comparison gcc/lambda-code.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Fri, 12 Feb 2010 23:39:51 +0900
parents a06113de4d67
children b7f97abdc517
comparison
equal deleted inserted replaced
52:c156f1bd5cd9 55:77e2b8dfacca
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc. 3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dberlin@dberlin.org> 4 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
8 GCC is free software; you can redistribute it and/or modify it under 8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free 9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later 10 Software Foundation; either version 3, or (at your option) any later
11 version. 11 version.
12 12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details. 16 for more details.
17 17
18 You should have received a copy of the GNU General Public License 18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see 19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */ 20 <http://www.gnu.org/licenses/>. */
21 21
22 #include "config.h" 22 #include "config.h"
45 #include "vecprim.h" 45 #include "vecprim.h"
46 #include "pointer-set.h" 46 #include "pointer-set.h"
47 47
48 /* This loop nest code generation is based on non-singular matrix 48 /* This loop nest code generation is based on non-singular matrix
49 math. 49 math.
50 50
51 A little terminology and a general sketch of the algorithm. See "A singular 51 A little terminology and a general sketch of the algorithm. See "A singular
52 loop transformation framework based on non-singular matrices" by Wei Li and 52 loop transformation framework based on non-singular matrices" by Wei Li and
53 Keshav Pingali for formal proofs that the various statements below are 53 Keshav Pingali for formal proofs that the various statements below are
54 correct. 54 correct.
55 55
56 A loop iteration space represents the points traversed by the loop. A point in the 56 A loop iteration space represents the points traversed by the loop. A point in the
57 iteration space can be represented by a vector of size <loop depth>. You can 57 iteration space can be represented by a vector of size <loop depth>. You can
58 therefore represent the iteration space as an integral combinations of a set 58 therefore represent the iteration space as an integral combinations of a set
59 of basis vectors. 59 of basis vectors.
60 60
61 A loop iteration space is dense if every integer point between the loop 61 A loop iteration space is dense if every integer point between the loop
62 bounds is a point in the iteration space. Every loop with a step of 1 62 bounds is a point in the iteration space. Every loop with a step of 1
63 therefore has a dense iteration space. 63 therefore has a dense iteration space.
64 64
65 for i = 1 to 3, step 1 is a dense iteration space. 65 for i = 1 to 3, step 1 is a dense iteration space.
66 66
67 A loop iteration space is sparse if it is not dense. That is, the iteration 67 A loop iteration space is sparse if it is not dense. That is, the iteration
68 space skips integer points that are within the loop bounds. 68 space skips integer points that are within the loop bounds.
69 69
70 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point 70 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71 2 is skipped. 71 2 is skipped.
72 72
73 Dense source spaces are easy to transform, because they don't skip any 73 Dense source spaces are easy to transform, because they don't skip any
74 points to begin with. Thus we can compute the exact bounds of the target 74 points to begin with. Thus we can compute the exact bounds of the target
75 space using min/max and floor/ceil. 75 space using min/max and floor/ceil.
76 76
77 For a dense source space, we take the transformation matrix, decompose it 77 For a dense source space, we take the transformation matrix, decompose it
78 into a lower triangular part (H) and a unimodular part (U). 78 into a lower triangular part (H) and a unimodular part (U).
79 We then compute the auxiliary space from the unimodular part (source loop 79 We then compute the auxiliary space from the unimodular part (source loop
80 nest . U = auxiliary space) , which has two important properties: 80 nest . U = auxiliary space) , which has two important properties:
81 1. It traverses the iterations in the same lexicographic order as the source 81 1. It traverses the iterations in the same lexicographic order as the source
82 space. 82 space.
83 2. It is a dense space when the source is a dense space (even if the target 83 2. It is a dense space when the source is a dense space (even if the target
84 space is going to be sparse). 84 space is going to be sparse).
85 85
86 Given the auxiliary space, we use the lower triangular part to compute the 86 Given the auxiliary space, we use the lower triangular part to compute the
87 bounds in the target space by simple matrix multiplication. 87 bounds in the target space by simple matrix multiplication.
88 The gaps in the target space (IE the new loop step sizes) will be the 88 The gaps in the target space (IE the new loop step sizes) will be the
89 diagonals of the H matrix. 89 diagonals of the H matrix.
90 90
102 the dense space to make it sparse). We then compose this transform with the 102 the dense space to make it sparse). We then compose this transform with the
103 transformation matrix specified by the user (since our matrix transformations 103 transformation matrix specified by the user (since our matrix transformations
104 are closed under composition, this is okay). We can then use the base space 104 are closed under composition, this is okay). We can then use the base space
105 (which is dense) plus the composed transformation matrix, to compute the rest 105 (which is dense) plus the composed transformation matrix, to compute the rest
106 of the transform using the dense space algorithm above. 106 of the transform using the dense space algorithm above.
107 107
108 In other words, our sparse source space (B) is decomposed into a dense base 108 In other words, our sparse source space (B) is decomposed into a dense base
109 space (A), and a matrix (L) that transforms A into B, such that A.L = B. 109 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
110 We then compute the composition of L and the user transformation matrix (T), 110 We then compute the composition of L and the user transformation matrix (T),
111 so that T is now a transform from A to the result, instead of from B to the 111 so that T is now a transform from A to the result, instead of from B to the
112 result. 112 result.
113 IE A.(LT) = result instead of B.T = result 113 IE A.(LT) = result instead of B.T = result
114 Since A is now a dense source space, we can use the dense source space 114 Since A is now a dense source space, we can use the dense source space
115 algorithm above to compute the result of applying transform (LT) to A. 115 algorithm above to compute the result of applying transform (LT) to A.
116 116
117 Fourier-Motzkin elimination is used to compute the bounds of the base space 117 Fourier-Motzkin elimination is used to compute the bounds of the base space
118 of the lattice. */ 118 of the lattice. */
119 119
120 static bool perfect_nestify (struct loop *, VEC(tree,heap) *, 120 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
121 VEC(tree,heap) *, VEC(int,heap) *, 121 VEC(tree,heap) *, VEC(int,heap) *,
122 VEC(tree,heap) *); 122 VEC(tree,heap) *);
123 /* Lattice stuff that is internal to the code generation algorithm. */ 123 /* Lattice stuff that is internal to the code generation algorithm. */
124 124
125 typedef struct lambda_lattice_s 125 typedef struct lambda_lattice_s
291 invariants, 'A'); 291 invariants, 'A');
292 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr)); 292 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
293 } 293 }
294 294
295 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of 295 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
296 coefficients is given by DEPTH, the number of invariants is 296 coefficients is given by DEPTH, the number of invariants is
297 given by INVARIANTS, and the character to start variable names with is given 297 given by INVARIANTS, and the character to start variable names with is given
298 by START. */ 298 by START. */
299 299
300 void 300 void
301 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth, 301 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
418 else 418 else
419 { 419 {
420 /* Otherwise, we need the lower bound expression (which must 420 /* Otherwise, we need the lower bound expression (which must
421 be an affine function) to determine the base. */ 421 be an affine function) to determine the base. */
422 expression = LL_LOWER_BOUND (loop); 422 expression = LL_LOWER_BOUND (loop);
423 gcc_assert (expression && !LLE_NEXT (expression) 423 gcc_assert (expression && !LLE_NEXT (expression)
424 && LLE_DENOMINATOR (expression) == 1); 424 && LLE_DENOMINATOR (expression) == 1);
425 425
426 /* The lower triangular portion of the base is going to be the 426 /* The lower triangular portion of the base is going to be the
427 coefficient times the step */ 427 coefficient times the step */
428 for (j = 0; j < i; j++) 428 for (j = 0; j < i; j++)
465 in b1 ... bk, and some a in a1...ai) 465 in b1 ... bk, and some a in a1...ai)
466 You can then eliminate this x from the non-constant inequalities by 466 You can then eliminate this x from the non-constant inequalities by
467 rewriting these as a <= b, x >= constant, and delete the x variable. 467 rewriting these as a <= b, x >= constant, and delete the x variable.
468 You can then repeat this for any remaining x variables, and then we have 468 You can then repeat this for any remaining x variables, and then we have
469 an easy to use variable <= constant (or no variables at all) form that we 469 an easy to use variable <= constant (or no variables at all) form that we
470 can construct our bounds from. 470 can construct our bounds from.
471 471
472 In our case, each time we eliminate, we construct part of the bound from 472 In our case, each time we eliminate, we construct part of the bound from
473 the ith variable, then delete the ith variable. 473 the ith variable, then delete the ith variable.
474 474
475 Remember the constant are in our vector a, our coefficient matrix is A, 475 Remember the constant are in our vector a, our coefficient matrix is A,
476 and our invariant coefficient matrix is B. 476 and our invariant coefficient matrix is B.
477 477
478 SIZE is the size of the matrices being passed. 478 SIZE is the size of the matrices being passed.
479 DEPTH is the loop nest depth. 479 DEPTH is the loop nest depth.
480 INVARIANTS is the number of loop invariants. 480 INVARIANTS is the number of loop invariants.
481 A, B, and a are the coefficient matrix, invariant coefficient, and a 481 A, B, and a are the coefficient matrix, invariant coefficient, and a
482 vector of constants, respectively. */ 482 vector of constants, respectively. */
483 483
484 static lambda_loopnest 484 static lambda_loopnest
485 compute_nest_using_fourier_motzkin (int size, 485 compute_nest_using_fourier_motzkin (int size,
486 int depth, 486 int depth,
487 int invariants, 487 int invariants,
488 lambda_matrix A, 488 lambda_matrix A,
489 lambda_matrix B, 489 lambda_matrix B,
490 lambda_vector a, 490 lambda_vector a,
491 struct obstack * lambda_obstack) 491 struct obstack * lambda_obstack)
515 for (j = 0; j < size; j++) 515 for (j = 0; j < size; j++)
516 { 516 {
517 if (A[j][i] < 0) 517 if (A[j][i] < 0)
518 { 518 {
519 /* Any linear expression in the matrix with a coefficient less 519 /* Any linear expression in the matrix with a coefficient less
520 than 0 becomes part of the new lower bound. */ 520 than 0 becomes part of the new lower bound. */
521 expression = lambda_linear_expression_new (depth, invariants, 521 expression = lambda_linear_expression_new (depth, invariants,
522 lambda_obstack); 522 lambda_obstack);
523 523
524 for (k = 0; k < i; k++) 524 for (k = 0; k < i; k++)
525 LLE_COEFFICIENTS (expression)[k] = A[j][k]; 525 LLE_COEFFICIENTS (expression)[k] = A[j][k];
540 540
541 } 541 }
542 else if (A[j][i] > 0) 542 else if (A[j][i] > 0)
543 { 543 {
544 /* Any linear expression with a coefficient greater than 0 544 /* Any linear expression with a coefficient greater than 0
545 becomes part of the new upper bound. */ 545 becomes part of the new upper bound. */
546 expression = lambda_linear_expression_new (depth, invariants, 546 expression = lambda_linear_expression_new (depth, invariants,
547 lambda_obstack); 547 lambda_obstack);
548 for (k = 0; k < i; k++) 548 for (k = 0; k < i; k++)
549 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k]; 549 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
550 550
618 618
619 return auxillary_nest; 619 return auxillary_nest;
620 } 620 }
621 621
622 /* Compute the loop bounds for the auxiliary space NEST. 622 /* Compute the loop bounds for the auxiliary space NEST.
623 Input system used is Ax <= b. TRANS is the unimodular transformation. 623 Input system used is Ax <= b. TRANS is the unimodular transformation.
624 Given the original nest, this function will 624 Given the original nest, this function will
625 1. Convert the nest into matrix form, which consists of a matrix for the 625 1. Convert the nest into matrix form, which consists of a matrix for the
626 coefficients, a matrix for the 626 coefficients, a matrix for the
627 invariant coefficients, and a vector for the constants. 627 invariant coefficients, and a vector for the constants.
628 2. Use the matrix form to calculate the lattice base for the nest (which is 628 2. Use the matrix form to calculate the lattice base for the nest (which is
629 a dense space) 629 a dense space)
630 3. Compose the dense space transform with the user specified transform, to 630 3. Compose the dense space transform with the user specified transform, to
631 get a transform we can easily calculate transformed bounds for. 631 get a transform we can easily calculate transformed bounds for.
632 4. Multiply the composed transformation matrix times the matrix form of the 632 4. Multiply the composed transformation matrix times the matrix form of the
633 loop. 633 loop.
634 5. Transform the newly created matrix (from step 4) back into a loop nest 634 5. Transform the newly created matrix (from step 4) back into a loop nest
635 using Fourier-Motzkin elimination to figure out the bounds. */ 635 using Fourier-Motzkin elimination to figure out the bounds. */
698 B[size][j] *= -1; 698 B[size][j] *= -1;
699 699
700 size++; 700 size++;
701 /* Need to increase matrix sizes above. */ 701 /* Need to increase matrix sizes above. */
702 gcc_assert (size <= 127); 702 gcc_assert (size <= 127);
703 703
704 } 704 }
705 705
706 /* Then do the exact same thing for the upper bounds. */ 706 /* Then do the exact same thing for the upper bounds. */
707 if (LL_STEP (loop) > 0) 707 if (LL_STEP (loop) > 0)
708 expression = LL_UPPER_BOUND (loop); 708 expression = LL_UPPER_BOUND (loop);
766 return compute_nest_using_fourier_motzkin (size, depth, invariants, 766 return compute_nest_using_fourier_motzkin (size, depth, invariants,
767 A, B1, a1, lambda_obstack); 767 A, B1, a1, lambda_obstack);
768 } 768 }
769 769
770 /* Compute the loop bounds for the target space, using the bounds of 770 /* Compute the loop bounds for the target space, using the bounds of
771 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. 771 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
772 The target space loop bounds are computed by multiplying the triangular 772 The target space loop bounds are computed by multiplying the triangular
773 matrix H by the auxiliary nest, to get the new loop bounds. The sign of 773 matrix H by the auxiliary nest, to get the new loop bounds. The sign of
774 the loop steps (positive or negative) is then used to swap the bounds if 774 the loop steps (positive or negative) is then used to swap the bounds if
775 the loop counts downwards. 775 the loop counts downwards.
776 Return the target loopnest. */ 776 Return the target loopnest. */
1028 /* Transform NEST according to TRANS, and return the new loopnest. 1028 /* Transform NEST according to TRANS, and return the new loopnest.
1029 This involves 1029 This involves
1030 1. Computing a lattice base for the transformation 1030 1. Computing a lattice base for the transformation
1031 2. Composing the dense base with the specified transformation (TRANS) 1031 2. Composing the dense base with the specified transformation (TRANS)
1032 3. Decomposing the combined transformation into a lower triangular portion, 1032 3. Decomposing the combined transformation into a lower triangular portion,
1033 and a unimodular portion. 1033 and a unimodular portion.
1034 4. Computing the auxiliary nest using the unimodular portion. 1034 4. Computing the auxiliary nest using the unimodular portion.
1035 5. Computing the target nest using the auxiliary nest and the lower 1035 5. Computing the target nest using the auxiliary nest and the lower
1036 triangular portion. */ 1036 triangular portion. */
1037 1037
1038 lambda_loopnest 1038 lambda_loopnest
1039 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans, 1039 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1040 struct obstack * lambda_obstack) 1040 struct obstack * lambda_obstack)
1041 { 1041 {
1185 return lle; 1185 return lle;
1186 } 1186 }
1187 1187
1188 /* Return the depth of the loopnest NEST */ 1188 /* Return the depth of the loopnest NEST */
1189 1189
1190 static int 1190 static int
1191 depth_of_nest (struct loop *nest) 1191 depth_of_nest (struct loop *nest)
1192 { 1192 {
1193 size_t depth = 0; 1193 size_t depth = 0;
1194 while (nest) 1194 while (nest)
1195 { 1195 {
1360 lboundvar = PHI_ARG_DEF (phi, 0); 1360 lboundvar = PHI_ARG_DEF (phi, 0);
1361 lbound = gcc_tree_to_linear_expression (depth, lboundvar, 1361 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1362 outerinductionvars, *invariants, 1362 outerinductionvars, *invariants,
1363 0, lambda_obstack); 1363 0, lambda_obstack);
1364 } 1364 }
1365 1365
1366 if (!lbound) 1366 if (!lbound)
1367 { 1367 {
1368 1368
1369 if (dump_file && (dump_flags & TDF_DETAILS)) 1369 if (dump_file && (dump_flags & TDF_DETAILS))
1370 fprintf (dump_file, 1370 fprintf (dump_file,
1381 && invariant_in_loop_and_outer_loops (loop, test_rhs)) 1381 && invariant_in_loop_and_outer_loops (loop, test_rhs))
1382 VEC_quick_push (tree, *invariants, test_rhs); 1382 VEC_quick_push (tree, *invariants, test_rhs);
1383 else if (TREE_CODE (test_lhs) == SSA_NAME 1383 else if (TREE_CODE (test_lhs) == SSA_NAME
1384 && invariant_in_loop_and_outer_loops (loop, test_lhs)) 1384 && invariant_in_loop_and_outer_loops (loop, test_lhs))
1385 VEC_quick_push (tree, *invariants, test_lhs); 1385 VEC_quick_push (tree, *invariants, test_lhs);
1386 1386
1387 /* The non-induction variable part of the test is the upper bound variable. 1387 /* The non-induction variable part of the test is the upper bound variable.
1388 */ 1388 */
1389 if (test_lhs == inductionvar) 1389 if (test_lhs == inductionvar)
1390 uboundvar = test_rhs; 1390 uboundvar = test_rhs;
1391 else 1391 else
1392 uboundvar = test_lhs; 1392 uboundvar = test_lhs;
1393 1393
1394 /* We only size the vectors assuming we have, at max, 2 times as many 1394 /* We only size the vectors assuming we have, at max, 2 times as many
1395 invariants as we do loops (one for each bound). 1395 invariants as we do loops (one for each bound).
1396 This is just an arbitrary number, but it has to be matched against the 1396 This is just an arbitrary number, but it has to be matched against the
1397 code below. */ 1397 code below. */
1398 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth)); 1398 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1399 1399
1400 1400
1401 /* We might have some leftover. */ 1401 /* We might have some leftover. */
1402 if (gimple_cond_code (exit_cond) == LT_EXPR) 1402 if (gimple_cond_code (exit_cond) == LT_EXPR)
1403 extra = -1 * stepint; 1403 extra = -1 * stepint;
1404 else if (gimple_cond_code (exit_cond) == NE_EXPR) 1404 else if (gimple_cond_code (exit_cond) == NE_EXPR)
1405 extra = -1 * stepint; 1405 extra = -1 * stepint;
1406 else if (gimple_cond_code (exit_cond) == GT_EXPR) 1406 else if (gimple_cond_code (exit_cond) == GT_EXPR)
1407 extra = -1 * stepint; 1407 extra = -1 * stepint;
1408 else if (gimple_cond_code (exit_cond) == EQ_EXPR) 1408 else if (gimple_cond_code (exit_cond) == EQ_EXPR)
1409 extra = 1 * stepint; 1409 extra = 1 * stepint;
1410 1410
1411 ubound = gcc_tree_to_linear_expression (depth, uboundvar, 1411 ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1412 outerinductionvars, 1412 outerinductionvars,
1413 *invariants, extra, lambda_obstack); 1413 *invariants, extra, lambda_obstack);
1414 uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar, 1414 uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1415 build_int_cst (TREE_TYPE (uboundvar), extra)); 1415 build_int_cst (TREE_TYPE (uboundvar), extra));
1447 test_lhs = gimple_cond_lhs (expr); 1447 test_lhs = gimple_cond_lhs (expr);
1448 test_rhs = gimple_cond_rhs (expr); 1448 test_rhs = gimple_cond_rhs (expr);
1449 1449
1450 /* Find the side that is invariant in this loop. The ivar must be the other 1450 /* Find the side that is invariant in this loop. The ivar must be the other
1451 side. */ 1451 side. */
1452 1452
1453 if (expr_invariant_in_loop_p (loop, test_lhs)) 1453 if (expr_invariant_in_loop_p (loop, test_lhs))
1454 ivarop = test_rhs; 1454 ivarop = test_rhs;
1455 else if (expr_invariant_in_loop_p (loop, test_rhs)) 1455 else if (expr_invariant_in_loop_p (loop, test_rhs))
1456 ivarop = test_lhs; 1456 ivarop = test_lhs;
1457 else 1457 else
1464 1464
1465 DEF_VEC_P(lambda_loop); 1465 DEF_VEC_P(lambda_loop);
1466 DEF_VEC_ALLOC_P(lambda_loop,heap); 1466 DEF_VEC_ALLOC_P(lambda_loop,heap);
1467 1467
1468 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST. 1468 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1469 Return the new loop nest. 1469 Return the new loop nest.
1470 INDUCTIONVARS is a pointer to an array of induction variables for the 1470 INDUCTIONVARS is a pointer to an array of induction variables for the
1471 loopnest that will be filled in during this process. 1471 loopnest that will be filled in during this process.
1472 INVARIANTS is a pointer to an array of invariants that will be filled in 1472 INVARIANTS is a pointer to an array of invariants that will be filled in
1473 during this process. */ 1473 during this process. */
1474 1474
1512 if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps, 1512 if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1513 *inductionvars)) 1513 *inductionvars))
1514 { 1514 {
1515 if (dump_file) 1515 if (dump_file)
1516 fprintf (dump_file, 1516 fprintf (dump_file,
1517 "Not a perfect loop nest and couldn't convert to one.\n"); 1517 "Not a perfect loop nest and couldn't convert to one.\n");
1518 goto fail; 1518 goto fail;
1519 } 1519 }
1520 else if (dump_file) 1520 else if (dump_file)
1521 fprintf (dump_file, 1521 fprintf (dump_file,
1522 "Successfully converted loop nest to perfect loop nest.\n"); 1522 "Successfully converted loop nest to perfect loop nest.\n");
1530 fail: 1530 fail:
1531 VEC_free (lambda_loop, heap, loops); 1531 VEC_free (lambda_loop, heap, loops);
1532 VEC_free (tree, heap, uboundvars); 1532 VEC_free (tree, heap, uboundvars);
1533 VEC_free (tree, heap, lboundvars); 1533 VEC_free (tree, heap, lboundvars);
1534 VEC_free (int, heap, steps); 1534 VEC_free (int, heap, steps);
1535 1535
1536 return ret; 1536 return ret;
1537 } 1537 }
1538 1538
1539 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree. 1539 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1540 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be 1540 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1541 inserted for us are stored. INDUCTION_VARS is the array of induction 1541 inserted for us are stored. INDUCTION_VARS is the array of induction
1542 variables for the loop this LBV is from. TYPE is the tree type to use for 1542 variables for the loop this LBV is from. TYPE is the tree type to use for
1543 the variables and trees involved. */ 1543 the variables and trees involved. */
1544 1544
1545 static tree 1545 static tree
1546 lbv_to_gcc_expression (lambda_body_vector lbv, 1546 lbv_to_gcc_expression (lambda_body_vector lbv,
1547 tree type, VEC(tree,heap) *induction_vars, 1547 tree type, VEC(tree,heap) *induction_vars,
1548 gimple_seq *stmts_to_insert) 1548 gimple_seq *stmts_to_insert)
1549 { 1549 {
1550 int k; 1550 int k;
1551 tree resvar; 1551 tree resvar;
1552 tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars); 1552 tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1564 /* Convert a linear expression from coefficient and constant form to a 1564 /* Convert a linear expression from coefficient and constant form to a
1565 gcc tree. 1565 gcc tree.
1566 Return the tree that represents the final value of the expression. 1566 Return the tree that represents the final value of the expression.
1567 LLE is the linear expression to convert. 1567 LLE is the linear expression to convert.
1568 OFFSET is the linear offset to apply to the expression. 1568 OFFSET is the linear offset to apply to the expression.
1569 TYPE is the tree type to use for the variables and math. 1569 TYPE is the tree type to use for the variables and math.
1570 INDUCTION_VARS is a vector of induction variables for the loops. 1570 INDUCTION_VARS is a vector of induction variables for the loops.
1571 INVARIANTS is a vector of the loop nest invariants. 1571 INVARIANTS is a vector of the loop nest invariants.
1572 WRAP specifies what tree code to wrap the results in, if there is more than 1572 WRAP specifies what tree code to wrap the results in, if there is more than
1573 one (it is either MAX_EXPR, or MIN_EXPR). 1573 one (it is either MAX_EXPR, or MIN_EXPR).
1574 STMTS_TO_INSERT Is a pointer to the statement list we fill in with 1574 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1592 /* Build up the linear expressions. */ 1592 /* Build up the linear expressions. */
1593 for (; lle != NULL; lle = LLE_NEXT (lle)) 1593 for (; lle != NULL; lle = LLE_NEXT (lle))
1594 { 1594 {
1595 expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars); 1595 expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1596 expr = fold_build2 (PLUS_EXPR, type, expr, 1596 expr = fold_build2 (PLUS_EXPR, type, expr,
1597 build_linear_expr (type, 1597 build_linear_expr (type,
1598 LLE_INVARIANT_COEFFICIENTS (lle), 1598 LLE_INVARIANT_COEFFICIENTS (lle),
1599 invariants)); 1599 invariants));
1600 1600
1601 k = LLE_CONSTANT (lle); 1601 k = LLE_CONSTANT (lle);
1602 if (k) 1602 if (k)
1667 remove_phi_node (&si, true); 1667 remove_phi_node (&si, true);
1668 } 1668 }
1669 else 1669 else
1670 { 1670 {
1671 gsi_remove (&si, true); 1671 gsi_remove (&si, true);
1672 release_defs (iv_stmt); 1672 release_defs (iv_stmt);
1673 } 1673 }
1674 } 1674 }
1675 1675
1676 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to 1676 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1677 it, back into gcc code. This changes the 1677 it, back into gcc code. This changes the
1678 loops, their induction variables, and their bodies, so that they 1678 loops, their induction variables, and their bodies, so that they
1679 match the transformed loopnest. 1679 match the transformed loopnest.
1680 OLD_LOOPNEST is the loopnest before we've replaced it with the new 1680 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1681 loopnest. 1681 loopnest.
1682 OLD_IVS is a vector of induction variables from the old loopnest. 1682 OLD_IVS is a vector of induction variables from the old loopnest.
1683 INVARIANTS is a vector of loop invariants from the old loopnest. 1683 INVARIANTS is a vector of loop invariants from the old loopnest.
1684 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with. 1684 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1685 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get 1685 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1686 NEW_LOOPNEST. */ 1686 NEW_LOOPNEST. */
1687 1687
1688 void 1688 void
1689 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, 1689 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1690 VEC(tree,heap) *old_ivs, 1690 VEC(tree,heap) *old_ivs,
1740 newloop = LN_LOOPS (new_loopnest)[i]; 1740 newloop = LN_LOOPS (new_loopnest)[i];
1741 1741
1742 /* Linear offset is a bit tricky to handle. Punt on the unhandled 1742 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1743 cases for now. */ 1743 cases for now. */
1744 offset = LL_LINEAR_OFFSET (newloop); 1744 offset = LL_LINEAR_OFFSET (newloop);
1745 1745
1746 gcc_assert (LLE_DENOMINATOR (offset) == 1 && 1746 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1747 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth)); 1747 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1748 1748
1749 /* Now build the new lower bounds, and insert the statements 1749 /* Now build the new lower bounds, and insert the statements
1750 necessary to generate it on the loop preheader. */ 1750 necessary to generate it on the loop preheader. */
1751 stmts = NULL; 1751 stmts = NULL;
1752 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop), 1752 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1753 LL_LINEAR_OFFSET (newloop), 1753 LL_LINEAR_OFFSET (newloop),
1796 bsi = gsi_for_stmt (exitcond); 1796 bsi = gsi_for_stmt (exitcond);
1797 gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT); 1797 gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT);
1798 1798
1799 /* Replace the exit condition with the new upper bound 1799 /* Replace the exit condition with the new upper bound
1800 comparison. */ 1800 comparison. */
1801 1801
1802 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR; 1802 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1803 1803
1804 /* We want to build a conditional where true means exit the loop, and 1804 /* We want to build a conditional where true means exit the loop, and
1805 false means continue the loop. 1805 false means continue the loop.
1806 So swap the testtype if this isn't the way things are.*/ 1806 So swap the testtype if this isn't the way things are.*/
1807 1807
1808 if (exit->flags & EDGE_FALSE_VALUE) 1808 if (exit->flags & EDGE_FALSE_VALUE)
1842 /* Compute the new expression for the induction 1842 /* Compute the new expression for the induction
1843 variable. */ 1843 variable. */
1844 depth = VEC_length (tree, new_ivs); 1844 depth = VEC_length (tree, new_ivs);
1845 lbv = lambda_body_vector_new (depth, lambda_obstack); 1845 lbv = lambda_body_vector_new (depth, lambda_obstack);
1846 LBV_COEFFICIENTS (lbv)[i] = 1; 1846 LBV_COEFFICIENTS (lbv)[i] = 1;
1847 1847
1848 newlbv = lambda_body_vector_compute_new (transform, lbv, 1848 newlbv = lambda_body_vector_compute_new (transform, lbv,
1849 lambda_obstack); 1849 lambda_obstack);
1850 1850
1851 stmts = NULL; 1851 stmts = NULL;
1852 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv), 1852 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1907 1907
1908 static bool 1908 static bool
1909 stmt_uses_phi_result (gimple stmt, tree phi_result) 1909 stmt_uses_phi_result (gimple stmt, tree phi_result)
1910 { 1910 {
1911 tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); 1911 tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1912 1912
1913 /* This is conservatively true, because we only want SIMPLE bumpers 1913 /* This is conservatively true, because we only want SIMPLE bumpers
1914 of the form x +- constant for our pass. */ 1914 of the form x +- constant for our pass. */
1915 return (use == phi_result); 1915 return (use == phi_result);
1916 } 1916 }
1917 1917
1918 /* STMT is a bumper stmt for LOOP if the version it defines is used in the 1918 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1919 in-loop-edge in a phi node, and the operand it uses is the result of that 1919 in-loop-edge in a phi node, and the operand it uses is the result of that
1920 phi node. 1920 phi node.
1921 I.E. i_29 = i_3 + 1 1921 I.E. i_29 = i_3 + 1
1922 i_3 = PHI (0, i_29); */ 1922 i_3 = PHI (0, i_29); */
1923 1923
1924 static bool 1924 static bool
1925 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt) 1925 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt)
1926 { 1926 {
1927 gimple use; 1927 gimple use;
1928 tree def; 1928 tree def;
1929 imm_use_iterator iter; 1929 imm_use_iterator iter;
1930 use_operand_p use_p; 1930 use_operand_p use_p;
1931 1931
1932 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); 1932 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1933 if (!def) 1933 if (!def)
1934 return false; 1934 return false;
1935 1935
1936 FOR_EACH_IMM_USE_FAST (use_p, iter, def) 1936 FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1939 if (gimple_code (use) == GIMPLE_PHI) 1939 if (gimple_code (use) == GIMPLE_PHI)
1940 { 1940 {
1941 if (phi_loop_edge_uses_def (loop, use, def)) 1941 if (phi_loop_edge_uses_def (loop, use, def))
1942 if (stmt_uses_phi_result (stmt, PHI_RESULT (use))) 1942 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1943 return true; 1943 return true;
1944 } 1944 }
1945 } 1945 }
1946 return false; 1946 return false;
1947 } 1947 }
1948 1948
1949 1949
1950 /* Return true if LOOP is a perfect loop nest. 1950 /* Return true if LOOP is a perfect loop nest.
1951 Perfect loop nests are those loop nests where all code occurs in the 1951 Perfect loop nests are those loop nests where all code occurs in the
1952 innermost loop body. 1952 innermost loop body.
1953 If S is a program statement, then 1953 If S is a program statement, then
1954 1954
1955 i.e. 1955 i.e.
1956 DO I = 1, 20 1956 DO I = 1, 20
1957 S1 1957 S1
1958 DO J = 1, 20 1958 DO J = 1, 20
1959 ... 1959 ...
1960 END DO 1960 END DO
1961 END DO 1961 END DO
1962 is not a perfect loop nest because of S1. 1962 is not a perfect loop nest because of S1.
1963 1963
1964 DO I = 1, 20 1964 DO I = 1, 20
1965 DO J = 1, 20 1965 DO J = 1, 20
1966 S1 1966 S1
1967 ... 1967 ...
1968 END DO 1968 END DO
1969 END DO 1969 END DO
1970 is a perfect loop nest. 1970 is a perfect loop nest.
1971 1971
1972 Since we don't have high level loops anymore, we basically have to walk our 1972 Since we don't have high level loops anymore, we basically have to walk our
1973 statements and ignore those that are there because the loop needs them (IE 1973 statements and ignore those that are there because the loop needs them (IE
1974 the induction variable increment, and jump back to the top of the loop). */ 1974 the induction variable increment, and jump back to the top of the loop). */
1975 1975
2023 avoid creating duplicate temporaries and FIRSTBSI is statement 2023 avoid creating duplicate temporaries and FIRSTBSI is statement
2024 iterator where new temporaries should be inserted at the beginning 2024 iterator where new temporaries should be inserted at the beginning
2025 of body basic block. */ 2025 of body basic block. */
2026 2026
2027 static void 2027 static void
2028 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x, 2028 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x,
2029 int xstep, tree y, tree yinit, 2029 int xstep, tree y, tree yinit,
2030 htab_t replacements, 2030 htab_t replacements,
2031 gimple_stmt_iterator *firstbsi) 2031 gimple_stmt_iterator *firstbsi)
2032 { 2032 {
2033 ssa_op_iter iter; 2033 ssa_op_iter iter;
2126 { 2126 {
2127 if (gimple_code (stmt) != GIMPLE_PHI 2127 if (gimple_code (stmt) != GIMPLE_PHI
2128 || gimple_phi_num_args (stmt) != 1 2128 || gimple_phi_num_args (stmt) != 1
2129 || gimple_bb (stmt) != single_exit (loop)->dest) 2129 || gimple_bb (stmt) != single_exit (loop)->dest)
2130 return false; 2130 return false;
2131 2131
2132 return true; 2132 return true;
2133 } 2133 }
2134 2134
2135 /* Return true if STMT can be put back into the loop INNER, by 2135 /* Return true if STMT can be put back into the loop INNER, by
2136 copying it to the beginning of that loop and changing the uses. */ 2136 copying it to the beginning of that loop and changing the uses. */
2138 static bool 2138 static bool
2139 can_put_in_inner_loop (struct loop *inner, gimple stmt) 2139 can_put_in_inner_loop (struct loop *inner, gimple stmt)
2140 { 2140 {
2141 imm_use_iterator imm_iter; 2141 imm_use_iterator imm_iter;
2142 use_operand_p use_p; 2142 use_operand_p use_p;
2143 2143
2144 gcc_assert (is_gimple_assign (stmt)); 2144 gcc_assert (is_gimple_assign (stmt));
2145 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS) 2145 if (gimple_vuse (stmt)
2146 || !stmt_invariant_in_loop_p (inner, stmt)) 2146 || !stmt_invariant_in_loop_p (inner, stmt))
2147 return false; 2147 return false;
2148 2148
2149 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt)) 2149 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2150 { 2150 {
2151 if (!exit_phi_for_loop_p (inner, USE_STMT (use_p))) 2151 if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2152 { 2152 {
2153 basic_block immbb = gimple_bb (USE_STMT (use_p)); 2153 basic_block immbb = gimple_bb (USE_STMT (use_p));
2154 2154
2155 if (!flow_bb_inside_loop_p (inner, immbb)) 2155 if (!flow_bb_inside_loop_p (inner, immbb))
2156 return false; 2156 return false;
2157 } 2157 }
2158 } 2158 }
2159 return true; 2159 return true;
2160 } 2160 }
2161 2161
2162 /* Return true if STMT can be put *after* the inner loop of LOOP. */ 2162 /* Return true if STMT can be put *after* the inner loop of LOOP. */
2163 2163
2164 static bool 2164 static bool
2165 can_put_after_inner_loop (struct loop *loop, gimple stmt) 2165 can_put_after_inner_loop (struct loop *loop, gimple stmt)
2166 { 2166 {
2167 imm_use_iterator imm_iter; 2167 imm_use_iterator imm_iter;
2168 use_operand_p use_p; 2168 use_operand_p use_p;
2169 2169
2170 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)) 2170 if (gimple_vuse (stmt))
2171 return false; 2171 return false;
2172 2172
2173 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt)) 2173 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2174 { 2174 {
2175 if (!exit_phi_for_loop_p (loop, USE_STMT (use_p))) 2175 if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2176 { 2176 {
2177 basic_block immbb = gimple_bb (USE_STMT (use_p)); 2177 basic_block immbb = gimple_bb (USE_STMT (use_p));
2178 2178
2179 if (!dominated_by_p (CDI_DOMINATORS, 2179 if (!dominated_by_p (CDI_DOMINATORS,
2180 immbb, 2180 immbb,
2181 loop->inner->header) 2181 loop->inner->header)
2182 && !can_put_in_inner_loop (loop->inner, stmt)) 2182 && !can_put_in_inner_loop (loop->inner, stmt))
2183 return false; 2183 return false;
2269 { 2269 {
2270 gimple_stmt_iterator bsi; 2270 gimple_stmt_iterator bsi;
2271 gimple exit_condition = get_loop_exit_condition (loop); 2271 gimple exit_condition = get_loop_exit_condition (loop);
2272 2272
2273 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2273 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2274 { 2274 {
2275 gimple stmt = gsi_stmt (bsi); 2275 gimple stmt = gsi_stmt (bsi);
2276 2276
2277 if (stmt == exit_condition 2277 if (stmt == exit_condition
2278 || not_interesting_stmt (stmt) 2278 || not_interesting_stmt (stmt)
2279 || stmt_is_bumper_for_loop (loop, stmt)) 2279 || stmt_is_bumper_for_loop (loop, stmt))
2295 /* If the bb of a statement we care about isn't dominated by the 2295 /* If the bb of a statement we care about isn't dominated by the
2296 header of the inner loop, then we can't handle this case 2296 header of the inner loop, then we can't handle this case
2297 right now. This test ensures that the statement comes 2297 right now. This test ensures that the statement comes
2298 completely *after* the inner loop. */ 2298 completely *after* the inner loop. */
2299 if (!dominated_by_p (CDI_DOMINATORS, 2299 if (!dominated_by_p (CDI_DOMINATORS,
2300 gimple_bb (stmt), 2300 gimple_bb (stmt),
2301 loop->inner->header)) 2301 loop->inner->header))
2302 return true; 2302 return true;
2303 } 2303 }
2304 2304
2305 return false; 2305 return false;
2318 gimple_stmt_iterator si; 2318 gimple_stmt_iterator si;
2319 2319
2320 /* Can't handle triply nested+ loops yet. */ 2320 /* Can't handle triply nested+ loops yet. */
2321 if (!loop->inner || loop->inner->inner) 2321 if (!loop->inner || loop->inner->inner)
2322 return false; 2322 return false;
2323 2323
2324 bbs = get_loop_body (loop); 2324 bbs = get_loop_body (loop);
2325 for (i = 0; i < loop->num_nodes; i++) 2325 for (i = 0; i < loop->num_nodes; i++)
2326 if (bbs[i]->loop_father == loop 2326 if (bbs[i]->loop_father == loop
2327 && cannot_convert_bb_to_perfect_nest (bbs[i], loop)) 2327 && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2328 goto fail; 2328 goto fail;
2332 for (si = gsi_start_phis (single_exit (loop)->dest); 2332 for (si = gsi_start_phis (single_exit (loop)->dest);
2333 !gsi_end_p (si); 2333 !gsi_end_p (si);
2334 gsi_next (&si)) 2334 gsi_next (&si))
2335 if (gimple_phi_num_args (gsi_stmt (si)) != 1) 2335 if (gimple_phi_num_args (gsi_stmt (si)) != 1)
2336 goto fail; 2336 goto fail;
2337 2337
2338 free (bbs); 2338 free (bbs);
2339 return true; 2339 return true;
2340 2340
2341 fail: 2341 fail:
2342 free (bbs); 2342 free (bbs);
2343 return false; 2343 return false;
2344 } 2344 }
2345
2346
2347 DEF_VEC_I(source_location);
2348 DEF_VEC_ALLOC_I(source_location,heap);
2345 2349
2346 /* Transform the loop nest into a perfect nest, if possible. 2350 /* Transform the loop nest into a perfect nest, if possible.
2347 LOOP is the loop nest to transform into a perfect nest 2351 LOOP is the loop nest to transform into a perfect nest
2348 LBOUNDS are the lower bounds for the loops to transform 2352 LBOUNDS are the lower bounds for the loops to transform
2349 UBOUNDS are the upper bounds for the loops to transform 2353 UBOUNDS are the upper bounds for the loops to transform
2350 STEPS is the STEPS for the loops to transform. 2354 STEPS is the STEPS for the loops to transform.
2351 LOOPIVS is the induction variables for the loops to transform. 2355 LOOPIVS is the induction variables for the loops to transform.
2352 2356
2353 Basically, for the case of 2357 Basically, for the case of
2354 2358
2355 FOR (i = 0; i < 50; i++) 2359 FOR (i = 0; i < 50; i++)
2356 { 2360 {
2357 FOR (j =0; j < 50; j++) 2361 FOR (j =0; j < 50; j++)
2369 FOR (j = 0; j < 50; j++) 2373 FOR (j = 0; j < 50; j++)
2370 { 2374 {
2371 <whatever> 2375 <whatever>
2372 } 2376 }
2373 } 2377 }
2374 2378
2375 FOR (i = 0; i < 50; i ++) 2379 FOR (i = 0; i < 50; i ++)
2376 { 2380 {
2377 <some code> 2381 <some code>
2378 } 2382 }
2379 2383
2398 gimple phi; 2402 gimple phi;
2399 tree uboundvar; 2403 tree uboundvar;
2400 gimple stmt; 2404 gimple stmt;
2401 tree oldivvar, ivvar, ivvarinced; 2405 tree oldivvar, ivvar, ivvarinced;
2402 VEC(tree,heap) *phis = NULL; 2406 VEC(tree,heap) *phis = NULL;
2407 VEC(source_location,heap) *locations = NULL;
2403 htab_t replacements = NULL; 2408 htab_t replacements = NULL;
2404 2409
2405 /* Create the new loop. */ 2410 /* Create the new loop. */
2406 olddest = single_exit (loop)->dest; 2411 olddest = single_exit (loop)->dest;
2407 preheaderbb = split_edge (single_exit (loop)); 2412 preheaderbb = split_edge (single_exit (loop));
2408 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 2413 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2409 2414
2410 /* Push the exit phi nodes that we are moving. */ 2415 /* Push the exit phi nodes that we are moving. */
2411 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi)) 2416 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi))
2412 { 2417 {
2413 phi = gsi_stmt (bsi); 2418 phi = gsi_stmt (bsi);
2414 VEC_reserve (tree, heap, phis, 2); 2419 VEC_reserve (tree, heap, phis, 2);
2420 VEC_reserve (source_location, heap, locations, 1);
2415 VEC_quick_push (tree, phis, PHI_RESULT (phi)); 2421 VEC_quick_push (tree, phis, PHI_RESULT (phi));
2416 VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0)); 2422 VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2423 VEC_quick_push (source_location, locations,
2424 gimple_phi_arg_location (phi, 0));
2417 } 2425 }
2418 e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb); 2426 e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2419 2427
2420 /* Remove the exit phis from the old basic block. */ 2428 /* Remove the exit phis from the old basic block. */
2421 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); ) 2429 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); )
2424 /* and add them back to the new basic block. */ 2432 /* and add them back to the new basic block. */
2425 while (VEC_length (tree, phis) != 0) 2433 while (VEC_length (tree, phis) != 0)
2426 { 2434 {
2427 tree def; 2435 tree def;
2428 tree phiname; 2436 tree phiname;
2437 source_location locus;
2429 def = VEC_pop (tree, phis); 2438 def = VEC_pop (tree, phis);
2430 phiname = VEC_pop (tree, phis); 2439 phiname = VEC_pop (tree, phis);
2440 locus = VEC_pop (source_location, locations);
2431 phi = create_phi_node (phiname, preheaderbb); 2441 phi = create_phi_node (phiname, preheaderbb);
2432 add_phi_arg (phi, def, single_pred_edge (preheaderbb)); 2442 add_phi_arg (phi, def, single_pred_edge (preheaderbb), locus);
2433 } 2443 }
2434 flush_pending_stmts (e); 2444 flush_pending_stmts (e);
2435 VEC_free (tree, heap, phis); 2445 VEC_free (tree, heap, phis);
2436 2446
2437 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 2447 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2438 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 2448 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2439 make_edge (headerbb, bodybb, EDGE_FALLTHRU); 2449 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2440 cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node, 2450 cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node,
2441 NULL_TREE, NULL_TREE); 2451 NULL_TREE, NULL_TREE);
2442 bsi = gsi_start_bb (bodybb); 2452 bsi = gsi_start_bb (bodybb);
2443 gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT); 2453 gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT);
2444 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE); 2454 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2445 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE); 2455 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2446 make_edge (latchbb, headerbb, EDGE_FALLTHRU); 2456 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2447 2457
2448 /* Update the loop structures. */ 2458 /* Update the loop structures. */
2449 newloop = duplicate_loop (loop, olddest->loop_father); 2459 newloop = duplicate_loop (loop, olddest->loop_father);
2450 newloop->header = headerbb; 2460 newloop->header = headerbb;
2451 newloop->latch = latchbb; 2461 newloop->latch = latchbb;
2452 add_bb_to_loop (latchbb, newloop); 2462 add_bb_to_loop (latchbb, newloop);
2453 add_bb_to_loop (bodybb, newloop); 2463 add_bb_to_loop (bodybb, newloop);
2454 add_bb_to_loop (headerbb, newloop); 2464 add_bb_to_loop (headerbb, newloop);
2455 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb); 2465 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2456 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb); 2466 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2457 set_immediate_dominator (CDI_DOMINATORS, preheaderbb, 2467 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2458 single_exit (loop)->src); 2468 single_exit (loop)->src);
2459 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb); 2469 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2460 set_immediate_dominator (CDI_DOMINATORS, olddest, 2470 set_immediate_dominator (CDI_DOMINATORS, olddest,
2461 recompute_dominator (CDI_DOMINATORS, olddest)); 2471 recompute_dominator (CDI_DOMINATORS, olddest));
2462 /* Create the new iv. */ 2472 /* Create the new iv. */
2464 ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv"); 2474 ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2465 add_referenced_var (ivvar); 2475 add_referenced_var (ivvar);
2466 standard_iv_increment_position (newloop, &bsi, &insert_after); 2476 standard_iv_increment_position (newloop, &bsi, &insert_after);
2467 create_iv (VEC_index (tree, lbounds, 0), 2477 create_iv (VEC_index (tree, lbounds, 0),
2468 build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)), 2478 build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2469 ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced); 2479 ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2470 2480
2471 /* Create the new upper bound. This may be not just a variable, so we copy 2481 /* Create the new upper bound. This may be not just a variable, so we copy
2472 it to one just in case. */ 2482 it to one just in case. */
2473 2483
2474 exit_condition = get_loop_exit_condition (newloop); 2484 exit_condition = get_loop_exit_condition (newloop);
2486 update_stmt (stmt); 2496 update_stmt (stmt);
2487 gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced); 2497 gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced);
2488 update_stmt (exit_condition); 2498 update_stmt (exit_condition);
2489 replacements = htab_create_ggc (20, tree_map_hash, 2499 replacements = htab_create_ggc (20, tree_map_hash,
2490 tree_map_eq, NULL); 2500 tree_map_eq, NULL);
2491 bbs = get_loop_body_in_dom_order (loop); 2501 bbs = get_loop_body_in_dom_order (loop);
2492 /* Now move the statements, and replace the induction variable in the moved 2502 /* Now move the statements, and replace the induction variable in the moved
2493 statements with the correct loop induction variable. */ 2503 statements with the correct loop induction variable. */
2494 oldivvar = VEC_index (tree, loopivs, 0); 2504 oldivvar = VEC_index (tree, loopivs, 0);
2495 firstbsi = gsi_start_bb (bodybb); 2505 firstbsi = gsi_start_bb (bodybb);
2496 for (i = loop->num_nodes - 1; i >= 0 ; i--) 2506 for (i = loop->num_nodes - 1; i >= 0 ; i--)
2501 /* If this is true, we are *before* the inner loop. 2511 /* If this is true, we are *before* the inner loop.
2502 If this isn't true, we are *after* it. 2512 If this isn't true, we are *after* it.
2503 2513
2504 The only time can_convert_to_perfect_nest returns true when we 2514 The only time can_convert_to_perfect_nest returns true when we
2505 have statements before the inner loop is if they can be moved 2515 have statements before the inner loop is if they can be moved
2506 into the inner loop. 2516 into the inner loop.
2507 2517
2508 The only time can_convert_to_perfect_nest returns true when we 2518 The only time can_convert_to_perfect_nest returns true when we
2509 have statements after the inner loop is if they can be moved into 2519 have statements after the inner loop is if they can be moved into
2510 the new split loop. */ 2520 the new split loop. */
2511 2521
2512 if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i])) 2522 if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2513 { 2523 {
2514 gimple_stmt_iterator header_bsi 2524 gimple_stmt_iterator header_bsi
2515 = gsi_after_labels (loop->inner->header); 2525 = gsi_after_labels (loop->inner->header);
2516 2526
2517 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);) 2527 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2518 { 2528 {
2519 gimple stmt = gsi_stmt (bsi); 2529 gimple stmt = gsi_stmt (bsi);
2520 2530
2521 if (stmt == exit_condition 2531 if (stmt == exit_condition
2522 || not_interesting_stmt (stmt) 2532 || not_interesting_stmt (stmt)
2523 || stmt_is_bumper_for_loop (loop, stmt)) 2533 || stmt_is_bumper_for_loop (loop, stmt))
2528 2538
2529 gsi_move_before (&bsi, &header_bsi); 2539 gsi_move_before (&bsi, &header_bsi);
2530 } 2540 }
2531 } 2541 }
2532 else 2542 else
2533 { 2543 {
2534 /* Note that the bsi only needs to be explicitly incremented 2544 /* Note that the bsi only needs to be explicitly incremented
2535 when we don't move something, since it is automatically 2545 when we don't move something, since it is automatically
2536 incremented when we do. */ 2546 incremented when we do. */
2537 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);) 2547 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2538 { 2548 {
2539 ssa_op_iter i;
2540 tree n;
2541 gimple stmt = gsi_stmt (bsi); 2549 gimple stmt = gsi_stmt (bsi);
2542 2550
2543 if (stmt == exit_condition 2551 if (stmt == exit_condition
2544 || not_interesting_stmt (stmt) 2552 || not_interesting_stmt (stmt)
2545 || stmt_is_bumper_for_loop (loop, stmt)) 2553 || stmt_is_bumper_for_loop (loop, stmt))
2546 { 2554 {
2547 gsi_next (&bsi); 2555 gsi_next (&bsi);
2548 continue; 2556 continue;
2549 } 2557 }
2550 2558
2551 replace_uses_equiv_to_x_with_y 2559 replace_uses_equiv_to_x_with_y
2552 (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar, 2560 (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2553 VEC_index (tree, lbounds, 0), replacements, &firstbsi); 2561 VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2554 2562
2555 gsi_move_before (&bsi, &tobsi); 2563 gsi_move_before (&bsi, &tobsi);
2556 2564
2557 /* If the statement has any virtual operands, they may 2565 /* If the statement has any virtual operands, they may
2558 need to be rewired because the original loop may 2566 need to be rewired because the original loop may
2559 still reference them. */ 2567 still reference them. */
2560 FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS) 2568 if (gimple_vuse (stmt))
2561 mark_sym_for_renaming (SSA_NAME_VAR (n)); 2569 mark_sym_for_renaming (gimple_vop (cfun));
2562 } 2570 }
2563 } 2571 }
2564 2572
2565 } 2573 }
2566 } 2574 }
2567 2575
2568 free (bbs); 2576 free (bbs);
2569 htab_delete (replacements); 2577 htab_delete (replacements);
2582 distance vectors to lexicographically positive vectors. Note that 2590 distance vectors to lexicographically positive vectors. Note that
2583 a unimodular matrix must transform the zero vector (and only it) to 2591 a unimodular matrix must transform the zero vector (and only it) to
2584 the zero vector." S.Muchnick. */ 2592 the zero vector." S.Muchnick. */
2585 2593
2586 bool 2594 bool
2587 lambda_transform_legal_p (lambda_trans_matrix trans, 2595 lambda_transform_legal_p (lambda_trans_matrix trans,
2588 int nb_loops, 2596 int nb_loops,
2589 VEC (ddr_p, heap) *dependence_relations) 2597 VEC (ddr_p, heap) *dependence_relations)
2590 { 2598 {
2591 unsigned int i, j; 2599 unsigned int i, j;
2592 lambda_vector distres; 2600 lambda_vector distres;
2621 continue; 2629 continue;
2622 2630
2623 /* Conservatively answer: "this transformation is not valid". */ 2631 /* Conservatively answer: "this transformation is not valid". */
2624 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 2632 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2625 return false; 2633 return false;
2626 2634
2627 /* If the dependence could not be captured by a distance vector, 2635 /* If the dependence could not be captured by a distance vector,
2628 conservatively answer that the transform is not valid. */ 2636 conservatively answer that the transform is not valid. */
2629 if (DDR_NUM_DIST_VECTS (ddr) == 0) 2637 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2630 return false; 2638 return false;
2631 2639
2632 /* Compute trans.dist_vect */ 2640 /* Compute trans.dist_vect */
2633 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) 2641 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2634 { 2642 {
2635 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, 2643 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2636 DDR_DIST_VECT (ddr, j), distres); 2644 DDR_DIST_VECT (ddr, j), distres);
2637 2645
2638 if (!lambda_vector_lexico_pos (distres, nb_loops)) 2646 if (!lambda_vector_lexico_pos (distres, nb_loops))
2639 return false; 2647 return false;
2640 } 2648 }
2726 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst) 2734 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2727 && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst); 2735 && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2728 2736
2729 case MULT_EXPR: 2737 case MULT_EXPR:
2730 if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST) 2738 if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2731 result = av_for_af_base (TREE_OPERAND (base_expr, 1), 2739 result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2732 cy, am, cst * 2740 cy, am, cst *
2733 int_cst_value (TREE_OPERAND (base_expr, 0))); 2741 int_cst_value (TREE_OPERAND (base_expr, 0)));
2734 else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST) 2742 else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2735 result = av_for_af_base (TREE_OPERAND (base_expr, 0), 2743 result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2736 cy, am, cst * 2744 cy, am, cst *