comparison gcc/tree-ssa-loop-ivopts.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Fri, 12 Feb 2010 23:39:51 +0900
parents a06113de4d67
children b7f97abdc517
comparison
equal deleted inserted replaced
52:c156f1bd5cd9 55:77e2b8dfacca
1 /* Induction variable optimizations. 1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc. 3 Free Software Foundation, Inc.
4 4
5 This file is part of GCC. 5 This file is part of GCC.
6 6
7 GCC is free software; you can redistribute it and/or modify it 7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any 9 Free Software Foundation; either version 3, or (at your option) any
10 later version. 10 later version.
11 11
12 GCC is distributed in the hope that it will be useful, but WITHOUT 12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details. 15 for more details.
16 16
17 You should have received a copy of the GNU General Public License 17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see 18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */ 19 <http://www.gnu.org/licenses/>. */
20 20
21 /* This pass tries to find the optimal set of induction variables for the loop. 21 /* This pass tries to find the optimal set of induction variables for the loop.
54 All the costs are defined in a machine-specific way, using the target 54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them. 55 hooks and machine descriptions to determine them.
56 56
57 4) The trees are transformed to use the new variables, the dead code is 57 4) The trees are transformed to use the new variables, the dead code is
58 removed. 58 removed.
59 59
60 All of this is done loop by loop. Doing it globally is theoretically 60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us 61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right 62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */ 63 would be complicated. */
64 64
133 }; 133 };
134 134
135 /* Cost of a computation. */ 135 /* Cost of a computation. */
136 typedef struct 136 typedef struct
137 { 137 {
138 unsigned cost; /* The runtime cost. */ 138 int cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for 139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units -- 140 the computation (in no concrete units --
141 complexity field should be larger for more 141 complexity field should be larger for more
142 complex expressions and addressing modes). */ 142 complex expressions and addressing modes). */
143 } comp_cost; 143 } comp_cost;
179 /* The position where the iv is computed. */ 179 /* The position where the iv is computed. */
180 enum iv_position 180 enum iv_position
181 { 181 {
182 IP_NORMAL, /* At the end, just before the exit condition. */ 182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */ 183 IP_END, /* At the end of the latch block. */
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
184 IP_ORIGINAL /* The original biv. */ 186 IP_ORIGINAL /* The original biv. */
185 }; 187 };
186 188
187 /* The induction variable candidate. */ 189 /* The induction variable candidate. */
188 struct iv_cand 190 struct iv_cand
198 struct iv *iv; /* The value of the candidate. NULL for 200 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility 201 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct 202 to replace the final value of an iv by direct
201 computation of the value. */ 203 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */ 204 unsigned cost; /* Cost of the candidate. */
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
203 bitmap depends_on; /* The list of invariants that are used in step of the 208 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */ 209 biv. */
205 }; 210 };
206 211
207 /* The data used by the induction variable optimizations. */ 212 /* The data used by the induction variable optimizations. */
513 { 518 {
514 case IP_NORMAL: 519 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n"); 520 fprintf (file, " incremented before exit test\n");
516 break; 521 break;
517 522
523 case IP_BEFORE_USE:
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
525 break;
526
527 case IP_AFTER_USE:
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
529 break;
530
518 case IP_END: 531 case IP_END:
519 fprintf (file, " incremented at end\n"); 532 fprintf (file, " incremented at end\n");
520 break; 533 break;
521 534
522 case IP_ORIGINAL: 535 case IP_ORIGINAL:
561 574
562 return stmt == last_stmt (bb); 575 return stmt == last_stmt (bb);
563 } 576 }
564 577
565 /* Returns true if STMT if after the place where the original induction 578 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */ 579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
567 581
568 static bool 582 static bool
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt) 583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
570 { 584 {
571 basic_block cand_bb = gimple_bb (cand->incremented_at); 585 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt); 586 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
574 587
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false; 589 return false;
577 590
578 if (stmt_bb != cand_bb) 591 if (stmt_bb != cand_bb)
579 return true; 592 return true;
580 593
581 /* Scan the block from the end, since the original ivs are usually 594 if (true_if_equal
582 incremented at the end of the loop body. */ 595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi)) 596 return true;
584 { 597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
589 }
590 } 598 }
591 599
592 /* Returns true if STMT if after the place where the induction variable 600 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */ 601 CAND is incremented in LOOP. */
594 602
602 610
603 case IP_NORMAL: 611 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt); 612 return stmt_after_ip_normal_pos (loop, stmt);
605 613
606 case IP_ORIGINAL: 614 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt); 615 case IP_AFTER_USE:
616 return stmt_after_inc_pos (cand, stmt, false);
617
618 case IP_BEFORE_USE:
619 return stmt_after_inc_pos (cand, stmt, true);
608 620
609 default: 621 default:
610 gcc_unreachable (); 622 gcc_unreachable ();
611 } 623 }
612 } 624 }
1062 { 1074 {
1063 fprintf (dump_file, " number of iterations "); 1075 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM); 1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n"); 1077 fprintf (dump_file, "\n\n");
1066 }; 1078 };
1067 1079
1068 fprintf (dump_file, "Induction variables:\n\n"); 1080 fprintf (dump_file, "Induction variables:\n\n");
1069 1081
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1071 { 1083 {
1072 if (ver_info (data, i)->iv) 1084 if (ver_info (data, i)->iv)
1145 return NULL; 1157 return NULL;
1146 1158
1147 iv = get_iv (data, op); 1159 iv = get_iv (data, op);
1148 if (!iv) 1160 if (!iv)
1149 return NULL; 1161 return NULL;
1150 1162
1151 if (iv->have_use_for) 1163 if (iv->have_use_for)
1152 { 1164 {
1153 use = iv_use (data, iv->use_id); 1165 use = iv_use (data, iv->use_id);
1154 1166
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR); 1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1531 1543
1532 if (base_align < GET_MODE_ALIGNMENT (mode) 1544 if (base_align < GET_MODE_ALIGNMENT (mode)
1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0 1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1534 || bitpos % BITS_PER_UNIT != 0) 1546 || bitpos % BITS_PER_UNIT != 0)
1535 return true; 1547 return true;
1536 1548
1537 if (!constant_multiple_of (step, al, &mul)) 1549 if (!constant_multiple_of (step, al, &mul))
1538 return true; 1550 return true;
1539 } 1551 }
1540 1552
1541 return false; 1553 return false;
1835 find_interesting_uses_outside (data, e); 1847 find_interesting_uses_outside (data, e);
1836 1848
1837 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1849 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1838 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 1850 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1839 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1851 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1840 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 1852 if (!is_gimple_debug (gsi_stmt (bsi)))
1853 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1841 } 1854 }
1842 1855
1843 if (dump_file && (dump_flags & TDF_DETAILS)) 1856 if (dump_file && (dump_flags & TDF_DETAILS))
1844 { 1857 {
1845 bitmap_iterator bi; 1858 bitmap_iterator bi;
1920 else 1933 else
1921 expr = fold_build2 (code, type, op0, op1); 1934 expr = fold_build2 (code, type, op0, op1);
1922 1935
1923 return fold_convert (orig_type, expr); 1936 return fold_convert (orig_type, expr);
1924 1937
1938 case MULT_EXPR:
1939 op1 = TREE_OPERAND (expr, 1);
1940 if (!cst_and_fits_in_hwi (op1))
1941 return orig_expr;
1942
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, false, false, &off0);
1945 if (op0 == TREE_OPERAND (expr, 0))
1946 return orig_expr;
1947
1948 *offset = off0 * int_cst_value (op1);
1949 if (integer_zerop (op0))
1950 expr = op0;
1951 else
1952 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1953
1954 return fold_convert (orig_type, expr);
1955
1925 case ARRAY_REF: 1956 case ARRAY_REF:
1926 case ARRAY_RANGE_REF: 1957 case ARRAY_RANGE_REF:
1927 if (!inside_addr) 1958 if (!inside_addr)
1928 return orig_expr; 1959 return orig_expr;
1929 1960
2064 struct iv_use *use, gimple incremented_at) 2095 struct iv_use *use, gimple incremented_at)
2065 { 2096 {
2066 unsigned i; 2097 unsigned i;
2067 struct iv_cand *cand = NULL; 2098 struct iv_cand *cand = NULL;
2068 tree type, orig_type; 2099 tree type, orig_type;
2069 2100
2070 if (base) 2101 if (base)
2071 { 2102 {
2072 orig_type = TREE_TYPE (base); 2103 orig_type = TREE_TYPE (base);
2073 type = generic_type_for (orig_type); 2104 type = generic_type_for (orig_type);
2074 if (type != orig_type) 2105 if (type != orig_type)
2083 cand = iv_cand (data, i); 2114 cand = iv_cand (data, i);
2084 2115
2085 if (cand->pos != pos) 2116 if (cand->pos != pos)
2086 continue; 2117 continue;
2087 2118
2088 if (cand->incremented_at != incremented_at) 2119 if (cand->incremented_at != incremented_at
2120 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2121 && cand->ainc_use != use))
2089 continue; 2122 continue;
2090 2123
2091 if (!cand->iv) 2124 if (!cand->iv)
2092 { 2125 {
2093 if (!base && !step) 2126 if (!base && !step)
2129 { 2162 {
2130 fd_ivopts_data = data; 2163 fd_ivopts_data = data;
2131 walk_tree (&step, find_depends, &cand->depends_on, NULL); 2164 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2132 } 2165 }
2133 2166
2167 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2168 cand->ainc_use = use;
2169 else
2170 cand->ainc_use = NULL;
2171
2134 if (dump_file && (dump_flags & TDF_DETAILS)) 2172 if (dump_file && (dump_flags & TDF_DETAILS))
2135 dump_cand (dump_file, cand); 2173 dump_cand (dump_file, cand);
2136 } 2174 }
2137 2175
2138 if (important && !cand->important) 2176 if (important && !cand->important)
2172 return true; 2210 return true;
2173 2211
2174 return false; 2212 return false;
2175 } 2213 }
2176 2214
2215 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2216 Important field is set to IMPORTANT. */
2217
2218 static void
2219 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2220 bool important, struct iv_use *use)
2221 {
2222 basic_block use_bb = gimple_bb (use->stmt);
2223 enum machine_mode mem_mode;
2224 unsigned HOST_WIDE_INT cstepi;
2225
2226 /* If we insert the increment in any position other than the standard
2227 ones, we must ensure that it is incremented once per iteration.
2228 It must not be in an inner nested loop, or one side of an if
2229 statement. */
2230 if (use_bb->loop_father != data->current_loop
2231 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2232 || stmt_could_throw_p (use->stmt)
2233 || !cst_and_fits_in_hwi (step))
2234 return;
2235
2236 cstepi = int_cst_value (step);
2237
2238 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2239 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2240 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2241 {
2242 enum tree_code code = MINUS_EXPR;
2243 tree new_base;
2244 tree new_step = step;
2245
2246 if (POINTER_TYPE_P (TREE_TYPE (base)))
2247 {
2248 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2249 code = POINTER_PLUS_EXPR;
2250 }
2251 else
2252 new_step = fold_convert (TREE_TYPE (base), new_step);
2253 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2254 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2255 use->stmt);
2256 }
2257 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2258 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2259 {
2260 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2261 use->stmt);
2262 }
2263 }
2264
2177 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2265 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2178 position to POS. If USE is not NULL, the candidate is set as related to 2266 position to POS. If USE is not NULL, the candidate is set as related to
2179 it. The candidate computation is scheduled on all available positions. */ 2267 it. The candidate computation is scheduled on all available positions. */
2180 2268
2181 static void 2269 static void
2182 add_candidate (struct ivopts_data *data, 2270 add_candidate (struct ivopts_data *data,
2183 tree base, tree step, bool important, struct iv_use *use) 2271 tree base, tree step, bool important, struct iv_use *use)
2184 { 2272 {
2185 if (ip_normal_pos (data->current_loop)) 2273 if (ip_normal_pos (data->current_loop))
2186 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL); 2274 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2187 if (ip_end_pos (data->current_loop) 2275 if (ip_end_pos (data->current_loop)
2188 && allow_ip_end_pos_p (data->current_loop)) 2276 && allow_ip_end_pos_p (data->current_loop))
2189 add_candidate_1 (data, base, step, important, IP_END, use, NULL); 2277 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2278
2279 if (use != NULL && use->type == USE_ADDRESS)
2280 add_autoinc_candidates (data, base, step, important, use);
2190 } 2281 }
2191 2282
2192 /* Add a standard "0 + 1 * iteration" iv candidate for a 2283 /* Add a standard "0 + 1 * iteration" iv candidate for a
2193 type with SIZE bits. */ 2284 type with SIZE bits. */
2194 2285
2356 /* Add important candidates to the related_cands bitmaps. */ 2447 /* Add important candidates to the related_cands bitmaps. */
2357 for (i = 0; i < n_iv_uses (data); i++) 2448 for (i = 0; i < n_iv_uses (data); i++)
2358 bitmap_ior_into (iv_use (data, i)->related_cands, 2449 bitmap_ior_into (iv_use (data, i)->related_cands,
2359 data->important_candidates); 2450 data->important_candidates);
2360 } 2451 }
2361 }
2362
2363 /* Finds the candidates for the induction variables. */
2364
2365 static void
2366 find_iv_candidates (struct ivopts_data *data)
2367 {
2368 /* Add commonly used ivs. */
2369 add_standard_iv_candidates (data);
2370
2371 /* Add old induction variables. */
2372 add_old_ivs_candidates (data);
2373
2374 /* Add induction variables derived from uses. */
2375 add_derived_ivs_candidates (data);
2376
2377 /* Record the important candidates. */
2378 record_important_candidates (data);
2379 } 2452 }
2380 2453
2381 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 2454 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2382 If consider_all_candidates is true, we use a two-dimensional array, otherwise 2455 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2383 we allocate a simple list to every use. */ 2456 we allocate a simple list to every use. */
2467 return cost.cost == INFTY; 2540 return cost.cost == INFTY;
2468 } 2541 }
2469 2542
2470 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends 2543 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2471 on invariants DEPENDS_ON and that the value used in expressing it 2544 on invariants DEPENDS_ON and that the value used in expressing it
2472 is VALUE.*/ 2545 is VALUE. */
2473 2546
2474 static void 2547 static void
2475 set_use_iv_cost (struct ivopts_data *data, 2548 set_use_iv_cost (struct ivopts_data *data,
2476 struct iv_use *use, struct iv_cand *cand, 2549 struct iv_use *use, struct iv_cand *cand,
2477 comp_cost cost, bitmap depends_on, tree value) 2550 comp_cost cost, bitmap depends_on, tree value)
2529 if (!ret->cand) 2602 if (!ret->cand)
2530 return NULL; 2603 return NULL;
2531 2604
2532 return ret; 2605 return ret;
2533 } 2606 }
2534 2607
2535 /* n_map_members is a power of two, so this computes modulo. */ 2608 /* n_map_members is a power of two, so this computes modulo. */
2536 s = cand->id & (use->n_map_members - 1); 2609 s = cand->id & (use->n_map_members - 1);
2537 for (i = s; i < use->n_map_members; i++) 2610 for (i = s; i < use->n_map_members; i++)
2538 if (use->cost_map[i].cand == cand) 2611 if (use->cost_map[i].cand == cand)
2539 return use->cost_map + i; 2612 return use->cost_map + i;
2567 2640
2568 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 2641 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2569 static rtx 2642 static rtx
2570 produce_memory_decl_rtl (tree obj, int *regno) 2643 produce_memory_decl_rtl (tree obj, int *regno)
2571 { 2644 {
2645 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2646 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2572 rtx x; 2647 rtx x;
2573 2648
2574 gcc_assert (obj); 2649 gcc_assert (obj);
2575 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 2650 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2576 { 2651 {
2577 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 2652 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2578 x = gen_rtx_SYMBOL_REF (Pmode, name); 2653 x = gen_rtx_SYMBOL_REF (address_mode, name);
2579 SET_SYMBOL_REF_DECL (x, obj); 2654 SET_SYMBOL_REF_DECL (x, obj);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x); 2655 x = gen_rtx_MEM (DECL_MODE (obj), x);
2656 set_mem_addr_space (x, as);
2581 targetm.encode_section_info (obj, x, true); 2657 targetm.encode_section_info (obj, x, true);
2582 } 2658 }
2583 else 2659 else
2584 { 2660 {
2585 x = gen_raw_REG (Pmode, (*regno)++); 2661 x = gen_raw_REG (address_mode, (*regno)++);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x); 2662 x = gen_rtx_MEM (DECL_MODE (obj), x);
2663 set_mem_addr_space (x, as);
2587 } 2664 }
2588 2665
2589 return x; 2666 return x;
2590 } 2667 }
2591 2668
2669 default_rtl_profile (); 2746 default_rtl_profile ();
2670 cfun->function_frequency = real_frequency; 2747 cfun->function_frequency = real_frequency;
2671 2748
2672 cost = seq_cost (seq, speed); 2749 cost = seq_cost (seq, speed);
2673 if (MEM_P (rslt)) 2750 if (MEM_P (rslt))
2674 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed); 2751 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2752 TYPE_ADDR_SPACE (type), speed);
2675 2753
2676 return cost; 2754 return cost;
2677 } 2755 }
2678 2756
2679 /* Returns variable containing the value of candidate CAND at statement AT. */ 2757 /* Returns variable containing the value of candidate CAND at statement AT. */
2796 2874
2797 /* We need to shift the value if we are after the increment. */ 2875 /* We need to shift the value if we are after the increment. */
2798 if (stmt_after_increment (loop, cand, at)) 2876 if (stmt_after_increment (loop, cand, at))
2799 { 2877 {
2800 aff_tree cstep_aff; 2878 aff_tree cstep_aff;
2801 2879
2802 if (common_type != uutype) 2880 if (common_type != uutype)
2803 cstep_common = fold_convert (common_type, cstep); 2881 cstep_common = fold_convert (common_type, cstep);
2804 else 2882 else
2805 cstep_common = cstep; 2883 cstep_common = cstep;
2806 2884
2867 cost = seq_cost (seq, speed); 2945 cost = seq_cost (seq, speed);
2868 if (!cost) 2946 if (!cost)
2869 cost = 1; 2947 cost = 1;
2870 2948
2871 costs[mode] = cost; 2949 costs[mode] = cost;
2872 2950
2873 if (dump_file && (dump_flags & TDF_DETAILS)) 2951 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "Addition in %s costs %d\n", 2952 fprintf (dump_file, "Addition in %s costs %d\n",
2875 GET_MODE_NAME (mode), cost); 2953 GET_MODE_NAME (mode), cost);
2876 return cost; 2954 return cost;
2877 } 2955 }
2932 start_sequence (); 3010 start_sequence ();
2933 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), 3011 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934 gen_int_mode (cst, mode), NULL_RTX, 0); 3012 gen_int_mode (cst, mode), NULL_RTX, 0);
2935 seq = get_insns (); 3013 seq = get_insns ();
2936 end_sequence (); 3014 end_sequence ();
2937 3015
2938 cost = seq_cost (seq, speed); 3016 cost = seq_cost (seq, speed);
2939 3017
2940 if (dump_file && (dump_flags & TDF_DETAILS)) 3018 if (dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "Multiplication by %d in %s costs %d\n", 3019 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2942 (int) cst, GET_MODE_NAME (mode), cost); 3020 (int) cst, GET_MODE_NAME (mode), cost);
2945 3023
2946 return cost; 3024 return cost;
2947 } 3025 }
2948 3026
2949 /* Returns true if multiplying by RATIO is allowed in an address. Test the 3027 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2950 validity for a memory reference accessing memory of mode MODE. */ 3028 validity for a memory reference accessing memory of mode MODE in
3029 address space AS. */
3030
3031 DEF_VEC_P (sbitmap);
3032 DEF_VEC_ALLOC_P (sbitmap, heap);
2951 3033
2952 bool 3034 bool
2953 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode) 3035 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3036 addr_space_t as)
2954 { 3037 {
2955 #define MAX_RATIO 128 3038 #define MAX_RATIO 128
2956 static sbitmap valid_mult[MAX_MACHINE_MODE]; 3039 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
2957 3040 static VEC (sbitmap, heap) *valid_mult_list;
2958 if (!valid_mult[mode]) 3041 sbitmap valid_mult;
2959 { 3042
2960 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); 3043 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3044 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3045
3046 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3047 if (!valid_mult)
3048 {
3049 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3050 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
2961 rtx addr; 3051 rtx addr;
2962 HOST_WIDE_INT i; 3052 HOST_WIDE_INT i;
2963 3053
2964 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1); 3054 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2965 sbitmap_zero (valid_mult[mode]); 3055 sbitmap_zero (valid_mult);
2966 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX); 3056 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
2967 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3057 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2968 { 3058 {
2969 XEXP (addr, 1) = gen_int_mode (i, Pmode); 3059 XEXP (addr, 1) = gen_int_mode (i, address_mode);
2970 if (memory_address_p (mode, addr)) 3060 if (memory_address_addr_space_p (mode, addr, as))
2971 SET_BIT (valid_mult[mode], i + MAX_RATIO); 3061 SET_BIT (valid_mult, i + MAX_RATIO);
2972 } 3062 }
2973 3063
2974 if (dump_file && (dump_flags & TDF_DETAILS)) 3064 if (dump_file && (dump_flags & TDF_DETAILS))
2975 { 3065 {
2976 fprintf (dump_file, " allowed multipliers:"); 3066 fprintf (dump_file, " allowed multipliers:");
2977 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3067 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2978 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO)) 3068 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2979 fprintf (dump_file, " %d", (int) i); 3069 fprintf (dump_file, " %d", (int) i);
2980 fprintf (dump_file, "\n"); 3070 fprintf (dump_file, "\n");
2981 fprintf (dump_file, "\n"); 3071 fprintf (dump_file, "\n");
2982 } 3072 }
3073
3074 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
2983 } 3075 }
2984 3076
2985 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3077 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2986 return false; 3078 return false;
2987 3079
2988 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO); 3080 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
2989 } 3081 }
2990 3082
2991 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3083 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2992 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3084 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2993 variable is omitted. Compute the cost for a memory reference that accesses 3085 variable is omitted. Compute the cost for a memory reference that accesses
2994 a memory location of mode MEM_MODE. 3086 a memory location of mode MEM_MODE in address space AS.
3087
3088 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3089 size of MEM_MODE / RATIO) is available. To make this determination, we
3090 look at the size of the increment to be made, which is given in CSTEP.
3091 CSTEP may be zero if the step is unknown.
3092 STMT_AFTER_INC is true iff the statement we're looking at is after the
3093 increment of the original biv.
2995 3094
2996 TODO -- there must be some better way. This all is quite crude. */ 3095 TODO -- there must be some better way. This all is quite crude. */
3096
3097 typedef struct
3098 {
3099 HOST_WIDE_INT min_offset, max_offset;
3100 unsigned costs[2][2][2][2];
3101 } *address_cost_data;
3102
3103 DEF_VEC_P (address_cost_data);
3104 DEF_VEC_ALLOC_P (address_cost_data, heap);
2997 3105
2998 static comp_cost 3106 static comp_cost
2999 get_address_cost (bool symbol_present, bool var_present, 3107 get_address_cost (bool symbol_present, bool var_present,
3000 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, 3108 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3001 enum machine_mode mem_mode, 3109 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3002 bool speed) 3110 addr_space_t as, bool speed,
3003 { 3111 bool stmt_after_inc, bool *may_autoinc)
3004 static bool initialized[MAX_MACHINE_MODE]; 3112 {
3005 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE]; 3113 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3006 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE]; 3114 static VEC(address_cost_data, heap) *address_cost_data_list;
3007 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2]; 3115 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3116 address_cost_data data;
3117 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3118 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3008 unsigned cost, acost, complexity; 3119 unsigned cost, acost, complexity;
3009 bool offset_p, ratio_p; 3120 bool offset_p, ratio_p, autoinc;
3010 HOST_WIDE_INT s_offset; 3121 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3011 unsigned HOST_WIDE_INT mask; 3122 unsigned HOST_WIDE_INT mask;
3012 unsigned bits; 3123 unsigned bits;
3013 3124
3014 if (!initialized[mem_mode]) 3125 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3126 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3127 data_index + 1);
3128
3129 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3130 if (!data)
3015 { 3131 {
3016 HOST_WIDE_INT i; 3132 HOST_WIDE_INT i;
3017 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT; 3133 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3134 HOST_WIDE_INT rat, off;
3018 int old_cse_not_expected; 3135 int old_cse_not_expected;
3019 unsigned sym_p, var_p, off_p, rat_p, add_c; 3136 unsigned sym_p, var_p, off_p, rat_p, add_c;
3020 rtx seq, addr, base; 3137 rtx seq, addr, base;
3021 rtx reg0, reg1; 3138 rtx reg0, reg1;
3022 3139
3023 initialized[mem_mode] = true; 3140 data = (address_cost_data) xcalloc (1, sizeof (*data));
3024 3141
3025 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); 3142 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3026 3143
3027 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX); 3144 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3028 for (i = start; i <= 1 << 20; i <<= 1) 3145 for (i = start; i <= 1 << 20; i <<= 1)
3029 { 3146 {
3030 XEXP (addr, 1) = gen_int_mode (i, Pmode); 3147 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3031 if (!memory_address_p (mem_mode, addr)) 3148 if (!memory_address_addr_space_p (mem_mode, addr, as))
3032 break; 3149 break;
3033 } 3150 }
3034 max_offset[mem_mode] = i == start ? 0 : i >> 1; 3151 data->max_offset = i == start ? 0 : i >> 1;
3035 off[mem_mode] = max_offset[mem_mode]; 3152 off = data->max_offset;
3036 3153
3037 for (i = start; i <= 1 << 20; i <<= 1) 3154 for (i = start; i <= 1 << 20; i <<= 1)
3038 { 3155 {
3039 XEXP (addr, 1) = gen_int_mode (-i, Pmode); 3156 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3040 if (!memory_address_p (mem_mode, addr)) 3157 if (!memory_address_addr_space_p (mem_mode, addr, as))
3041 break; 3158 break;
3042 } 3159 }
3043 min_offset[mem_mode] = i == start ? 0 : -(i >> 1); 3160 data->min_offset = i == start ? 0 : -(i >> 1);
3044 3161
3045 if (dump_file && (dump_flags & TDF_DETAILS)) 3162 if (dump_file && (dump_flags & TDF_DETAILS))
3046 { 3163 {
3047 fprintf (dump_file, "get_address_cost:\n"); 3164 fprintf (dump_file, "get_address_cost:\n");
3048 fprintf (dump_file, " min offset %s %d\n", 3165 fprintf (dump_file, " min offset %s %d\n",
3049 GET_MODE_NAME (mem_mode), 3166 GET_MODE_NAME (mem_mode),
3050 (int) min_offset[mem_mode]); 3167 (int) data->min_offset);
3051 fprintf (dump_file, " max offset %s %d\n", 3168 fprintf (dump_file, " max offset %s %d\n",
3052 GET_MODE_NAME (mem_mode), 3169 GET_MODE_NAME (mem_mode),
3053 (int) max_offset[mem_mode]); 3170 (int) data->max_offset);
3054 } 3171 }
3055 3172
3056 rat[mem_mode] = 1; 3173 rat = 1;
3057 for (i = 2; i <= MAX_RATIO; i++) 3174 for (i = 2; i <= MAX_RATIO; i++)
3058 if (multiplier_allowed_in_address_p (i, mem_mode)) 3175 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3059 { 3176 {
3060 rat[mem_mode] = i; 3177 rat = i;
3061 break; 3178 break;
3062 } 3179 }
3063 3180
3064 /* Compute the cost of various addressing modes. */ 3181 /* Compute the cost of various addressing modes. */
3065 acost = 0; 3182 acost = 0;
3066 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); 3183 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3067 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2); 3184 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3068 3185
3186 if (HAVE_PRE_DECREMENT)
3187 {
3188 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3189 has_predec[mem_mode]
3190 = memory_address_addr_space_p (mem_mode, addr, as);
3191 }
3192 if (HAVE_POST_DECREMENT)
3193 {
3194 addr = gen_rtx_POST_DEC (address_mode, reg0);
3195 has_postdec[mem_mode]
3196 = memory_address_addr_space_p (mem_mode, addr, as);
3197 }
3198 if (HAVE_PRE_INCREMENT)
3199 {
3200 addr = gen_rtx_PRE_INC (address_mode, reg0);
3201 has_preinc[mem_mode]
3202 = memory_address_addr_space_p (mem_mode, addr, as);
3203 }
3204 if (HAVE_POST_INCREMENT)
3205 {
3206 addr = gen_rtx_POST_INC (address_mode, reg0);
3207 has_postinc[mem_mode]
3208 = memory_address_addr_space_p (mem_mode, addr, as);
3209 }
3069 for (i = 0; i < 16; i++) 3210 for (i = 0; i < 16; i++)
3070 { 3211 {
3071 sym_p = i & 1; 3212 sym_p = i & 1;
3072 var_p = (i >> 1) & 1; 3213 var_p = (i >> 1) & 1;
3073 off_p = (i >> 2) & 1; 3214 off_p = (i >> 2) & 1;
3074 rat_p = (i >> 3) & 1; 3215 rat_p = (i >> 3) & 1;
3075 3216
3076 addr = reg0; 3217 addr = reg0;
3077 if (rat_p) 3218 if (rat_p)
3078 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, 3219 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3079 gen_int_mode (rat[mem_mode], Pmode)); 3220 gen_int_mode (rat, address_mode));
3080 3221
3081 if (var_p) 3222 if (var_p)
3082 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1); 3223 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3083 3224
3084 if (sym_p) 3225 if (sym_p)
3085 { 3226 {
3086 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup ("")); 3227 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3087 /* ??? We can run into trouble with some backends by presenting 3228 /* ??? We can run into trouble with some backends by presenting
3088 it with symbols which haven't been properly passed through 3229 it with symbols which haven't been properly passed through
3089 targetm.encode_section_info. By setting the local bit, we 3230 targetm.encode_section_info. By setting the local bit, we
3090 enhance the probability of things working. */ 3231 enhance the probability of things working. */
3091 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL; 3232 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3092 3233
3093 if (off_p) 3234 if (off_p)
3094 base = gen_rtx_fmt_e (CONST, Pmode, 3235 base = gen_rtx_fmt_e (CONST, address_mode,
3095 gen_rtx_fmt_ee (PLUS, Pmode, 3236 gen_rtx_fmt_ee
3096 base, 3237 (PLUS, address_mode, base,
3097 gen_int_mode (off[mem_mode], 3238 gen_int_mode (off, address_mode)));
3098 Pmode)));
3099 } 3239 }
3100 else if (off_p) 3240 else if (off_p)
3101 base = gen_int_mode (off[mem_mode], Pmode); 3241 base = gen_int_mode (off, address_mode);
3102 else 3242 else
3103 base = NULL_RTX; 3243 base = NULL_RTX;
3104 3244
3105 if (base) 3245 if (base)
3106 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base); 3246 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3107 3247
3108 start_sequence (); 3248 start_sequence ();
3109 /* To avoid splitting addressing modes, pretend that no cse will 3249 /* To avoid splitting addressing modes, pretend that no cse will
3110 follow. */ 3250 follow. */
3111 old_cse_not_expected = cse_not_expected; 3251 old_cse_not_expected = cse_not_expected;
3112 cse_not_expected = true; 3252 cse_not_expected = true;
3113 addr = memory_address (mem_mode, addr); 3253 addr = memory_address_addr_space (mem_mode, addr, as);
3114 cse_not_expected = old_cse_not_expected; 3254 cse_not_expected = old_cse_not_expected;
3115 seq = get_insns (); 3255 seq = get_insns ();
3116 end_sequence (); 3256 end_sequence ();
3117 3257
3118 acost = seq_cost (seq, speed); 3258 acost = seq_cost (seq, speed);
3119 acost += address_cost (addr, mem_mode, speed); 3259 acost += address_cost (addr, mem_mode, as, speed);
3120 3260
3121 if (!acost) 3261 if (!acost)
3122 acost = 1; 3262 acost = 1;
3123 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost; 3263 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3124 } 3264 }
3125 3265
3126 /* On some targets, it is quite expensive to load symbol to a register, 3266 /* On some targets, it is quite expensive to load symbol to a register,
3127 which makes addresses that contain symbols look much more expensive. 3267 which makes addresses that contain symbols look much more expensive.
3128 However, the symbol will have to be loaded in any case before the 3268 However, the symbol will have to be loaded in any case before the
3133 If VAR_PRESENT is false, and the mode obtained by changing symbol to 3273 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3134 var is cheaper, use this mode with small penalty. 3274 var is cheaper, use this mode with small penalty.
3135 If VAR_PRESENT is true, try whether the mode with 3275 If VAR_PRESENT is true, try whether the mode with
3136 SYMBOL_PRESENT = false is cheaper even with cost of addition, and 3276 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3137 if this is the case, use it. */ 3277 if this is the case, use it. */
3138 add_c = add_cost (Pmode, speed); 3278 add_c = add_cost (address_mode, speed);
3139 for (i = 0; i < 8; i++) 3279 for (i = 0; i < 8; i++)
3140 { 3280 {
3141 var_p = i & 1; 3281 var_p = i & 1;
3142 off_p = (i >> 1) & 1; 3282 off_p = (i >> 1) & 1;
3143 rat_p = (i >> 2) & 1; 3283 rat_p = (i >> 2) & 1;
3144 3284
3145 acost = costs[mem_mode][0][1][off_p][rat_p] + 1; 3285 acost = data->costs[0][1][off_p][rat_p] + 1;
3146 if (var_p) 3286 if (var_p)
3147 acost += add_c; 3287 acost += add_c;
3148 3288
3149 if (acost < costs[mem_mode][1][var_p][off_p][rat_p]) 3289 if (acost < data->costs[1][var_p][off_p][rat_p])
3150 costs[mem_mode][1][var_p][off_p][rat_p] = acost; 3290 data->costs[1][var_p][off_p][rat_p] = acost;
3151 } 3291 }
3152 3292
3153 if (dump_file && (dump_flags & TDF_DETAILS)) 3293 if (dump_file && (dump_flags & TDF_DETAILS))
3154 { 3294 {
3155 fprintf (dump_file, "Address costs:\n"); 3295 fprintf (dump_file, "Address costs:\n");
3156 3296
3157 for (i = 0; i < 16; i++) 3297 for (i = 0; i < 16; i++)
3158 { 3298 {
3159 sym_p = i & 1; 3299 sym_p = i & 1;
3160 var_p = (i >> 1) & 1; 3300 var_p = (i >> 1) & 1;
3161 off_p = (i >> 2) & 1; 3301 off_p = (i >> 2) & 1;
3169 if (off_p) 3309 if (off_p)
3170 fprintf (dump_file, "cst + "); 3310 fprintf (dump_file, "cst + ");
3171 if (rat_p) 3311 if (rat_p)
3172 fprintf (dump_file, "rat * "); 3312 fprintf (dump_file, "rat * ");
3173 3313
3174 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p]; 3314 acost = data->costs[sym_p][var_p][off_p][rat_p];
3175 fprintf (dump_file, "index costs %d\n", acost); 3315 fprintf (dump_file, "index costs %d\n", acost);
3176 } 3316 }
3317 if (has_predec[mem_mode] || has_postdec[mem_mode]
3318 || has_preinc[mem_mode] || has_postinc[mem_mode])
3319 fprintf (dump_file, " May include autoinc/dec\n");
3177 fprintf (dump_file, "\n"); 3320 fprintf (dump_file, "\n");
3178 } 3321 }
3179 } 3322
3180 3323 VEC_replace (address_cost_data, address_cost_data_list,
3181 bits = GET_MODE_BITSIZE (Pmode); 3324 data_index, data);
3325 }
3326
3327 bits = GET_MODE_BITSIZE (address_mode);
3182 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 3328 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3183 offset &= mask; 3329 offset &= mask;
3184 if ((offset >> (bits - 1) & 1)) 3330 if ((offset >> (bits - 1) & 1))
3185 offset |= ~mask; 3331 offset |= ~mask;
3186 s_offset = offset; 3332 s_offset = offset;
3187 3333
3334 autoinc = false;
3335 msize = GET_MODE_SIZE (mem_mode);
3336 autoinc_offset = offset;
3337 if (stmt_after_inc)
3338 autoinc_offset += ratio * cstep;
3339 if (symbol_present || var_present || ratio != 1)
3340 autoinc = false;
3341 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3342 && msize == cstep)
3343 || (has_postdec[mem_mode] && autoinc_offset == 0
3344 && msize == -cstep)
3345 || (has_preinc[mem_mode] && autoinc_offset == msize
3346 && msize == cstep)
3347 || (has_predec[mem_mode] && autoinc_offset == -msize
3348 && msize == -cstep))
3349 autoinc = true;
3350
3188 cost = 0; 3351 cost = 0;
3189 offset_p = (s_offset != 0 3352 offset_p = (s_offset != 0
3190 && min_offset[mem_mode] <= s_offset 3353 && data->min_offset <= s_offset
3191 && s_offset <= max_offset[mem_mode]); 3354 && s_offset <= data->max_offset);
3192 ratio_p = (ratio != 1 3355 ratio_p = (ratio != 1
3193 && multiplier_allowed_in_address_p (ratio, mem_mode)); 3356 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3194 3357
3195 if (ratio != 1 && !ratio_p) 3358 if (ratio != 1 && !ratio_p)
3196 cost += multiply_by_cost (ratio, Pmode, speed); 3359 cost += multiply_by_cost (ratio, address_mode, speed);
3197 3360
3198 if (s_offset && !offset_p && !symbol_present) 3361 if (s_offset && !offset_p && !symbol_present)
3199 cost += add_cost (Pmode, speed); 3362 cost += add_cost (address_mode, speed);
3200 3363
3201 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p]; 3364 if (may_autoinc)
3365 *may_autoinc = autoinc;
3366 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3202 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; 3367 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3203 return new_cost (cost + acost, complexity); 3368 return new_cost (cost + acost, complexity);
3204 } 3369 }
3205 3370
3206 /* Estimates cost of forcing expression EXPR into a variable. */ 3371 /* Estimates cost of forcing expression EXPR into a variable. */
3300 else 3465 else
3301 cost1 = force_expr_to_var_cost (op1, speed); 3466 cost1 = force_expr_to_var_cost (op1, speed);
3302 3467
3303 break; 3468 break;
3304 3469
3470 case NEGATE_EXPR:
3471 op0 = TREE_OPERAND (expr, 0);
3472 STRIP_NOPS (op0);
3473 op1 = NULL_TREE;
3474
3475 if (is_gimple_val (op0))
3476 cost0 = zero_cost;
3477 else
3478 cost0 = force_expr_to_var_cost (op0, speed);
3479
3480 cost1 = zero_cost;
3481 break;
3482
3305 default: 3483 default:
3306 /* Just an arbitrary value, FIXME. */ 3484 /* Just an arbitrary value, FIXME. */
3307 return new_cost (target_spill_cost[speed], 0); 3485 return new_cost (target_spill_cost[speed], 0);
3308 } 3486 }
3309 3487
3311 switch (TREE_CODE (expr)) 3489 switch (TREE_CODE (expr))
3312 { 3490 {
3313 case POINTER_PLUS_EXPR: 3491 case POINTER_PLUS_EXPR:
3314 case PLUS_EXPR: 3492 case PLUS_EXPR:
3315 case MINUS_EXPR: 3493 case MINUS_EXPR:
3494 case NEGATE_EXPR:
3316 cost = new_cost (add_cost (mode, speed), 0); 3495 cost = new_cost (add_cost (mode, speed), 0);
3317 break; 3496 break;
3318 3497
3319 case MULT_EXPR: 3498 case MULT_EXPR:
3320 if (cst_and_fits_in_hwi (op0)) 3499 if (cst_and_fits_in_hwi (op0))
3321 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0); 3500 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3322 else if (cst_and_fits_in_hwi (op1)) 3501 else if (cst_and_fits_in_hwi (op1))
3323 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0); 3502 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3324 else 3503 else
3325 return new_cost (target_spill_cost [speed], 0); 3504 return new_cost (target_spill_cost [speed], 0);
3326 break; 3505 break;
3327 3506
3334 3513
3335 /* Bound the cost by target_spill_cost. The parts of complicated 3514 /* Bound the cost by target_spill_cost. The parts of complicated
3336 computations often are either loop invariant or at least can 3515 computations often are either loop invariant or at least can
3337 be shared between several iv uses, so letting this grow without 3516 be shared between several iv uses, so letting this grow without
3338 limits would not give reasonable results. */ 3517 limits would not give reasonable results. */
3339 if (cost.cost > target_spill_cost [speed]) 3518 if (cost.cost > (int) target_spill_cost [speed])
3340 cost.cost = target_spill_cost [speed]; 3519 cost.cost = target_spill_cost [speed];
3341 3520
3342 return cost; 3521 return cost;
3343 } 3522 }
3344 3523
3372 HOST_WIDE_INT bitsize; 3551 HOST_WIDE_INT bitsize;
3373 HOST_WIDE_INT bitpos; 3552 HOST_WIDE_INT bitpos;
3374 tree toffset; 3553 tree toffset;
3375 enum machine_mode mode; 3554 enum machine_mode mode;
3376 int unsignedp, volatilep; 3555 int unsignedp, volatilep;
3377 3556
3378 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 3557 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3379 &unsignedp, &volatilep, false); 3558 &unsignedp, &volatilep, false);
3380 3559
3381 if (toffset != 0 3560 if (toffset != 0
3382 || bitpos % BITS_PER_UNIT != 0 3561 || bitpos % BITS_PER_UNIT != 0
3395 { 3574 {
3396 *symbol_present = true; 3575 *symbol_present = true;
3397 *var_present = false; 3576 *var_present = false;
3398 return zero_cost; 3577 return zero_cost;
3399 } 3578 }
3400 3579
3401 *symbol_present = false; 3580 *symbol_present = false;
3402 *var_present = true; 3581 *var_present = true;
3403 return zero_cost; 3582 return zero_cost;
3404 } 3583 }
3405 3584
3413 ptr_difference_cost (struct ivopts_data *data, 3592 ptr_difference_cost (struct ivopts_data *data,
3414 tree e1, tree e2, bool *symbol_present, bool *var_present, 3593 tree e1, tree e2, bool *symbol_present, bool *var_present,
3415 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3594 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3416 { 3595 {
3417 HOST_WIDE_INT diff = 0; 3596 HOST_WIDE_INT diff = 0;
3418 comp_cost cost; 3597 aff_tree aff_e1, aff_e2;
3419 bool speed = optimize_loop_for_speed_p (data->current_loop); 3598 tree type;
3420 3599
3421 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 3600 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3422 3601
3423 if (ptr_difference_const (e1, e2, &diff)) 3602 if (ptr_difference_const (e1, e2, &diff))
3424 { 3603 {
3432 return split_address_cost (data, TREE_OPERAND (e1, 0), 3611 return split_address_cost (data, TREE_OPERAND (e1, 0),
3433 symbol_present, var_present, offset, depends_on); 3612 symbol_present, var_present, offset, depends_on);
3434 3613
3435 *symbol_present = false; 3614 *symbol_present = false;
3436 *var_present = true; 3615 *var_present = true;
3437 3616
3438 cost = force_var_cost (data, e1, depends_on); 3617 type = signed_type_for (TREE_TYPE (e1));
3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on)); 3618 tree_to_aff_combination (e1, type, &aff_e1);
3440 cost.cost += add_cost (Pmode, speed); 3619 tree_to_aff_combination (e2, type, &aff_e2);
3441 3620 aff_combination_scale (&aff_e2, double_int_minus_one);
3442 return cost; 3621 aff_combination_add (&aff_e1, &aff_e2);
3622
3623 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3443 } 3624 }
3444 3625
3445 /* Estimates cost of expressing difference E1 - E2 as 3626 /* Estimates cost of expressing difference E1 - E2 as
3446 var + symbol + offset. The value of offset is added to OFFSET, 3627 var + symbol + offset. The value of offset is added to OFFSET,
3447 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3628 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3451 static comp_cost 3632 static comp_cost
3452 difference_cost (struct ivopts_data *data, 3633 difference_cost (struct ivopts_data *data,
3453 tree e1, tree e2, bool *symbol_present, bool *var_present, 3634 tree e1, tree e2, bool *symbol_present, bool *var_present,
3454 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3635 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3455 { 3636 {
3456 comp_cost cost;
3457 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 3637 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3458 unsigned HOST_WIDE_INT off1, off2; 3638 unsigned HOST_WIDE_INT off1, off2;
3639 aff_tree aff_e1, aff_e2;
3640 tree type;
3459 3641
3460 e1 = strip_offset (e1, &off1); 3642 e1 = strip_offset (e1, &off1);
3461 e2 = strip_offset (e2, &off2); 3643 e2 = strip_offset (e2, &off2);
3462 *offset += off1 - off2; 3644 *offset += off1 - off2;
3463 3645
3464 STRIP_NOPS (e1); 3646 STRIP_NOPS (e1);
3465 STRIP_NOPS (e2); 3647 STRIP_NOPS (e2);
3466 3648
3467 if (TREE_CODE (e1) == ADDR_EXPR) 3649 if (TREE_CODE (e1) == ADDR_EXPR)
3468 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset, 3650 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3469 depends_on); 3651 offset, depends_on);
3470 *symbol_present = false; 3652 *symbol_present = false;
3471 3653
3472 if (operand_equal_p (e1, e2, 0)) 3654 if (operand_equal_p (e1, e2, 0))
3473 { 3655 {
3474 *var_present = false; 3656 *var_present = false;
3475 return zero_cost; 3657 return zero_cost;
3476 } 3658 }
3659
3477 *var_present = true; 3660 *var_present = true;
3661
3478 if (integer_zerop (e2)) 3662 if (integer_zerop (e2))
3479 return force_var_cost (data, e1, depends_on); 3663 return force_var_cost (data, e1, depends_on);
3480 3664
3481 if (integer_zerop (e1)) 3665 if (integer_zerop (e1))
3482 { 3666 {
3483 cost = force_var_cost (data, e2, depends_on); 3667 comp_cost cost = force_var_cost (data, e2, depends_on);
3484 cost.cost += multiply_by_cost (-1, mode, data->speed); 3668 cost.cost += multiply_by_cost (-1, mode, data->speed);
3485
3486 return cost; 3669 return cost;
3487 } 3670 }
3488 3671
3489 cost = force_var_cost (data, e1, depends_on); 3672 type = signed_type_for (TREE_TYPE (e1));
3490 cost = add_costs (cost, force_var_cost (data, e2, depends_on)); 3673 tree_to_aff_combination (e1, type, &aff_e1);
3491 cost.cost += add_cost (mode, data->speed); 3674 tree_to_aff_combination (e2, type, &aff_e2);
3492 3675 aff_combination_scale (&aff_e2, double_int_minus_one);
3493 return cost; 3676 aff_combination_add (&aff_e1, &aff_e2);
3677
3678 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3494 } 3679 }
3495 3680
3496 /* Determines the cost of the computation by that USE is expressed 3681 /* Determines the cost of the computation by that USE is expressed
3497 from induction variable CAND. If ADDRESS_P is true, we just need 3682 from induction variable CAND. If ADDRESS_P is true, we just need
3498 to create an address from it, otherwise we want to get it into 3683 to create an address from it, otherwise we want to get it into
3499 register. A set of invariants we depend on is stored in 3684 register. A set of invariants we depend on is stored in
3500 DEPENDS_ON. AT is the statement at that the value is computed. */ 3685 DEPENDS_ON. AT is the statement at that the value is computed.
3686 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3687 addressing is likely. */
3501 3688
3502 static comp_cost 3689 static comp_cost
3503 get_computation_cost_at (struct ivopts_data *data, 3690 get_computation_cost_at (struct ivopts_data *data,
3504 struct iv_use *use, struct iv_cand *cand, 3691 struct iv_use *use, struct iv_cand *cand,
3505 bool address_p, bitmap *depends_on, gimple at) 3692 bool address_p, bitmap *depends_on, gimple at,
3693 bool *can_autoinc)
3506 { 3694 {
3507 tree ubase = use->iv->base, ustep = use->iv->step; 3695 tree ubase = use->iv->base, ustep = use->iv->step;
3508 tree cbase, cstep; 3696 tree cbase, cstep;
3509 tree utype = TREE_TYPE (ubase), ctype; 3697 tree utype = TREE_TYPE (ubase), ctype;
3510 unsigned HOST_WIDE_INT cstepi, offset = 0; 3698 unsigned HOST_WIDE_INT cstepi, offset = 0;
3511 HOST_WIDE_INT ratio, aratio; 3699 HOST_WIDE_INT ratio, aratio;
3512 bool var_present, symbol_present; 3700 bool var_present, symbol_present, stmt_is_after_inc;
3513 comp_cost cost; 3701 comp_cost cost;
3514 unsigned n_sums;
3515 double_int rat; 3702 double_int rat;
3516 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 3703 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3517 3704
3518 *depends_on = NULL; 3705 *depends_on = NULL;
3519 3706
3542 && cand->iv->base_object 3729 && cand->iv->base_object
3543 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 3730 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3544 return infinite_cost; 3731 return infinite_cost;
3545 } 3732 }
3546 3733
3547 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype)) 3734 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3548 { 3735 {
3549 /* TODO -- add direct handling of this case. */ 3736 /* TODO -- add direct handling of this case. */
3550 goto fallback; 3737 goto fallback;
3551 } 3738 }
3552 3739
3561 else 3748 else
3562 cstepi = 0; 3749 cstepi = 0;
3563 3750
3564 if (!constant_multiple_of (ustep, cstep, &rat)) 3751 if (!constant_multiple_of (ustep, cstep, &rat))
3565 return infinite_cost; 3752 return infinite_cost;
3566 3753
3567 if (double_int_fits_in_shwi_p (rat)) 3754 if (double_int_fits_in_shwi_p (rat))
3568 ratio = double_int_to_shwi (rat); 3755 ratio = double_int_to_shwi (rat);
3569 else 3756 else
3570 return infinite_cost; 3757 return infinite_cost;
3571 3758
3759 STRIP_NOPS (cbase);
3760 ctype = TREE_TYPE (cbase);
3761
3572 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 3762 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3573 or ratio == 1, it is better to handle this like 3763 or ratio == 1, it is better to handle this like
3574 3764
3575 ubase - ratio * cbase + ratio * var 3765 ubase - ratio * cbase + ratio * var
3576 3766
3577 (also holds in the case ratio == -1, TODO. */ 3767 (also holds in the case ratio == -1, TODO. */
3578 3768
3579 if (cst_and_fits_in_hwi (cbase)) 3769 if (cst_and_fits_in_hwi (cbase))
3580 { 3770 {
3581 offset = - ratio * int_cst_value (cbase); 3771 offset = - ratio * int_cst_value (cbase);
3582 cost = difference_cost (data, 3772 cost = difference_cost (data,
3583 ubase, build_int_cst (utype, 0), 3773 ubase, build_int_cst (utype, 0),
3584 &symbol_present, &var_present, &offset, 3774 &symbol_present, &var_present, &offset,
3585 depends_on); 3775 depends_on);
3586 } 3776 }
3587 else if (ratio == 1) 3777 else if (ratio == 1)
3588 { 3778 {
3779 cost = difference_cost (data,
3780 ubase, cbase,
3781 &symbol_present, &var_present, &offset,
3782 depends_on);
3783 }
3784 else if (address_p
3785 && !POINTER_TYPE_P (ctype)
3786 && multiplier_allowed_in_address_p
3787 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3788 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3789 {
3790 cbase
3791 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3589 cost = difference_cost (data, 3792 cost = difference_cost (data,
3590 ubase, cbase, 3793 ubase, cbase,
3591 &symbol_present, &var_present, &offset, 3794 &symbol_present, &var_present, &offset,
3592 depends_on); 3795 depends_on);
3593 } 3796 }
3602 &offset, depends_on)); 3805 &offset, depends_on));
3603 } 3806 }
3604 3807
3605 /* If we are after the increment, the value of the candidate is higher by 3808 /* If we are after the increment, the value of the candidate is higher by
3606 one iteration. */ 3809 one iteration. */
3607 if (stmt_after_increment (data->current_loop, cand, at)) 3810 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3811 if (stmt_is_after_inc)
3608 offset -= ratio * cstepi; 3812 offset -= ratio * cstepi;
3609 3813
3610 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 3814 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3611 (symbol/var/const parts may be omitted). If we are looking for an address, 3815 (symbol/var1/const parts may be omitted). If we are looking for an
3612 find the cost of addressing this. */ 3816 address, find the cost of addressing this. */
3613 if (address_p) 3817 if (address_p)
3614 return add_costs (cost, get_address_cost (symbol_present, var_present, 3818 return add_costs (cost,
3615 offset, ratio, 3819 get_address_cost (symbol_present, var_present,
3616 TYPE_MODE (TREE_TYPE (*use->op_p)), speed)); 3820 offset, ratio, cstepi,
3821 TYPE_MODE (TREE_TYPE (utype)),
3822 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3823 speed, stmt_is_after_inc,
3824 can_autoinc));
3617 3825
3618 /* Otherwise estimate the costs for computing the expression. */ 3826 /* Otherwise estimate the costs for computing the expression. */
3619 aratio = ratio > 0 ? ratio : -ratio;
3620 if (!symbol_present && !var_present && !offset) 3827 if (!symbol_present && !var_present && !offset)
3621 { 3828 {
3622 if (ratio != 1) 3829 if (ratio != 1)
3623 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed); 3830 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3624
3625 return cost; 3831 return cost;
3626 } 3832 }
3627 3833
3628 if (aratio != 1) 3834 /* Symbol + offset should be compile-time computable so consider that they
3629 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed); 3835 are added once to the variable, if present. */
3630 3836 if (var_present && (symbol_present || offset))
3631 n_sums = 1; 3837 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3632 if (var_present 3838 / AVG_LOOP_NITER (data->current_loop);
3633 /* Symbol + offset should be compile-time computable. */
3634 && (symbol_present || offset))
3635 n_sums++;
3636 3839
3637 /* Having offset does not affect runtime cost in case it is added to 3840 /* Having offset does not affect runtime cost in case it is added to
3638 symbol, but it increases complexity. */ 3841 symbol, but it increases complexity. */
3639 if (offset) 3842 if (offset)
3640 cost.complexity++; 3843 cost.complexity++;
3641 3844
3642 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed); 3845 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3643 return cost; 3846
3847 aratio = ratio > 0 ? ratio : -ratio;
3848 if (aratio != 1)
3849 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3644 3850
3645 fallback: 3851 fallback:
3852 if (can_autoinc)
3853 *can_autoinc = false;
3854
3646 { 3855 {
3647 /* Just get the expression, expand it and measure the cost. */ 3856 /* Just get the expression, expand it and measure the cost. */
3648 tree comp = get_computation_at (data->current_loop, use, cand, at); 3857 tree comp = get_computation_at (data->current_loop, use, cand, at);
3649 3858
3650 if (!comp) 3859 if (!comp)
3659 3868
3660 /* Determines the cost of the computation by that USE is expressed 3869 /* Determines the cost of the computation by that USE is expressed
3661 from induction variable CAND. If ADDRESS_P is true, we just need 3870 from induction variable CAND. If ADDRESS_P is true, we just need
3662 to create an address from it, otherwise we want to get it into 3871 to create an address from it, otherwise we want to get it into
3663 register. A set of invariants we depend on is stored in 3872 register. A set of invariants we depend on is stored in
3664 DEPENDS_ON. */ 3873 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3874 autoinc addressing is likely. */
3665 3875
3666 static comp_cost 3876 static comp_cost
3667 get_computation_cost (struct ivopts_data *data, 3877 get_computation_cost (struct ivopts_data *data,
3668 struct iv_use *use, struct iv_cand *cand, 3878 struct iv_use *use, struct iv_cand *cand,
3669 bool address_p, bitmap *depends_on) 3879 bool address_p, bitmap *depends_on, bool *can_autoinc)
3670 { 3880 {
3671 return get_computation_cost_at (data, 3881 return get_computation_cost_at (data,
3672 use, cand, address_p, depends_on, use->stmt); 3882 use, cand, address_p, depends_on, use->stmt,
3883 can_autoinc);
3673 } 3884 }
3674 3885
3675 /* Determines cost of basing replacement of USE on CAND in a generic 3886 /* Determines cost of basing replacement of USE on CAND in a generic
3676 expression. */ 3887 expression. */
3677 3888
3691 { 3902 {
3692 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE); 3903 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3693 return true; 3904 return true;
3694 } 3905 }
3695 3906
3696 cost = get_computation_cost (data, use, cand, false, &depends_on); 3907 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3697 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 3908 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3698 3909
3699 return !infinite_cost_p (cost); 3910 return !infinite_cost_p (cost);
3700 } 3911 }
3701 3912
3704 static bool 3915 static bool
3705 determine_use_iv_cost_address (struct ivopts_data *data, 3916 determine_use_iv_cost_address (struct ivopts_data *data,
3706 struct iv_use *use, struct iv_cand *cand) 3917 struct iv_use *use, struct iv_cand *cand)
3707 { 3918 {
3708 bitmap depends_on; 3919 bitmap depends_on;
3709 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on); 3920 bool can_autoinc;
3710 3921 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3922 &can_autoinc);
3923
3924 if (cand->ainc_use == use)
3925 {
3926 if (can_autoinc)
3927 cost.cost -= cand->cost_step;
3928 /* If we generated the candidate solely for exploiting autoincrement
3929 opportunities, and it turns out it can't be used, set the cost to
3930 infinity to make sure we ignore it. */
3931 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3932 cost = infinite_cost;
3933 }
3711 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 3934 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3712 3935
3713 return !infinite_cost_p (cost); 3936 return !infinite_cost_p (cost);
3714 } 3937 }
3715 3938
3885 note that we cannot get rid of it. */ 4108 note that we cannot get rid of it. */
3886 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv); 4109 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3887 gcc_assert (ok); 4110 gcc_assert (ok);
3888 4111
3889 express_cost = get_computation_cost (data, use, cand, false, 4112 express_cost = get_computation_cost (data, use, cand, false,
3890 &depends_on_express); 4113 &depends_on_express, NULL);
3891 fd_ivopts_data = data; 4114 fd_ivopts_data = data;
3892 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); 4115 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3893 4116
3894 /* Choose the better approach, preferring the eliminated IV. */ 4117 /* Choose the better approach, preferring the eliminated IV. */
3895 if (compare_costs (elim_cost, express_cost) <= 0) 4118 if (compare_costs (elim_cost, express_cost) <= 0)
3935 return determine_use_iv_cost_condition (data, use, cand); 4158 return determine_use_iv_cost_condition (data, use, cand);
3936 4159
3937 default: 4160 default:
3938 gcc_unreachable (); 4161 gcc_unreachable ();
3939 } 4162 }
4163 }
4164
4165 /* Return true if get_computation_cost indicates that autoincrement is
4166 a possibility for the pair of USE and CAND, false otherwise. */
4167
4168 static bool
4169 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4170 struct iv_cand *cand)
4171 {
4172 bitmap depends_on;
4173 bool can_autoinc;
4174 comp_cost cost;
4175
4176 if (use->type != USE_ADDRESS)
4177 return false;
4178
4179 cost = get_computation_cost (data, use, cand, true, &depends_on,
4180 &can_autoinc);
4181
4182 BITMAP_FREE (depends_on);
4183
4184 return !infinite_cost_p (cost) && can_autoinc;
4185 }
4186
4187 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4188 use that allows autoincrement, and set their AINC_USE if possible. */
4189
4190 static void
4191 set_autoinc_for_original_candidates (struct ivopts_data *data)
4192 {
4193 unsigned i, j;
4194
4195 for (i = 0; i < n_iv_cands (data); i++)
4196 {
4197 struct iv_cand *cand = iv_cand (data, i);
4198 struct iv_use *closest = NULL;
4199 if (cand->pos != IP_ORIGINAL)
4200 continue;
4201 for (j = 0; j < n_iv_uses (data); j++)
4202 {
4203 struct iv_use *use = iv_use (data, j);
4204 unsigned uid = gimple_uid (use->stmt);
4205 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4206 || uid > gimple_uid (cand->incremented_at))
4207 continue;
4208 if (closest == NULL || uid > gimple_uid (closest->stmt))
4209 closest = use;
4210 }
4211 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4212 continue;
4213 cand->ainc_use = closest;
4214 }
4215 }
4216
4217 /* Finds the candidates for the induction variables. */
4218
4219 static void
4220 find_iv_candidates (struct ivopts_data *data)
4221 {
4222 /* Add commonly used ivs. */
4223 add_standard_iv_candidates (data);
4224
4225 /* Add old induction variables. */
4226 add_old_ivs_candidates (data);
4227
4228 /* Add induction variables derived from uses. */
4229 add_derived_ivs_candidates (data);
4230
4231 set_autoinc_for_original_candidates (data);
4232
4233 /* Record the important candidates. */
4234 record_important_candidates (data);
3940 } 4235 }
3941 4236
3942 /* Determines costs of basing the use of the iv on an iv candidate. */ 4237 /* Determines costs of basing the use of the iv on an iv candidate. */
3943 4238
3944 static void 4239 static void
4044 The reason is to make debugging simpler; so this is not relevant for 4339 The reason is to make debugging simpler; so this is not relevant for
4045 artificial ivs created by other optimization passes. */ 4340 artificial ivs created by other optimization passes. */
4046 if (cand->pos != IP_ORIGINAL 4341 if (cand->pos != IP_ORIGINAL
4047 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 4342 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4048 cost++; 4343 cost++;
4049 4344
4050 /* Prefer not to insert statements into latch unless there are some 4345 /* Prefer not to insert statements into latch unless there are some
4051 already (so that we do not create unnecessary jumps). */ 4346 already (so that we do not create unnecessary jumps). */
4052 if (cand->pos == IP_END 4347 if (cand->pos == IP_END
4053 && empty_block_p (ip_end_pos (data->current_loop))) 4348 && empty_block_p (ip_end_pos (data->current_loop)))
4054 cost++; 4349 cost++;
4055 4350
4056 cand->cost = cost; 4351 cand->cost = cost;
4352 cand->cost_step = cost_step;
4057 } 4353 }
4058 4354
4059 /* Determines costs of computation of the candidates. */ 4355 /* Determines costs of computation of the candidates. */
4060 4356
4061 static void 4357 static void
4076 determine_iv_cost (data, cand); 4372 determine_iv_cost (data, cand);
4077 4373
4078 if (dump_file && (dump_flags & TDF_DETAILS)) 4374 if (dump_file && (dump_flags & TDF_DETAILS))
4079 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 4375 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4080 } 4376 }
4081 4377
4082 if (dump_file && (dump_flags & TDF_DETAILS)) 4378 if (dump_file && (dump_flags & TDF_DETAILS))
4083 fprintf (dump_file, "\n"); 4379 fprintf (dump_file, "\n");
4084 } 4380 }
4085 4381
4086 /* Calculates cost for having SIZE induction variables. */ 4382 /* Calculates cost for having SIZE induction variables. */
4116 4412
4117 We set a reserve R (free regs that are used for temporary computations, 4413 We set a reserve R (free regs that are used for temporary computations,
4118 etc.). For now the reserve is a constant 3. 4414 etc.). For now the reserve is a constant 3.
4119 4415
4120 Let I be the number of induction variables. 4416 Let I be the number of induction variables.
4121 4417
4122 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage 4418 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4123 make a lot of ivs without a reason). 4419 make a lot of ivs without a reason).
4124 -- if A - R < U + I <= A, the cost is I * PRES_COST 4420 -- if A - R < U + I <= A, the cost is I * PRES_COST
4125 -- if U + I > A, the cost is I * PRES_COST and 4421 -- if U + I > A, the cost is I * PRES_COST and
4126 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */ 4422 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4595 if (!new_cp) 4891 if (!new_cp)
4596 continue; 4892 continue;
4597 4893
4598 if (!iv_ca_has_deps (ivs, new_cp)) 4894 if (!iv_ca_has_deps (ivs, new_cp))
4599 continue; 4895 continue;
4600 4896
4601 if (!cheaper_cost_pair (new_cp, old_cp)) 4897 if (!cheaper_cost_pair (new_cp, old_cp))
4602 continue; 4898 continue;
4603 4899
4604 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 4900 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4605 } 4901 }
4650 cp = get_use_iv_cost (data, use, cnd); 4946 cp = get_use_iv_cost (data, use, cnd);
4651 if (!cp) 4947 if (!cp)
4652 continue; 4948 continue;
4653 if (!iv_ca_has_deps (ivs, cp)) 4949 if (!iv_ca_has_deps (ivs, cp))
4654 continue; 4950 continue;
4655 4951
4656 if (!cheaper_cost_pair (cp, new_cp)) 4952 if (!cheaper_cost_pair (cp, new_cp))
4657 continue; 4953 continue;
4658 4954
4659 new_cp = cp; 4955 new_cp = cp;
4660 } 4956 }
4671 cp = get_use_iv_cost (data, use, cnd); 4967 cp = get_use_iv_cost (data, use, cnd);
4672 if (!cp) 4968 if (!cp)
4673 continue; 4969 continue;
4674 if (!iv_ca_has_deps (ivs, cp)) 4970 if (!iv_ca_has_deps (ivs, cp))
4675 continue; 4971 continue;
4676 4972
4677 if (!cheaper_cost_pair (cp, new_cp)) 4973 if (!cheaper_cost_pair (cp, new_cp))
4678 continue; 4974 continue;
4679 4975
4680 new_cp = cp; 4976 new_cp = cp;
4681 } 4977 }
4819 continue; 5115 continue;
4820 5116
4821 /* Already tried this. */ 5117 /* Already tried this. */
4822 if (cand->important && cand->iv->base_object == NULL_TREE) 5118 if (cand->important && cand->iv->base_object == NULL_TREE)
4823 continue; 5119 continue;
4824 5120
4825 if (iv_ca_cand_used_p (ivs, cand)) 5121 if (iv_ca_cand_used_p (ivs, cand))
4826 continue; 5122 continue;
4827 5123
4828 act_delta = NULL; 5124 act_delta = NULL;
4829 iv_ca_set_cp (data, ivs, use, cp); 5125 iv_ca_set_cp (data, ivs, use, cp);
4881 5177
4882 /* Try extending the set of induction variables by one. */ 5178 /* Try extending the set of induction variables by one. */
4883 for (i = 0; i < n_iv_cands (data); i++) 5179 for (i = 0; i < n_iv_cands (data); i++)
4884 { 5180 {
4885 cand = iv_cand (data, i); 5181 cand = iv_cand (data, i);
4886 5182
4887 if (iv_ca_cand_used_p (ivs, cand)) 5183 if (iv_ca_cand_used_p (ivs, cand))
4888 continue; 5184 continue;
4889 5185
4890 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs); 5186 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4891 if (!act_delta) 5187 if (!act_delta)
4998 case IP_END: 5294 case IP_END:
4999 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 5295 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5000 after = true; 5296 after = true;
5001 break; 5297 break;
5002 5298
5299 case IP_AFTER_USE:
5300 after = true;
5301 /* fall through */
5302 case IP_BEFORE_USE:
5303 incr_pos = gsi_for_stmt (cand->incremented_at);
5304 break;
5305
5003 case IP_ORIGINAL: 5306 case IP_ORIGINAL:
5004 /* Mark that the iv is preserved. */ 5307 /* Mark that the iv is preserved. */
5005 name_info (data, cand->var_before)->preserve_biv = true; 5308 name_info (data, cand->var_before)->preserve_biv = true;
5006 name_info (data, cand->var_after)->preserve_biv = true; 5309 name_info (data, cand->var_after)->preserve_biv = true;
5007 5310
5008 /* Rewrite the increment so that it uses var_before directly. */ 5311 /* Rewrite the increment so that it uses var_before directly. */
5009 find_interesting_uses_op (data, cand->var_after)->selected = cand; 5312 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5010 5313
5011 return; 5314 return;
5012 } 5315 }
5013 5316
5014 gimple_add_tmp_var (cand->var_before); 5317 gimple_add_tmp_var (cand->var_before);
5015 add_referenced_var (cand->var_before); 5318 add_referenced_var (cand->var_before);
5016 5319
5017 base = unshare_expr (cand->iv->base); 5320 base = unshare_expr (cand->iv->base);
5018 5321
5035 cand = iv_cand (data, i); 5338 cand = iv_cand (data, i);
5036 create_new_iv (data, cand); 5339 create_new_iv (data, cand);
5037 } 5340 }
5038 } 5341 }
5039 5342
5040 /* Returns the phi-node in BB with result RESULT. */
5041
5042 static gimple
5043 get_phi_with_result (basic_block bb, tree result)
5044 {
5045 gimple_stmt_iterator i = gsi_start_phis (bb);
5046
5047 for (; !gsi_end_p (i); gsi_next (&i))
5048 if (gimple_phi_result (gsi_stmt (i)) == result)
5049 return gsi_stmt (i);
5050
5051 gcc_unreachable ();
5052 return NULL;
5053 }
5054
5055
5056 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5057 is true, remove also the ssa name defined by the statement. */
5058
5059 static void
5060 remove_statement (gimple stmt, bool including_defined_name)
5061 {
5062 if (gimple_code (stmt) == GIMPLE_PHI)
5063 {
5064 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5065 gimple_phi_result (stmt));
5066 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5067 remove_phi_node (&bsi, including_defined_name);
5068 }
5069 else
5070 {
5071 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5072 gsi_remove (&bsi, true);
5073 release_defs (stmt);
5074 }
5075 }
5076 5343
5077 /* Rewrites USE (definition of iv used in a nonlinear expression) 5344 /* Rewrites USE (definition of iv used in a nonlinear expression)
5078 using candidate CAND. */ 5345 using candidate CAND. */
5079 5346
5080 static void 5347 static void
5173 5440
5174 if (gimple_code (use->stmt) == GIMPLE_PHI) 5441 if (gimple_code (use->stmt) == GIMPLE_PHI)
5175 { 5442 {
5176 ass = gimple_build_assign (tgt, op); 5443 ass = gimple_build_assign (tgt, op);
5177 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 5444 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5178 remove_statement (use->stmt, false); 5445
5446 bsi = gsi_for_stmt (use->stmt);
5447 remove_phi_node (&bsi, false);
5179 } 5448 }
5180 else 5449 else
5181 { 5450 {
5182 gimple_assign_set_rhs_from_tree (&bsi, op); 5451 gimple_assign_set_rhs_from_tree (&bsi, op);
5183 use->stmt = gsi_stmt (bsi); 5452 use->stmt = gsi_stmt (bsi);
5220 for_each_index (&ref, idx_remove_ssa_names, NULL); 5489 for_each_index (&ref, idx_remove_ssa_names, NULL);
5221 5490
5222 return ref; 5491 return ref;
5223 } 5492 }
5224 5493
5225 /* Extract the alias analysis info for the memory reference REF. There are
5226 several ways how this information may be stored and what precisely is
5227 its semantics depending on the type of the reference, but there always is
5228 somewhere hidden one _DECL node that is used to determine the set of
5229 virtual operands for the reference. The code below deciphers this jungle
5230 and extracts this single useful piece of information. */
5231
5232 static tree
5233 get_ref_tag (tree ref, tree orig)
5234 {
5235 tree var = get_base_address (ref);
5236 tree aref = NULL_TREE, tag, sv;
5237 HOST_WIDE_INT offset, size, maxsize;
5238
5239 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5240 {
5241 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5242 if (ref)
5243 break;
5244 }
5245
5246 if (!var)
5247 return NULL_TREE;
5248
5249 if (TREE_CODE (var) == INDIRECT_REF)
5250 {
5251 /* If the base is a dereference of a pointer, first check its name memory
5252 tag. If it does not have one, use its symbol memory tag. */
5253 var = TREE_OPERAND (var, 0);
5254 if (TREE_CODE (var) != SSA_NAME)
5255 return NULL_TREE;
5256
5257 if (SSA_NAME_PTR_INFO (var))
5258 {
5259 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5260 if (tag)
5261 return tag;
5262 }
5263
5264 var = SSA_NAME_VAR (var);
5265 tag = symbol_mem_tag (var);
5266 gcc_assert (tag != NULL_TREE);
5267 return tag;
5268 }
5269 else
5270 {
5271 if (!DECL_P (var))
5272 return NULL_TREE;
5273
5274 tag = symbol_mem_tag (var);
5275 if (tag)
5276 return tag;
5277
5278 return var;
5279 }
5280 }
5281
5282 /* Copies the reference information from OLD_REF to NEW_REF. */ 5494 /* Copies the reference information from OLD_REF to NEW_REF. */
5283 5495
5284 static void 5496 static void
5285 copy_ref_info (tree new_ref, tree old_ref) 5497 copy_ref_info (tree new_ref, tree old_ref)
5286 { 5498 {
5287 if (TREE_CODE (old_ref) == TARGET_MEM_REF) 5499 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5288 copy_mem_ref_info (new_ref, old_ref); 5500 copy_mem_ref_info (new_ref, old_ref);
5289 else 5501 else
5290 { 5502 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5291 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5292 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5293 }
5294 } 5503 }
5295 5504
5296 /* Rewrites USE (address that is an iv) using candidate CAND. */ 5505 /* Rewrites USE (address that is an iv) using candidate CAND. */
5297 5506
5298 static void 5507 static void
5299 rewrite_use_address (struct ivopts_data *data, 5508 rewrite_use_address (struct ivopts_data *data,
5300 struct iv_use *use, struct iv_cand *cand) 5509 struct iv_use *use, struct iv_cand *cand)
5301 { 5510 {
5302 aff_tree aff; 5511 aff_tree aff;
5303 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 5512 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5513 tree base_hint = NULL_TREE;
5304 tree ref; 5514 tree ref;
5305 bool ok; 5515 bool ok;
5306 5516
5307 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 5517 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5308 gcc_assert (ok); 5518 gcc_assert (ok);
5309 unshare_aff_combination (&aff); 5519 unshare_aff_combination (&aff);
5310 5520
5311 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed); 5521 /* To avoid undefined overflow problems, all IV candidates use unsigned
5522 integer types. The drawback is that this makes it impossible for
5523 create_mem_ref to distinguish an IV that is based on a memory object
5524 from one that represents simply an offset.
5525
5526 To work around this problem, we pass a hint to create_mem_ref that
5527 indicates which variable (if any) in aff is an IV based on a memory
5528 object. Note that we only consider the candidate. If this is not
5529 based on an object, the base of the reference is in some subexpression
5530 of the use -- but these will use pointer types, so they are recognized
5531 by the create_mem_ref heuristics anyway. */
5532 if (cand->iv->base_object)
5533 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5534
5535 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5536 data->speed);
5312 copy_ref_info (ref, *use->op_p); 5537 copy_ref_info (ref, *use->op_p);
5313 *use->op_p = ref; 5538 *use->op_p = ref;
5314 } 5539 }
5315 5540
5316 /* Rewrites USE (the condition such that one of the arguments is an iv) using 5541 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5362 /* Rewrites USE using candidate CAND. */ 5587 /* Rewrites USE using candidate CAND. */
5363 5588
5364 static void 5589 static void
5365 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) 5590 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5366 { 5591 {
5367 push_stmt_changes (&use->stmt);
5368
5369 switch (use->type) 5592 switch (use->type)
5370 { 5593 {
5371 case USE_NONLINEAR_EXPR: 5594 case USE_NONLINEAR_EXPR:
5372 rewrite_use_nonlinear_expr (data, use, cand); 5595 rewrite_use_nonlinear_expr (data, use, cand);
5373 break; 5596 break;
5382 5605
5383 default: 5606 default:
5384 gcc_unreachable (); 5607 gcc_unreachable ();
5385 } 5608 }
5386 5609
5387 pop_stmt_changes (&use->stmt); 5610 update_stmt (use->stmt);
5388 } 5611 }
5389 5612
5390 /* Rewrite the uses using the selected induction variables. */ 5613 /* Rewrite the uses using the selected induction variables. */
5391 5614
5392 static void 5615 static void
5411 static void 5634 static void
5412 remove_unused_ivs (struct ivopts_data *data) 5635 remove_unused_ivs (struct ivopts_data *data)
5413 { 5636 {
5414 unsigned j; 5637 unsigned j;
5415 bitmap_iterator bi; 5638 bitmap_iterator bi;
5416 5639 bitmap toremove = BITMAP_ALLOC (NULL);
5640
5641 /* Figure out an order in which to release SSA DEFs so that we don't
5642 release something that we'd have to propagate into a debug stmt
5643 afterwards. */
5417 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5644 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5418 { 5645 {
5419 struct version_info *info; 5646 struct version_info *info;
5420 5647
5421 info = ver_info (data, j); 5648 info = ver_info (data, j);
5422 if (info->iv 5649 if (info->iv
5423 && !integer_zerop (info->iv->step) 5650 && !integer_zerop (info->iv->step)
5424 && !info->inv_id 5651 && !info->inv_id
5425 && !info->iv->have_use_for 5652 && !info->iv->have_use_for
5426 && !info->preserve_biv) 5653 && !info->preserve_biv)
5427 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true); 5654 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5428 } 5655 }
5656
5657 release_defs_bitset (toremove);
5658
5659 BITMAP_FREE (toremove);
5429 } 5660 }
5430 5661
5431 /* Frees data allocated by the optimization of a single loop. */ 5662 /* Frees data allocated by the optimization of a single loop. */
5432 5663
5433 static void 5664 static void
5521 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 5752 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5522 { 5753 {
5523 bool changed = false; 5754 bool changed = false;
5524 struct iv_ca *iv_ca; 5755 struct iv_ca *iv_ca;
5525 edge exit; 5756 edge exit;
5757 basic_block *body;
5526 5758
5527 gcc_assert (!data->niters); 5759 gcc_assert (!data->niters);
5528 data->current_loop = loop; 5760 data->current_loop = loop;
5529 data->speed = optimize_loop_for_speed_p (loop); 5761 data->speed = optimize_loop_for_speed_p (loop);
5530 5762
5531 if (dump_file && (dump_flags & TDF_DETAILS)) 5763 if (dump_file && (dump_flags & TDF_DETAILS))
5532 { 5764 {
5533 fprintf (dump_file, "Processing loop %d\n", loop->num); 5765 fprintf (dump_file, "Processing loop %d\n", loop->num);
5534 5766
5535 exit = single_dom_exit (loop); 5767 exit = single_dom_exit (loop);
5536 if (exit) 5768 if (exit)
5537 { 5769 {
5538 fprintf (dump_file, " single exit %d -> %d, exit condition ", 5770 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5539 exit->src->index, exit->dest->index); 5771 exit->src->index, exit->dest->index);
5542 } 5774 }
5543 5775
5544 fprintf (dump_file, "\n"); 5776 fprintf (dump_file, "\n");
5545 } 5777 }
5546 5778
5779 body = get_loop_body (loop);
5780 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5781 free (body);
5782
5547 /* For each ssa name determines whether it behaves as an induction variable 5783 /* For each ssa name determines whether it behaves as an induction variable
5548 in some loop. */ 5784 in some loop. */
5549 if (!find_induction_variables (data)) 5785 if (!find_induction_variables (data))
5550 goto finish; 5786 goto finish;
5551 5787
5556 5792
5557 /* Finds candidates for the induction variables (item 2). */ 5793 /* Finds candidates for the induction variables (item 2). */
5558 find_iv_candidates (data); 5794 find_iv_candidates (data);
5559 5795
5560 /* Calculates the costs (item 3, part 1). */ 5796 /* Calculates the costs (item 3, part 1). */
5797 determine_iv_costs (data);
5561 determine_use_iv_costs (data); 5798 determine_use_iv_costs (data);
5562 determine_iv_costs (data);
5563 determine_set_costs (data); 5799 determine_set_costs (data);
5564 5800
5565 /* Find the optimal set of induction variables (item 3, part 2). */ 5801 /* Find the optimal set of induction variables (item 3, part 2). */
5566 iv_ca = find_optimal_iv_set (data); 5802 iv_ca = find_optimal_iv_set (data);
5567 if (!iv_ca) 5803 if (!iv_ca)
5569 changed = true; 5805 changed = true;
5570 5806
5571 /* Create the new induction variables (item 4, part 1). */ 5807 /* Create the new induction variables (item 4, part 1). */
5572 create_new_ivs (data, iv_ca); 5808 create_new_ivs (data, iv_ca);
5573 iv_ca_free (&iv_ca); 5809 iv_ca_free (&iv_ca);
5574 5810
5575 /* Rewrite the uses (item 4, part 2). */ 5811 /* Rewrite the uses (item 4, part 2). */
5576 rewrite_uses (data); 5812 rewrite_uses (data);
5577 5813
5578 /* Remove the ivs that are unused after rewriting. */ 5814 /* Remove the ivs that are unused after rewriting. */
5579 remove_unused_ivs (data); 5815 remove_unused_ivs (data);