Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-predcom.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
130:e108057fa461 | 132:d34655255c78 |
---|---|
1 /* Predictive commoning. | 1 /* Predictive commoning. |
2 Copyright (C) 2005-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2005-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | 7 under the terms of the GNU General Public License as published by the |
189 a[n] = 2; | 189 a[n] = 2; |
190 a[n+1] = 2; | 190 a[n+1] = 2; |
191 | 191 |
192 The interesting part is this can be viewed either as general store motion | 192 The interesting part is this can be viewed either as general store motion |
193 or general dead store elimination in either intra/inter-iterations way. | 193 or general dead store elimination in either intra/inter-iterations way. |
194 | |
195 With trivial effort, we also support load inside Store-Store chains if the | |
196 load is dominated by a store statement in the same iteration of loop. You | |
197 can see this as a restricted Store-Mixed-Load-Store chain. | |
194 | 198 |
195 TODO: For now, we don't support store-store chains in multi-exit loops. We | 199 TODO: For now, we don't support store-store chains in multi-exit loops. We |
196 force to not unroll in case of store-store chain even if other chains might | 200 force to not unroll in case of store-store chain even if other chains might |
197 ask for unroll. | 201 ask for unroll. |
198 | 202 |
674 tree type = TREE_TYPE (DR_OFFSET (dr)); | 678 tree type = TREE_TYPE (DR_OFFSET (dr)); |
675 aff_tree delta; | 679 aff_tree delta; |
676 | 680 |
677 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, | 681 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, |
678 &name_expansions); | 682 &name_expansions); |
679 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr))); | 683 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr))); |
680 aff_combination_add (offset, &delta); | 684 aff_combination_add (offset, &delta); |
681 } | 685 } |
682 | 686 |
683 /* Determines number of iterations of the innermost enclosing loop before B | 687 /* Determines number of iterations of the innermost enclosing loop before B |
684 refers to exactly the same location as A and stores it to OFF. If A and | 688 refers to exactly the same location as A and stores it to OFF. If A and |
686 returns false, otherwise returns true. Both A and B are assumed to | 690 returns false, otherwise returns true. Both A and B are assumed to |
687 satisfy suitable_reference_p. */ | 691 satisfy suitable_reference_p. */ |
688 | 692 |
689 static bool | 693 static bool |
690 determine_offset (struct data_reference *a, struct data_reference *b, | 694 determine_offset (struct data_reference *a, struct data_reference *b, |
691 widest_int *off) | 695 poly_widest_int *off) |
692 { | 696 { |
693 aff_tree diff, baseb, step; | 697 aff_tree diff, baseb, step; |
694 tree typea, typeb; | 698 tree typea, typeb; |
695 | 699 |
696 /* Check that both the references access the location in the same type. */ | 700 /* Check that both the references access the location in the same type. */ |
795 } | 799 } |
796 } | 800 } |
797 | 801 |
798 FOR_EACH_VEC_ELT (depends, i, ddr) | 802 FOR_EACH_VEC_ELT (depends, i, ddr) |
799 { | 803 { |
800 widest_int dummy_off; | 804 poly_widest_int dummy_off; |
801 | 805 |
802 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | 806 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
803 continue; | 807 continue; |
804 | 808 |
805 dra = DDR_A (ddr); | 809 dra = DDR_A (ddr); |
900 dataref->always_accessed | 904 dataref->always_accessed |
901 = dominated_by_p (CDI_DOMINATORS, last_always_executed, | 905 = dominated_by_p (CDI_DOMINATORS, last_always_executed, |
902 gimple_bb (dataref->stmt)); | 906 gimple_bb (dataref->stmt)); |
903 dataref->pos = comp->refs.length (); | 907 dataref->pos = comp->refs.length (); |
904 comp->refs.quick_push (dataref); | 908 comp->refs.quick_push (dataref); |
905 if (DR_IS_READ (dr)) | |
906 comp->eliminate_store_p = false; | |
907 } | 909 } |
908 | 910 |
909 for (i = 0; i < n; i++) | 911 for (i = 0; i < n; i++) |
910 { | 912 { |
911 comp = comps[i]; | 913 comp = comps[i]; |
954 gcc_assert (ok); | 956 gcc_assert (ok); |
955 first->offset = 0; | 957 first->offset = 0; |
956 | 958 |
957 for (i = 1; comp->refs.iterate (i, &a); i++) | 959 for (i = 1; comp->refs.iterate (i, &a); i++) |
958 { | 960 { |
959 if (!determine_offset (first->ref, a->ref, &a->offset)) | 961 /* Polynomial offsets are no use, since we need to know the |
962 gap between iteration numbers at compile time. */ | |
963 poly_widest_int offset; | |
964 if (!determine_offset (first->ref, a->ref, &offset) | |
965 || !offset.is_constant (&a->offset)) | |
960 return false; | 966 return false; |
961 | 967 |
962 enum ref_step_type a_step; | 968 enum ref_step_type a_step; |
963 gcc_checking_assert (suitable_reference_p (a->ref, &a_step) | 969 gcc_checking_assert (suitable_reference_p (a->ref, &a_step) |
964 && a_step == comp->comp_step); | 970 && a_step == comp->comp_step); |
1018 return offcmp; | 1024 return offcmp; |
1019 | 1025 |
1020 return (*da)->pos - (*db)->pos; | 1026 return (*da)->pos - (*db)->pos; |
1021 } | 1027 } |
1022 | 1028 |
1029 /* Compares two drefs A and B by their position. Callback for qsort. */ | |
1030 | |
1031 static int | |
1032 order_drefs_by_pos (const void *a, const void *b) | |
1033 { | |
1034 const dref *const da = (const dref *) a; | |
1035 const dref *const db = (const dref *) b; | |
1036 | |
1037 return (*da)->pos - (*db)->pos; | |
1038 } | |
1039 | |
1023 /* Returns root of the CHAIN. */ | 1040 /* Returns root of the CHAIN. */ |
1024 | 1041 |
1025 static inline dref | 1042 static inline dref |
1026 get_chain_root (chain_p chain) | 1043 get_chain_root (chain_p chain) |
1027 { | 1044 { |
1028 return chain->refs[0]; | 1045 return chain->refs[0]; |
1029 } | 1046 } |
1030 | 1047 |
1031 /* Given CHAIN, returns the last ref at DISTANCE, or NULL if it doesn't | 1048 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't |
1032 exist. */ | 1049 exist. */ |
1033 | 1050 |
1034 static inline dref | 1051 static inline dref |
1035 get_chain_last_ref_at (chain_p chain, unsigned distance) | 1052 get_chain_last_write_at (chain_p chain, unsigned distance) |
1036 { | 1053 { |
1037 unsigned i; | 1054 for (unsigned i = chain->refs.length (); i > 0; i--) |
1038 | 1055 if (DR_IS_WRITE (chain->refs[i - 1]->ref) |
1039 for (i = chain->refs.length (); i > 0; i--) | 1056 && distance == chain->refs[i - 1]->distance) |
1040 if (distance == chain->refs[i - 1]->distance) | 1057 return chain->refs[i - 1]; |
1041 break; | 1058 |
1042 | 1059 return NULL; |
1043 return (i > 0) ? chain->refs[i - 1] : NULL; | 1060 } |
1061 | |
1062 /* Given CHAIN, returns the last write ref with the same distance before load | |
1063 at index LOAD_IDX, or NULL if it doesn't exist. */ | |
1064 | |
1065 static inline dref | |
1066 get_chain_last_write_before_load (chain_p chain, unsigned load_idx) | |
1067 { | |
1068 gcc_assert (load_idx < chain->refs.length ()); | |
1069 | |
1070 unsigned distance = chain->refs[load_idx]->distance; | |
1071 | |
1072 for (unsigned i = load_idx; i > 0; i--) | |
1073 if (DR_IS_WRITE (chain->refs[i - 1]->ref) | |
1074 && distance == chain->refs[i - 1]->distance) | |
1075 return chain->refs[i - 1]; | |
1076 | |
1077 return NULL; | |
1044 } | 1078 } |
1045 | 1079 |
1046 /* Adds REF to the chain CHAIN. */ | 1080 /* Adds REF to the chain CHAIN. */ |
1047 | 1081 |
1048 static void | 1082 static void |
1050 { | 1084 { |
1051 dref root = get_chain_root (chain); | 1085 dref root = get_chain_root (chain); |
1052 | 1086 |
1053 gcc_assert (wi::les_p (root->offset, ref->offset)); | 1087 gcc_assert (wi::les_p (root->offset, ref->offset)); |
1054 widest_int dist = ref->offset - root->offset; | 1088 widest_int dist = ref->offset - root->offset; |
1055 if (wi::leu_p (MAX_DISTANCE, dist)) | |
1056 { | |
1057 free (ref); | |
1058 return; | |
1059 } | |
1060 gcc_assert (wi::fits_uhwi_p (dist)); | 1089 gcc_assert (wi::fits_uhwi_p (dist)); |
1061 | 1090 |
1062 chain->refs.safe_push (ref); | 1091 chain->refs.safe_push (ref); |
1063 | 1092 |
1064 ref->distance = dist.to_uhwi (); | 1093 ref->distance = dist.to_uhwi (); |
1066 if (ref->distance >= chain->length) | 1095 if (ref->distance >= chain->length) |
1067 { | 1096 { |
1068 chain->length = ref->distance; | 1097 chain->length = ref->distance; |
1069 chain->has_max_use_after = false; | 1098 chain->has_max_use_after = false; |
1070 } | 1099 } |
1100 | |
1101 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */ | |
1102 if (DR_IS_WRITE (ref->ref)) | |
1103 chain->type = CT_STORE_STORE; | |
1071 | 1104 |
1072 /* Don't set the flag for store-store chain since there is no use. */ | 1105 /* Don't set the flag for store-store chain since there is no use. */ |
1073 if (chain->type != CT_STORE_STORE | 1106 if (chain->type != CT_STORE_STORE |
1074 && ref->distance == chain->length | 1107 && ref->distance == chain->length |
1075 && ref->pos > root->pos) | 1108 && ref->pos > root->pos) |
1156 static bool | 1189 static bool |
1157 valid_initializer_p (struct data_reference *ref, | 1190 valid_initializer_p (struct data_reference *ref, |
1158 unsigned distance, struct data_reference *root) | 1191 unsigned distance, struct data_reference *root) |
1159 { | 1192 { |
1160 aff_tree diff, base, step; | 1193 aff_tree diff, base, step; |
1161 widest_int off; | 1194 poly_widest_int off; |
1162 | 1195 |
1163 /* Both REF and ROOT must be accessing the same object. */ | 1196 /* Both REF and ROOT must be accessing the same object. */ |
1164 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0)) | 1197 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0)) |
1165 return false; | 1198 return false; |
1166 | 1199 |
1184 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)), | 1217 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)), |
1185 &step, &name_expansions); | 1218 &step, &name_expansions); |
1186 if (!aff_combination_constant_multiple_p (&diff, &step, &off)) | 1219 if (!aff_combination_constant_multiple_p (&diff, &step, &off)) |
1187 return false; | 1220 return false; |
1188 | 1221 |
1189 if (off != distance) | 1222 if (maybe_ne (off, distance)) |
1190 return false; | 1223 return false; |
1191 | 1224 |
1192 return true; | 1225 return true; |
1193 } | 1226 } |
1194 | 1227 |
1245 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost | 1278 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost |
1246 loop enclosing PHI). */ | 1279 loop enclosing PHI). */ |
1247 memset (&init_dr, 0, sizeof (struct data_reference)); | 1280 memset (&init_dr, 0, sizeof (struct data_reference)); |
1248 DR_REF (&init_dr) = init_ref; | 1281 DR_REF (&init_dr) = init_ref; |
1249 DR_STMT (&init_dr) = phi; | 1282 DR_STMT (&init_dr) = phi; |
1250 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop)) | 1283 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop, |
1284 init_stmt)) | |
1251 return NULL; | 1285 return NULL; |
1252 | 1286 |
1253 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) | 1287 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) |
1254 return NULL; | 1288 return NULL; |
1255 | 1289 |
1329 return; | 1363 return; |
1330 } | 1364 } |
1331 | 1365 |
1332 /* Trivial component. */ | 1366 /* Trivial component. */ |
1333 if (comp->refs.length () <= 1) | 1367 if (comp->refs.length () <= 1) |
1334 return; | 1368 { |
1369 if (comp->refs.length () == 1) | |
1370 { | |
1371 free (comp->refs[0]); | |
1372 comp->refs.truncate (0); | |
1373 } | |
1374 return; | |
1375 } | |
1335 | 1376 |
1336 comp->refs.qsort (order_drefs); | 1377 comp->refs.qsort (order_drefs); |
1378 | |
1379 /* For Store-Store chain, we only support load if it is dominated by a | |
1380 store statement in the same iteration of loop. */ | |
1381 if (comp->eliminate_store_p) | |
1382 for (a = NULL, i = 0; i < comp->refs.length (); i++) | |
1383 { | |
1384 if (DR_IS_WRITE (comp->refs[i]->ref)) | |
1385 a = comp->refs[i]; | |
1386 else if (a == NULL || a->offset != comp->refs[i]->offset) | |
1387 { | |
1388 /* If there is load that is not dominated by a store in the | |
1389 same iteration of loop, clear the flag so no Store-Store | |
1390 chain is generated for this component. */ | |
1391 comp->eliminate_store_p = false; | |
1392 break; | |
1393 } | |
1394 } | |
1395 | |
1396 /* Determine roots and create chains for components. */ | |
1337 FOR_EACH_VEC_ELT (comp->refs, i, a) | 1397 FOR_EACH_VEC_ELT (comp->refs, i, a) |
1338 { | 1398 { |
1339 if (!chain | 1399 if (!chain |
1400 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref)) | |
1340 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref)) | 1401 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref)) |
1341 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs)) | 1402 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs)) |
1342 { | 1403 { |
1343 if (nontrivial_chain_p (chain)) | 1404 if (nontrivial_chain_p (chain)) |
1344 { | 1405 { |
1346 chains->safe_push (chain); | 1407 chains->safe_push (chain); |
1347 } | 1408 } |
1348 else | 1409 else |
1349 release_chain (chain); | 1410 release_chain (chain); |
1350 | 1411 |
1351 if (DR_IS_READ (a->ref)) | 1412 /* Determine type of the chain. If the root reference is a load, |
1352 type = CT_LOAD; | 1413 this can only be a CT_LOAD chain; other chains are intialized |
1353 else | 1414 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when |
1354 type = comp->eliminate_store_p ? CT_STORE_STORE : CT_STORE_LOAD; | 1415 new reference is added. */ |
1355 | 1416 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD; |
1356 chain = make_rooted_chain (a, type); | 1417 chain = make_rooted_chain (a, type); |
1357 last_ofs = a->offset; | 1418 last_ofs = a->offset; |
1358 continue; | 1419 continue; |
1359 } | 1420 } |
1360 | 1421 |
1661 | 1722 |
1662 /* Check stores in chain for elimination if they only store loop invariant | 1723 /* Check stores in chain for elimination if they only store loop invariant |
1663 values. */ | 1724 values. */ |
1664 for (unsigned i = 0; i < chain->length; i++) | 1725 for (unsigned i = 0; i < chain->length; i++) |
1665 { | 1726 { |
1666 dref a = get_chain_last_ref_at (chain, i); | 1727 dref a = get_chain_last_write_at (chain, i); |
1667 if (a == NULL) | 1728 if (a == NULL) |
1668 continue; | 1729 continue; |
1669 | 1730 |
1670 gimple *def_stmt, *stmt = a->stmt; | 1731 gimple *def_stmt, *stmt = a->stmt; |
1671 if (!gimple_assign_single_p (stmt)) | 1732 if (!gimple_assign_single_p (stmt)) |
1705 chain->vars.safe_grow_cleared (n); | 1766 chain->vars.safe_grow_cleared (n); |
1706 | 1767 |
1707 /* Initialize root value for eliminated stores at each distance. */ | 1768 /* Initialize root value for eliminated stores at each distance. */ |
1708 for (i = 0; i < n; i++) | 1769 for (i = 0; i < n; i++) |
1709 { | 1770 { |
1710 dref a = get_chain_last_ref_at (chain, i); | 1771 dref a = get_chain_last_write_at (chain, i); |
1711 if (a == NULL) | 1772 if (a == NULL) |
1712 continue; | 1773 continue; |
1713 | 1774 |
1714 var = gimple_assign_rhs1 (a->stmt); | 1775 var = gimple_assign_rhs1 (a->stmt); |
1715 chain->vars[a->distance] = var; | 1776 chain->vars[a->distance] = var; |
1768 init = get_init_expr (chain, i); | 1829 init = get_init_expr (chain, i); |
1769 if (init == NULL_TREE) | 1830 if (init == NULL_TREE) |
1770 { | 1831 { |
1771 /* Root value is rhs operand of the store to be eliminated if | 1832 /* Root value is rhs operand of the store to be eliminated if |
1772 it isn't loaded from memory before loop. */ | 1833 it isn't loaded from memory before loop. */ |
1773 dref a = get_chain_last_ref_at (chain, i); | 1834 dref a = get_chain_last_write_at (chain, i); |
1774 val = gimple_assign_rhs1 (a->stmt); | 1835 val = gimple_assign_rhs1 (a->stmt); |
1775 if (TREE_CLOBBER_P (val)) | 1836 if (TREE_CLOBBER_P (val)) |
1776 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var)); | 1837 { |
1838 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var)); | |
1839 gimple_assign_set_rhs1 (a->stmt, val); | |
1840 } | |
1777 | 1841 |
1778 vtemps[n - i - 1] = val; | 1842 vtemps[n - i - 1] = val; |
1779 } | 1843 } |
1780 else | 1844 else |
1781 { | 1845 { |
2039 | 2103 |
2040 static void | 2104 static void |
2041 execute_pred_commoning_chain (struct loop *loop, chain_p chain, | 2105 execute_pred_commoning_chain (struct loop *loop, chain_p chain, |
2042 bitmap tmp_vars) | 2106 bitmap tmp_vars) |
2043 { | 2107 { |
2044 unsigned i, n; | 2108 unsigned i; |
2045 dref a; | 2109 dref a; |
2046 tree var; | 2110 tree var; |
2047 bool in_lhs; | 2111 bool in_lhs; |
2048 | 2112 |
2049 if (chain->combined) | 2113 if (chain->combined) |
2076 be eliminated, because there is no following killing store. | 2140 be eliminated, because there is no following killing store. |
2077 We need to generate these stores after loop. */ | 2141 We need to generate these stores after loop. */ |
2078 finalize_eliminated_stores (loop, chain); | 2142 finalize_eliminated_stores (loop, chain); |
2079 } | 2143 } |
2080 | 2144 |
2081 /* Eliminate the stores killed by following store. */ | 2145 bool last_store_p = true; |
2082 n = chain->refs.length (); | 2146 for (i = chain->refs.length (); i > 0; i--) |
2083 for (i = 0; i < n - 1; i++) | 2147 { |
2084 remove_stmt (chain->refs[i]->stmt); | 2148 a = chain->refs[i - 1]; |
2149 /* Preserve the last store of the chain. Eliminate other stores | |
2150 which are killed by the last one. */ | |
2151 if (DR_IS_WRITE (a->ref)) | |
2152 { | |
2153 if (last_store_p) | |
2154 last_store_p = false; | |
2155 else | |
2156 remove_stmt (a->stmt); | |
2157 | |
2158 continue; | |
2159 } | |
2160 | |
2161 /* Any load in Store-Store chain must be dominated by a previous | |
2162 store, we replace the load reference with rhs of the store. */ | |
2163 dref b = get_chain_last_write_before_load (chain, i - 1); | |
2164 gcc_assert (b != NULL); | |
2165 var = gimple_assign_rhs1 (b->stmt); | |
2166 replace_ref_with (a->stmt, var, false, false); | |
2167 } | |
2085 } | 2168 } |
2086 else | 2169 else |
2087 { | 2170 { |
2088 /* For non-combined chains, set up the variables that hold its value. */ | 2171 /* For non-combined chains, set up the variables that hold its value. */ |
2089 initialize_root_vars (loop, chain, tmp_vars); | 2172 initialize_root_vars (loop, chain, tmp_vars); |
2518 | 2601 |
2519 update_stmt (stmt); | 2602 update_stmt (stmt); |
2520 } | 2603 } |
2521 | 2604 |
2522 /* Reassociates the expression in that NAME1 and NAME2 are used so that they | 2605 /* Reassociates the expression in that NAME1 and NAME2 are used so that they |
2523 are combined in a single statement, and returns this statement. Note the | 2606 are combined in a single statement, and returns this statement. */ |
2524 statement is inserted before INSERT_BEFORE if it's not NULL. */ | |
2525 | 2607 |
2526 static gimple * | 2608 static gimple * |
2527 reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before) | 2609 reassociate_to_the_same_stmt (tree name1, tree name2) |
2528 { | 2610 { |
2529 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2; | 2611 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2; |
2530 gassign *new_stmt, *tmp_stmt; | 2612 gassign *new_stmt, *tmp_stmt; |
2531 tree new_name, tmp_name, var, r1, r2; | 2613 tree new_name, tmp_name, var, r1, r2; |
2532 unsigned dist1, dist2; | 2614 unsigned dist1, dist2; |
2579 /* Insert the new statement combining NAME1 and NAME2 before S1, and | 2661 /* Insert the new statement combining NAME1 and NAME2 before S1, and |
2580 combine it with the rhs of S1. */ | 2662 combine it with the rhs of S1. */ |
2581 var = create_tmp_reg (type, "predreastmp"); | 2663 var = create_tmp_reg (type, "predreastmp"); |
2582 new_name = make_ssa_name (var); | 2664 new_name = make_ssa_name (var); |
2583 new_stmt = gimple_build_assign (new_name, code, name1, name2); | 2665 new_stmt = gimple_build_assign (new_name, code, name1, name2); |
2584 if (insert_before && stmt_dominates_stmt_p (insert_before, s1)) | |
2585 bsi = gsi_for_stmt (insert_before); | |
2586 else | |
2587 bsi = gsi_for_stmt (s1); | |
2588 | |
2589 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT); | |
2590 | 2666 |
2591 var = create_tmp_reg (type, "predreastmp"); | 2667 var = create_tmp_reg (type, "predreastmp"); |
2592 tmp_name = make_ssa_name (var); | 2668 tmp_name = make_ssa_name (var); |
2593 | 2669 |
2594 /* Rhs of S1 may now be either a binary expression with operation | 2670 /* Rhs of S1 may now be either a binary expression with operation |
2601 bsi = gsi_for_stmt (s1); | 2677 bsi = gsi_for_stmt (s1); |
2602 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name); | 2678 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name); |
2603 s1 = gsi_stmt (bsi); | 2679 s1 = gsi_stmt (bsi); |
2604 update_stmt (s1); | 2680 update_stmt (s1); |
2605 | 2681 |
2682 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT); | |
2606 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT); | 2683 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT); |
2607 | 2684 |
2608 return new_stmt; | 2685 return new_stmt; |
2609 } | 2686 } |
2610 | 2687 |
2611 /* Returns the statement that combines references R1 and R2. In case R1 | 2688 /* Returns the statement that combines references R1 and R2. In case R1 |
2612 and R2 are not used in the same statement, but they are used with an | 2689 and R2 are not used in the same statement, but they are used with an |
2613 associative and commutative operation in the same expression, reassociate | 2690 associative and commutative operation in the same expression, reassociate |
2614 the expression so that they are used in the same statement. The combined | 2691 the expression so that they are used in the same statement. */ |
2615 statement is inserted before INSERT_BEFORE if it's not NULL. */ | |
2616 | 2692 |
2617 static gimple * | 2693 static gimple * |
2618 stmt_combining_refs (dref r1, dref r2, gimple *insert_before) | 2694 stmt_combining_refs (dref r1, dref r2) |
2619 { | 2695 { |
2620 gimple *stmt1, *stmt2; | 2696 gimple *stmt1, *stmt2; |
2621 tree name1 = name_for_ref (r1); | 2697 tree name1 = name_for_ref (r1); |
2622 tree name2 = name_for_ref (r2); | 2698 tree name2 = name_for_ref (r2); |
2623 | 2699 |
2624 stmt1 = find_use_stmt (&name1); | 2700 stmt1 = find_use_stmt (&name1); |
2625 stmt2 = find_use_stmt (&name2); | 2701 stmt2 = find_use_stmt (&name2); |
2626 if (stmt1 == stmt2) | 2702 if (stmt1 == stmt2) |
2627 return stmt1; | 2703 return stmt1; |
2628 | 2704 |
2629 return reassociate_to_the_same_stmt (name1, name2, insert_before); | 2705 return reassociate_to_the_same_stmt (name1, name2); |
2630 } | 2706 } |
2631 | 2707 |
2632 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the | 2708 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the |
2633 description of the new chain is returned, otherwise we return NULL. */ | 2709 description of the new chain is returned, otherwise we return NULL. */ |
2634 | 2710 |
2637 { | 2713 { |
2638 dref r1, r2, nw; | 2714 dref r1, r2, nw; |
2639 enum tree_code op = ERROR_MARK; | 2715 enum tree_code op = ERROR_MARK; |
2640 bool swap = false; | 2716 bool swap = false; |
2641 chain_p new_chain; | 2717 chain_p new_chain; |
2642 int i, j, num; | 2718 unsigned i; |
2643 gimple *root_stmt; | |
2644 tree rslt_type = NULL_TREE; | 2719 tree rslt_type = NULL_TREE; |
2645 | 2720 |
2646 if (ch1 == ch2) | 2721 if (ch1 == ch2) |
2647 return NULL; | 2722 return NULL; |
2648 if (ch1->length != ch2->length) | 2723 if (ch1->length != ch2->length) |
2658 return NULL; | 2733 return NULL; |
2659 | 2734 |
2660 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type)) | 2735 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type)) |
2661 return NULL; | 2736 return NULL; |
2662 } | 2737 } |
2663 | |
2664 ch1->combined = true; | |
2665 ch2->combined = true; | |
2666 | 2738 |
2667 if (swap) | 2739 if (swap) |
2668 std::swap (ch1, ch2); | 2740 std::swap (ch1, ch2); |
2669 | 2741 |
2670 new_chain = XCNEW (struct chain); | 2742 new_chain = XCNEW (struct chain); |
2673 new_chain->ch1 = ch1; | 2745 new_chain->ch1 = ch1; |
2674 new_chain->ch2 = ch2; | 2746 new_chain->ch2 = ch2; |
2675 new_chain->rslt_type = rslt_type; | 2747 new_chain->rslt_type = rslt_type; |
2676 new_chain->length = ch1->length; | 2748 new_chain->length = ch1->length; |
2677 | 2749 |
2678 gimple *insert = NULL; | 2750 for (i = 0; (ch1->refs.iterate (i, &r1) |
2679 num = ch1->refs.length (); | 2751 && ch2->refs.iterate (i, &r2)); i++) |
2680 i = (new_chain->length == 0) ? num - 1 : 0; | 2752 { |
2681 j = (new_chain->length == 0) ? -1 : 1; | |
2682 /* For ZERO length chain, process refs in reverse order so that dominant | |
2683 position is ready when it comes to the root ref. | |
2684 For non-ZERO length chain, process refs in order. See PR79663. */ | |
2685 for (; num > 0; num--, i += j) | |
2686 { | |
2687 r1 = ch1->refs[i]; | |
2688 r2 = ch2->refs[i]; | |
2689 nw = XCNEW (struct dref_d); | 2753 nw = XCNEW (struct dref_d); |
2754 nw->stmt = stmt_combining_refs (r1, r2); | |
2690 nw->distance = r1->distance; | 2755 nw->distance = r1->distance; |
2691 | 2756 |
2692 /* For ZERO length chain, insert combined stmt of root ref at dominant | |
2693 position. */ | |
2694 nw->stmt = stmt_combining_refs (r1, r2, i == 0 ? insert : NULL); | |
2695 /* For ZERO length chain, record dominant position where combined stmt | |
2696 of root ref should be inserted. In this case, though all root refs | |
2697 dominate following ones, it's possible that combined stmt doesn't. | |
2698 See PR70754. */ | |
2699 if (new_chain->length == 0 | |
2700 && (insert == NULL || stmt_dominates_stmt_p (nw->stmt, insert))) | |
2701 insert = nw->stmt; | |
2702 | |
2703 new_chain->refs.safe_push (nw); | 2757 new_chain->refs.safe_push (nw); |
2704 } | 2758 } |
2705 if (new_chain->length == 0) | 2759 |
2706 { | 2760 ch1->combined = true; |
2707 /* Restore the order for ZERO length chain's refs. */ | 2761 ch2->combined = true; |
2708 num = new_chain->refs.length () >> 1; | |
2709 for (i = 0, j = new_chain->refs.length () - 1; i < num; i++, j--) | |
2710 std::swap (new_chain->refs[i], new_chain->refs[j]); | |
2711 | |
2712 /* For ZERO length chain, has_max_use_after must be true since root | |
2713 combined stmt must dominates others. */ | |
2714 new_chain->has_max_use_after = true; | |
2715 return new_chain; | |
2716 } | |
2717 | |
2718 new_chain->has_max_use_after = false; | |
2719 root_stmt = get_chain_root (new_chain)->stmt; | |
2720 for (i = 1; new_chain->refs.iterate (i, &nw); i++) | |
2721 { | |
2722 if (nw->distance == new_chain->length | |
2723 && !stmt_dominates_stmt_p (nw->stmt, root_stmt)) | |
2724 { | |
2725 new_chain->has_max_use_after = true; | |
2726 break; | |
2727 } | |
2728 } | |
2729 | |
2730 return new_chain; | 2762 return new_chain; |
2731 } | 2763 } |
2732 | 2764 |
2733 /* Try to combine the CHAINS. */ | 2765 /* Recursively update position information of all offspring chains to ROOT |
2766 chain's position information. */ | |
2734 | 2767 |
2735 static void | 2768 static void |
2736 try_combine_chains (vec<chain_p> *chains) | 2769 update_pos_for_combined_chains (chain_p root) |
2770 { | |
2771 chain_p ch1 = root->ch1, ch2 = root->ch2; | |
2772 dref ref, ref1, ref2; | |
2773 for (unsigned j = 0; (root->refs.iterate (j, &ref) | |
2774 && ch1->refs.iterate (j, &ref1) | |
2775 && ch2->refs.iterate (j, &ref2)); ++j) | |
2776 ref1->pos = ref2->pos = ref->pos; | |
2777 | |
2778 if (ch1->type == CT_COMBINATION) | |
2779 update_pos_for_combined_chains (ch1); | |
2780 if (ch2->type == CT_COMBINATION) | |
2781 update_pos_for_combined_chains (ch2); | |
2782 } | |
2783 | |
2784 /* Returns true if statement S1 dominates statement S2. */ | |
2785 | |
2786 static bool | |
2787 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2) | |
2788 { | |
2789 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); | |
2790 | |
2791 if (!bb1 || s1 == s2) | |
2792 return true; | |
2793 | |
2794 if (bb1 == bb2) | |
2795 return gimple_uid (s1) < gimple_uid (s2); | |
2796 | |
2797 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); | |
2798 } | |
2799 | |
2800 /* Try to combine the CHAINS in LOOP. */ | |
2801 | |
2802 static void | |
2803 try_combine_chains (struct loop *loop, vec<chain_p> *chains) | |
2737 { | 2804 { |
2738 unsigned i, j; | 2805 unsigned i, j; |
2739 chain_p ch1, ch2, cch; | 2806 chain_p ch1, ch2, cch; |
2740 auto_vec<chain_p> worklist; | 2807 auto_vec<chain_p> worklist; |
2808 bool combined_p = false; | |
2741 | 2809 |
2742 FOR_EACH_VEC_ELT (*chains, i, ch1) | 2810 FOR_EACH_VEC_ELT (*chains, i, ch1) |
2743 if (chain_can_be_combined_p (ch1)) | 2811 if (chain_can_be_combined_p (ch1)) |
2744 worklist.safe_push (ch1); | 2812 worklist.safe_push (ch1); |
2745 | 2813 |
2757 cch = combine_chains (ch1, ch2); | 2825 cch = combine_chains (ch1, ch2); |
2758 if (cch) | 2826 if (cch) |
2759 { | 2827 { |
2760 worklist.safe_push (cch); | 2828 worklist.safe_push (cch); |
2761 chains->safe_push (cch); | 2829 chains->safe_push (cch); |
2830 combined_p = true; | |
2831 break; | |
2832 } | |
2833 } | |
2834 } | |
2835 if (!combined_p) | |
2836 return; | |
2837 | |
2838 /* Setup UID for all statements in dominance order. */ | |
2839 basic_block *bbs = get_loop_body (loop); | |
2840 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes); | |
2841 free (bbs); | |
2842 | |
2843 /* Re-association in combined chains may generate statements different to | |
2844 order of references of the original chain. We need to keep references | |
2845 of combined chain in dominance order so that all uses will be inserted | |
2846 after definitions. Note: | |
2847 A) This is necessary for all combined chains. | |
2848 B) This is only necessary for ZERO distance references because other | |
2849 references inherit value from loop carried PHIs. | |
2850 | |
2851 We first update position information for all combined chains. */ | |
2852 dref ref; | |
2853 for (i = 0; chains->iterate (i, &ch1); ++i) | |
2854 { | |
2855 if (ch1->type != CT_COMBINATION || ch1->combined) | |
2856 continue; | |
2857 | |
2858 for (j = 0; ch1->refs.iterate (j, &ref); ++j) | |
2859 ref->pos = gimple_uid (ref->stmt); | |
2860 | |
2861 update_pos_for_combined_chains (ch1); | |
2862 } | |
2863 /* Then sort references according to newly updated position information. */ | |
2864 for (i = 0; chains->iterate (i, &ch1); ++i) | |
2865 { | |
2866 if (ch1->type != CT_COMBINATION && !ch1->combined) | |
2867 continue; | |
2868 | |
2869 /* Find the first reference with non-ZERO distance. */ | |
2870 if (ch1->length == 0) | |
2871 j = ch1->refs.length(); | |
2872 else | |
2873 { | |
2874 for (j = 0; ch1->refs.iterate (j, &ref); ++j) | |
2875 if (ref->distance != 0) | |
2876 break; | |
2877 } | |
2878 | |
2879 /* Sort all ZERO distance references by position. */ | |
2880 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos); | |
2881 | |
2882 if (ch1->combined) | |
2883 continue; | |
2884 | |
2885 /* For ZERO length chain, has_max_use_after must be true since root | |
2886 combined stmt must dominates others. */ | |
2887 if (ch1->length == 0) | |
2888 { | |
2889 ch1->has_max_use_after = true; | |
2890 continue; | |
2891 } | |
2892 /* Check if there is use at max distance after root for combined chains | |
2893 and set flag accordingly. */ | |
2894 ch1->has_max_use_after = false; | |
2895 gimple *root_stmt = get_chain_root (ch1)->stmt; | |
2896 for (j = 1; ch1->refs.iterate (j, &ref); ++j) | |
2897 { | |
2898 if (ref->distance == ch1->length | |
2899 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt)) | |
2900 { | |
2901 ch1->has_max_use_after = true; | |
2762 break; | 2902 break; |
2763 } | 2903 } |
2764 } | 2904 } |
2765 } | 2905 } |
2766 } | 2906 } |
3094 } | 3234 } |
3095 prepare_initializers (loop, chains); | 3235 prepare_initializers (loop, chains); |
3096 loop_closed_ssa = prepare_finalizers (loop, chains); | 3236 loop_closed_ssa = prepare_finalizers (loop, chains); |
3097 | 3237 |
3098 /* Try to combine the chains that are always worked with together. */ | 3238 /* Try to combine the chains that are always worked with together. */ |
3099 try_combine_chains (&chains); | 3239 try_combine_chains (loop, &chains); |
3100 | 3240 |
3101 insert_init_seqs (loop, chains); | 3241 insert_init_seqs (loop, chains); |
3102 | 3242 |
3103 if (dump_file && (dump_flags & TDF_DETAILS)) | 3243 if (dump_file && (dump_flags & TDF_DETAILS)) |
3104 { | 3244 { |