Mercurial > hg > CbC > CbC_gcc
comparison gcc/cfgloopmanip.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Loop manipulation code for GNU compiler. | 1 /* Loop manipulation code for GNU compiler. |
2 Copyright (C) 2002-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2002-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
7 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
500 free (bbs); | 500 free (bbs); |
501 } | 501 } |
502 | 502 |
503 /* Scale profile in LOOP by P. | 503 /* Scale profile in LOOP by P. |
504 If ITERATION_BOUND is non-zero, scale even further if loop is predicted | 504 If ITERATION_BOUND is non-zero, scale even further if loop is predicted |
505 to iterate too many times. */ | 505 to iterate too many times. |
506 Before caling this function, preheader block profile should be already | |
507 scaled to final count. This is necessary because loop iterations are | |
508 determined by comparing header edge count to latch ege count and thus | |
509 they need to be scaled synchronously. */ | |
506 | 510 |
507 void | 511 void |
508 scale_loop_profile (struct loop *loop, profile_probability p, | 512 scale_loop_profile (struct loop *loop, profile_probability p, |
509 gcov_type iteration_bound) | 513 gcov_type iteration_bound) |
510 { | 514 { |
511 gcov_type iterations = expected_loop_iterations_unbounded (loop); | 515 edge e, preheader_e; |
512 edge e; | |
513 edge_iterator ei; | 516 edge_iterator ei; |
514 | 517 |
515 if (dump_file && (dump_flags & TDF_DETAILS)) | 518 if (dump_file && (dump_flags & TDF_DETAILS)) |
516 { | 519 { |
517 fprintf (dump_file, ";; Scaling loop %i with scale ", | 520 fprintf (dump_file, ";; Scaling loop %i with scale ", |
518 loop->num); | 521 loop->num); |
519 p.dump (dump_file); | 522 p.dump (dump_file); |
520 fprintf (dump_file, " bounding iterations to %i from guessed %i\n", | 523 fprintf (dump_file, " bounding iterations to %i\n", |
521 (int)iteration_bound, (int)iterations); | 524 (int)iteration_bound); |
525 } | |
526 | |
527 /* Scale the probabilities. */ | |
528 scale_loop_frequencies (loop, p); | |
529 | |
530 if (iteration_bound == 0) | |
531 return; | |
532 | |
533 gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true); | |
534 | |
535 if (dump_file && (dump_flags & TDF_DETAILS)) | |
536 { | |
537 fprintf (dump_file, ";; guessed iterations after scaling %i\n", | |
538 (int)iterations); | |
522 } | 539 } |
523 | 540 |
524 /* See if loop is predicted to iterate too many times. */ | 541 /* See if loop is predicted to iterate too many times. */ |
525 if (iteration_bound && iterations > 0 | 542 if (iterations <= iteration_bound) |
526 && p.apply (iterations) > iteration_bound) | 543 return; |
527 { | 544 |
528 /* Fixing loop profile for different trip count is not trivial; the exit | 545 preheader_e = loop_preheader_edge (loop); |
529 probabilities has to be updated to match and frequencies propagated down | 546 |
530 to the loop body. | 547 /* We could handle also loops without preheaders, but bounding is |
531 | 548 currently used only by optimizers that have preheaders constructed. */ |
532 We fully update only the simple case of loop with single exit that is | 549 gcc_checking_assert (preheader_e); |
533 either from the latch or BB just before latch and leads from BB with | 550 profile_count count_in = preheader_e->count (); |
534 simple conditional jump. This is OK for use in vectorizer. */ | 551 |
552 if (count_in > profile_count::zero () | |
553 && loop->header->count.initialized_p ()) | |
554 { | |
555 profile_count count_delta = profile_count::zero (); | |
556 | |
535 e = single_exit (loop); | 557 e = single_exit (loop); |
536 if (e) | 558 if (e) |
537 { | 559 { |
538 edge other_e; | 560 edge other_e; |
539 int freq_delta; | 561 FOR_EACH_EDGE (other_e, ei, e->src->succs) |
540 profile_count count_delta; | |
541 | |
542 FOR_EACH_EDGE (other_e, ei, e->src->succs) | |
543 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) | 562 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) |
544 && e != other_e) | 563 && e != other_e) |
545 break; | 564 break; |
546 | 565 |
547 /* Probability of exit must be 1/iterations. */ | 566 /* Probability of exit must be 1/iterations. */ |
548 freq_delta = EDGE_FREQUENCY (e); | |
549 count_delta = e->count (); | 567 count_delta = e->count (); |
550 e->probability = profile_probability::always () | 568 e->probability = profile_probability::always () |
551 .apply_scale (1, iteration_bound); | 569 .apply_scale (1, iteration_bound); |
552 other_e->probability = e->probability.invert (); | 570 other_e->probability = e->probability.invert (); |
553 freq_delta -= EDGE_FREQUENCY (e); | 571 |
554 count_delta -= e->count (); | 572 /* In code below we only handle the following two updates. */ |
555 | 573 if (other_e->dest != loop->header |
556 /* If latch exists, change its frequency and count, since we changed | 574 && other_e->dest != loop->latch |
557 probability of exit. Theoretically we should update everything from | 575 && (dump_file && (dump_flags & TDF_DETAILS))) |
558 source of exit edge to latch, but for vectorizer this is enough. */ | |
559 if (loop->latch | |
560 && loop->latch != e->src) | |
561 { | 576 { |
562 loop->latch->frequency += freq_delta; | 577 fprintf (dump_file, ";; giving up on update of paths from " |
563 if (loop->latch->frequency < 0) | 578 "exit condition to latch\n"); |
564 loop->latch->frequency = 0; | |
565 loop->latch->count += count_delta; | |
566 } | 579 } |
567 } | 580 } |
581 else | |
582 if (dump_file && (dump_flags & TDF_DETAILS)) | |
583 fprintf (dump_file, ";; Loop has multiple exit edges; " | |
584 "giving up on exit condition update\n"); | |
568 | 585 |
569 /* Roughly speaking we want to reduce the loop body profile by the | 586 /* Roughly speaking we want to reduce the loop body profile by the |
570 difference of loop iterations. We however can do better if | 587 difference of loop iterations. We however can do better if |
571 we look at the actual profile, if it is available. */ | 588 we look at the actual profile, if it is available. */ |
572 p = p.apply_scale (iteration_bound, iterations); | 589 p = profile_probability::always (); |
573 | 590 |
574 bool determined = false; | 591 count_in = count_in.apply_scale (iteration_bound, 1); |
575 if (loop->header->count.initialized_p ()) | 592 p = count_in.probability_in (loop->header->count); |
576 { | |
577 profile_count count_in = profile_count::zero (); | |
578 | |
579 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
580 if (e->src != loop->latch) | |
581 count_in += e->count (); | |
582 | |
583 if (count_in > profile_count::zero () ) | |
584 { | |
585 p = count_in.probability_in (loop->header->count.apply_scale | |
586 (iteration_bound, 1)); | |
587 determined = true; | |
588 } | |
589 } | |
590 if (!determined && loop->header->frequency) | |
591 { | |
592 int freq_in = 0; | |
593 | |
594 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
595 if (e->src != loop->latch) | |
596 freq_in += EDGE_FREQUENCY (e); | |
597 | |
598 if (freq_in != 0) | |
599 p = profile_probability::probability_in_gcov_type | |
600 (freq_in * iteration_bound, loop->header->frequency); | |
601 } | |
602 if (!(p > profile_probability::never ())) | 593 if (!(p > profile_probability::never ())) |
603 p = profile_probability::very_unlikely (); | 594 p = profile_probability::very_unlikely (); |
604 } | 595 |
605 | 596 if (p == profile_probability::always () |
606 if (p >= profile_probability::always () | 597 || !p.initialized_p ()) |
607 || !p.initialized_p ()) | 598 return; |
608 return; | 599 |
609 | 600 /* If latch exists, change its count, since we changed |
610 /* Scale the actual probabilities. */ | 601 probability of exit. Theoretically we should update everything from |
611 scale_loop_frequencies (loop, p); | 602 source of exit edge to latch, but for vectorizer this is enough. */ |
612 if (dump_file && (dump_flags & TDF_DETAILS)) | 603 if (loop->latch && loop->latch != e->src) |
613 fprintf (dump_file, ";; guessed iterations are now %i\n", | 604 loop->latch->count += count_delta; |
614 (int)expected_loop_iterations_unbounded (loop)); | 605 |
606 /* Scale the probabilities. */ | |
607 scale_loop_frequencies (loop, p); | |
608 | |
609 /* Change latch's count back. */ | |
610 if (loop->latch && loop->latch != e->src) | |
611 loop->latch->count -= count_delta; | |
612 | |
613 if (dump_file && (dump_flags & TDF_DETAILS)) | |
614 fprintf (dump_file, ";; guessed iterations are now %i\n", | |
615 (int)expected_loop_iterations_unbounded (loop, NULL, true)); | |
616 } | |
615 } | 617 } |
616 | 618 |
617 /* Recompute dominance information for basic blocks outside LOOP. */ | 619 /* Recompute dominance information for basic blocks outside LOOP. */ |
618 | 620 |
619 static void | 621 static void |
798 loop = alloc_loop (); | 800 loop = alloc_loop (); |
799 loop->header = loop_header; | 801 loop->header = loop_header; |
800 loop->latch = loop_latch; | 802 loop->latch = loop_latch; |
801 add_loop (loop, outer); | 803 add_loop (loop, outer); |
802 | 804 |
803 /* TODO: Fix frequencies and counts. */ | 805 /* TODO: Fix counts. */ |
804 scale_loop_frequencies (loop, profile_probability::even ()); | 806 scale_loop_frequencies (loop, profile_probability::even ()); |
805 | 807 |
806 /* Update dominators. */ | 808 /* Update dominators. */ |
807 update_dominators_in_loop (loop); | 809 update_dominators_in_loop (loop); |
808 | 810 |
864 { | 866 { |
865 basic_block succ_bb = latch_edge->dest; | 867 basic_block succ_bb = latch_edge->dest; |
866 basic_block pred_bb = header_edge->src; | 868 basic_block pred_bb = header_edge->src; |
867 struct loop *loop = alloc_loop (); | 869 struct loop *loop = alloc_loop (); |
868 struct loop *outer = loop_outer (succ_bb->loop_father); | 870 struct loop *outer = loop_outer (succ_bb->loop_father); |
869 int freq; | |
870 profile_count cnt; | 871 profile_count cnt; |
871 | 872 |
872 loop->header = header_edge->dest; | 873 loop->header = header_edge->dest; |
873 loop->latch = latch_edge->src; | 874 loop->latch = latch_edge->src; |
874 | 875 |
875 freq = EDGE_FREQUENCY (header_edge); | |
876 cnt = header_edge->count (); | 876 cnt = header_edge->count (); |
877 | 877 |
878 /* Redirect edges. */ | 878 /* Redirect edges. */ |
879 loop_redirect_edge (latch_edge, loop->header); | 879 loop_redirect_edge (latch_edge, loop->header); |
880 loop_redirect_edge (true_edge, succ_bb); | 880 loop_redirect_edge (true_edge, succ_bb); |
899 /* Add switch_bb to appropriate loop. */ | 899 /* Add switch_bb to appropriate loop. */ |
900 if (switch_bb->loop_father) | 900 if (switch_bb->loop_father) |
901 remove_bb_from_loops (switch_bb); | 901 remove_bb_from_loops (switch_bb); |
902 add_bb_to_loop (switch_bb, outer); | 902 add_bb_to_loop (switch_bb, outer); |
903 | 903 |
904 /* Fix frequencies. */ | 904 /* Fix counts. */ |
905 if (redirect_all_edges) | 905 if (redirect_all_edges) |
906 { | 906 { |
907 switch_bb->frequency = freq; | |
908 switch_bb->count = cnt; | 907 switch_bb->count = cnt; |
909 } | 908 } |
910 scale_loop_frequencies (loop, false_scale); | 909 scale_loop_frequencies (loop, false_scale); |
911 scale_loop_frequencies (succ_bb->loop_father, true_scale); | 910 scale_loop_frequencies (succ_bb->loop_father, true_scale); |
912 update_dominators_in_loop (loop); | 911 update_dominators_in_loop (loop); |
1021 |= loop->warned_aggressive_loop_optimizations; | 1020 |= loop->warned_aggressive_loop_optimizations; |
1022 target->in_oacc_kernels_region = loop->in_oacc_kernels_region; | 1021 target->in_oacc_kernels_region = loop->in_oacc_kernels_region; |
1023 } | 1022 } |
1024 | 1023 |
1025 /* Copies copy of LOOP as subloop of TARGET loop, placing newly | 1024 /* Copies copy of LOOP as subloop of TARGET loop, placing newly |
1026 created loop into loops structure. */ | 1025 created loop into loops structure. If AFTER is non-null |
1026 the new loop is added at AFTER->next, otherwise in front of TARGETs | |
1027 sibling list. */ | |
1027 struct loop * | 1028 struct loop * |
1028 duplicate_loop (struct loop *loop, struct loop *target) | 1029 duplicate_loop (struct loop *loop, struct loop *target, struct loop *after) |
1029 { | 1030 { |
1030 struct loop *cloop; | 1031 struct loop *cloop; |
1031 cloop = alloc_loop (); | 1032 cloop = alloc_loop (); |
1032 place_new_loop (cfun, cloop); | 1033 place_new_loop (cfun, cloop); |
1033 | 1034 |
1035 | 1036 |
1036 /* Mark the new loop as copy of LOOP. */ | 1037 /* Mark the new loop as copy of LOOP. */ |
1037 set_loop_copy (loop, cloop); | 1038 set_loop_copy (loop, cloop); |
1038 | 1039 |
1039 /* Add it to target. */ | 1040 /* Add it to target. */ |
1040 flow_loop_tree_node_add (target, cloop); | 1041 flow_loop_tree_node_add (target, cloop, after); |
1041 | 1042 |
1042 return cloop; | 1043 return cloop; |
1043 } | 1044 } |
1044 | 1045 |
1045 /* Copies structure of subloops of LOOP into TARGET loop, placing | 1046 /* Copies structure of subloops of LOOP into TARGET loop, placing |
1046 newly created loops into loop tree. */ | 1047 newly created loops into loop tree at the end of TARGETs sibling |
1048 list in the original order. */ | |
1047 void | 1049 void |
1048 duplicate_subloops (struct loop *loop, struct loop *target) | 1050 duplicate_subloops (struct loop *loop, struct loop *target) |
1049 { | 1051 { |
1050 struct loop *aloop, *cloop; | 1052 struct loop *aloop, *cloop, *tail; |
1051 | 1053 |
1054 for (tail = target->inner; tail && tail->next; tail = tail->next) | |
1055 ; | |
1052 for (aloop = loop->inner; aloop; aloop = aloop->next) | 1056 for (aloop = loop->inner; aloop; aloop = aloop->next) |
1053 { | 1057 { |
1054 cloop = duplicate_loop (aloop, target); | 1058 cloop = duplicate_loop (aloop, target, tail); |
1059 tail = cloop; | |
1060 gcc_assert(!tail->next); | |
1055 duplicate_subloops (aloop, cloop); | 1061 duplicate_subloops (aloop, cloop); |
1056 } | 1062 } |
1057 } | 1063 } |
1058 | 1064 |
1059 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, | 1065 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, |
1060 into TARGET loop, placing newly created loops into loop tree. */ | 1066 into TARGET loop, placing newly created loops into loop tree adding |
1067 them to TARGETs sibling list at the end in order. */ | |
1061 static void | 1068 static void |
1062 copy_loops_to (struct loop **copied_loops, int n, struct loop *target) | 1069 copy_loops_to (struct loop **copied_loops, int n, struct loop *target) |
1063 { | 1070 { |
1064 struct loop *aloop; | 1071 struct loop *aloop, *tail; |
1065 int i; | 1072 int i; |
1066 | 1073 |
1074 for (tail = target->inner; tail && tail->next; tail = tail->next) | |
1075 ; | |
1067 for (i = 0; i < n; i++) | 1076 for (i = 0; i < n; i++) |
1068 { | 1077 { |
1069 aloop = duplicate_loop (copied_loops[i], target); | 1078 aloop = duplicate_loop (copied_loops[i], target, tail); |
1079 tail = aloop; | |
1080 gcc_assert(!tail->next); | |
1070 duplicate_subloops (copied_loops[i], aloop); | 1081 duplicate_subloops (copied_loops[i], aloop); |
1071 } | 1082 } |
1072 } | 1083 } |
1073 | 1084 |
1074 /* Redirects edge E to basic block DEST. */ | 1085 /* Redirects edge E to basic block DEST. */ |
1093 | 1104 |
1094 return ret; | 1105 return ret; |
1095 } | 1106 } |
1096 | 1107 |
1097 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating | 1108 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating |
1098 loop structure and dominators. E's destination must be LOOP header for | 1109 loop structure and dominators (order of inner subloops is retained). |
1099 this to work, i.e. it must be entry or latch edge of this loop; these are | 1110 E's destination must be LOOP header for this to work, i.e. it must be entry |
1100 unique, as the loops must have preheaders for this function to work | 1111 or latch edge of this loop; these are unique, as the loops must have |
1101 correctly (in case E is latch, the function unrolls the loop, if E is entry | 1112 preheaders for this function to work correctly (in case E is latch, the |
1102 edge, it peels the loop). Store edges created by copying ORIG edge from | 1113 function unrolls the loop, if E is entry edge, it peels the loop). Store |
1103 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to | 1114 edges created by copying ORIG edge from copies corresponding to set bits in |
1104 original LOOP body, the other copies are numbered in order given by control | 1115 WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies |
1105 flow through them) into TO_REMOVE array. Returns false if duplication is | 1116 are numbered in order given by control flow through them) into TO_REMOVE |
1117 array. Returns false if duplication is | |
1106 impossible. */ | 1118 impossible. */ |
1107 | 1119 |
1108 bool | 1120 bool |
1109 duplicate_loop_to_header_edge (struct loop *loop, edge e, | 1121 duplicate_loop_to_header_edge (struct loop *loop, edge e, |
1110 unsigned int ndupl, sbitmap wont_exit, | 1122 unsigned int ndupl, sbitmap wont_exit, |
1117 basic_block header = loop->header, latch = loop->latch; | 1129 basic_block header = loop->header, latch = loop->latch; |
1118 basic_block *new_bbs, *bbs, *first_active; | 1130 basic_block *new_bbs, *bbs, *first_active; |
1119 basic_block new_bb, bb, first_active_latch = NULL; | 1131 basic_block new_bb, bb, first_active_latch = NULL; |
1120 edge ae, latch_edge; | 1132 edge ae, latch_edge; |
1121 edge spec_edges[2], new_spec_edges[2]; | 1133 edge spec_edges[2], new_spec_edges[2]; |
1122 #define SE_LATCH 0 | 1134 const int SE_LATCH = 0; |
1123 #define SE_ORIG 1 | 1135 const int SE_ORIG = 1; |
1124 unsigned i, j, n; | 1136 unsigned i, j, n; |
1125 int is_latch = (latch == e->src); | 1137 int is_latch = (latch == e->src); |
1126 int scale_act = 0, *scale_step = NULL, scale_main = 0; | 1138 profile_probability *scale_step = NULL; |
1127 int scale_after_exit = 0; | 1139 profile_probability scale_main = profile_probability::always (); |
1128 int p, freq_in, freq_le, freq_out_orig; | 1140 profile_probability scale_act = profile_probability::always (); |
1129 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; | 1141 profile_count after_exit_num = profile_count::zero (), |
1142 after_exit_den = profile_count::zero (); | |
1143 bool scale_after_exit = false; | |
1130 int add_irreducible_flag; | 1144 int add_irreducible_flag; |
1131 basic_block place_after; | 1145 basic_block place_after; |
1132 bitmap bbs_to_scale = NULL; | 1146 bitmap bbs_to_scale = NULL; |
1133 bitmap_iterator bi; | 1147 bitmap_iterator bi; |
1134 | 1148 |
1163 /* Find edge from latch. */ | 1177 /* Find edge from latch. */ |
1164 latch_edge = loop_latch_edge (loop); | 1178 latch_edge = loop_latch_edge (loop); |
1165 | 1179 |
1166 if (flags & DLTHE_FLAG_UPDATE_FREQ) | 1180 if (flags & DLTHE_FLAG_UPDATE_FREQ) |
1167 { | 1181 { |
1168 /* Calculate coefficients by that we have to scale frequencies | 1182 /* Calculate coefficients by that we have to scale counts |
1169 of duplicated loop bodies. */ | 1183 of duplicated loop bodies. */ |
1170 freq_in = header->frequency; | 1184 profile_count count_in = header->count; |
1171 freq_le = EDGE_FREQUENCY (latch_edge); | 1185 profile_count count_le = latch_edge->count (); |
1172 if (freq_in == 0) | 1186 profile_count count_out_orig = orig ? orig->count () : count_in - count_le; |
1173 freq_in = 1; | 1187 profile_probability prob_pass_thru = count_le.probability_in (count_in); |
1174 if (freq_in < freq_le) | 1188 profile_probability prob_pass_wont_exit = |
1175 freq_in = freq_le; | 1189 (count_le + count_out_orig).probability_in (count_in); |
1176 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le; | |
1177 if (freq_out_orig > freq_in - freq_le) | |
1178 freq_out_orig = freq_in - freq_le; | |
1179 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in); | |
1180 prob_pass_wont_exit = | |
1181 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); | |
1182 | 1190 |
1183 if (orig && orig->probability.initialized_p () | 1191 if (orig && orig->probability.initialized_p () |
1184 && !(orig->probability == profile_probability::always ())) | 1192 && !(orig->probability == profile_probability::always ())) |
1185 { | 1193 { |
1186 /* The blocks that are dominated by a removed exit edge ORIG have | 1194 /* The blocks that are dominated by a removed exit edge ORIG have |
1187 frequencies scaled by this. */ | 1195 frequencies scaled by this. */ |
1188 if (orig->probability.initialized_p ()) | 1196 if (orig->count ().initialized_p ()) |
1189 scale_after_exit | 1197 { |
1190 = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, | 1198 after_exit_num = orig->src->count; |
1191 REG_BR_PROB_BASE | 1199 after_exit_den = after_exit_num - orig->count (); |
1192 - orig->probability.to_reg_br_prob_base ()); | 1200 scale_after_exit = true; |
1193 else | 1201 } |
1194 scale_after_exit = REG_BR_PROB_BASE; | |
1195 bbs_to_scale = BITMAP_ALLOC (NULL); | 1202 bbs_to_scale = BITMAP_ALLOC (NULL); |
1196 for (i = 0; i < n; i++) | 1203 for (i = 0; i < n; i++) |
1197 { | 1204 { |
1198 if (bbs[i] != orig->src | 1205 if (bbs[i] != orig->src |
1199 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) | 1206 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) |
1200 bitmap_set_bit (bbs_to_scale, i); | 1207 bitmap_set_bit (bbs_to_scale, i); |
1201 } | 1208 } |
1202 } | 1209 } |
1203 | 1210 |
1204 scale_step = XNEWVEC (int, ndupl); | 1211 scale_step = XNEWVEC (profile_probability, ndupl); |
1205 | 1212 |
1206 for (i = 1; i <= ndupl; i++) | 1213 for (i = 1; i <= ndupl; i++) |
1207 scale_step[i - 1] = bitmap_bit_p (wont_exit, i) | 1214 scale_step[i - 1] = bitmap_bit_p (wont_exit, i) |
1208 ? prob_pass_wont_exit | 1215 ? prob_pass_wont_exit |
1209 : prob_pass_thru; | 1216 : prob_pass_thru; |
1210 | 1217 |
1211 /* Complete peeling is special as the probability of exit in last | 1218 /* Complete peeling is special as the probability of exit in last |
1212 copy becomes 1. */ | 1219 copy becomes 1. */ |
1213 if (flags & DLTHE_FLAG_COMPLETTE_PEEL) | 1220 if (flags & DLTHE_FLAG_COMPLETTE_PEEL) |
1214 { | 1221 { |
1215 int wanted_freq = EDGE_FREQUENCY (e); | 1222 profile_count wanted_count = e->count (); |
1216 | |
1217 if (wanted_freq > freq_in) | |
1218 wanted_freq = freq_in; | |
1219 | 1223 |
1220 gcc_assert (!is_latch); | 1224 gcc_assert (!is_latch); |
1221 /* First copy has frequency of incoming edge. Each subsequent | 1225 /* First copy has count of incoming edge. Each subsequent |
1222 frequency should be reduced by prob_pass_wont_exit. Caller | 1226 count should be reduced by prob_pass_wont_exit. Caller |
1223 should've managed the flags so all except for original loop | 1227 should've managed the flags so all except for original loop |
1224 has won't exist set. */ | 1228 has won't exist set. */ |
1225 scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); | 1229 scale_act = wanted_count.probability_in (count_in); |
1226 /* Now simulate the duplication adjustments and compute header | 1230 /* Now simulate the duplication adjustments and compute header |
1227 frequency of the last copy. */ | 1231 frequency of the last copy. */ |
1228 for (i = 0; i < ndupl; i++) | 1232 for (i = 0; i < ndupl; i++) |
1229 wanted_freq = combine_probabilities (wanted_freq, scale_step[i]); | 1233 wanted_count = wanted_count.apply_probability (scale_step [i]); |
1230 scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); | 1234 scale_main = wanted_count.probability_in (count_in); |
1231 } | 1235 } |
1236 /* Here we insert loop bodies inside the loop itself (for loop unrolling). | |
1237 First iteration will be original loop followed by duplicated bodies. | |
1238 It is necessary to scale down the original so we get right overall | |
1239 number of iterations. */ | |
1232 else if (is_latch) | 1240 else if (is_latch) |
1233 { | 1241 { |
1234 prob_pass_main = bitmap_bit_p (wont_exit, 0) | 1242 profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0) |
1235 ? prob_pass_wont_exit | 1243 ? prob_pass_wont_exit |
1236 : prob_pass_thru; | 1244 : prob_pass_thru; |
1237 p = prob_pass_main; | 1245 profile_probability p = prob_pass_main; |
1238 scale_main = REG_BR_PROB_BASE; | 1246 profile_count scale_main_den = count_in; |
1239 for (i = 0; i < ndupl; i++) | 1247 for (i = 0; i < ndupl; i++) |
1240 { | 1248 { |
1241 scale_main += p; | 1249 scale_main_den += count_in.apply_probability (p); |
1242 p = combine_probabilities (p, scale_step[i]); | 1250 p = p * scale_step[i]; |
1243 } | 1251 } |
1244 scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main); | 1252 /* If original loop is executed COUNT_IN times, the unrolled |
1245 scale_act = combine_probabilities (scale_main, prob_pass_main); | 1253 loop will account SCALE_MAIN_DEN times. */ |
1254 scale_main = count_in.probability_in (scale_main_den); | |
1255 scale_act = scale_main * prob_pass_main; | |
1246 } | 1256 } |
1247 else | 1257 else |
1248 { | 1258 { |
1249 int preheader_freq = EDGE_FREQUENCY (e); | 1259 profile_count preheader_count = e->count (); |
1250 scale_main = REG_BR_PROB_BASE; | |
1251 for (i = 0; i < ndupl; i++) | 1260 for (i = 0; i < ndupl; i++) |
1252 scale_main = combine_probabilities (scale_main, scale_step[i]); | 1261 scale_main = scale_main * scale_step[i]; |
1253 if (preheader_freq > freq_in) | 1262 scale_act = preheader_count.probability_in (count_in); |
1254 preheader_freq = freq_in; | 1263 } |
1255 scale_act = GCOV_COMPUTE_SCALE (preheader_freq, freq_in); | |
1256 } | |
1257 for (i = 0; i < ndupl; i++) | |
1258 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE); | |
1259 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE | |
1260 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE); | |
1261 } | 1264 } |
1262 | 1265 |
1263 /* Loop the new bbs will belong to. */ | 1266 /* Loop the new bbs will belong to. */ |
1264 target = e->src->loop_father; | 1267 target = e->src->loop_father; |
1265 | 1268 |
1348 if (to_remove) | 1351 if (to_remove) |
1349 to_remove->safe_push (new_spec_edges[SE_ORIG]); | 1352 to_remove->safe_push (new_spec_edges[SE_ORIG]); |
1350 force_edge_cold (new_spec_edges[SE_ORIG], true); | 1353 force_edge_cold (new_spec_edges[SE_ORIG], true); |
1351 | 1354 |
1352 /* Scale the frequencies of the blocks dominated by the exit. */ | 1355 /* Scale the frequencies of the blocks dominated by the exit. */ |
1353 if (bbs_to_scale) | 1356 if (bbs_to_scale && scale_after_exit) |
1354 { | 1357 { |
1355 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) | 1358 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) |
1356 { | 1359 scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num, |
1357 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, | 1360 after_exit_den); |
1358 REG_BR_PROB_BASE); | |
1359 } | |
1360 } | 1361 } |
1361 } | 1362 } |
1362 | 1363 |
1363 /* Record the first copy in the control flow order if it is not | 1364 /* Record the first copy in the control flow order if it is not |
1364 the original loop (i.e. in case of peeling). */ | 1365 the original loop (i.e. in case of peeling). */ |
1369 } | 1370 } |
1370 | 1371 |
1371 /* Set counts and frequencies. */ | 1372 /* Set counts and frequencies. */ |
1372 if (flags & DLTHE_FLAG_UPDATE_FREQ) | 1373 if (flags & DLTHE_FLAG_UPDATE_FREQ) |
1373 { | 1374 { |
1374 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE); | 1375 scale_bbs_frequencies (new_bbs, n, scale_act); |
1375 scale_act = combine_probabilities (scale_act, scale_step[j]); | 1376 scale_act = scale_act * scale_step[j]; |
1376 } | 1377 } |
1377 } | 1378 } |
1378 free (new_bbs); | 1379 free (new_bbs); |
1379 free (orig_loops); | 1380 free (orig_loops); |
1380 | 1381 |
1384 if (to_remove) | 1385 if (to_remove) |
1385 to_remove->safe_push (orig); | 1386 to_remove->safe_push (orig); |
1386 force_edge_cold (orig, true); | 1387 force_edge_cold (orig, true); |
1387 | 1388 |
1388 /* Scale the frequencies of the blocks dominated by the exit. */ | 1389 /* Scale the frequencies of the blocks dominated by the exit. */ |
1389 if (bbs_to_scale) | 1390 if (bbs_to_scale && scale_after_exit) |
1390 { | 1391 { |
1391 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) | 1392 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) |
1392 { | 1393 scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num, |
1393 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, | 1394 after_exit_den); |
1394 REG_BR_PROB_BASE); | |
1395 } | |
1396 } | 1395 } |
1397 } | 1396 } |
1398 | 1397 |
1399 /* Update the original loop. */ | 1398 /* Update the original loop. */ |
1400 if (!is_latch) | 1399 if (!is_latch) |
1401 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); | 1400 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); |
1402 if (flags & DLTHE_FLAG_UPDATE_FREQ) | 1401 if (flags & DLTHE_FLAG_UPDATE_FREQ) |
1403 { | 1402 { |
1404 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE); | 1403 scale_bbs_frequencies (bbs, n, scale_main); |
1405 free (scale_step); | 1404 free (scale_step); |
1406 } | 1405 } |
1407 | 1406 |
1408 /* Update dominators of outer blocks if affected. */ | 1407 /* Update dominators of outer blocks if affected. */ |
1409 for (i = 0; i < n; i++) | 1408 for (i = 0; i < n; i++) |
1515 return NULL; | 1514 return NULL; |
1516 } | 1515 } |
1517 | 1516 |
1518 mfb_kj_edge = loop_latch_edge (loop); | 1517 mfb_kj_edge = loop_latch_edge (loop); |
1519 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; | 1518 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; |
1520 if (nentry == 1) | 1519 if (nentry == 1 |
1520 && ((flags & CP_FALLTHRU_PREHEADERS) == 0 | |
1521 || (single_entry->flags & EDGE_CROSSING) == 0)) | |
1521 dummy = split_edge (single_entry); | 1522 dummy = split_edge (single_entry); |
1522 else | 1523 else |
1523 { | 1524 { |
1524 edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL); | 1525 edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL); |
1525 dummy = fallthru->src; | 1526 dummy = fallthru->src; |