comparison gcc/cfgloopmanip.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* Loop manipulation code for GNU compiler. 1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002-2017 Free Software Foundation, Inc. 2 Copyright (C) 2002-2018 Free Software Foundation, Inc.
3 3
4 This file is part of GCC. 4 This file is part of GCC.
5 5
6 GCC is free software; you can redistribute it and/or modify it under 6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free 7 the terms of the GNU General Public License as published by the Free
500 free (bbs); 500 free (bbs);
501 } 501 }
502 502
503 /* Scale profile in LOOP by P. 503 /* Scale profile in LOOP by P.
504 If ITERATION_BOUND is non-zero, scale even further if loop is predicted 504 If ITERATION_BOUND is non-zero, scale even further if loop is predicted
505 to iterate too many times. */ 505 to iterate too many times.
506 Before caling this function, preheader block profile should be already
507 scaled to final count. This is necessary because loop iterations are
508 determined by comparing header edge count to latch ege count and thus
509 they need to be scaled synchronously. */
506 510
507 void 511 void
508 scale_loop_profile (struct loop *loop, profile_probability p, 512 scale_loop_profile (struct loop *loop, profile_probability p,
509 gcov_type iteration_bound) 513 gcov_type iteration_bound)
510 { 514 {
511 gcov_type iterations = expected_loop_iterations_unbounded (loop); 515 edge e, preheader_e;
512 edge e;
513 edge_iterator ei; 516 edge_iterator ei;
514 517
515 if (dump_file && (dump_flags & TDF_DETAILS)) 518 if (dump_file && (dump_flags & TDF_DETAILS))
516 { 519 {
517 fprintf (dump_file, ";; Scaling loop %i with scale ", 520 fprintf (dump_file, ";; Scaling loop %i with scale ",
518 loop->num); 521 loop->num);
519 p.dump (dump_file); 522 p.dump (dump_file);
520 fprintf (dump_file, " bounding iterations to %i from guessed %i\n", 523 fprintf (dump_file, " bounding iterations to %i\n",
521 (int)iteration_bound, (int)iterations); 524 (int)iteration_bound);
525 }
526
527 /* Scale the probabilities. */
528 scale_loop_frequencies (loop, p);
529
530 if (iteration_bound == 0)
531 return;
532
533 gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true);
534
535 if (dump_file && (dump_flags & TDF_DETAILS))
536 {
537 fprintf (dump_file, ";; guessed iterations after scaling %i\n",
538 (int)iterations);
522 } 539 }
523 540
524 /* See if loop is predicted to iterate too many times. */ 541 /* See if loop is predicted to iterate too many times. */
525 if (iteration_bound && iterations > 0 542 if (iterations <= iteration_bound)
526 && p.apply (iterations) > iteration_bound) 543 return;
527 { 544
528 /* Fixing loop profile for different trip count is not trivial; the exit 545 preheader_e = loop_preheader_edge (loop);
529 probabilities has to be updated to match and frequencies propagated down 546
530 to the loop body. 547 /* We could handle also loops without preheaders, but bounding is
531 548 currently used only by optimizers that have preheaders constructed. */
532 We fully update only the simple case of loop with single exit that is 549 gcc_checking_assert (preheader_e);
533 either from the latch or BB just before latch and leads from BB with 550 profile_count count_in = preheader_e->count ();
534 simple conditional jump. This is OK for use in vectorizer. */ 551
552 if (count_in > profile_count::zero ()
553 && loop->header->count.initialized_p ())
554 {
555 profile_count count_delta = profile_count::zero ();
556
535 e = single_exit (loop); 557 e = single_exit (loop);
536 if (e) 558 if (e)
537 { 559 {
538 edge other_e; 560 edge other_e;
539 int freq_delta; 561 FOR_EACH_EDGE (other_e, ei, e->src->succs)
540 profile_count count_delta;
541
542 FOR_EACH_EDGE (other_e, ei, e->src->succs)
543 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) 562 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
544 && e != other_e) 563 && e != other_e)
545 break; 564 break;
546 565
547 /* Probability of exit must be 1/iterations. */ 566 /* Probability of exit must be 1/iterations. */
548 freq_delta = EDGE_FREQUENCY (e);
549 count_delta = e->count (); 567 count_delta = e->count ();
550 e->probability = profile_probability::always () 568 e->probability = profile_probability::always ()
551 .apply_scale (1, iteration_bound); 569 .apply_scale (1, iteration_bound);
552 other_e->probability = e->probability.invert (); 570 other_e->probability = e->probability.invert ();
553 freq_delta -= EDGE_FREQUENCY (e); 571
554 count_delta -= e->count (); 572 /* In code below we only handle the following two updates. */
555 573 if (other_e->dest != loop->header
556 /* If latch exists, change its frequency and count, since we changed 574 && other_e->dest != loop->latch
557 probability of exit. Theoretically we should update everything from 575 && (dump_file && (dump_flags & TDF_DETAILS)))
558 source of exit edge to latch, but for vectorizer this is enough. */
559 if (loop->latch
560 && loop->latch != e->src)
561 { 576 {
562 loop->latch->frequency += freq_delta; 577 fprintf (dump_file, ";; giving up on update of paths from "
563 if (loop->latch->frequency < 0) 578 "exit condition to latch\n");
564 loop->latch->frequency = 0;
565 loop->latch->count += count_delta;
566 } 579 }
567 } 580 }
581 else
582 if (dump_file && (dump_flags & TDF_DETAILS))
583 fprintf (dump_file, ";; Loop has multiple exit edges; "
584 "giving up on exit condition update\n");
568 585
569 /* Roughly speaking we want to reduce the loop body profile by the 586 /* Roughly speaking we want to reduce the loop body profile by the
570 difference of loop iterations. We however can do better if 587 difference of loop iterations. We however can do better if
571 we look at the actual profile, if it is available. */ 588 we look at the actual profile, if it is available. */
572 p = p.apply_scale (iteration_bound, iterations); 589 p = profile_probability::always ();
573 590
574 bool determined = false; 591 count_in = count_in.apply_scale (iteration_bound, 1);
575 if (loop->header->count.initialized_p ()) 592 p = count_in.probability_in (loop->header->count);
576 {
577 profile_count count_in = profile_count::zero ();
578
579 FOR_EACH_EDGE (e, ei, loop->header->preds)
580 if (e->src != loop->latch)
581 count_in += e->count ();
582
583 if (count_in > profile_count::zero () )
584 {
585 p = count_in.probability_in (loop->header->count.apply_scale
586 (iteration_bound, 1));
587 determined = true;
588 }
589 }
590 if (!determined && loop->header->frequency)
591 {
592 int freq_in = 0;
593
594 FOR_EACH_EDGE (e, ei, loop->header->preds)
595 if (e->src != loop->latch)
596 freq_in += EDGE_FREQUENCY (e);
597
598 if (freq_in != 0)
599 p = profile_probability::probability_in_gcov_type
600 (freq_in * iteration_bound, loop->header->frequency);
601 }
602 if (!(p > profile_probability::never ())) 593 if (!(p > profile_probability::never ()))
603 p = profile_probability::very_unlikely (); 594 p = profile_probability::very_unlikely ();
604 } 595
605 596 if (p == profile_probability::always ()
606 if (p >= profile_probability::always () 597 || !p.initialized_p ())
607 || !p.initialized_p ()) 598 return;
608 return; 599
609 600 /* If latch exists, change its count, since we changed
610 /* Scale the actual probabilities. */ 601 probability of exit. Theoretically we should update everything from
611 scale_loop_frequencies (loop, p); 602 source of exit edge to latch, but for vectorizer this is enough. */
612 if (dump_file && (dump_flags & TDF_DETAILS)) 603 if (loop->latch && loop->latch != e->src)
613 fprintf (dump_file, ";; guessed iterations are now %i\n", 604 loop->latch->count += count_delta;
614 (int)expected_loop_iterations_unbounded (loop)); 605
606 /* Scale the probabilities. */
607 scale_loop_frequencies (loop, p);
608
609 /* Change latch's count back. */
610 if (loop->latch && loop->latch != e->src)
611 loop->latch->count -= count_delta;
612
613 if (dump_file && (dump_flags & TDF_DETAILS))
614 fprintf (dump_file, ";; guessed iterations are now %i\n",
615 (int)expected_loop_iterations_unbounded (loop, NULL, true));
616 }
615 } 617 }
616 618
617 /* Recompute dominance information for basic blocks outside LOOP. */ 619 /* Recompute dominance information for basic blocks outside LOOP. */
618 620
619 static void 621 static void
798 loop = alloc_loop (); 800 loop = alloc_loop ();
799 loop->header = loop_header; 801 loop->header = loop_header;
800 loop->latch = loop_latch; 802 loop->latch = loop_latch;
801 add_loop (loop, outer); 803 add_loop (loop, outer);
802 804
803 /* TODO: Fix frequencies and counts. */ 805 /* TODO: Fix counts. */
804 scale_loop_frequencies (loop, profile_probability::even ()); 806 scale_loop_frequencies (loop, profile_probability::even ());
805 807
806 /* Update dominators. */ 808 /* Update dominators. */
807 update_dominators_in_loop (loop); 809 update_dominators_in_loop (loop);
808 810
864 { 866 {
865 basic_block succ_bb = latch_edge->dest; 867 basic_block succ_bb = latch_edge->dest;
866 basic_block pred_bb = header_edge->src; 868 basic_block pred_bb = header_edge->src;
867 struct loop *loop = alloc_loop (); 869 struct loop *loop = alloc_loop ();
868 struct loop *outer = loop_outer (succ_bb->loop_father); 870 struct loop *outer = loop_outer (succ_bb->loop_father);
869 int freq;
870 profile_count cnt; 871 profile_count cnt;
871 872
872 loop->header = header_edge->dest; 873 loop->header = header_edge->dest;
873 loop->latch = latch_edge->src; 874 loop->latch = latch_edge->src;
874 875
875 freq = EDGE_FREQUENCY (header_edge);
876 cnt = header_edge->count (); 876 cnt = header_edge->count ();
877 877
878 /* Redirect edges. */ 878 /* Redirect edges. */
879 loop_redirect_edge (latch_edge, loop->header); 879 loop_redirect_edge (latch_edge, loop->header);
880 loop_redirect_edge (true_edge, succ_bb); 880 loop_redirect_edge (true_edge, succ_bb);
899 /* Add switch_bb to appropriate loop. */ 899 /* Add switch_bb to appropriate loop. */
900 if (switch_bb->loop_father) 900 if (switch_bb->loop_father)
901 remove_bb_from_loops (switch_bb); 901 remove_bb_from_loops (switch_bb);
902 add_bb_to_loop (switch_bb, outer); 902 add_bb_to_loop (switch_bb, outer);
903 903
904 /* Fix frequencies. */ 904 /* Fix counts. */
905 if (redirect_all_edges) 905 if (redirect_all_edges)
906 { 906 {
907 switch_bb->frequency = freq;
908 switch_bb->count = cnt; 907 switch_bb->count = cnt;
909 } 908 }
910 scale_loop_frequencies (loop, false_scale); 909 scale_loop_frequencies (loop, false_scale);
911 scale_loop_frequencies (succ_bb->loop_father, true_scale); 910 scale_loop_frequencies (succ_bb->loop_father, true_scale);
912 update_dominators_in_loop (loop); 911 update_dominators_in_loop (loop);
1021 |= loop->warned_aggressive_loop_optimizations; 1020 |= loop->warned_aggressive_loop_optimizations;
1022 target->in_oacc_kernels_region = loop->in_oacc_kernels_region; 1021 target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1023 } 1022 }
1024 1023
1025 /* Copies copy of LOOP as subloop of TARGET loop, placing newly 1024 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1026 created loop into loops structure. */ 1025 created loop into loops structure. If AFTER is non-null
1026 the new loop is added at AFTER->next, otherwise in front of TARGETs
1027 sibling list. */
1027 struct loop * 1028 struct loop *
1028 duplicate_loop (struct loop *loop, struct loop *target) 1029 duplicate_loop (struct loop *loop, struct loop *target, struct loop *after)
1029 { 1030 {
1030 struct loop *cloop; 1031 struct loop *cloop;
1031 cloop = alloc_loop (); 1032 cloop = alloc_loop ();
1032 place_new_loop (cfun, cloop); 1033 place_new_loop (cfun, cloop);
1033 1034
1035 1036
1036 /* Mark the new loop as copy of LOOP. */ 1037 /* Mark the new loop as copy of LOOP. */
1037 set_loop_copy (loop, cloop); 1038 set_loop_copy (loop, cloop);
1038 1039
1039 /* Add it to target. */ 1040 /* Add it to target. */
1040 flow_loop_tree_node_add (target, cloop); 1041 flow_loop_tree_node_add (target, cloop, after);
1041 1042
1042 return cloop; 1043 return cloop;
1043 } 1044 }
1044 1045
1045 /* Copies structure of subloops of LOOP into TARGET loop, placing 1046 /* Copies structure of subloops of LOOP into TARGET loop, placing
1046 newly created loops into loop tree. */ 1047 newly created loops into loop tree at the end of TARGETs sibling
1048 list in the original order. */
1047 void 1049 void
1048 duplicate_subloops (struct loop *loop, struct loop *target) 1050 duplicate_subloops (struct loop *loop, struct loop *target)
1049 { 1051 {
1050 struct loop *aloop, *cloop; 1052 struct loop *aloop, *cloop, *tail;
1051 1053
1054 for (tail = target->inner; tail && tail->next; tail = tail->next)
1055 ;
1052 for (aloop = loop->inner; aloop; aloop = aloop->next) 1056 for (aloop = loop->inner; aloop; aloop = aloop->next)
1053 { 1057 {
1054 cloop = duplicate_loop (aloop, target); 1058 cloop = duplicate_loop (aloop, target, tail);
1059 tail = cloop;
1060 gcc_assert(!tail->next);
1055 duplicate_subloops (aloop, cloop); 1061 duplicate_subloops (aloop, cloop);
1056 } 1062 }
1057 } 1063 }
1058 1064
1059 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, 1065 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1060 into TARGET loop, placing newly created loops into loop tree. */ 1066 into TARGET loop, placing newly created loops into loop tree adding
1067 them to TARGETs sibling list at the end in order. */
1061 static void 1068 static void
1062 copy_loops_to (struct loop **copied_loops, int n, struct loop *target) 1069 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1063 { 1070 {
1064 struct loop *aloop; 1071 struct loop *aloop, *tail;
1065 int i; 1072 int i;
1066 1073
1074 for (tail = target->inner; tail && tail->next; tail = tail->next)
1075 ;
1067 for (i = 0; i < n; i++) 1076 for (i = 0; i < n; i++)
1068 { 1077 {
1069 aloop = duplicate_loop (copied_loops[i], target); 1078 aloop = duplicate_loop (copied_loops[i], target, tail);
1079 tail = aloop;
1080 gcc_assert(!tail->next);
1070 duplicate_subloops (copied_loops[i], aloop); 1081 duplicate_subloops (copied_loops[i], aloop);
1071 } 1082 }
1072 } 1083 }
1073 1084
1074 /* Redirects edge E to basic block DEST. */ 1085 /* Redirects edge E to basic block DEST. */
1093 1104
1094 return ret; 1105 return ret;
1095 } 1106 }
1096 1107
1097 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating 1108 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1098 loop structure and dominators. E's destination must be LOOP header for 1109 loop structure and dominators (order of inner subloops is retained).
1099 this to work, i.e. it must be entry or latch edge of this loop; these are 1110 E's destination must be LOOP header for this to work, i.e. it must be entry
1100 unique, as the loops must have preheaders for this function to work 1111 or latch edge of this loop; these are unique, as the loops must have
1101 correctly (in case E is latch, the function unrolls the loop, if E is entry 1112 preheaders for this function to work correctly (in case E is latch, the
1102 edge, it peels the loop). Store edges created by copying ORIG edge from 1113 function unrolls the loop, if E is entry edge, it peels the loop). Store
1103 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to 1114 edges created by copying ORIG edge from copies corresponding to set bits in
1104 original LOOP body, the other copies are numbered in order given by control 1115 WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1105 flow through them) into TO_REMOVE array. Returns false if duplication is 1116 are numbered in order given by control flow through them) into TO_REMOVE
1117 array. Returns false if duplication is
1106 impossible. */ 1118 impossible. */
1107 1119
1108 bool 1120 bool
1109 duplicate_loop_to_header_edge (struct loop *loop, edge e, 1121 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1110 unsigned int ndupl, sbitmap wont_exit, 1122 unsigned int ndupl, sbitmap wont_exit,
1117 basic_block header = loop->header, latch = loop->latch; 1129 basic_block header = loop->header, latch = loop->latch;
1118 basic_block *new_bbs, *bbs, *first_active; 1130 basic_block *new_bbs, *bbs, *first_active;
1119 basic_block new_bb, bb, first_active_latch = NULL; 1131 basic_block new_bb, bb, first_active_latch = NULL;
1120 edge ae, latch_edge; 1132 edge ae, latch_edge;
1121 edge spec_edges[2], new_spec_edges[2]; 1133 edge spec_edges[2], new_spec_edges[2];
1122 #define SE_LATCH 0 1134 const int SE_LATCH = 0;
1123 #define SE_ORIG 1 1135 const int SE_ORIG = 1;
1124 unsigned i, j, n; 1136 unsigned i, j, n;
1125 int is_latch = (latch == e->src); 1137 int is_latch = (latch == e->src);
1126 int scale_act = 0, *scale_step = NULL, scale_main = 0; 1138 profile_probability *scale_step = NULL;
1127 int scale_after_exit = 0; 1139 profile_probability scale_main = profile_probability::always ();
1128 int p, freq_in, freq_le, freq_out_orig; 1140 profile_probability scale_act = profile_probability::always ();
1129 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; 1141 profile_count after_exit_num = profile_count::zero (),
1142 after_exit_den = profile_count::zero ();
1143 bool scale_after_exit = false;
1130 int add_irreducible_flag; 1144 int add_irreducible_flag;
1131 basic_block place_after; 1145 basic_block place_after;
1132 bitmap bbs_to_scale = NULL; 1146 bitmap bbs_to_scale = NULL;
1133 bitmap_iterator bi; 1147 bitmap_iterator bi;
1134 1148
1163 /* Find edge from latch. */ 1177 /* Find edge from latch. */
1164 latch_edge = loop_latch_edge (loop); 1178 latch_edge = loop_latch_edge (loop);
1165 1179
1166 if (flags & DLTHE_FLAG_UPDATE_FREQ) 1180 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1167 { 1181 {
1168 /* Calculate coefficients by that we have to scale frequencies 1182 /* Calculate coefficients by that we have to scale counts
1169 of duplicated loop bodies. */ 1183 of duplicated loop bodies. */
1170 freq_in = header->frequency; 1184 profile_count count_in = header->count;
1171 freq_le = EDGE_FREQUENCY (latch_edge); 1185 profile_count count_le = latch_edge->count ();
1172 if (freq_in == 0) 1186 profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1173 freq_in = 1; 1187 profile_probability prob_pass_thru = count_le.probability_in (count_in);
1174 if (freq_in < freq_le) 1188 profile_probability prob_pass_wont_exit =
1175 freq_in = freq_le; 1189 (count_le + count_out_orig).probability_in (count_in);
1176 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1177 if (freq_out_orig > freq_in - freq_le)
1178 freq_out_orig = freq_in - freq_le;
1179 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1180 prob_pass_wont_exit =
1181 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1182 1190
1183 if (orig && orig->probability.initialized_p () 1191 if (orig && orig->probability.initialized_p ()
1184 && !(orig->probability == profile_probability::always ())) 1192 && !(orig->probability == profile_probability::always ()))
1185 { 1193 {
1186 /* The blocks that are dominated by a removed exit edge ORIG have 1194 /* The blocks that are dominated by a removed exit edge ORIG have
1187 frequencies scaled by this. */ 1195 frequencies scaled by this. */
1188 if (orig->probability.initialized_p ()) 1196 if (orig->count ().initialized_p ())
1189 scale_after_exit 1197 {
1190 = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, 1198 after_exit_num = orig->src->count;
1191 REG_BR_PROB_BASE 1199 after_exit_den = after_exit_num - orig->count ();
1192 - orig->probability.to_reg_br_prob_base ()); 1200 scale_after_exit = true;
1193 else 1201 }
1194 scale_after_exit = REG_BR_PROB_BASE;
1195 bbs_to_scale = BITMAP_ALLOC (NULL); 1202 bbs_to_scale = BITMAP_ALLOC (NULL);
1196 for (i = 0; i < n; i++) 1203 for (i = 0; i < n; i++)
1197 { 1204 {
1198 if (bbs[i] != orig->src 1205 if (bbs[i] != orig->src
1199 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) 1206 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1200 bitmap_set_bit (bbs_to_scale, i); 1207 bitmap_set_bit (bbs_to_scale, i);
1201 } 1208 }
1202 } 1209 }
1203 1210
1204 scale_step = XNEWVEC (int, ndupl); 1211 scale_step = XNEWVEC (profile_probability, ndupl);
1205 1212
1206 for (i = 1; i <= ndupl; i++) 1213 for (i = 1; i <= ndupl; i++)
1207 scale_step[i - 1] = bitmap_bit_p (wont_exit, i) 1214 scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1208 ? prob_pass_wont_exit 1215 ? prob_pass_wont_exit
1209 : prob_pass_thru; 1216 : prob_pass_thru;
1210 1217
1211 /* Complete peeling is special as the probability of exit in last 1218 /* Complete peeling is special as the probability of exit in last
1212 copy becomes 1. */ 1219 copy becomes 1. */
1213 if (flags & DLTHE_FLAG_COMPLETTE_PEEL) 1220 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1214 { 1221 {
1215 int wanted_freq = EDGE_FREQUENCY (e); 1222 profile_count wanted_count = e->count ();
1216
1217 if (wanted_freq > freq_in)
1218 wanted_freq = freq_in;
1219 1223
1220 gcc_assert (!is_latch); 1224 gcc_assert (!is_latch);
1221 /* First copy has frequency of incoming edge. Each subsequent 1225 /* First copy has count of incoming edge. Each subsequent
1222 frequency should be reduced by prob_pass_wont_exit. Caller 1226 count should be reduced by prob_pass_wont_exit. Caller
1223 should've managed the flags so all except for original loop 1227 should've managed the flags so all except for original loop
1224 has won't exist set. */ 1228 has won't exist set. */
1225 scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); 1229 scale_act = wanted_count.probability_in (count_in);
1226 /* Now simulate the duplication adjustments and compute header 1230 /* Now simulate the duplication adjustments and compute header
1227 frequency of the last copy. */ 1231 frequency of the last copy. */
1228 for (i = 0; i < ndupl; i++) 1232 for (i = 0; i < ndupl; i++)
1229 wanted_freq = combine_probabilities (wanted_freq, scale_step[i]); 1233 wanted_count = wanted_count.apply_probability (scale_step [i]);
1230 scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); 1234 scale_main = wanted_count.probability_in (count_in);
1231 } 1235 }
1236 /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1237 First iteration will be original loop followed by duplicated bodies.
1238 It is necessary to scale down the original so we get right overall
1239 number of iterations. */
1232 else if (is_latch) 1240 else if (is_latch)
1233 { 1241 {
1234 prob_pass_main = bitmap_bit_p (wont_exit, 0) 1242 profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1235 ? prob_pass_wont_exit 1243 ? prob_pass_wont_exit
1236 : prob_pass_thru; 1244 : prob_pass_thru;
1237 p = prob_pass_main; 1245 profile_probability p = prob_pass_main;
1238 scale_main = REG_BR_PROB_BASE; 1246 profile_count scale_main_den = count_in;
1239 for (i = 0; i < ndupl; i++) 1247 for (i = 0; i < ndupl; i++)
1240 { 1248 {
1241 scale_main += p; 1249 scale_main_den += count_in.apply_probability (p);
1242 p = combine_probabilities (p, scale_step[i]); 1250 p = p * scale_step[i];
1243 } 1251 }
1244 scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main); 1252 /* If original loop is executed COUNT_IN times, the unrolled
1245 scale_act = combine_probabilities (scale_main, prob_pass_main); 1253 loop will account SCALE_MAIN_DEN times. */
1254 scale_main = count_in.probability_in (scale_main_den);
1255 scale_act = scale_main * prob_pass_main;
1246 } 1256 }
1247 else 1257 else
1248 { 1258 {
1249 int preheader_freq = EDGE_FREQUENCY (e); 1259 profile_count preheader_count = e->count ();
1250 scale_main = REG_BR_PROB_BASE;
1251 for (i = 0; i < ndupl; i++) 1260 for (i = 0; i < ndupl; i++)
1252 scale_main = combine_probabilities (scale_main, scale_step[i]); 1261 scale_main = scale_main * scale_step[i];
1253 if (preheader_freq > freq_in) 1262 scale_act = preheader_count.probability_in (count_in);
1254 preheader_freq = freq_in; 1263 }
1255 scale_act = GCOV_COMPUTE_SCALE (preheader_freq, freq_in);
1256 }
1257 for (i = 0; i < ndupl; i++)
1258 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1259 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1260 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1261 } 1264 }
1262 1265
1263 /* Loop the new bbs will belong to. */ 1266 /* Loop the new bbs will belong to. */
1264 target = e->src->loop_father; 1267 target = e->src->loop_father;
1265 1268
1348 if (to_remove) 1351 if (to_remove)
1349 to_remove->safe_push (new_spec_edges[SE_ORIG]); 1352 to_remove->safe_push (new_spec_edges[SE_ORIG]);
1350 force_edge_cold (new_spec_edges[SE_ORIG], true); 1353 force_edge_cold (new_spec_edges[SE_ORIG], true);
1351 1354
1352 /* Scale the frequencies of the blocks dominated by the exit. */ 1355 /* Scale the frequencies of the blocks dominated by the exit. */
1353 if (bbs_to_scale) 1356 if (bbs_to_scale && scale_after_exit)
1354 { 1357 {
1355 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) 1358 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1356 { 1359 scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1357 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, 1360 after_exit_den);
1358 REG_BR_PROB_BASE);
1359 }
1360 } 1361 }
1361 } 1362 }
1362 1363
1363 /* Record the first copy in the control flow order if it is not 1364 /* Record the first copy in the control flow order if it is not
1364 the original loop (i.e. in case of peeling). */ 1365 the original loop (i.e. in case of peeling). */
1369 } 1370 }
1370 1371
1371 /* Set counts and frequencies. */ 1372 /* Set counts and frequencies. */
1372 if (flags & DLTHE_FLAG_UPDATE_FREQ) 1373 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1373 { 1374 {
1374 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE); 1375 scale_bbs_frequencies (new_bbs, n, scale_act);
1375 scale_act = combine_probabilities (scale_act, scale_step[j]); 1376 scale_act = scale_act * scale_step[j];
1376 } 1377 }
1377 } 1378 }
1378 free (new_bbs); 1379 free (new_bbs);
1379 free (orig_loops); 1380 free (orig_loops);
1380 1381
1384 if (to_remove) 1385 if (to_remove)
1385 to_remove->safe_push (orig); 1386 to_remove->safe_push (orig);
1386 force_edge_cold (orig, true); 1387 force_edge_cold (orig, true);
1387 1388
1388 /* Scale the frequencies of the blocks dominated by the exit. */ 1389 /* Scale the frequencies of the blocks dominated by the exit. */
1389 if (bbs_to_scale) 1390 if (bbs_to_scale && scale_after_exit)
1390 { 1391 {
1391 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) 1392 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1392 { 1393 scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1393 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, 1394 after_exit_den);
1394 REG_BR_PROB_BASE);
1395 }
1396 } 1395 }
1397 } 1396 }
1398 1397
1399 /* Update the original loop. */ 1398 /* Update the original loop. */
1400 if (!is_latch) 1399 if (!is_latch)
1401 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); 1400 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1402 if (flags & DLTHE_FLAG_UPDATE_FREQ) 1401 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1403 { 1402 {
1404 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE); 1403 scale_bbs_frequencies (bbs, n, scale_main);
1405 free (scale_step); 1404 free (scale_step);
1406 } 1405 }
1407 1406
1408 /* Update dominators of outer blocks if affected. */ 1407 /* Update dominators of outer blocks if affected. */
1409 for (i = 0; i < n; i++) 1408 for (i = 0; i < n; i++)
1515 return NULL; 1514 return NULL;
1516 } 1515 }
1517 1516
1518 mfb_kj_edge = loop_latch_edge (loop); 1517 mfb_kj_edge = loop_latch_edge (loop);
1519 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; 1518 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1520 if (nentry == 1) 1519 if (nentry == 1
1520 && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1521 || (single_entry->flags & EDGE_CROSSING) == 0))
1521 dummy = split_edge (single_entry); 1522 dummy = split_edge (single_entry);
1522 else 1523 else
1523 { 1524 {
1524 edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL); 1525 edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1525 dummy = fallthru->src; 1526 dummy = fallthru->src;