comparison gcc/loop-unroll.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Fri, 12 Feb 2010 23:39:51 +0900
parents a06113de4d67
children b7f97abdc517
comparison
equal deleted inserted replaced
52:c156f1bd5cd9 55:77e2b8dfacca
30 #include "cfglayout.h" 30 #include "cfglayout.h"
31 #include "params.h" 31 #include "params.h"
32 #include "output.h" 32 #include "output.h"
33 #include "expr.h" 33 #include "expr.h"
34 #include "hashtab.h" 34 #include "hashtab.h"
35 #include "recog.h" 35 #include "recog.h"
36 36
37 /* This pass performs loop unrolling and peeling. We only perform these 37 /* This pass performs loop unrolling and peeling. We only perform these
38 optimizations on innermost loops (with single exception) because 38 optimizations on innermost loops (with single exception) because
39 the impact on performance is greatest here, and we want to avoid 39 the impact on performance is greatest here, and we want to avoid
40 unnecessary code size growth. The gain is caused by greater sequentiality 40 unnecessary code size growth. The gain is caused by greater sequentiality
75 { 75 {
76 rtx insn; /* The insn in that the induction variable occurs. */ 76 rtx insn; /* The insn in that the induction variable occurs. */
77 rtx base_var; /* The variable on that the values in the further 77 rtx base_var; /* The variable on that the values in the further
78 iterations are based. */ 78 iterations are based. */
79 rtx step; /* Step of the induction variable. */ 79 rtx step; /* Step of the induction variable. */
80 struct iv_to_split *next; /* Next entry in walking order. */
80 unsigned n_loc; 81 unsigned n_loc;
81 unsigned loc[3]; /* Location where the definition of the induction 82 unsigned loc[3]; /* Location where the definition of the induction
82 variable occurs in the insn. For example if 83 variable occurs in the insn. For example if
83 N_LOC is 2, the expression is located at 84 N_LOC is 2, the expression is located at
84 XEXP (XEXP (single_set, loc[0]), loc[1]). */ 85 XEXP (XEXP (single_set, loc[0]), loc[1]). */
85 }; 86 };
86 87
87 /* Information about accumulators to expand. */ 88 /* Information about accumulators to expand. */
88 89
89 struct var_to_expand 90 struct var_to_expand
90 { 91 {
91 rtx insn; /* The insn in that the variable expansion occurs. */ 92 rtx insn; /* The insn in that the variable expansion occurs. */
92 rtx reg; /* The accumulator which is expanded. */ 93 rtx reg; /* The accumulator which is expanded. */
93 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ 94 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */
94 enum rtx_code op; /* The type of the accumulation - addition, subtraction 95 struct var_to_expand *next; /* Next entry in walking order. */
96 enum rtx_code op; /* The type of the accumulation - addition, subtraction
95 or multiplication. */ 97 or multiplication. */
96 int expansion_count; /* Count the number of expansions generated so far. */ 98 int expansion_count; /* Count the number of expansions generated so far. */
97 int reuse_expansion; /* The expansion we intend to reuse to expand 99 int reuse_expansion; /* The expansion we intend to reuse to expand
98 the accumulator. If REUSE_EXPANSION is 0 reuse 100 the accumulator. If REUSE_EXPANSION is 0 reuse
99 the original accumulator. Else use 101 the original accumulator. Else use
100 var_expansions[REUSE_EXPANSION - 1]. */ 102 var_expansions[REUSE_EXPANSION - 1]. */
101 unsigned accum_pos; /* The position in which the accumulator is placed in 103 unsigned accum_pos; /* The position in which the accumulator is placed in
102 the insn src. For example in x = x + something 104 the insn src. For example in x = x + something
103 accum_pos is 0 while in x = something + x accum_pos 105 accum_pos is 0 while in x = something + x accum_pos
104 is 1. */ 106 is 1. */
108 the unrolled loop. */ 110 the unrolled loop. */
109 111
110 struct opt_info 112 struct opt_info
111 { 113 {
112 htab_t insns_to_split; /* A hashtable of insns to split. */ 114 htab_t insns_to_split; /* A hashtable of insns to split. */
115 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
116 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
113 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators 117 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
114 to expand. */ 118 to expand. */
119 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
120 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
115 unsigned first_new_block; /* The first basic block that was 121 unsigned first_new_block; /* The first basic block that was
116 duplicated. */ 122 duplicated. */
117 basic_block loop_exit; /* The loop exit basic block. */ 123 basic_block loop_exit; /* The loop exit basic block. */
118 basic_block loop_preheader; /* The loop preheader basic block. */ 124 basic_block loop_preheader; /* The loop preheader basic block. */
119 }; 125 };
137 static void free_opt_info (struct opt_info *); 143 static void free_opt_info (struct opt_info *);
138 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); 144 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
139 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx); 145 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
140 static struct iv_to_split *analyze_iv_to_split_insn (rtx); 146 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
141 static void expand_var_during_unrolling (struct var_to_expand *, rtx); 147 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
142 static int insert_var_expansion_initialization (void **, void *); 148 static void insert_var_expansion_initialization (struct var_to_expand *,
143 static int combine_var_copies_in_loop_exit (void **, void *); 149 basic_block);
144 static int release_var_copies (void **, void *); 150 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
151 basic_block);
145 static rtx get_expansion (struct var_to_expand *); 152 static rtx get_expansion (struct var_to_expand *);
146 153
147 /* Unroll and/or peel (depending on FLAGS) LOOPS. */ 154 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
148 void 155 void
149 unroll_and_peel_loops (int flags) 156 unroll_and_peel_loops (int flags)
453 unsigned i; 460 unsigned i;
454 VEC (edge, heap) *remove_edges; 461 VEC (edge, heap) *remove_edges;
455 edge ein; 462 edge ein;
456 struct niter_desc *desc = get_simple_loop_desc (loop); 463 struct niter_desc *desc = get_simple_loop_desc (loop);
457 struct opt_info *opt_info = NULL; 464 struct opt_info *opt_info = NULL;
458 465
459 npeel = desc->niter; 466 npeel = desc->niter;
460 467
461 if (npeel) 468 if (npeel)
462 { 469 {
463 bool ok; 470 bool ok;
464 471
465 wont_exit = sbitmap_alloc (npeel + 1); 472 wont_exit = sbitmap_alloc (npeel + 1);
466 sbitmap_ones (wont_exit); 473 sbitmap_ones (wont_exit);
467 RESET_BIT (wont_exit, 0); 474 RESET_BIT (wont_exit, 0);
468 if (desc->noloop_assumptions) 475 if (desc->noloop_assumptions)
469 RESET_BIT (wont_exit, 1); 476 RESET_BIT (wont_exit, 1);
470 477
471 remove_edges = NULL; 478 remove_edges = NULL;
472 479
473 if (flag_split_ivs_in_unroller) 480 if (flag_split_ivs_in_unroller)
474 opt_info = analyze_insns_in_loop (loop); 481 opt_info = analyze_insns_in_loop (loop);
475 482
476 opt_info_start_duplication (opt_info); 483 opt_info_start_duplication (opt_info);
477 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 484 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
478 npeel, 485 npeel,
479 wont_exit, desc->out_edge, 486 wont_exit, desc->out_edge,
480 &remove_edges, 487 &remove_edges,
483 | (opt_info 490 | (opt_info
484 ? DLTHE_RECORD_COPY_NUMBER : 0)); 491 ? DLTHE_RECORD_COPY_NUMBER : 0));
485 gcc_assert (ok); 492 gcc_assert (ok);
486 493
487 free (wont_exit); 494 free (wont_exit);
488 495
489 if (opt_info) 496 if (opt_info)
490 { 497 {
491 apply_opt_in_copies (opt_info, npeel, false, true); 498 apply_opt_in_copies (opt_info, npeel, false, true);
492 free_opt_info (opt_info); 499 free_opt_info (opt_info);
493 } 500 }
600 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n", 607 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
601 best_unroll + 1, best_copies, nunroll); 608 best_unroll + 1, best_copies, nunroll);
602 609
603 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 610 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
604 loop->lpt_decision.times = best_unroll; 611 loop->lpt_decision.times = best_unroll;
605 612
606 if (dump_file) 613 if (dump_file)
607 fprintf (dump_file, 614 fprintf (dump_file,
608 ";; Decided to unroll the constant times rolling loop, %d times.\n", 615 ";; Decided to unroll the constant times rolling loop, %d times.\n",
609 loop->lpt_decision.times); 616 loop->lpt_decision.times);
610 } 617 }
640 unsigned max_unroll = loop->lpt_decision.times; 647 unsigned max_unroll = loop->lpt_decision.times;
641 struct niter_desc *desc = get_simple_loop_desc (loop); 648 struct niter_desc *desc = get_simple_loop_desc (loop);
642 bool exit_at_end = loop_exit_at_end_p (loop); 649 bool exit_at_end = loop_exit_at_end_p (loop);
643 struct opt_info *opt_info = NULL; 650 struct opt_info *opt_info = NULL;
644 bool ok; 651 bool ok;
645 652
646 niter = desc->niter; 653 niter = desc->niter;
647 654
648 /* Should not get here (such loop should be peeled instead). */ 655 /* Should not get here (such loop should be peeled instead). */
649 gcc_assert (niter > max_unroll + 1); 656 gcc_assert (niter > max_unroll + 1);
650 657
652 659
653 wont_exit = sbitmap_alloc (max_unroll + 1); 660 wont_exit = sbitmap_alloc (max_unroll + 1);
654 sbitmap_ones (wont_exit); 661 sbitmap_ones (wont_exit);
655 662
656 remove_edges = NULL; 663 remove_edges = NULL;
657 if (flag_split_ivs_in_unroller 664 if (flag_split_ivs_in_unroller
658 || flag_variable_expansion_in_unroller) 665 || flag_variable_expansion_in_unroller)
659 opt_info = analyze_insns_in_loop (loop); 666 opt_info = analyze_insns_in_loop (loop);
660 667
661 if (!exit_at_end) 668 if (!exit_at_end)
662 { 669 {
663 /* The exit is not at the end of the loop; leave exit test 670 /* The exit is not at the end of the loop; leave exit test
664 in the first copy, so that the loops that start with test 671 in the first copy, so that the loops that start with test
665 of exit condition have continuous body after unrolling. */ 672 of exit condition have continuous body after unrolling. */
684 ? DLTHE_RECORD_COPY_NUMBER 691 ? DLTHE_RECORD_COPY_NUMBER
685 : 0)); 692 : 0));
686 gcc_assert (ok); 693 gcc_assert (ok);
687 694
688 if (opt_info && exit_mod > 1) 695 if (opt_info && exit_mod > 1)
689 apply_opt_in_copies (opt_info, exit_mod, false, false); 696 apply_opt_in_copies (opt_info, exit_mod, false, false);
690 697
691 desc->noloop_assumptions = NULL_RTX; 698 desc->noloop_assumptions = NULL_RTX;
692 desc->niter -= exit_mod; 699 desc->niter -= exit_mod;
693 desc->niter_max -= exit_mod; 700 desc->niter_max -= exit_mod;
694 } 701 }
695 702
710 || desc->noloop_assumptions) 717 || desc->noloop_assumptions)
711 { 718 {
712 RESET_BIT (wont_exit, 0); 719 RESET_BIT (wont_exit, 0);
713 if (desc->noloop_assumptions) 720 if (desc->noloop_assumptions)
714 RESET_BIT (wont_exit, 1); 721 RESET_BIT (wont_exit, 1);
715 722
716 opt_info_start_duplication (opt_info); 723 opt_info_start_duplication (opt_info);
717 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 724 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
718 exit_mod + 1, 725 exit_mod + 1,
719 wont_exit, desc->out_edge, 726 wont_exit, desc->out_edge,
720 &remove_edges, 727 &remove_edges,
721 DLTHE_FLAG_UPDATE_FREQ 728 DLTHE_FLAG_UPDATE_FREQ
722 | (opt_info && exit_mod > 0 729 | (opt_info && exit_mod > 0
723 ? DLTHE_RECORD_COPY_NUMBER 730 ? DLTHE_RECORD_COPY_NUMBER
724 : 0)); 731 : 0));
725 gcc_assert (ok); 732 gcc_assert (ok);
726 733
727 if (opt_info && exit_mod > 0) 734 if (opt_info && exit_mod > 0)
728 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); 735 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
729 736
730 desc->niter -= exit_mod + 1; 737 desc->niter -= exit_mod + 1;
731 desc->niter_max -= exit_mod + 1; 738 desc->niter_max -= exit_mod + 1;
737 744
738 RESET_BIT (wont_exit, max_unroll); 745 RESET_BIT (wont_exit, max_unroll);
739 } 746 }
740 747
741 /* Now unroll the loop. */ 748 /* Now unroll the loop. */
742 749
743 opt_info_start_duplication (opt_info); 750 opt_info_start_duplication (opt_info);
744 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 751 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
745 max_unroll, 752 max_unroll,
746 wont_exit, desc->out_edge, 753 wont_exit, desc->out_edge,
747 &remove_edges, 754 &remove_edges,
761 768
762 if (exit_at_end) 769 if (exit_at_end)
763 { 770 {
764 basic_block exit_block = get_bb_copy (desc->in_edge->src); 771 basic_block exit_block = get_bb_copy (desc->in_edge->src);
765 /* Find a new in and out edge; they are in the last copy we have made. */ 772 /* Find a new in and out edge; they are in the last copy we have made. */
766 773
767 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 774 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
768 { 775 {
769 desc->out_edge = EDGE_SUCC (exit_block, 0); 776 desc->out_edge = EDGE_SUCC (exit_block, 0);
770 desc->in_edge = EDGE_SUCC (exit_block, 1); 777 desc->in_edge = EDGE_SUCC (exit_block, 1);
771 } 778 }
860 for (i = 1; 2 * i <= nunroll; i *= 2) 867 for (i = 1; 2 * i <= nunroll; i *= 2)
861 continue; 868 continue;
862 869
863 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; 870 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
864 loop->lpt_decision.times = i - 1; 871 loop->lpt_decision.times = i - 1;
865 872
866 if (dump_file) 873 if (dump_file)
867 fprintf (dump_file, 874 fprintf (dump_file,
868 ";; Decided to unroll the runtime computable " 875 ";; Decided to unroll the runtime computable "
869 "times rolling loop, %d times.\n", 876 "times rolling loop, %d times.\n",
870 loop->lpt_decision.times); 877 loop->lpt_decision.times);
879 { 886 {
880 basic_block bb; 887 basic_block bb;
881 888
882 if (!insns) 889 if (!insns)
883 return NULL; 890 return NULL;
884 bb = split_edge (e); 891 bb = split_edge (e);
885 emit_insn_after (insns, BB_END (bb)); 892 emit_insn_after (insns, BB_END (bb));
886 893
887 /* ??? We used to assume that INSNS can contain control flow insns, and 894 /* ??? We used to assume that INSNS can contain control flow insns, and
888 that we had to try to find sub basic blocks in BB to maintain a valid 895 that we had to try to find sub basic blocks in BB to maintain a valid
889 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB 896 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
964 unsigned max_unroll = loop->lpt_decision.times; 971 unsigned max_unroll = loop->lpt_decision.times;
965 struct niter_desc *desc = get_simple_loop_desc (loop); 972 struct niter_desc *desc = get_simple_loop_desc (loop);
966 bool exit_at_end = loop_exit_at_end_p (loop); 973 bool exit_at_end = loop_exit_at_end_p (loop);
967 struct opt_info *opt_info = NULL; 974 struct opt_info *opt_info = NULL;
968 bool ok; 975 bool ok;
969 976
970 if (flag_split_ivs_in_unroller 977 if (flag_split_ivs_in_unroller
971 || flag_variable_expansion_in_unroller) 978 || flag_variable_expansion_in_unroller)
972 opt_info = analyze_insns_in_loop (loop); 979 opt_info = analyze_insns_in_loop (loop);
973 980
974 /* Remember blocks whose dominators will have to be updated. */ 981 /* Remember blocks whose dominators will have to be updated. */
975 dom_bbs = NULL; 982 dom_bbs = NULL;
976 983
977 body = get_loop_body (loop); 984 body = get_loop_body (loop);
978 for (i = 0; i < loop->num_nodes; i++) 985 for (i = 0; i < loop->num_nodes; i++)
1110 /* And unroll loop. */ 1117 /* And unroll loop. */
1111 1118
1112 sbitmap_ones (wont_exit); 1119 sbitmap_ones (wont_exit);
1113 RESET_BIT (wont_exit, may_exit_copy); 1120 RESET_BIT (wont_exit, may_exit_copy);
1114 opt_info_start_duplication (opt_info); 1121 opt_info_start_duplication (opt_info);
1115 1122
1116 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1123 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1117 max_unroll, 1124 max_unroll,
1118 wont_exit, desc->out_edge, 1125 wont_exit, desc->out_edge,
1119 &remove_edges, 1126 &remove_edges,
1120 DLTHE_FLAG_UPDATE_FREQ 1127 DLTHE_FLAG_UPDATE_FREQ
1121 | (opt_info 1128 | (opt_info
1122 ? DLTHE_RECORD_COPY_NUMBER 1129 ? DLTHE_RECORD_COPY_NUMBER
1123 : 0)); 1130 : 0));
1124 gcc_assert (ok); 1131 gcc_assert (ok);
1125 1132
1126 if (opt_info) 1133 if (opt_info)
1127 { 1134 {
1128 apply_opt_in_copies (opt_info, max_unroll, true, true); 1135 apply_opt_in_copies (opt_info, max_unroll, true, true);
1129 free_opt_info (opt_info); 1136 free_opt_info (opt_info);
1130 } 1137 }
1134 if (exit_at_end) 1141 if (exit_at_end)
1135 { 1142 {
1136 basic_block exit_block = get_bb_copy (desc->in_edge->src); 1143 basic_block exit_block = get_bb_copy (desc->in_edge->src);
1137 /* Find a new in and out edge; they are in the last copy we have 1144 /* Find a new in and out edge; they are in the last copy we have
1138 made. */ 1145 made. */
1139 1146
1140 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 1147 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1141 { 1148 {
1142 desc->out_edge = EDGE_SUCC (exit_block, 0); 1149 desc->out_edge = EDGE_SUCC (exit_block, 0);
1143 desc->in_edge = EDGE_SUCC (exit_block, 1); 1150 desc->in_edge = EDGE_SUCC (exit_block, 1);
1144 } 1151 }
1257 } 1264 }
1258 1265
1259 /* Success. */ 1266 /* Success. */
1260 loop->lpt_decision.decision = LPT_PEEL_SIMPLE; 1267 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1261 loop->lpt_decision.times = npeel; 1268 loop->lpt_decision.times = npeel;
1262 1269
1263 if (dump_file) 1270 if (dump_file)
1264 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n", 1271 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1265 loop->lpt_decision.times); 1272 loop->lpt_decision.times);
1266 } 1273 }
1267 1274
1285 sbitmap wont_exit; 1292 sbitmap wont_exit;
1286 unsigned npeel = loop->lpt_decision.times; 1293 unsigned npeel = loop->lpt_decision.times;
1287 struct niter_desc *desc = get_simple_loop_desc (loop); 1294 struct niter_desc *desc = get_simple_loop_desc (loop);
1288 struct opt_info *opt_info = NULL; 1295 struct opt_info *opt_info = NULL;
1289 bool ok; 1296 bool ok;
1290 1297
1291 if (flag_split_ivs_in_unroller && npeel > 1) 1298 if (flag_split_ivs_in_unroller && npeel > 1)
1292 opt_info = analyze_insns_in_loop (loop); 1299 opt_info = analyze_insns_in_loop (loop);
1293 1300
1294 wont_exit = sbitmap_alloc (npeel + 1); 1301 wont_exit = sbitmap_alloc (npeel + 1);
1295 sbitmap_zero (wont_exit); 1302 sbitmap_zero (wont_exit);
1296 1303
1297 opt_info_start_duplication (opt_info); 1304 opt_info_start_duplication (opt_info);
1298 1305
1299 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1306 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1300 npeel, wont_exit, NULL, 1307 npeel, wont_exit, NULL,
1301 NULL, DLTHE_FLAG_UPDATE_FREQ 1308 NULL, DLTHE_FLAG_UPDATE_FREQ
1302 | (opt_info 1309 | (opt_info
1303 ? DLTHE_RECORD_COPY_NUMBER 1310 ? DLTHE_RECORD_COPY_NUMBER
1304 : 0)); 1311 : 0));
1305 gcc_assert (ok); 1312 gcc_assert (ok);
1306 1313
1307 free (wont_exit); 1314 free (wont_exit);
1308 1315
1309 if (opt_info) 1316 if (opt_info)
1310 { 1317 {
1311 apply_opt_in_copies (opt_info, npeel, false, false); 1318 apply_opt_in_copies (opt_info, npeel, false, false);
1312 free_opt_info (opt_info); 1319 free_opt_info (opt_info);
1313 } 1320 }
1402 for (i = 1; 2 * i <= nunroll; i *= 2) 1409 for (i = 1; 2 * i <= nunroll; i *= 2)
1403 continue; 1410 continue;
1404 1411
1405 loop->lpt_decision.decision = LPT_UNROLL_STUPID; 1412 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1406 loop->lpt_decision.times = i - 1; 1413 loop->lpt_decision.times = i - 1;
1407 1414
1408 if (dump_file) 1415 if (dump_file)
1409 fprintf (dump_file, 1416 fprintf (dump_file,
1410 ";; Decided to unroll the loop stupidly, %d times.\n", 1417 ";; Decided to unroll the loop stupidly, %d times.\n",
1411 loop->lpt_decision.times); 1418 loop->lpt_decision.times);
1412 } 1419 }
1434 sbitmap wont_exit; 1441 sbitmap wont_exit;
1435 unsigned nunroll = loop->lpt_decision.times; 1442 unsigned nunroll = loop->lpt_decision.times;
1436 struct niter_desc *desc = get_simple_loop_desc (loop); 1443 struct niter_desc *desc = get_simple_loop_desc (loop);
1437 struct opt_info *opt_info = NULL; 1444 struct opt_info *opt_info = NULL;
1438 bool ok; 1445 bool ok;
1439 1446
1440 if (flag_split_ivs_in_unroller 1447 if (flag_split_ivs_in_unroller
1441 || flag_variable_expansion_in_unroller) 1448 || flag_variable_expansion_in_unroller)
1442 opt_info = analyze_insns_in_loop (loop); 1449 opt_info = analyze_insns_in_loop (loop);
1443 1450
1444 1451
1445 wont_exit = sbitmap_alloc (nunroll + 1); 1452 wont_exit = sbitmap_alloc (nunroll + 1);
1446 sbitmap_zero (wont_exit); 1453 sbitmap_zero (wont_exit);
1447 opt_info_start_duplication (opt_info); 1454 opt_info_start_duplication (opt_info);
1448 1455
1449 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1456 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1450 nunroll, wont_exit, 1457 nunroll, wont_exit,
1451 NULL, NULL, 1458 NULL, NULL,
1452 DLTHE_FLAG_UPDATE_FREQ 1459 DLTHE_FLAG_UPDATE_FREQ
1453 | (opt_info 1460 | (opt_info
1454 ? DLTHE_RECORD_COPY_NUMBER 1461 ? DLTHE_RECORD_COPY_NUMBER
1455 : 0)); 1462 : 0));
1456 gcc_assert (ok); 1463 gcc_assert (ok);
1457 1464
1458 if (opt_info) 1465 if (opt_info)
1459 { 1466 {
1460 apply_opt_in_copies (opt_info, nunroll, true, true); 1467 apply_opt_in_copies (opt_info, nunroll, true, true);
1461 free_opt_info (opt_info); 1468 free_opt_info (opt_info);
1462 } 1469 }
1504 ve_info_hash (const void *ves) 1511 ve_info_hash (const void *ves)
1505 { 1512 {
1506 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn); 1513 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1507 } 1514 }
1508 1515
1509 /* Return true if IVTS1 and IVTS2 (which are really both of type 1516 /* Return true if IVTS1 and IVTS2 (which are really both of type
1510 "var_to_expand *") refer to the same instruction. */ 1517 "var_to_expand *") refer to the same instruction. */
1511 1518
1512 static int 1519 static int
1513 ve_info_eq (const void *ivts1, const void *ivts2) 1520 ve_info_eq (const void *ivts1, const void *ivts2)
1514 { 1521 {
1515 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1; 1522 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1516 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2; 1523 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1517 1524
1518 return i1->insn == i2->insn; 1525 return i1->insn == i2->insn;
1519 } 1526 }
1520 1527
1521 /* Returns true if REG is referenced in one insn in LOOP. */ 1528 /* Returns true if REG is referenced in one insn in LOOP. */
1522 1529
1525 { 1532 {
1526 basic_block *body, bb; 1533 basic_block *body, bb;
1527 unsigned i; 1534 unsigned i;
1528 int count_ref = 0; 1535 int count_ref = 0;
1529 rtx insn; 1536 rtx insn;
1530 1537
1531 body = get_loop_body (loop); 1538 body = get_loop_body (loop);
1532 for (i = 0; i < loop->num_nodes; i++) 1539 for (i = 0; i < loop->num_nodes; i++)
1533 { 1540 {
1534 bb = body[i]; 1541 bb = body[i];
1535 1542
1536 FOR_BB_INSNS (bb, insn) 1543 FOR_BB_INSNS (bb, insn)
1537 { 1544 {
1538 if (rtx_referenced_p (reg, insn)) 1545 if (rtx_referenced_p (reg, insn))
1539 count_ref++; 1546 count_ref++;
1540 } 1547 }
1541 } 1548 }
1542 return (count_ref == 1); 1549 return (count_ref == 1);
1543 } 1550 }
1544 1551
1545 /* Determine whether INSN contains an accumulator 1552 /* Determine whether INSN contains an accumulator
1546 which can be expanded into separate copies, 1553 which can be expanded into separate copies,
1547 one for each copy of the LOOP body. 1554 one for each copy of the LOOP body.
1548 1555
1549 for (i = 0 ; i < n; i++) 1556 for (i = 0 ; i < n; i++)
1550 sum += a[i]; 1557 sum += a[i];
1551 1558
1552 ==> 1559 ==>
1553 1560
1554 sum += a[i] 1561 sum += a[i]
1555 .... 1562 ....
1556 i = i+1; 1563 i = i+1;
1557 sum1 += a[i] 1564 sum1 += a[i]
1558 .... 1565 ....
1559 i = i+1 1566 i = i+1
1560 sum2 += a[i]; 1567 sum2 += a[i];
1561 .... 1568 ....
1562 1569
1563 Return NULL if INSN contains no opportunity for expansion of accumulator. 1570 Return NULL if INSN contains no opportunity for expansion of accumulator.
1564 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant 1571 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1565 information and return a pointer to it. 1572 information and return a pointer to it.
1566 */ 1573 */
1567 1574
1568 static struct var_to_expand * 1575 static struct var_to_expand *
1569 analyze_insn_to_expand_var (struct loop *loop, rtx insn) 1576 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1574 unsigned accum_pos; 1581 unsigned accum_pos;
1575 1582
1576 set = single_set (insn); 1583 set = single_set (insn);
1577 if (!set) 1584 if (!set)
1578 return NULL; 1585 return NULL;
1579 1586
1580 dest = SET_DEST (set); 1587 dest = SET_DEST (set);
1581 src = SET_SRC (set); 1588 src = SET_SRC (set);
1582 1589
1583 if (GET_CODE (src) != PLUS 1590 if (GET_CODE (src) != PLUS
1584 && GET_CODE (src) != MINUS 1591 && GET_CODE (src) != MINUS
1585 && GET_CODE (src) != MULT) 1592 && GET_CODE (src) != MULT)
1586 return NULL; 1593 return NULL;
1587 1594
1597 if (!have_insn_for (GET_CODE (src), GET_MODE (src))) 1604 if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1598 return NULL; 1605 return NULL;
1599 1606
1600 op1 = XEXP (src, 0); 1607 op1 = XEXP (src, 0);
1601 op2 = XEXP (src, 1); 1608 op2 = XEXP (src, 1);
1602 1609
1603 if (!REG_P (dest) 1610 if (!REG_P (dest)
1604 && !(GET_CODE (dest) == SUBREG 1611 && !(GET_CODE (dest) == SUBREG
1605 && REG_P (SUBREG_REG (dest)))) 1612 && REG_P (SUBREG_REG (dest))))
1606 return NULL; 1613 return NULL;
1607 1614
1608 if (rtx_equal_p (dest, op1)) 1615 if (rtx_equal_p (dest, op1))
1609 accum_pos = 0; 1616 accum_pos = 0;
1610 else if (rtx_equal_p (dest, op2)) 1617 else if (rtx_equal_p (dest, op2))
1611 accum_pos = 1; 1618 accum_pos = 1;
1612 else 1619 else
1614 1621
1615 /* The method of expansion that we are using; which includes 1622 /* The method of expansion that we are using; which includes
1616 the initialization of the expansions with zero and the summation of 1623 the initialization of the expansions with zero and the summation of
1617 the expansions at the end of the computation will yield wrong results 1624 the expansions at the end of the computation will yield wrong results
1618 for (x = something - x) thus avoid using it in that case. */ 1625 for (x = something - x) thus avoid using it in that case. */
1619 if (accum_pos == 1 1626 if (accum_pos == 1
1620 && GET_CODE (src) == MINUS) 1627 && GET_CODE (src) == MINUS)
1621 return NULL; 1628 return NULL;
1622 1629
1623 something = (accum_pos == 0)? op2 : op1; 1630 something = (accum_pos == 0)? op2 : op1;
1624 1631
1625 if (!referenced_in_one_insn_in_loop_p (loop, dest)) 1632 if (!referenced_in_one_insn_in_loop_p (loop, dest))
1626 return NULL; 1633 return NULL;
1627 1634
1628 if (rtx_referenced_p (dest, something)) 1635 if (rtx_referenced_p (dest, something))
1629 return NULL; 1636 return NULL;
1630 1637
1631 mode1 = GET_MODE (dest); 1638 mode1 = GET_MODE (dest);
1632 mode2 = GET_MODE (something); 1639 mode2 = GET_MODE (something);
1633 if ((FLOAT_MODE_P (mode1) 1640 if ((FLOAT_MODE_P (mode1)
1634 || FLOAT_MODE_P (mode2)) 1641 || FLOAT_MODE_P (mode2))
1635 && !flag_associative_math) 1642 && !flag_associative_math)
1636 return NULL; 1643 return NULL;
1637 1644
1638 if (dump_file) 1645 if (dump_file)
1639 { 1646 {
1640 fprintf (dump_file, 1647 fprintf (dump_file,
1644 } 1651 }
1645 1652
1646 /* Record the accumulator to expand. */ 1653 /* Record the accumulator to expand. */
1647 ves = XNEW (struct var_to_expand); 1654 ves = XNEW (struct var_to_expand);
1648 ves->insn = insn; 1655 ves->insn = insn;
1656 ves->reg = copy_rtx (dest);
1649 ves->var_expansions = VEC_alloc (rtx, heap, 1); 1657 ves->var_expansions = VEC_alloc (rtx, heap, 1);
1650 ves->reg = copy_rtx (dest); 1658 ves->next = NULL;
1651 ves->op = GET_CODE (src); 1659 ves->op = GET_CODE (src);
1652 ves->expansion_count = 0; 1660 ves->expansion_count = 0;
1653 ves->reuse_expansion = 0; 1661 ves->reuse_expansion = 0;
1654 ves->accum_pos = accum_pos; 1662 ves->accum_pos = accum_pos;
1655 return ves; 1663 return ves;
1656 } 1664 }
1657 1665
1658 /* Determine whether there is an induction variable in INSN that 1666 /* Determine whether there is an induction variable in INSN that
1659 we would like to split during unrolling. 1667 we would like to split during unrolling.
1660 1668
1661 I.e. replace 1669 I.e. replace
1662 1670
1663 i = i + 1; 1671 i = i + 1;
1664 ... 1672 ...
1674 i = i0 + 1 1682 i = i0 + 1
1675 ... 1683 ...
1676 i = i0 + 2 1684 i = i0 + 2
1677 ... 1685 ...
1678 1686
1679 Return NULL if INSN contains no interesting IVs. Otherwise, allocate 1687 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1680 an IV_TO_SPLIT structure, fill it with the relevant information and return a 1688 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1681 pointer to it. */ 1689 pointer to it. */
1682 1690
1683 static struct iv_to_split * 1691 static struct iv_to_split *
1684 analyze_iv_to_split_insn (rtx insn) 1692 analyze_iv_to_split_insn (rtx insn)
1721 /* Record the insn to split. */ 1729 /* Record the insn to split. */
1722 ivts = XNEW (struct iv_to_split); 1730 ivts = XNEW (struct iv_to_split);
1723 ivts->insn = insn; 1731 ivts->insn = insn;
1724 ivts->base_var = NULL_RTX; 1732 ivts->base_var = NULL_RTX;
1725 ivts->step = iv.step; 1733 ivts->step = iv.step;
1734 ivts->next = NULL;
1726 ivts->n_loc = 1; 1735 ivts->n_loc = 1;
1727 ivts->loc[0] = 1; 1736 ivts->loc[0] = 1;
1728 1737
1729 return ivts; 1738 return ivts;
1730 } 1739 }
1731 1740
1732 /* Determines which of insns in LOOP can be optimized. 1741 /* Determines which of insns in LOOP can be optimized.
1733 Return a OPT_INFO struct with the relevant hash tables filled 1742 Return a OPT_INFO struct with the relevant hash tables filled
1746 PTR *slot1; 1755 PTR *slot1;
1747 PTR *slot2; 1756 PTR *slot2;
1748 VEC (edge, heap) *edges = get_loop_exit_edges (loop); 1757 VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1749 edge exit; 1758 edge exit;
1750 bool can_apply = false; 1759 bool can_apply = false;
1751 1760
1752 iv_analysis_loop_init (loop); 1761 iv_analysis_loop_init (loop);
1753 1762
1754 body = get_loop_body (loop); 1763 body = get_loop_body (loop);
1755 1764
1756 if (flag_split_ivs_in_unroller) 1765 if (flag_split_ivs_in_unroller)
1757 opt_info->insns_to_split = htab_create (5 * loop->num_nodes, 1766 {
1758 si_info_hash, si_info_eq, free); 1767 opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1759 1768 si_info_hash, si_info_eq, free);
1769 opt_info->iv_to_split_head = NULL;
1770 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1771 }
1772
1760 /* Record the loop exit bb and loop preheader before the unrolling. */ 1773 /* Record the loop exit bb and loop preheader before the unrolling. */
1761 opt_info->loop_preheader = loop_preheader_edge (loop)->src; 1774 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1762 1775
1763 if (VEC_length (edge, edges) == 1) 1776 if (VEC_length (edge, edges) == 1)
1764 { 1777 {
1765 exit = VEC_index (edge, edges, 0); 1778 exit = VEC_index (edge, edges, 0);
1766 if (!(exit->flags & EDGE_COMPLEX)) 1779 if (!(exit->flags & EDGE_COMPLEX))
1767 { 1780 {
1768 opt_info->loop_exit = split_edge (exit); 1781 opt_info->loop_exit = split_edge (exit);
1769 can_apply = true; 1782 can_apply = true;
1770 } 1783 }
1771 } 1784 }
1772 1785
1773 if (flag_variable_expansion_in_unroller 1786 if (flag_variable_expansion_in_unroller
1774 && can_apply) 1787 && can_apply)
1775 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, 1788 {
1776 ve_info_hash, ve_info_eq, free); 1789 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1777 1790 ve_info_hash,
1791 ve_info_eq, free);
1792 opt_info->var_to_expand_head = NULL;
1793 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1794 }
1795
1778 for (i = 0; i < loop->num_nodes; i++) 1796 for (i = 0; i < loop->num_nodes; i++)
1779 { 1797 {
1780 bb = body[i]; 1798 bb = body[i];
1781 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 1799 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1782 continue; 1800 continue;
1783 1801
1784 FOR_BB_INSNS (bb, insn) 1802 FOR_BB_INSNS (bb, insn)
1785 { 1803 {
1786 if (!INSN_P (insn)) 1804 if (!INSN_P (insn))
1787 continue; 1805 continue;
1788 1806
1789 if (opt_info->insns_to_split) 1807 if (opt_info->insns_to_split)
1790 ivts = analyze_iv_to_split_insn (insn); 1808 ivts = analyze_iv_to_split_insn (insn);
1791 1809
1792 if (ivts) 1810 if (ivts)
1793 { 1811 {
1794 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); 1812 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1813 gcc_assert (*slot1 == NULL);
1795 *slot1 = ivts; 1814 *slot1 = ivts;
1815 *opt_info->iv_to_split_tail = ivts;
1816 opt_info->iv_to_split_tail = &ivts->next;
1796 continue; 1817 continue;
1797 } 1818 }
1798 1819
1799 if (opt_info->insns_with_var_to_expand) 1820 if (opt_info->insns_with_var_to_expand)
1800 ves = analyze_insn_to_expand_var (loop, insn); 1821 ves = analyze_insn_to_expand_var (loop, insn);
1801 1822
1802 if (ves) 1823 if (ves)
1803 { 1824 {
1804 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); 1825 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1826 gcc_assert (*slot2 == NULL);
1805 *slot2 = ves; 1827 *slot2 = ves;
1828 *opt_info->var_to_expand_tail = ves;
1829 opt_info->var_to_expand_tail = &ves->next;
1806 } 1830 }
1807 } 1831 }
1808 } 1832 }
1809 1833
1810 VEC_free (edge, heap, edges); 1834 VEC_free (edge, heap, edges);
1811 free (body); 1835 free (body);
1812 return opt_info; 1836 return opt_info;
1813 } 1837 }
1814 1838
1815 /* Called just before loop duplication. Records start of duplicated area 1839 /* Called just before loop duplication. Records start of duplicated area
1816 to OPT_INFO. */ 1840 to OPT_INFO. */
1817 1841
1818 static void 1842 static void
1819 opt_info_start_duplication (struct opt_info *opt_info) 1843 opt_info_start_duplication (struct opt_info *opt_info)
1820 { 1844 {
1821 if (opt_info) 1845 if (opt_info)
1822 opt_info->first_new_block = last_basic_block; 1846 opt_info->first_new_block = last_basic_block;
1823 } 1847 }
1860 ret = &XEXP (*ret, ivts->loc[i]); 1884 ret = &XEXP (*ret, ivts->loc[i]);
1861 1885
1862 return ret; 1886 return ret;
1863 } 1887 }
1864 1888
1865 /* Allocate basic variable for the induction variable chain. Callback for 1889 /* Allocate basic variable for the induction variable chain. */
1866 htab_traverse. */ 1890
1867 1891 static void
1868 static int 1892 allocate_basic_variable (struct iv_to_split *ivts)
1869 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED) 1893 {
1870 {
1871 struct iv_to_split *ivts = (struct iv_to_split *) *slot;
1872 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); 1894 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1873 1895
1874 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); 1896 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1875
1876 return 1;
1877 } 1897 }
1878 1898
1879 /* Insert initialization of basic variable of IVTS before INSN, taking 1899 /* Insert initialization of basic variable of IVTS before INSN, taking
1880 the initial value from INSN. */ 1900 the initial value from INSN. */
1881 1901
1930 if (expr != var) 1950 if (expr != var)
1931 emit_move_insn (var, expr); 1951 emit_move_insn (var, expr);
1932 seq = get_insns (); 1952 seq = get_insns ();
1933 end_sequence (); 1953 end_sequence ();
1934 emit_insn_before (seq, insn); 1954 emit_insn_before (seq, insn);
1935 1955
1936 if (validate_change (insn, loc, var, 0)) 1956 if (validate_change (insn, loc, var, 0))
1937 return; 1957 return;
1938 1958
1939 /* The last chance. Try recreating the assignment in insn 1959 /* The last chance. Try recreating the assignment in insn
1940 completely from scratch. */ 1960 completely from scratch. */
1948 src = force_operand (src, dest); 1968 src = force_operand (src, dest);
1949 if (src != dest) 1969 if (src != dest)
1950 emit_move_insn (dest, src); 1970 emit_move_insn (dest, src);
1951 seq = get_insns (); 1971 seq = get_insns ();
1952 end_sequence (); 1972 end_sequence ();
1953 1973
1954 emit_insn_before (seq, insn); 1974 emit_insn_before (seq, insn);
1955 delete_insn (insn); 1975 delete_insn (insn);
1956 } 1976 }
1957 1977
1958 1978
1960 1980
1961 static rtx 1981 static rtx
1962 get_expansion (struct var_to_expand *ve) 1982 get_expansion (struct var_to_expand *ve)
1963 { 1983 {
1964 rtx reg; 1984 rtx reg;
1965 1985
1966 if (ve->reuse_expansion == 0) 1986 if (ve->reuse_expansion == 0)
1967 reg = ve->reg; 1987 reg = ve->reg;
1968 else 1988 else
1969 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); 1989 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1970 1990
1971 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) 1991 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1972 ve->reuse_expansion = 0; 1992 ve->reuse_expansion = 0;
1973 else 1993 else
1974 ve->reuse_expansion++; 1994 ve->reuse_expansion++;
1975 1995
1976 return reg; 1996 return reg;
1977 } 1997 }
1978 1998
1979 1999
1980 /* Given INSN replace the uses of the accumulator recorded in VE 2000 /* Given INSN replace the uses of the accumulator recorded in VE
1981 with a new register. */ 2001 with a new register. */
1982 2002
1983 static void 2003 static void
1984 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn) 2004 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1985 { 2005 {
1986 rtx new_reg, set; 2006 rtx new_reg, set;
1987 bool really_new_expansion = false; 2007 bool really_new_expansion = false;
1988 2008
1989 set = single_set (insn); 2009 set = single_set (insn);
1990 gcc_assert (set); 2010 gcc_assert (set);
1991 2011
1992 /* Generate a new register only if the expansion limit has not been 2012 /* Generate a new register only if the expansion limit has not been
1993 reached. Else reuse an already existing expansion. */ 2013 reached. Else reuse an already existing expansion. */
1994 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) 2014 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1995 { 2015 {
1996 really_new_expansion = true; 2016 really_new_expansion = true;
1999 else 2019 else
2000 new_reg = get_expansion (ve); 2020 new_reg = get_expansion (ve);
2001 2021
2002 validate_change (insn, &SET_DEST (set), new_reg, 1); 2022 validate_change (insn, &SET_DEST (set), new_reg, 1);
2003 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1); 2023 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2004 2024
2005 if (apply_change_group ()) 2025 if (apply_change_group ())
2006 if (really_new_expansion) 2026 if (really_new_expansion)
2007 { 2027 {
2008 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); 2028 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2009 ve->expansion_count++; 2029 ve->expansion_count++;
2010 } 2030 }
2011 } 2031 }
2012 2032
2013 /* Initialize the variable expansions in loop preheader. 2033 /* Initialize the variable expansions in loop preheader. PLACE is the
2014 Callbacks for htab_traverse. PLACE_P is the loop-preheader 2034 loop-preheader basic block where the initialization of the
2015 basic block where the initialization of the expansions 2035 expansions should take place. The expansions are initialized with
2016 should take place. The expansions are initialized with (-0) 2036 (-0) when the operation is plus or minus to honor sign zero. This
2017 when the operation is plus or minus to honor sign zero. 2037 way we can prevent cases where the sign of the final result is
2018 This way we can prevent cases where the sign of the final result is 2038 effected by the sign of the expansion. Here is an example to
2019 effected by the sign of the expansion. 2039 demonstrate this:
2020 Here is an example to demonstrate this: 2040
2021
2022 for (i = 0 ; i < n; i++) 2041 for (i = 0 ; i < n; i++)
2023 sum += something; 2042 sum += something;
2024 2043
2025 ==> 2044 ==>
2026 2045
2030 sum1 += something 2049 sum1 += something
2031 .... 2050 ....
2032 i = i+1 2051 i = i+1
2033 sum2 += something; 2052 sum2 += something;
2034 .... 2053 ....
2035 2054
2036 When SUM is initialized with -zero and SOMETHING is also -zero; the 2055 When SUM is initialized with -zero and SOMETHING is also -zero; the
2037 final result of sum should be -zero thus the expansions sum1 and sum2 2056 final result of sum should be -zero thus the expansions sum1 and sum2
2038 should be initialized with -zero as well (otherwise we will get +zero 2057 should be initialized with -zero as well (otherwise we will get +zero
2039 as the final result). */ 2058 as the final result). */
2040 2059
2041 static int 2060 static void
2042 insert_var_expansion_initialization (void **slot, void *place_p) 2061 insert_var_expansion_initialization (struct var_to_expand *ve,
2043 { 2062 basic_block place)
2044 struct var_to_expand *ve = (struct var_to_expand *) *slot; 2063 {
2045 basic_block place = (basic_block)place_p;
2046 rtx seq, var, zero_init, insn; 2064 rtx seq, var, zero_init, insn;
2047 unsigned i; 2065 unsigned i;
2048 enum machine_mode mode = GET_MODE (ve->reg); 2066 enum machine_mode mode = GET_MODE (ve->reg);
2049 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); 2067 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2050 2068
2051 if (VEC_length (rtx, ve->var_expansions) == 0) 2069 if (VEC_length (rtx, ve->var_expansions) == 0)
2052 return 1; 2070 return;
2053 2071
2054 start_sequence (); 2072 start_sequence ();
2055 if (ve->op == PLUS || ve->op == MINUS) 2073 if (ve->op == PLUS || ve->op == MINUS)
2056 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2074 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2057 { 2075 {
2058 if (honor_signed_zero_p) 2076 if (honor_signed_zero_p)
2059 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); 2077 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2060 else 2078 else
2061 zero_init = CONST0_RTX (mode); 2079 zero_init = CONST0_RTX (mode);
2062 2080
2063 emit_move_insn (var, zero_init); 2081 emit_move_insn (var, zero_init);
2064 } 2082 }
2065 else if (ve->op == MULT) 2083 else if (ve->op == MULT)
2066 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2084 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2067 { 2085 {
2068 zero_init = CONST1_RTX (GET_MODE (var)); 2086 zero_init = CONST1_RTX (GET_MODE (var));
2069 emit_move_insn (var, zero_init); 2087 emit_move_insn (var, zero_init);
2070 } 2088 }
2071 2089
2072 seq = get_insns (); 2090 seq = get_insns ();
2073 end_sequence (); 2091 end_sequence ();
2074 2092
2075 insn = BB_HEAD (place); 2093 insn = BB_HEAD (place);
2076 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) 2094 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2077 insn = NEXT_INSN (insn); 2095 insn = NEXT_INSN (insn);
2078 2096
2079 emit_insn_after (seq, insn); 2097 emit_insn_after (seq, insn);
2080 /* Continue traversing the hash table. */ 2098 }
2081 return 1; 2099
2082 } 2100 /* Combine the variable expansions at the loop exit. PLACE is the
2083 2101 loop exit basic block where the summation of the expansions should
2084 /* Combine the variable expansions at the loop exit. 2102 take place. */
2085 Callbacks for htab_traverse. PLACE_P is the loop exit 2103
2086 basic block where the summation of the expansions should 2104 static void
2087 take place. */ 2105 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2088 2106 {
2089 static int
2090 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2091 {
2092 struct var_to_expand *ve = (struct var_to_expand *) *slot;
2093 basic_block place = (basic_block)place_p;
2094 rtx sum = ve->reg; 2107 rtx sum = ve->reg;
2095 rtx expr, seq, var, insn; 2108 rtx expr, seq, var, insn;
2096 unsigned i; 2109 unsigned i;
2097 2110
2098 if (VEC_length (rtx, ve->var_expansions) == 0) 2111 if (VEC_length (rtx, ve->var_expansions) == 0)
2099 return 1; 2112 return;
2100 2113
2101 start_sequence (); 2114 start_sequence ();
2102 if (ve->op == PLUS || ve->op == MINUS) 2115 if (ve->op == PLUS || ve->op == MINUS)
2103 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2116 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2104 { 2117 {
2105 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), 2118 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2109 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2122 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2110 { 2123 {
2111 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), 2124 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2112 var, sum); 2125 var, sum);
2113 } 2126 }
2114 2127
2115 expr = force_operand (sum, ve->reg); 2128 expr = force_operand (sum, ve->reg);
2116 if (expr != ve->reg) 2129 if (expr != ve->reg)
2117 emit_move_insn (ve->reg, expr); 2130 emit_move_insn (ve->reg, expr);
2118 seq = get_insns (); 2131 seq = get_insns ();
2119 end_sequence (); 2132 end_sequence ();
2120 2133
2121 insn = BB_HEAD (place); 2134 insn = BB_HEAD (place);
2122 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) 2135 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2123 insn = NEXT_INSN (insn); 2136 insn = NEXT_INSN (insn);
2124 2137
2125 emit_insn_after (seq, insn); 2138 emit_insn_after (seq, insn);
2126 2139 }
2127 /* Continue traversing the hash table. */ 2140
2128 return 1; 2141 /* Apply loop optimizations in loop copies using the
2129 } 2142 data which gathered during the unrolling. Structure
2130
2131 /* Apply loop optimizations in loop copies using the
2132 data which gathered during the unrolling. Structure
2133 OPT_INFO record that data. 2143 OPT_INFO record that data.
2134 2144
2135 UNROLLING is true if we unrolled (not peeled) the loop. 2145 UNROLLING is true if we unrolled (not peeled) the loop.
2136 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of 2146 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2137 the loop (as it should happen in complete unrolling, but not in ordinary 2147 the loop (as it should happen in complete unrolling, but not in ordinary
2138 peeling of the loop). */ 2148 peeling of the loop). */
2139 2149
2140 static void 2150 static void
2141 apply_opt_in_copies (struct opt_info *opt_info, 2151 apply_opt_in_copies (struct opt_info *opt_info,
2142 unsigned n_copies, bool unrolling, 2152 unsigned n_copies, bool unrolling,
2143 bool rewrite_original_loop) 2153 bool rewrite_original_loop)
2144 { 2154 {
2145 unsigned i, delta; 2155 unsigned i, delta;
2146 basic_block bb, orig_bb; 2156 basic_block bb, orig_bb;
2147 rtx insn, orig_insn, next; 2157 rtx insn, orig_insn, next;
2148 struct iv_to_split ivts_templ, *ivts; 2158 struct iv_to_split ivts_templ, *ivts;
2149 struct var_to_expand ve_templ, *ves; 2159 struct var_to_expand ve_templ, *ves;
2150 2160
2151 /* Sanity check -- we need to put initialization in the original loop 2161 /* Sanity check -- we need to put initialization in the original loop
2152 body. */ 2162 body. */
2153 gcc_assert (!unrolling || rewrite_original_loop); 2163 gcc_assert (!unrolling || rewrite_original_loop);
2154 2164
2155 /* Allocate the basic variables (i0). */ 2165 /* Allocate the basic variables (i0). */
2156 if (opt_info->insns_to_split) 2166 if (opt_info->insns_to_split)
2157 htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL); 2167 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2158 2168 allocate_basic_variable (ivts);
2169
2159 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) 2170 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2160 { 2171 {
2161 bb = BASIC_BLOCK (i); 2172 bb = BASIC_BLOCK (i);
2162 orig_bb = get_bb_original (bb); 2173 orig_bb = get_bb_original (bb);
2163 2174
2164 /* bb->aux holds position in copy sequence initialized by 2175 /* bb->aux holds position in copy sequence initialized by
2165 duplicate_loop_to_header_edge. */ 2176 duplicate_loop_to_header_edge. */
2166 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, 2177 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2167 unrolling); 2178 unrolling);
2168 bb->aux = 0; 2179 bb->aux = 0;
2170 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) 2181 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2171 { 2182 {
2172 next = NEXT_INSN (insn); 2183 next = NEXT_INSN (insn);
2173 if (!INSN_P (insn)) 2184 if (!INSN_P (insn))
2174 continue; 2185 continue;
2175 2186
2176 while (!INSN_P (orig_insn)) 2187 while (!INSN_P (orig_insn))
2177 orig_insn = NEXT_INSN (orig_insn); 2188 orig_insn = NEXT_INSN (orig_insn);
2178 2189
2179 ivts_templ.insn = orig_insn; 2190 ivts_templ.insn = orig_insn;
2180 ve_templ.insn = orig_insn; 2191 ve_templ.insn = orig_insn;
2181 2192
2182 /* Apply splitting iv optimization. */ 2193 /* Apply splitting iv optimization. */
2183 if (opt_info->insns_to_split) 2194 if (opt_info->insns_to_split)
2184 { 2195 {
2185 ivts = (struct iv_to_split *) 2196 ivts = (struct iv_to_split *)
2186 htab_find (opt_info->insns_to_split, &ivts_templ); 2197 htab_find (opt_info->insns_to_split, &ivts_templ);
2187 2198
2188 if (ivts) 2199 if (ivts)
2189 { 2200 {
2190 gcc_assert (GET_CODE (PATTERN (insn)) 2201 gcc_assert (GET_CODE (PATTERN (insn))
2191 == GET_CODE (PATTERN (orig_insn))); 2202 == GET_CODE (PATTERN (orig_insn)));
2192 2203
2193 if (!delta) 2204 if (!delta)
2194 insert_base_initialization (ivts, insn); 2205 insert_base_initialization (ivts, insn);
2195 split_iv (ivts, insn, delta); 2206 split_iv (ivts, insn, delta);
2196 } 2207 }
2197 } 2208 }
2199 if (unrolling && opt_info->insns_with_var_to_expand) 2210 if (unrolling && opt_info->insns_with_var_to_expand)
2200 { 2211 {
2201 ves = (struct var_to_expand *) 2212 ves = (struct var_to_expand *)
2202 htab_find (opt_info->insns_with_var_to_expand, &ve_templ); 2213 htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2203 if (ves) 2214 if (ves)
2204 { 2215 {
2205 gcc_assert (GET_CODE (PATTERN (insn)) 2216 gcc_assert (GET_CODE (PATTERN (insn))
2206 == GET_CODE (PATTERN (orig_insn))); 2217 == GET_CODE (PATTERN (orig_insn)));
2207 expand_var_during_unrolling (ves, insn); 2218 expand_var_during_unrolling (ves, insn);
2208 } 2219 }
2209 } 2220 }
2211 } 2222 }
2212 } 2223 }
2213 2224
2214 if (!rewrite_original_loop) 2225 if (!rewrite_original_loop)
2215 return; 2226 return;
2216 2227
2217 /* Initialize the variable expansions in the loop preheader 2228 /* Initialize the variable expansions in the loop preheader
2218 and take care of combining them at the loop exit. */ 2229 and take care of combining them at the loop exit. */
2219 if (opt_info->insns_with_var_to_expand) 2230 if (opt_info->insns_with_var_to_expand)
2220 { 2231 {
2221 htab_traverse (opt_info->insns_with_var_to_expand, 2232 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2222 insert_var_expansion_initialization, 2233 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2223 opt_info->loop_preheader); 2234 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2224 htab_traverse (opt_info->insns_with_var_to_expand, 2235 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2225 combine_var_copies_in_loop_exit, 2236 }
2226 opt_info->loop_exit); 2237
2227 }
2228
2229 /* Rewrite also the original loop body. Find them as originals of the blocks 2238 /* Rewrite also the original loop body. Find them as originals of the blocks
2230 in the last copied iteration, i.e. those that have 2239 in the last copied iteration, i.e. those that have
2231 get_bb_copy (get_bb_original (bb)) == bb. */ 2240 get_bb_copy (get_bb_original (bb)) == bb. */
2232 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) 2241 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2233 { 2242 {
2234 bb = BASIC_BLOCK (i); 2243 bb = BASIC_BLOCK (i);
2235 orig_bb = get_bb_original (bb); 2244 orig_bb = get_bb_original (bb);
2236 if (get_bb_copy (orig_bb) != bb) 2245 if (get_bb_copy (orig_bb) != bb)
2237 continue; 2246 continue;
2238 2247
2239 delta = determine_split_iv_delta (0, n_copies, unrolling); 2248 delta = determine_split_iv_delta (0, n_copies, unrolling);
2240 for (orig_insn = BB_HEAD (orig_bb); 2249 for (orig_insn = BB_HEAD (orig_bb);
2241 orig_insn != NEXT_INSN (BB_END (bb)); 2250 orig_insn != NEXT_INSN (BB_END (bb));
2242 orig_insn = next) 2251 orig_insn = next)
2243 { 2252 {
2244 next = NEXT_INSN (orig_insn); 2253 next = NEXT_INSN (orig_insn);
2245 2254
2246 if (!INSN_P (orig_insn)) 2255 if (!INSN_P (orig_insn))
2247 continue; 2256 continue;
2248 2257
2249 ivts_templ.insn = orig_insn; 2258 ivts_templ.insn = orig_insn;
2250 if (opt_info->insns_to_split) 2259 if (opt_info->insns_to_split)
2251 { 2260 {
2252 ivts = (struct iv_to_split *) 2261 ivts = (struct iv_to_split *)
2253 htab_find (opt_info->insns_to_split, &ivts_templ); 2262 htab_find (opt_info->insns_to_split, &ivts_templ);
2257 insert_base_initialization (ivts, orig_insn); 2266 insert_base_initialization (ivts, orig_insn);
2258 split_iv (ivts, orig_insn, delta); 2267 split_iv (ivts, orig_insn, delta);
2259 continue; 2268 continue;
2260 } 2269 }
2261 } 2270 }
2262 2271
2263 } 2272 }
2264 } 2273 }
2265 }
2266
2267 /* Release the data structures used for the variable expansion
2268 optimization. Callbacks for htab_traverse. */
2269
2270 static int
2271 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2272 {
2273 struct var_to_expand *ve = (struct var_to_expand *) *slot;
2274
2275 VEC_free (rtx, heap, ve->var_expansions);
2276
2277 /* Continue traversing the hash table. */
2278 return 1;
2279 } 2274 }
2280 2275
2281 /* Release OPT_INFO. */ 2276 /* Release OPT_INFO. */
2282 2277
2283 static void 2278 static void
2285 { 2280 {
2286 if (opt_info->insns_to_split) 2281 if (opt_info->insns_to_split)
2287 htab_delete (opt_info->insns_to_split); 2282 htab_delete (opt_info->insns_to_split);
2288 if (opt_info->insns_with_var_to_expand) 2283 if (opt_info->insns_with_var_to_expand)
2289 { 2284 {
2290 htab_traverse (opt_info->insns_with_var_to_expand, 2285 struct var_to_expand *ves;
2291 release_var_copies, NULL); 2286
2287 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2288 VEC_free (rtx, heap, ves->var_expansions);
2292 htab_delete (opt_info->insns_with_var_to_expand); 2289 htab_delete (opt_info->insns_with_var_to_expand);
2293 } 2290 }
2294 free (opt_info); 2291 free (opt_info);
2295 } 2292 }