annotate gcc/tree-vect-loop-manip.c @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 /* Vectorizer Specific Loop Manipulations
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 and Ira Rosen <irar@il.ibm.com>
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 This file is part of GCC.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 GCC is free software; you can redistribute it and/or modify it under
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 the terms of the GNU General Public License as published by the Free
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 Software Foundation; either version 3, or (at your option) any later
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 version.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 for more details.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 You should have received a copy of the GNU General Public License
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 along with GCC; see the file COPYING3. If not see
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 <http://www.gnu.org/licenses/>. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 #include "config.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 #include "system.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 #include "coretypes.h"
111
kono
parents: 67
diff changeset
25 #include "backend.h"
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 #include "tree.h"
111
kono
parents: 67
diff changeset
27 #include "gimple.h"
kono
parents: 67
diff changeset
28 #include "cfghooks.h"
kono
parents: 67
diff changeset
29 #include "tree-pass.h"
kono
parents: 67
diff changeset
30 #include "ssa.h"
kono
parents: 67
diff changeset
31 #include "fold-const.h"
kono
parents: 67
diff changeset
32 #include "cfganal.h"
kono
parents: 67
diff changeset
33 #include "gimplify.h"
kono
parents: 67
diff changeset
34 #include "gimple-iterator.h"
kono
parents: 67
diff changeset
35 #include "gimplify-me.h"
kono
parents: 67
diff changeset
36 #include "tree-cfg.h"
kono
parents: 67
diff changeset
37 #include "tree-ssa-loop-manip.h"
kono
parents: 67
diff changeset
38 #include "tree-into-ssa.h"
kono
parents: 67
diff changeset
39 #include "tree-ssa.h"
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 #include "cfgloop.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 #include "tree-scalar-evolution.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 #include "tree-vectorizer.h"
111
kono
parents: 67
diff changeset
43 #include "tree-ssa-loop-ivopts.h"
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
44 #include "gimple-fold.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
45 #include "tree-ssa-loop-niter.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
46 #include "internal-fn.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
47 #include "stor-layout.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
48 #include "optabs-query.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
49 #include "vec-perm-indices.h"
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
50 #include "insn-config.h"
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
51 #include "rtl.h"
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
52 #include "recog.h"
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 /*************************************************************************
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 Simple Loop Peeling Utilities
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 Utilities to support loop peeling for vectorization purposes.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 *************************************************************************/
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 /* Renames the use *OP_P. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 static void
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 rename_use_op (use_operand_p op_p)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 tree new_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 return;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 new_name = get_current_def (USE_FROM_PTR (op_p));
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 /* Something defined outside of the loop. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 if (!new_name)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 return;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 /* An ordinary ssa name defined in the loop. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 SET_USE (op_p, new_name);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
82
111
kono
parents: 67
diff changeset
83 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
kono
parents: 67
diff changeset
84 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
kono
parents: 67
diff changeset
85 true. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86
111
kono
parents: 67
diff changeset
87 static void
kono
parents: 67
diff changeset
88 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 {
111
kono
parents: 67
diff changeset
90 gimple *stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 use_operand_p use_p;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 ssa_op_iter iter;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 edge e;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 edge_iterator ei;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
95 class loop *loop = bb->loop_father;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
96 class loop *outer_loop = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97
111
kono
parents: 67
diff changeset
98 if (rename_from_outer_loop)
kono
parents: 67
diff changeset
99 {
kono
parents: 67
diff changeset
100 gcc_assert (loop);
kono
parents: 67
diff changeset
101 outer_loop = loop_outer (loop);
kono
parents: 67
diff changeset
102 }
kono
parents: 67
diff changeset
103
kono
parents: 67
diff changeset
104 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
kono
parents: 67
diff changeset
105 gsi_next (&gsi))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 stmt = gsi_stmt (gsi);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 rename_use_op (use_p);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
111
111
kono
parents: 67
diff changeset
112 FOR_EACH_EDGE (e, ei, bb->preds)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 {
111
kono
parents: 67
diff changeset
114 if (!flow_bb_inside_loop_p (loop, e->src))
kono
parents: 67
diff changeset
115 {
kono
parents: 67
diff changeset
116 if (!rename_from_outer_loop)
kono
parents: 67
diff changeset
117 continue;
kono
parents: 67
diff changeset
118 if (e->src != outer_loop->header)
kono
parents: 67
diff changeset
119 {
kono
parents: 67
diff changeset
120 if (outer_loop->inner->next)
kono
parents: 67
diff changeset
121 {
kono
parents: 67
diff changeset
122 /* If outer_loop has 2 inner loops, allow there to
kono
parents: 67
diff changeset
123 be an extra basic block which decides which of the
kono
parents: 67
diff changeset
124 two loops to use using LOOP_VECTORIZED. */
kono
parents: 67
diff changeset
125 if (!single_pred_p (e->src)
kono
parents: 67
diff changeset
126 || single_pred (e->src) != outer_loop->header)
kono
parents: 67
diff changeset
127 continue;
kono
parents: 67
diff changeset
128 }
kono
parents: 67
diff changeset
129 }
kono
parents: 67
diff changeset
130 }
kono
parents: 67
diff changeset
131 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
kono
parents: 67
diff changeset
132 gsi_next (&gsi))
kono
parents: 67
diff changeset
133 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
136
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
137
111
kono
parents: 67
diff changeset
138 struct adjust_info
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
139 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
140 tree from, to;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
141 basic_block bb;
111
kono
parents: 67
diff changeset
142 };
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
143
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
144 /* A stack of values to be adjusted in debug stmts. We have to
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
145 process them LIFO, so that the closest substitution applies. If we
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
146 processed them FIFO, without the stack, we might substitute uses
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
147 with a PHI DEF that would soon become non-dominant, and when we got
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
148 to the suitable one, it wouldn't have anything to substitute any
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
149 more. */
111
kono
parents: 67
diff changeset
150 static vec<adjust_info, va_heap> adjust_vec;
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
151
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
152 /* Adjust any debug stmts that referenced AI->from values to use the
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
153 loop-closed AI->to, if the references are dominated by AI->bb and
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
154 not by the definition of AI->from. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
155
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
156 static void
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
157 adjust_debug_stmts_now (adjust_info *ai)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
158 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
159 basic_block bbphi = ai->bb;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
160 tree orig_def = ai->from;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
161 tree new_def = ai->to;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
162 imm_use_iterator imm_iter;
111
kono
parents: 67
diff changeset
163 gimple *stmt;
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
164 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
165
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
166 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
167
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
168 /* Adjust any debug stmts that held onto non-loop-closed
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
169 references. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
170 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
171 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
172 use_operand_p use_p;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
173 basic_block bbuse;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
174
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
175 if (!is_gimple_debug (stmt))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
176 continue;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
177
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
178 gcc_assert (gimple_debug_bind_p (stmt));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
179
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
180 bbuse = gimple_bb (stmt);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
181
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
182 if ((bbuse == bbphi
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
183 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
184 && !(bbuse == bbdef
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
185 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
186 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
187 if (new_def)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
188 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
189 SET_USE (use_p, new_def);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
190 else
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
191 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
192 gimple_debug_bind_reset_value (stmt);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
193 update_stmt (stmt);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
194 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
195 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
196 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
197 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
198
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
199 /* Adjust debug stmts as scheduled before. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
200
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
201 static void
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
202 adjust_vec_debug_stmts (void)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
203 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
204 if (!MAY_HAVE_DEBUG_BIND_STMTS)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
205 return;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
206
111
kono
parents: 67
diff changeset
207 gcc_assert (adjust_vec.exists ());
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
208
111
kono
parents: 67
diff changeset
209 while (!adjust_vec.is_empty ())
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
210 {
111
kono
parents: 67
diff changeset
211 adjust_debug_stmts_now (&adjust_vec.last ());
kono
parents: 67
diff changeset
212 adjust_vec.pop ();
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
213 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
214 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
215
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
216 /* Adjust any debug stmts that referenced FROM values to use the
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
217 loop-closed TO, if the references are dominated by BB and not by
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
218 the definition of FROM. If adjust_vec is non-NULL, adjustments
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
219 will be postponed until adjust_vec_debug_stmts is called. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
220
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
221 static void
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
222 adjust_debug_stmts (tree from, tree to, basic_block bb)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
223 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
224 adjust_info ai;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
225
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
226 if (MAY_HAVE_DEBUG_BIND_STMTS
111
kono
parents: 67
diff changeset
227 && TREE_CODE (from) == SSA_NAME
kono
parents: 67
diff changeset
228 && ! SSA_NAME_IS_DEFAULT_DEF (from)
kono
parents: 67
diff changeset
229 && ! virtual_operand_p (from))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
230 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
231 ai.from = from;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
232 ai.to = to;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
233 ai.bb = bb;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
234
111
kono
parents: 67
diff changeset
235 if (adjust_vec.exists ())
kono
parents: 67
diff changeset
236 adjust_vec.safe_push (ai);
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
237 else
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
238 adjust_debug_stmts_now (&ai);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
239 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
240 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
241
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
242 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
243 to adjust any debug stmts that referenced the old phi arg,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
244 presumably non-loop-closed references left over from other
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
245 transformations. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
246
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
247 static void
111
kono
parents: 67
diff changeset
248 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
249 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
250 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
251
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
252 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
253
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
254 if (MAY_HAVE_DEBUG_BIND_STMTS)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
255 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
256 gimple_bb (update_phi));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
257 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
258
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
259 /* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
260 the mask should have during the first iteration and NEXT_MASK is the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
261 value that it should have on subsequent iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
262
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
263 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
264 vect_set_loop_mask (class loop *loop, tree mask, tree init_mask,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
265 tree next_mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
266 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
267 gphi *phi = create_phi_node (mask, loop->header);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
268 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
269 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
270 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
271
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
272 /* Add SEQ to the end of LOOP's preheader block. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
273
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
274 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
275 add_preheader_seq (class loop *loop, gimple_seq seq)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
276 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
277 if (seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
278 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
279 edge pe = loop_preheader_edge (loop);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
280 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
281 gcc_assert (!new_bb);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
282 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
283 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
284
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
285 /* Add SEQ to the beginning of LOOP's header block. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
286
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
287 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
288 add_header_seq (class loop *loop, gimple_seq seq)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
289 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
290 if (seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
291 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
292 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
293 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
294 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
295 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
296
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
297 /* Return true if the target can interleave elements of two vectors.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
298 OFFSET is 0 if the first half of the vectors should be interleaved
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
299 or 1 if the second half should. When returning true, store the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
300 associated permutation in INDICES. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
301
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
302 static bool
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
303 interleave_supported_p (vec_perm_indices *indices, tree vectype,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
304 unsigned int offset)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
305 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
306 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
307 poly_uint64 base = exact_div (nelts, 2) * offset;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
308 vec_perm_builder sel (nelts, 2, 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
309 for (unsigned int i = 0; i < 3; ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
310 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
311 sel.quick_push (base + i);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
312 sel.quick_push (base + i + nelts);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
313 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
314 indices->new_vector (sel, 2, nelts);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
315 return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
316 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
317
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
318 /* Try to use permutes to define the masks in DEST_RGM using the masks
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
319 in SRC_RGM, given that the former has twice as many masks as the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
320 latter. Return true on success, adding any new statements to SEQ. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
321
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
322 static bool
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
323 vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
324 rgroup_masks *src_rgm)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
325 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
326 tree src_masktype = src_rgm->mask_type;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
327 tree dest_masktype = dest_rgm->mask_type;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
328 machine_mode src_mode = TYPE_MODE (src_masktype);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
329 insn_code icode1, icode2;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
330 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
331 && (icode1 = optab_handler (vec_unpacku_hi_optab,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
332 src_mode)) != CODE_FOR_nothing
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
333 && (icode2 = optab_handler (vec_unpacku_lo_optab,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
334 src_mode)) != CODE_FOR_nothing)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
335 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
336 /* Unpacking the source masks gives at least as many mask bits as
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
337 we need. We can then VIEW_CONVERT any excess bits away. */
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
338 machine_mode dest_mode = insn_data[icode1].operand[0].mode;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
339 gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
340 tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
341 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
342 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
343 tree src = src_rgm->masks[i / 2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
344 tree dest = dest_rgm->masks[i];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
345 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
346 ? VEC_UNPACK_HI_EXPR
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
347 : VEC_UNPACK_LO_EXPR);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
348 gassign *stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
349 if (dest_masktype == unpack_masktype)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
350 stmt = gimple_build_assign (dest, code, src);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
351 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
352 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
353 tree temp = make_ssa_name (unpack_masktype);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
354 stmt = gimple_build_assign (temp, code, src);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
355 gimple_seq_add_stmt (seq, stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
356 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
357 build1 (VIEW_CONVERT_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
358 dest_masktype, temp));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
359 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
360 gimple_seq_add_stmt (seq, stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
361 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
362 return true;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
363 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
364 vec_perm_indices indices[2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
365 if (dest_masktype == src_masktype
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
366 && interleave_supported_p (&indices[0], src_masktype, 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
367 && interleave_supported_p (&indices[1], src_masktype, 1))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
368 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
369 /* The destination requires twice as many mask bits as the source, so
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
370 we can use interleaving permutes to double up the number of bits. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
371 tree masks[2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
372 for (unsigned int i = 0; i < 2; ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
373 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
374 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
375 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
376 tree src = src_rgm->masks[i / 2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
377 tree dest = dest_rgm->masks[i];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
378 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
379 src, src, masks[i & 1]);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
380 gimple_seq_add_stmt (seq, stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
381 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
382 return true;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
383 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
384 return false;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
385 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
386
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
387 /* Helper for vect_set_loop_condition_masked. Generate definitions for
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
388 all the masks in RGM and return a mask that is nonzero when the loop
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
389 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
390 Use LOOP_COND_GSI to insert code before the exit gcond.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
391
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
392 RGM belongs to loop LOOP. The loop originally iterated NITERS
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
393 times and has been vectorized according to LOOP_VINFO.
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
394
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
395 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
396 starts with NITERS_SKIP dummy iterations of the scalar loop before
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
397 the real work starts. The mask elements for these dummy iterations
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
398 must be 0, to ensure that the extra iterations do not have an effect.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
399
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
400 It is known that:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
401
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
402 NITERS * RGM->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
403
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
404 does not overflow. However, MIGHT_WRAP_P says whether an induction
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
405 variable that starts at 0 and has step:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
406
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
407 VF * RGM->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
408
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
409 might overflow before hitting a value above:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
410
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
411 (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
412
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
413 This means that we cannot guarantee that such an induction variable
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
414 would ever hit a value that produces a set of all-false masks for RGM. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
415
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
416 static tree
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
417 vect_set_loop_masks_directly (class loop *loop, loop_vec_info loop_vinfo,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
418 gimple_seq *preheader_seq,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
419 gimple_stmt_iterator loop_cond_gsi,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
420 rgroup_masks *rgm, tree niters, tree niters_skip,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
421 bool might_wrap_p)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
422 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
423 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
424 tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
425 tree mask_type = rgm->mask_type;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
426 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
427 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
428 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
429
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
430 /* Calculate the maximum number of scalar values that the rgroup
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
431 handles in total, the number that it handles for each iteration
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
432 of the vector loop, and the number that it should skip during the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
433 first iteration of the vector loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
434 tree nscalars_total = niters;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
435 tree nscalars_step = build_int_cst (iv_type, vf);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
436 tree nscalars_skip = niters_skip;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
437 if (nscalars_per_iter != 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
438 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
439 /* We checked before choosing to use a fully-masked loop that these
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
440 multiplications don't overflow. */
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
441 tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
442 tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
443 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
444 nscalars_total, compare_factor);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
445 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
446 nscalars_step, iv_factor);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
447 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
448 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
449 nscalars_skip, compare_factor);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
450 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
451
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
452 /* Create an induction variable that counts the number of scalars
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
453 processed. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
454 tree index_before_incr, index_after_incr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
455 gimple_stmt_iterator incr_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
456 bool insert_after;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
457 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
458 create_iv (build_int_cst (iv_type, 0), nscalars_step, NULL_TREE, loop,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
459 &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
460
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
461 tree zero_index = build_int_cst (compare_type, 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
462 tree test_index, test_limit, first_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
463 gimple_stmt_iterator *test_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
464 if (might_wrap_p)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
465 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
466 /* In principle the loop should stop iterating once the incremented
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
467 IV reaches a value greater than or equal to:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
468
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
469 NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
470
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
471 However, there's no guarantee that this addition doesn't overflow
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
472 the comparison type, or that the IV hits a value above it before
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
473 wrapping around. We therefore adjust the limit down by one
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
474 IV step:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
475
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
476 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
477 -[infinite-prec] NSCALARS_STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
478
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
479 and compare the IV against this limit _before_ incrementing it.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
480 Since the comparison type is unsigned, we actually want the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
481 subtraction to saturate at zero:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
482
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
483 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
484 -[sat] NSCALARS_STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
485
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
486 And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
487
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
488 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
489
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
490 where the rightmost subtraction can be done directly in
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
491 COMPARE_TYPE. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
492 test_index = index_before_incr;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
493 tree adjust = gimple_convert (preheader_seq, compare_type,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
494 nscalars_step);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
495 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
496 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
497 adjust, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
498 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
499 nscalars_total, adjust);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
500 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
501 test_limit, adjust);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
502 test_gsi = &incr_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
503
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
504 /* Get a safe limit for the first iteration. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
505 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
506 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
507 /* The first vector iteration can handle at most NSCALARS_STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
508 scalars. NSCALARS_STEP <= CONST_LIMIT, and adding
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
509 NSCALARS_SKIP to that cannot overflow. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
510 tree const_limit = build_int_cst (compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
511 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
512 * nscalars_per_iter);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
513 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
514 nscalars_total, const_limit);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
515 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
516 first_limit, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
517 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
518 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
519 /* For the first iteration it doesn't matter whether the IV hits
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
520 a value above NSCALARS_TOTAL. That only matters for the latch
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
521 condition. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
522 first_limit = nscalars_total;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
523 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
524 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
525 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
526 /* Test the incremented IV, which will always hit a value above
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
527 the bound before wrapping. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
528 test_index = index_after_incr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
529 test_limit = nscalars_total;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
530 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
531 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
532 test_limit, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
533 test_gsi = &loop_cond_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
534
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
535 first_limit = test_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
536 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
537
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
538 /* Convert the IV value to the comparison type (either a no-op or
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
539 a demotion). */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
540 gimple_seq test_seq = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
541 test_index = gimple_convert (&test_seq, compare_type, test_index);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
542 gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
543
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
544 /* Provide a definition of each mask in the group. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
545 tree next_mask = NULL_TREE;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
546 tree mask;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
547 unsigned int i;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
548 FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
549 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
550 /* Previous masks will cover BIAS scalars. This mask covers the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
551 next batch. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
552 poly_uint64 bias = nscalars_per_mask * i;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
553 tree bias_tree = build_int_cst (compare_type, bias);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
554 gimple *tmp_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
555
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
556 /* See whether the first iteration of the vector loop is known
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
557 to have a full mask. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
558 poly_uint64 const_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
559 bool first_iteration_full
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
560 = (poly_int_tree_p (first_limit, &const_limit)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
561 && known_ge (const_limit, (i + 1) * nscalars_per_mask));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
562
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
563 /* Rather than have a new IV that starts at BIAS and goes up to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
564 TEST_LIMIT, prefer to use the same 0-based IV for each mask
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
565 and adjust the bound down by BIAS. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
566 tree this_test_limit = test_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
567 if (i != 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
568 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
569 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
570 compare_type, this_test_limit,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
571 bias_tree);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
572 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
573 compare_type, this_test_limit,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
574 bias_tree);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
575 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
576
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
577 /* Create the initial mask. First include all scalars that
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
578 are within the loop limit. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
579 tree init_mask = NULL_TREE;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
580 if (!first_iteration_full)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
581 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
582 tree start, end;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
583 if (first_limit == test_limit)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
584 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
585 /* Use a natural test between zero (the initial IV value)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
586 and the loop limit. The "else" block would be valid too,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
587 but this choice can avoid the need to load BIAS_TREE into
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
588 a register. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
589 start = zero_index;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
590 end = this_test_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
591 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
592 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
593 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
594 /* FIRST_LIMIT is the maximum number of scalars handled by the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
595 first iteration of the vector loop. Test the portion
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
596 associated with this mask. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
597 start = bias_tree;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
598 end = first_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
599 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
600
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
601 init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
602 tmp_stmt = vect_gen_while (init_mask, start, end);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
603 gimple_seq_add_stmt (preheader_seq, tmp_stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
604 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
605
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
606 /* Now AND out the bits that are within the number of skipped
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
607 scalars. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
608 poly_uint64 const_skip;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
609 if (nscalars_skip
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
610 && !(poly_int_tree_p (nscalars_skip, &const_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
611 && known_le (const_skip, bias)))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
612 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
613 tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
614 bias_tree, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
615 if (init_mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
616 init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
617 init_mask, unskipped_mask);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
618 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
619 init_mask = unskipped_mask;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
620 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
621
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
622 if (!init_mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
623 /* First iteration is full. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
624 init_mask = build_minus_one_cst (mask_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
625
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
626 /* Get the mask value for the next iteration of the loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
627 next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
628 gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
629 gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
630
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
631 vect_set_loop_mask (loop, mask, init_mask, next_mask);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
632 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
633 return next_mask;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
634 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
635
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
636 /* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
637 LOOP_VINFO describes the vectorization of LOOP. NITERS is the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
638 number of iterations of the original scalar loop that should be
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
639 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
640 as for vect_set_loop_condition.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
641
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
642 Insert the branch-back condition before LOOP_COND_GSI and return the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
643 final gcond. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
644
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
645 static gcond *
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
646 vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
647 tree niters, tree final_iv,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
648 bool niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
649 gimple_stmt_iterator loop_cond_gsi)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
650 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
651 gimple_seq preheader_seq = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
652 gimple_seq header_seq = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
653
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
654 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
655 unsigned int compare_precision = TYPE_PRECISION (compare_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
656 tree orig_niters = niters;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
657
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
658 /* Type of the initial value of NITERS. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
659 tree ni_actual_type = TREE_TYPE (niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
660 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
661 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
662
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
663 /* Convert NITERS to the same size as the compare. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
664 if (compare_precision > ni_actual_precision
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
665 && niters_maybe_zero)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
666 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
667 /* We know that there is always at least one iteration, so if the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
668 count is zero then it must have wrapped. Cope with this by
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
669 subtracting 1 before the conversion and adding 1 to the result. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
670 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
671 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
672 niters, build_minus_one_cst (ni_actual_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
673 niters = gimple_convert (&preheader_seq, compare_type, niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
674 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
675 niters, build_one_cst (compare_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
676 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
677 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
678 niters = gimple_convert (&preheader_seq, compare_type, niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
679
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
680 widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
681
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
682 /* Iterate over all the rgroups and fill in their masks. We could use
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
683 the first mask from any rgroup for the loop condition; here we
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
684 arbitrarily pick the last. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
685 tree test_mask = NULL_TREE;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
686 rgroup_masks *rgm;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
687 unsigned int i;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
688 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
689 FOR_EACH_VEC_ELT (*masks, i, rgm)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
690 if (!rgm->masks.is_empty ())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
691 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
692 /* First try using permutes. This adds a single vector
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
693 instruction to the loop for each mask, but needs no extra
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
694 loop invariants or IVs. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
695 unsigned int nmasks = i + 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
696 if ((nmasks & 1) == 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
697 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
698 rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
699 if (!half_rgm->masks.is_empty ()
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
700 && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
701 continue;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
702 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
703
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
704 /* See whether zero-based IV would ever generate all-false masks
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
705 before wrapping around. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
706 bool might_wrap_p
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
707 = (iv_limit == -1
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
708 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
709 UNSIGNED)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
710 > compare_precision));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
711
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
712 /* Set up all masks for this group. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
713 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
714 &preheader_seq,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
715 loop_cond_gsi, rgm,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
716 niters, niters_skip,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
717 might_wrap_p);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
718 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
719
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
720 /* Emit all accumulated statements. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
721 add_preheader_seq (loop, preheader_seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
722 add_header_seq (loop, header_seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
723
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
724 /* Get a boolean result that tells us whether to iterate. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
725 edge exit_edge = single_exit (loop);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
726 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
727 tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
728 gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
729 NULL_TREE, NULL_TREE);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
730 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
731
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
732 /* The loop iterates (NITERS - 1) / VF + 1 times.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
733 Subtract one from this to get the latch count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
734 tree step = build_int_cst (compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
735 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
736 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
737 build_minus_one_cst (compare_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
738 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
739 niters_minus_one, step);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
740
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
741 if (final_iv)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
742 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
743 gassign *assign = gimple_build_assign (final_iv, orig_niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
744 gsi_insert_on_edge_immediate (single_exit (loop), assign);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
745 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
746
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
747 return cond_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
748 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
749
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
750 /* Like vect_set_loop_condition, but handle the case in which there
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
751 are no loop masks. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
752
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
753 static gcond *
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
754 vect_set_loop_condition_unmasked (class loop *loop, tree niters,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
755 tree step, tree final_iv,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
756 bool niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
757 gimple_stmt_iterator loop_cond_gsi)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
758 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
759 tree indx_before_incr, indx_after_incr;
111
kono
parents: 67
diff changeset
760 gcond *cond_stmt;
kono
parents: 67
diff changeset
761 gcond *orig_cond;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
762 edge pe = loop_preheader_edge (loop);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
763 edge exit_edge = single_exit (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
764 gimple_stmt_iterator incr_gsi;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
765 bool insert_after;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
766 enum tree_code code;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
767 tree niters_type = TREE_TYPE (niters);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
768
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
769 orig_cond = get_loop_exit_condition (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
770 gcc_assert (orig_cond);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
771 loop_cond_gsi = gsi_for_stmt (orig_cond);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
772
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
773 tree init, limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
774 if (!niters_maybe_zero && integer_onep (step))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
775 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
776 /* In this case we can use a simple 0-based IV:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
777
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
778 A:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
779 x = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
780 do
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
781 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
782 ...
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
783 x += 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
784 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
785 while (x < NITERS); */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
786 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
787 init = build_zero_cst (niters_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
788 limit = niters;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
789 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
790 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
791 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
792 /* The following works for all values of NITERS except 0:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
793
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
794 B:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
795 x = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
796 do
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
797 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
798 ...
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
799 x += STEP;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
800 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
801 while (x <= NITERS - STEP);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
802
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
803 so that the loop continues to iterate if x + STEP - 1 < NITERS
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
804 but stops if x + STEP - 1 >= NITERS.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
805
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
806 However, if NITERS is zero, x never hits a value above NITERS - STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
807 before wrapping around. There are two obvious ways of dealing with
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
808 this:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
809
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
810 - start at STEP - 1 and compare x before incrementing it
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
811 - start at -1 and compare x after incrementing it
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
812
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
813 The latter is simpler and is what we use. The loop in this case
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
814 looks like:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
815
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
816 C:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
817 x = -1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
818 do
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
819 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
820 ...
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
821 x += STEP;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
822 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
823 while (x < NITERS - STEP);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
824
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
825 In both cases the loop limit is NITERS - STEP. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
826 gimple_seq seq = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
827 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
828 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
829 if (seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
830 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
831 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
832 gcc_assert (!new_bb);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
833 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
834 if (niters_maybe_zero)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
835 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
836 /* Case C. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
837 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
838 init = build_all_ones_cst (niters_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
839 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
840 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
841 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
842 /* Case B. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
843 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
844 init = build_zero_cst (niters_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
845 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
846 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
847
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
848 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
849 create_iv (init, step, NULL_TREE, loop,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
850 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
851 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
852 true, NULL_TREE, true,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
853 GSI_SAME_STMT);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
854 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
855 true, GSI_SAME_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
856
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
857 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
858 NULL_TREE);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
859
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
860 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
861
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
862 /* Record the number of latch iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
863 if (limit == niters)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
864 /* Case A: the loop iterates NITERS times. Subtract one to get the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
865 latch count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
866 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
867 build_int_cst (niters_type, 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
868 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
869 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
870 Subtract one from this to get the latch count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
871 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
872 limit, step);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
873
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
874 if (final_iv)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
875 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
876 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
877 indx_after_incr, init);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
878 gsi_insert_on_edge_immediate (single_exit (loop), assign);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
879 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
880
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
881 return cond_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
882 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
883
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
884 /* If we're using fully-masked loops, make LOOP iterate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
885
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
886 N == (NITERS - 1) / STEP + 1
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
887
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
888 times. When NITERS is zero, this is equivalent to making the loop
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
889 execute (1 << M) / STEP times, where M is the precision of NITERS.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
890 NITERS_MAYBE_ZERO is true if this last case might occur.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
891
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
892 If we're not using fully-masked loops, make LOOP iterate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
893
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
894 N == (NITERS - STEP) / STEP + 1
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
895
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
896 times, where NITERS is known to be outside the range [1, STEP - 1].
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
897 This is equivalent to making the loop execute NITERS / STEP times
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
898 when NITERS is nonzero and (1 << M) / STEP times otherwise.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
899 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
900
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
901 If FINAL_IV is nonnull, it is an SSA name that should be set to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
902 N * STEP on exit from the loop.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
903
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
904 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
905
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
906 void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
907 vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
908 tree niters, tree step, tree final_iv,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
909 bool niters_maybe_zero)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
910 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
911 gcond *cond_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
912 gcond *orig_cond = get_loop_exit_condition (loop);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
913 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
914
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
915 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
916 cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
917 final_iv, niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
918 loop_cond_gsi);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
919 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
920 cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
921 final_iv, niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
922 loop_cond_gsi);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
923
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
924 /* Remove old loop exit test. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
925 stmt_vec_info orig_cond_info;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
926 if (loop_vinfo
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
927 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
928 loop_vinfo->remove_stmt (orig_cond_info);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
929 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
930 gsi_remove (&loop_cond_gsi, true);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
931
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
932 if (dump_enabled_p ())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
933 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
934 cond_stmt);
111
kono
parents: 67
diff changeset
935 }
kono
parents: 67
diff changeset
936
kono
parents: 67
diff changeset
937 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
kono
parents: 67
diff changeset
938 For all PHI arguments in FROM->dest and TO->dest from those
kono
parents: 67
diff changeset
939 edges ensure that TO->dest PHI arguments have current_def
kono
parents: 67
diff changeset
940 to that in from. */
kono
parents: 67
diff changeset
941
kono
parents: 67
diff changeset
942 static void
kono
parents: 67
diff changeset
943 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
kono
parents: 67
diff changeset
944 {
kono
parents: 67
diff changeset
945 gimple_stmt_iterator gsi_from, gsi_to;
kono
parents: 67
diff changeset
946
kono
parents: 67
diff changeset
947 for (gsi_from = gsi_start_phis (from->dest),
kono
parents: 67
diff changeset
948 gsi_to = gsi_start_phis (to->dest);
kono
parents: 67
diff changeset
949 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
kono
parents: 67
diff changeset
950 {
kono
parents: 67
diff changeset
951 gimple *from_phi = gsi_stmt (gsi_from);
kono
parents: 67
diff changeset
952 gimple *to_phi = gsi_stmt (gsi_to);
kono
parents: 67
diff changeset
953 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
kono
parents: 67
diff changeset
954 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
kono
parents: 67
diff changeset
955 if (virtual_operand_p (from_arg))
kono
parents: 67
diff changeset
956 {
kono
parents: 67
diff changeset
957 gsi_next (&gsi_from);
kono
parents: 67
diff changeset
958 continue;
kono
parents: 67
diff changeset
959 }
kono
parents: 67
diff changeset
960 if (virtual_operand_p (to_arg))
kono
parents: 67
diff changeset
961 {
kono
parents: 67
diff changeset
962 gsi_next (&gsi_to);
kono
parents: 67
diff changeset
963 continue;
kono
parents: 67
diff changeset
964 }
kono
parents: 67
diff changeset
965 if (TREE_CODE (from_arg) != SSA_NAME)
kono
parents: 67
diff changeset
966 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
967 else if (TREE_CODE (to_arg) == SSA_NAME
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
968 && from_arg != to_arg)
111
kono
parents: 67
diff changeset
969 {
kono
parents: 67
diff changeset
970 if (get_current_def (to_arg) == NULL_TREE)
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
971 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
972 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
973 TREE_TYPE (get_current_def
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
974 (from_arg))));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
975 set_current_def (to_arg, get_current_def (from_arg));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
976 }
111
kono
parents: 67
diff changeset
977 }
kono
parents: 67
diff changeset
978 gsi_next (&gsi_from);
kono
parents: 67
diff changeset
979 gsi_next (&gsi_to);
kono
parents: 67
diff changeset
980 }
kono
parents: 67
diff changeset
981
kono
parents: 67
diff changeset
982 gphi *from_phi = get_virtual_phi (from->dest);
kono
parents: 67
diff changeset
983 gphi *to_phi = get_virtual_phi (to->dest);
kono
parents: 67
diff changeset
984 if (from_phi)
kono
parents: 67
diff changeset
985 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
kono
parents: 67
diff changeset
986 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
987 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
988
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
989
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
990 /* Given LOOP this function generates a new copy of it and puts it
111
kono
parents: 67
diff changeset
991 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
kono
parents: 67
diff changeset
992 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
kono
parents: 67
diff changeset
993 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
kono
parents: 67
diff changeset
994 entry or exit of LOOP. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
995
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
996 class loop *
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
997 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
998 class loop *scalar_loop, edge e)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
999 {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1000 class loop *new_loop;
111
kono
parents: 67
diff changeset
1001 basic_block *new_bbs, *bbs, *pbbs;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1002 bool at_exit;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1003 bool was_imm_dom;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1004 basic_block exit_dest;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1005 edge exit, new_exit;
111
kono
parents: 67
diff changeset
1006 bool duplicate_outer_loop = false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1007
111
kono
parents: 67
diff changeset
1008 exit = single_exit (loop);
kono
parents: 67
diff changeset
1009 at_exit = (e == exit);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1010 if (!at_exit && e != loop_preheader_edge (loop))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1011 return NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1012
111
kono
parents: 67
diff changeset
1013 if (scalar_loop == NULL)
kono
parents: 67
diff changeset
1014 scalar_loop = loop;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1015
111
kono
parents: 67
diff changeset
1016 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
kono
parents: 67
diff changeset
1017 pbbs = bbs + 1;
kono
parents: 67
diff changeset
1018 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
kono
parents: 67
diff changeset
1019 /* Allow duplication of outer loops. */
kono
parents: 67
diff changeset
1020 if (scalar_loop->inner)
kono
parents: 67
diff changeset
1021 duplicate_outer_loop = true;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1022 /* Check whether duplication is possible. */
111
kono
parents: 67
diff changeset
1023 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1024 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1025 free (bbs);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1026 return NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1027 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1028
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1029 /* Generate new loop structure. */
111
kono
parents: 67
diff changeset
1030 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
kono
parents: 67
diff changeset
1031 duplicate_subloops (scalar_loop, new_loop);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1032
111
kono
parents: 67
diff changeset
1033 exit_dest = exit->dest;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1034 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1035 exit_dest) == loop->header ?
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1036 true : false);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1037
111
kono
parents: 67
diff changeset
1038 /* Also copy the pre-header, this avoids jumping through hoops to
kono
parents: 67
diff changeset
1039 duplicate the loop entry PHI arguments. Create an empty
kono
parents: 67
diff changeset
1040 pre-header unconditionally for this. */
kono
parents: 67
diff changeset
1041 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
kono
parents: 67
diff changeset
1042 edge entry_e = single_pred_edge (preheader);
kono
parents: 67
diff changeset
1043 bbs[0] = preheader;
kono
parents: 67
diff changeset
1044 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1045
111
kono
parents: 67
diff changeset
1046 exit = single_exit (scalar_loop);
kono
parents: 67
diff changeset
1047 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1048 &exit, 1, &new_exit, NULL,
111
kono
parents: 67
diff changeset
1049 at_exit ? loop->latch : e->src, true);
kono
parents: 67
diff changeset
1050 exit = single_exit (loop);
kono
parents: 67
diff changeset
1051 basic_block new_preheader = new_bbs[0];
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1052
111
kono
parents: 67
diff changeset
1053 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
kono
parents: 67
diff changeset
1054
kono
parents: 67
diff changeset
1055 if (scalar_loop != loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1056 {
111
kono
parents: 67
diff changeset
1057 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
kono
parents: 67
diff changeset
1058 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
kono
parents: 67
diff changeset
1059 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
kono
parents: 67
diff changeset
1060 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
kono
parents: 67
diff changeset
1061 header) to have current_def set, so copy them over. */
kono
parents: 67
diff changeset
1062 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
kono
parents: 67
diff changeset
1063 exit);
kono
parents: 67
diff changeset
1064 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
kono
parents: 67
diff changeset
1065 0),
kono
parents: 67
diff changeset
1066 EDGE_SUCC (loop->latch, 0));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1067 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1068
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1069 if (at_exit) /* Add the loop copy at exit. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1070 {
111
kono
parents: 67
diff changeset
1071 if (scalar_loop != loop)
kono
parents: 67
diff changeset
1072 {
kono
parents: 67
diff changeset
1073 gphi_iterator gsi;
kono
parents: 67
diff changeset
1074 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
kono
parents: 67
diff changeset
1075
kono
parents: 67
diff changeset
1076 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
kono
parents: 67
diff changeset
1077 gsi_next (&gsi))
kono
parents: 67
diff changeset
1078 {
kono
parents: 67
diff changeset
1079 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
1080 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
kono
parents: 67
diff changeset
1081 location_t orig_locus
kono
parents: 67
diff changeset
1082 = gimple_phi_arg_location_from_edge (phi, e);
kono
parents: 67
diff changeset
1083
kono
parents: 67
diff changeset
1084 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
kono
parents: 67
diff changeset
1085 }
kono
parents: 67
diff changeset
1086 }
kono
parents: 67
diff changeset
1087 redirect_edge_and_branch_force (e, new_preheader);
kono
parents: 67
diff changeset
1088 flush_pending_stmts (e);
kono
parents: 67
diff changeset
1089 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
kono
parents: 67
diff changeset
1090 if (was_imm_dom || duplicate_outer_loop)
kono
parents: 67
diff changeset
1091 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
kono
parents: 67
diff changeset
1092
kono
parents: 67
diff changeset
1093 /* And remove the non-necessary forwarder again. Keep the other
kono
parents: 67
diff changeset
1094 one so we have a proper pre-header for the loop at the exit edge. */
kono
parents: 67
diff changeset
1095 redirect_edge_pred (single_succ_edge (preheader),
kono
parents: 67
diff changeset
1096 single_pred (preheader));
kono
parents: 67
diff changeset
1097 delete_basic_block (preheader);
kono
parents: 67
diff changeset
1098 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
kono
parents: 67
diff changeset
1099 loop_preheader_edge (scalar_loop)->src);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1100 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1101 else /* Add the copy at entry. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1102 {
111
kono
parents: 67
diff changeset
1103 if (scalar_loop != loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1104 {
111
kono
parents: 67
diff changeset
1105 /* Remove the non-necessary forwarder of scalar_loop again. */
kono
parents: 67
diff changeset
1106 redirect_edge_pred (single_succ_edge (preheader),
kono
parents: 67
diff changeset
1107 single_pred (preheader));
kono
parents: 67
diff changeset
1108 delete_basic_block (preheader);
kono
parents: 67
diff changeset
1109 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
kono
parents: 67
diff changeset
1110 loop_preheader_edge (scalar_loop)->src);
kono
parents: 67
diff changeset
1111 preheader = split_edge (loop_preheader_edge (loop));
kono
parents: 67
diff changeset
1112 entry_e = single_pred_edge (preheader);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1113 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1114
111
kono
parents: 67
diff changeset
1115 redirect_edge_and_branch_force (entry_e, new_preheader);
kono
parents: 67
diff changeset
1116 flush_pending_stmts (entry_e);
kono
parents: 67
diff changeset
1117 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
kono
parents: 67
diff changeset
1118
kono
parents: 67
diff changeset
1119 redirect_edge_and_branch_force (new_exit, preheader);
kono
parents: 67
diff changeset
1120 flush_pending_stmts (new_exit);
kono
parents: 67
diff changeset
1121 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
kono
parents: 67
diff changeset
1122
kono
parents: 67
diff changeset
1123 /* And remove the non-necessary forwarder again. Keep the other
kono
parents: 67
diff changeset
1124 one so we have a proper pre-header for the loop at the exit edge. */
kono
parents: 67
diff changeset
1125 redirect_edge_pred (single_succ_edge (new_preheader),
kono
parents: 67
diff changeset
1126 single_pred (new_preheader));
kono
parents: 67
diff changeset
1127 delete_basic_block (new_preheader);
kono
parents: 67
diff changeset
1128 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
kono
parents: 67
diff changeset
1129 loop_preheader_edge (new_loop)->src);
kono
parents: 67
diff changeset
1130 }
kono
parents: 67
diff changeset
1131
kono
parents: 67
diff changeset
1132 /* Skip new preheader since it's deleted if copy loop is added at entry. */
kono
parents: 67
diff changeset
1133 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
kono
parents: 67
diff changeset
1134 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
kono
parents: 67
diff changeset
1135
kono
parents: 67
diff changeset
1136 if (scalar_loop != loop)
kono
parents: 67
diff changeset
1137 {
kono
parents: 67
diff changeset
1138 /* Update new_loop->header PHIs, so that on the preheader
kono
parents: 67
diff changeset
1139 edge they are the ones from loop rather than scalar_loop. */
kono
parents: 67
diff changeset
1140 gphi_iterator gsi_orig, gsi_new;
kono
parents: 67
diff changeset
1141 edge orig_e = loop_preheader_edge (loop);
kono
parents: 67
diff changeset
1142 edge new_e = loop_preheader_edge (new_loop);
kono
parents: 67
diff changeset
1143
kono
parents: 67
diff changeset
1144 for (gsi_orig = gsi_start_phis (loop->header),
kono
parents: 67
diff changeset
1145 gsi_new = gsi_start_phis (new_loop->header);
kono
parents: 67
diff changeset
1146 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
kono
parents: 67
diff changeset
1147 gsi_next (&gsi_orig), gsi_next (&gsi_new))
kono
parents: 67
diff changeset
1148 {
kono
parents: 67
diff changeset
1149 gphi *orig_phi = gsi_orig.phi ();
kono
parents: 67
diff changeset
1150 gphi *new_phi = gsi_new.phi ();
kono
parents: 67
diff changeset
1151 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
kono
parents: 67
diff changeset
1152 location_t orig_locus
kono
parents: 67
diff changeset
1153 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
kono
parents: 67
diff changeset
1154
kono
parents: 67
diff changeset
1155 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
kono
parents: 67
diff changeset
1156 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1157 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1158
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1159 free (new_bbs);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1160 free (bbs);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1161
111
kono
parents: 67
diff changeset
1162 checking_verify_dominators (CDI_DOMINATORS);
kono
parents: 67
diff changeset
1163
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1164 return new_loop;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1165 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1166
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1167
111
kono
parents: 67
diff changeset
1168 /* Given the condition expression COND, put it as the last statement of
kono
parents: 67
diff changeset
1169 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
kono
parents: 67
diff changeset
1170 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
kono
parents: 67
diff changeset
1171 skip the loop. PROBABILITY is the skip edge's probability. Mark the
kono
parents: 67
diff changeset
1172 new edge as irreducible if IRREDUCIBLE_P is true. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1173
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1174 static edge
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1175 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
111
kono
parents: 67
diff changeset
1176 basic_block guard_to, basic_block dom_bb,
kono
parents: 67
diff changeset
1177 profile_probability probability, bool irreducible_p)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1178 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1179 gimple_stmt_iterator gsi;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1180 edge new_e, enter_e;
111
kono
parents: 67
diff changeset
1181 gcond *cond_stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1182 gimple_seq gimplify_stmt_list = NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1183
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1184 enter_e = EDGE_SUCC (guard_bb, 0);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1185 enter_e->flags &= ~EDGE_FALLTHRU;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1186 enter_e->flags |= EDGE_FALSE_VALUE;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1187 gsi = gsi_last_bb (guard_bb);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1188
111
kono
parents: 67
diff changeset
1189 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
kono
parents: 67
diff changeset
1190 NULL_TREE);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1191 if (gimplify_stmt_list)
111
kono
parents: 67
diff changeset
1192 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1193
111
kono
parents: 67
diff changeset
1194 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1195 gsi = gsi_last_bb (guard_bb);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1196 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1197
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1198 /* Add new edge to connect guard block to the merge/loop-exit block. */
111
kono
parents: 67
diff changeset
1199 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
kono
parents: 67
diff changeset
1200
kono
parents: 67
diff changeset
1201 new_e->probability = probability;
kono
parents: 67
diff changeset
1202 if (irreducible_p)
kono
parents: 67
diff changeset
1203 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
kono
parents: 67
diff changeset
1204
kono
parents: 67
diff changeset
1205 enter_e->probability = probability.invert ();
kono
parents: 67
diff changeset
1206 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
kono
parents: 67
diff changeset
1207
kono
parents: 67
diff changeset
1208 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
kono
parents: 67
diff changeset
1209 if (enter_e->dest->loop_father->header == enter_e->dest)
kono
parents: 67
diff changeset
1210 split_edge (enter_e);
kono
parents: 67
diff changeset
1211
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1212 return new_e;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1213 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1214
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1215
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1216 /* This function verifies that the following restrictions apply to LOOP:
111
kono
parents: 67
diff changeset
1217 (1) it consists of exactly 2 basic blocks - header, and an empty latch
kono
parents: 67
diff changeset
1218 for innermost loop and 5 basic blocks for outer-loop.
kono
parents: 67
diff changeset
1219 (2) it is single entry, single exit
kono
parents: 67
diff changeset
1220 (3) its exit condition is the last stmt in the header
kono
parents: 67
diff changeset
1221 (4) E is the entry/exit edge of LOOP.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1222 */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1223
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1224 bool
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1225 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1226 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1227 edge exit_e = single_exit (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1228 edge entry_e = loop_preheader_edge (loop);
111
kono
parents: 67
diff changeset
1229 gcond *orig_cond = get_loop_exit_condition (loop);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1230 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
111
kono
parents: 67
diff changeset
1231 unsigned int num_bb = loop->inner? 5 : 2;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1232
111
kono
parents: 67
diff changeset
1233 /* All loops have an outer scope; the only case loop->outer is NULL is for
kono
parents: 67
diff changeset
1234 the function itself. */
kono
parents: 67
diff changeset
1235 if (!loop_outer (loop)
kono
parents: 67
diff changeset
1236 || loop->num_nodes != num_bb
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1237 || !empty_block_p (loop->latch)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1238 || !single_exit (loop)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1239 /* Verify that new loop exit condition can be trivially modified. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1240 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1241 || (e != exit_e && e != entry_e))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1242 return false;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1243
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1244 return true;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1245 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1246
111
kono
parents: 67
diff changeset
1247 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
kono
parents: 67
diff changeset
1248 in the exit bb and rename all the uses after the loop. This simplifies
kono
parents: 67
diff changeset
1249 the *guard[12] routines, which assume loop closed SSA form for all PHIs
kono
parents: 67
diff changeset
1250 (but normally loop closed SSA form doesn't require virtual PHIs to be
kono
parents: 67
diff changeset
1251 in the same form). Doing this early simplifies the checking what
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1252 uses should be renamed.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1253
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1254 If we create a new phi after the loop, return the definition that
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1255 applies on entry to the loop, otherwise return null. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1256
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1257 static tree
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1258 create_lcssa_for_virtual_phi (class loop *loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1259 {
111
kono
parents: 67
diff changeset
1260 gphi_iterator gsi;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1261 edge exit_e = single_exit (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1262
111
kono
parents: 67
diff changeset
1263 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
1264 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
kono
parents: 67
diff changeset
1265 {
kono
parents: 67
diff changeset
1266 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
1267 for (gsi = gsi_start_phis (exit_e->dest);
kono
parents: 67
diff changeset
1268 !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
1269 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
kono
parents: 67
diff changeset
1270 break;
kono
parents: 67
diff changeset
1271 if (gsi_end_p (gsi))
kono
parents: 67
diff changeset
1272 {
kono
parents: 67
diff changeset
1273 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
kono
parents: 67
diff changeset
1274 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
kono
parents: 67
diff changeset
1275 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
kono
parents: 67
diff changeset
1276 imm_use_iterator imm_iter;
kono
parents: 67
diff changeset
1277 gimple *stmt;
kono
parents: 67
diff changeset
1278 use_operand_p use_p;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1279
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1280 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1281 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
111
kono
parents: 67
diff changeset
1282 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
1283 gimple_phi_set_result (new_phi, new_vop);
kono
parents: 67
diff changeset
1284 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
kono
parents: 67
diff changeset
1285 if (stmt != new_phi
kono
parents: 67
diff changeset
1286 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
kono
parents: 67
diff changeset
1287 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
kono
parents: 67
diff changeset
1288 SET_USE (use_p, new_vop);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1289
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1290 return PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
111
kono
parents: 67
diff changeset
1291 }
kono
parents: 67
diff changeset
1292 break;
kono
parents: 67
diff changeset
1293 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1294 return NULL_TREE;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1295 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1296
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1297 /* Function vect_get_loop_location.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1298
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1299 Extract the location of the loop in the source code.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1300 If the loop is not well formed for vectorization, an estimated
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1301 location is calculated.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1302 Return the loop location if succeed and NULL if not. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1303
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1304 dump_user_location_t
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1305 find_loop_location (class loop *loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1306 {
111
kono
parents: 67
diff changeset
1307 gimple *stmt = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1308 basic_block bb;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1309 gimple_stmt_iterator si;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1310
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1311 if (!loop)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1312 return dump_user_location_t ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1313
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1314 stmt = get_loop_exit_condition (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1315
111
kono
parents: 67
diff changeset
1316 if (stmt
kono
parents: 67
diff changeset
1317 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1318 return stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1319
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1320 /* If we got here the loop is probably not "well formed",
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1321 try to estimate the loop location */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1322
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1323 if (!loop->header)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1324 return dump_user_location_t ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1325
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1326 bb = loop->header;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1327
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1328 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1329 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1330 stmt = gsi_stmt (si);
111
kono
parents: 67
diff changeset
1331 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1332 return stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1333 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1334
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1335 return dump_user_location_t ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1336 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1337
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1338 /* Return true if the phi described by STMT_INFO defines an IV of the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1339 loop to be vectorized. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1340
111
kono
parents: 67
diff changeset
1341 static bool
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1342 iv_phi_p (stmt_vec_info stmt_info)
111
kono
parents: 67
diff changeset
1343 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1344 gphi *phi = as_a <gphi *> (stmt_info->stmt);
111
kono
parents: 67
diff changeset
1345 if (virtual_operand_p (PHI_RESULT (phi)))
kono
parents: 67
diff changeset
1346 return false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1347
111
kono
parents: 67
diff changeset
1348 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
kono
parents: 67
diff changeset
1349 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
kono
parents: 67
diff changeset
1350 return false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1351
111
kono
parents: 67
diff changeset
1352 return true;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1353 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1354
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1355 /* Function vect_can_advance_ivs_p
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1356
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1357 In case the number of iterations that LOOP iterates is unknown at compile
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1358 time, an epilog loop will be generated, and the loop induction variables
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1359 (IVs) will be "advanced" to the value they are supposed to take just before
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1360 the epilog loop. Here we check that the access function of the loop IVs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1361 and the expression that represents the loop bound are simple enough.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1362 These restrictions will be relaxed in the future. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1363
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1364 bool
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1365 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1366 {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1367 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1368 basic_block bb = loop->header;
111
kono
parents: 67
diff changeset
1369 gphi_iterator gsi;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1370
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1371 /* Analyze phi functions of the loop header. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1372
111
kono
parents: 67
diff changeset
1373 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1374 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1375 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1376 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1377 tree evolution_part;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1378
111
kono
parents: 67
diff changeset
1379 gphi *phi = gsi.phi ();
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1380 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
111
kono
parents: 67
diff changeset
1381 if (dump_enabled_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1382 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1383 phi_info->stmt);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1384
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1385 /* Skip virtual phi's. The data dependences that are associated with
111
kono
parents: 67
diff changeset
1386 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1387
111
kono
parents: 67
diff changeset
1388 Skip reduction phis. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1389 if (!iv_phi_p (phi_info))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1390 {
111
kono
parents: 67
diff changeset
1391 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1392 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
1393 "reduc or virtual phi. skip.\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1394 continue;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1395 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1396
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1397 /* Analyze the evolution function. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1398
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1399 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
111
kono
parents: 67
diff changeset
1400 if (evolution_part == NULL_TREE)
kono
parents: 67
diff changeset
1401 {
kono
parents: 67
diff changeset
1402 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1403 dump_printf (MSG_MISSED_OPTIMIZATION,
kono
parents: 67
diff changeset
1404 "No access function or evolution.\n");
kono
parents: 67
diff changeset
1405 return false;
kono
parents: 67
diff changeset
1406 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1407
111
kono
parents: 67
diff changeset
1408 /* FORNOW: We do not transform initial conditions of IVs
kono
parents: 67
diff changeset
1409 which evolution functions are not invariants in the loop. */
kono
parents: 67
diff changeset
1410
kono
parents: 67
diff changeset
1411 if (!expr_invariant_in_loop_p (loop, evolution_part))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1412 {
111
kono
parents: 67
diff changeset
1413 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
kono
parents: 67
diff changeset
1415 "evolution not invariant in loop.\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1416 return false;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1417 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1418
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1419 /* FORNOW: We do not transform initial conditions of IVs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1420 which evolution functions are a polynomial of degree >= 2. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1421
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1422 if (tree_is_chrec (evolution_part))
111
kono
parents: 67
diff changeset
1423 {
kono
parents: 67
diff changeset
1424 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
kono
parents: 67
diff changeset
1426 "evolution is chrec.\n");
kono
parents: 67
diff changeset
1427 return false;
kono
parents: 67
diff changeset
1428 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1429 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1430
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1431 return true;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1432 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1433
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1434
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1435 /* Function vect_update_ivs_after_vectorizer.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1436
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1437 "Advance" the induction variables of LOOP to the value they should take
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1438 after the execution of LOOP. This is currently necessary because the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1439 vectorizer does not handle induction variables that are used after the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1440 loop. Such a situation occurs when the last iterations of LOOP are
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1441 peeled, because:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1442 1. We introduced new uses after LOOP for IVs that were not originally used
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1443 after LOOP: the IVs of LOOP are now used by an epilog loop.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1444 2. LOOP is going to be vectorized; this means that it will iterate N/VF
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1445 times, whereas the loop IVs should be bumped N times.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1446
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1447 Input:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1448 - LOOP - a loop that is going to be vectorized. The last few iterations
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1449 of LOOP were peeled.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1450 - NITERS - the number of iterations that LOOP executes (before it is
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1451 vectorized). i.e, the number of times the ivs should be bumped.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1452 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1453 coming out from LOOP on which there are uses of the LOOP ivs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1454 (this is the path from LOOP->exit to epilog_loop->preheader).
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1455
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1456 The new definitions of the ivs are placed in LOOP->exit.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1457 The phi args associated with the edge UPDATE_E in the bb
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1458 UPDATE_E->dest are updated accordingly.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1459
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1460 Assumption 1: Like the rest of the vectorizer, this function assumes
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1461 a single loop exit that has a single predecessor.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1462
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1463 Assumption 2: The phi nodes in the LOOP header and in update_bb are
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1464 organized in the same order.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1465
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1466 Assumption 3: The access function of the ivs is simple enough (see
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1467 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1468
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1469 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1470 coming out of LOOP on which the ivs of LOOP are used (this is the path
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1471 that leads to the epilog loop; other paths skip the epilog loop). This
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1472 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1473 needs to have its phis updated.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1474 */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1475
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1476 static void
111
kono
parents: 67
diff changeset
1477 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
1478 tree niters, edge update_e)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1479 {
111
kono
parents: 67
diff changeset
1480 gphi_iterator gsi, gsi1;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1481 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
111
kono
parents: 67
diff changeset
1482 basic_block update_bb = update_e->dest;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1483 basic_block exit_bb = single_exit (loop)->dest;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1484
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1485 /* Make sure there exists a single-predecessor exit bb: */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1486 gcc_assert (single_pred_p (exit_bb));
111
kono
parents: 67
diff changeset
1487 gcc_assert (single_succ_edge (exit_bb) == update_e);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1488
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1489 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1490 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1491 gsi_next (&gsi), gsi_next (&gsi1))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1492 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1493 tree init_expr;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1494 tree step_expr, off;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1495 tree type;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1496 tree var, ni, ni_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1497 gimple_stmt_iterator last_gsi;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1498
111
kono
parents: 67
diff changeset
1499 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
1500 gphi *phi1 = gsi1.phi ();
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1501 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
111
kono
parents: 67
diff changeset
1502 if (dump_enabled_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1503 dump_printf_loc (MSG_NOTE, vect_location,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1504 "vect_update_ivs_after_vectorizer: phi: %G", phi);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1505
111
kono
parents: 67
diff changeset
1506 /* Skip reduction and virtual phis. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1507 if (!iv_phi_p (phi_info))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1508 {
111
kono
parents: 67
diff changeset
1509 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1510 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
1511 "reduc or virtual phi. skip.\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1512 continue;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1513 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1514
111
kono
parents: 67
diff changeset
1515 type = TREE_TYPE (gimple_phi_result (phi));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1516 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
111
kono
parents: 67
diff changeset
1517 step_expr = unshare_expr (step_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1518
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1519 /* FORNOW: We do not support IVs whose evolution function is a polynomial
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1520 of degree >= 2 or exponential. */
111
kono
parents: 67
diff changeset
1521 gcc_assert (!tree_is_chrec (step_expr));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1522
111
kono
parents: 67
diff changeset
1523 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1524
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1525 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1526 fold_convert (TREE_TYPE (step_expr), niters),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1527 step_expr);
111
kono
parents: 67
diff changeset
1528 if (POINTER_TYPE_P (type))
kono
parents: 67
diff changeset
1529 ni = fold_build_pointer_plus (init_expr, off);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1530 else
111
kono
parents: 67
diff changeset
1531 ni = fold_build2 (PLUS_EXPR, type,
kono
parents: 67
diff changeset
1532 init_expr, fold_convert (type, off));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1533
111
kono
parents: 67
diff changeset
1534 var = create_tmp_var (type, "tmp");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1535
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1536 last_gsi = gsi_last_bb (exit_bb);
111
kono
parents: 67
diff changeset
1537 gimple_seq new_stmts = NULL;
kono
parents: 67
diff changeset
1538 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
kono
parents: 67
diff changeset
1539 /* Exit_bb shouldn't be empty. */
kono
parents: 67
diff changeset
1540 if (!gsi_end_p (last_gsi))
kono
parents: 67
diff changeset
1541 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
kono
parents: 67
diff changeset
1542 else
kono
parents: 67
diff changeset
1543 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1544
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1545 /* Fix phi expressions in the successor bb. */
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1546 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1547 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1548 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1549
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1550 /* Return a gimple value containing the misalignment (measured in vector
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1551 elements) for the loop described by LOOP_VINFO, i.e. how many elements
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1552 it is away from a perfectly aligned address. Add any new statements
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1553 to SEQ. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1554
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1555 static tree
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1556 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1557 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1558 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1559 stmt_vec_info stmt_info = dr_info->stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1560 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1561
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1562 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1563 unsigned HOST_WIDE_INT target_align_c;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1564 tree target_align_minus_1;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1565
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1566 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1567 size_zero_node) < 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1568 tree offset = (negative
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1569 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1570 : size_zero_node);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1571 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1572 offset);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1573 tree type = unsigned_type_for (TREE_TYPE (start_addr));
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1574 if (target_align.is_constant (&target_align_c))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1575 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1576 else
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1577 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1578 tree vla = build_int_cst (type, target_align);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1579 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1580 fold_build2 (MINUS_EXPR, type,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1581 build_int_cst (type, 0), vla));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1582 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1583 build_int_cst (type, 1));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1584 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1585
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1586 HOST_WIDE_INT elem_size
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1587 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1588 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1589
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1590 /* Create: misalign_in_bytes = addr & (target_align - 1). */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1591 tree int_start_addr = fold_convert (type, start_addr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1592 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1593 target_align_minus_1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1594
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1595 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1596 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1597 elem_size_log);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1598
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1599 return misalign_in_elems;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1600 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1601
111
kono
parents: 67
diff changeset
1602 /* Function vect_gen_prolog_loop_niters
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1603
111
kono
parents: 67
diff changeset
1604 Generate the number of iterations which should be peeled as prolog for the
kono
parents: 67
diff changeset
1605 loop represented by LOOP_VINFO. It is calculated as the misalignment of
kono
parents: 67
diff changeset
1606 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
kono
parents: 67
diff changeset
1607 As a result, after the execution of this loop, the data reference DR will
kono
parents: 67
diff changeset
1608 refer to an aligned location. The following computation is generated:
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1609
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1610 If the misalignment of DR is known at compile time:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1611 addr_mis = int mis = DR_MISALIGNMENT (dr);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1612 Else, compute address misalignment in bytes:
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1613 addr_mis = addr & (target_align - 1)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1614
111
kono
parents: 67
diff changeset
1615 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1616
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1617 (elem_size = element type size; an element is the scalar element whose type
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1618 is the inner type of the vectype)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1619
111
kono
parents: 67
diff changeset
1620 The computations will be emitted at the end of BB. We also compute and
kono
parents: 67
diff changeset
1621 store upper bound (included) of the result in BOUND.
kono
parents: 67
diff changeset
1622
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1623 When the step of the data-ref in the loop is not 1 (as in interleaved data
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1624 and SLP), the number of iterations of the prolog must be divided by the step
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1625 (which is equal to the size of interleaved group).
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1626
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1627 The above formulas assume that VF == number of elements in the vector. This
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1628 may not hold when there are multiple-types in the loop.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1629 In this case, for some data-references in the loop the VF does not represent
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1630 the number of elements that fit in the vector. Therefore, instead of VF we
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1631 use TYPE_VECTOR_SUBPARTS. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1632
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1633 static tree
111
kono
parents: 67
diff changeset
1634 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
1635 basic_block bb, int *bound)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1636 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1637 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1638 tree var;
111
kono
parents: 67
diff changeset
1639 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
kono
parents: 67
diff changeset
1640 gimple_seq stmts = NULL, new_stmts = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1641 tree iters, iters_name;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1642 stmt_vec_info stmt_info = dr_info->stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1644 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1645
111
kono
parents: 67
diff changeset
1646 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1647 {
111
kono
parents: 67
diff changeset
1648 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1649
111
kono
parents: 67
diff changeset
1650 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1651 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
1652 "known peeling = %d.\n", npeel);
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1653
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1654 iters = build_int_cst (niters_type, npeel);
111
kono
parents: 67
diff changeset
1655 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1656 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1657 else
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1658 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1659 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1660 tree type = TREE_TYPE (misalign_in_elems);
111
kono
parents: 67
diff changeset
1661 HOST_WIDE_INT elem_size
kono
parents: 67
diff changeset
1662 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1663 /* We only do prolog peeling if the target alignment is known at compile
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1664 time. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1665 poly_uint64 align_in_elems =
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1666 exact_div (target_align, elem_size);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1667 tree align_in_elems_minus_1 =
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1668 build_int_cst (type, align_in_elems - 1);
111
kono
parents: 67
diff changeset
1669 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1670
111
kono
parents: 67
diff changeset
1671 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
kono
parents: 67
diff changeset
1672 & (align_in_elems - 1)). */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1673 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1674 size_zero_node) < 0;
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1675 if (negative)
111
kono
parents: 67
diff changeset
1676 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
kono
parents: 67
diff changeset
1677 align_in_elems_tree);
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1678 else
111
kono
parents: 67
diff changeset
1679 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
kono
parents: 67
diff changeset
1680 misalign_in_elems);
kono
parents: 67
diff changeset
1681 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1682 iters = fold_convert (niters_type, iters);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1683 unsigned HOST_WIDE_INT align_in_elems_c;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1684 if (align_in_elems.is_constant (&align_in_elems_c))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1685 *bound = align_in_elems_c - 1;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1686 else
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1687 *bound = -1;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1688 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1689
111
kono
parents: 67
diff changeset
1690 if (dump_enabled_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1691 dump_printf_loc (MSG_NOTE, vect_location,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1692 "niters for prolog loop: %T\n", iters);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1693
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1694 var = create_tmp_var (niters_type, "prolog_loop_niters");
111
kono
parents: 67
diff changeset
1695 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1696
111
kono
parents: 67
diff changeset
1697 if (new_stmts)
kono
parents: 67
diff changeset
1698 gimple_seq_add_seq (&stmts, new_stmts);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1699 if (stmts)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1700 {
111
kono
parents: 67
diff changeset
1701 gcc_assert (single_succ_p (bb));
kono
parents: 67
diff changeset
1702 gimple_stmt_iterator gsi = gsi_last_bb (bb);
kono
parents: 67
diff changeset
1703 if (gsi_end_p (gsi))
kono
parents: 67
diff changeset
1704 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
kono
parents: 67
diff changeset
1705 else
kono
parents: 67
diff changeset
1706 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1707 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1708 return iters_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1709 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1710
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1711
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1712 /* Function vect_update_init_of_dr
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1713
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1714 If CODE is PLUS, the vector loop starts NITERS iterations after the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1715 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1716 iterations before the scalar one (using masking to skip inactive
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1717 elements). This function updates the information recorded in DR to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1718 account for the difference. Specifically, it updates the OFFSET
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1719 field of DR_INFO. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1720
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1721 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1722 vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1723 {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1724 struct data_reference *dr = dr_info->dr;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1725 tree offset = dr_info->offset;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1726 if (!offset)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1727 offset = build_zero_cst (sizetype);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1728
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1729 niters = fold_build2 (MULT_EXPR, sizetype,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1730 fold_convert (sizetype, niters),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1731 fold_convert (sizetype, DR_STEP (dr)));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1732 offset = fold_build2 (code, sizetype,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1733 fold_convert (sizetype, offset), niters);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1734 dr_info->offset = offset;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1735 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1736
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1737
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1738 /* Function vect_update_inits_of_drs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1739
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1740 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1741 CODE and NITERS are as for vect_update_inits_of_dr. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1742
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1743 void
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1744 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1745 tree_code code)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1746 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1747 unsigned int i;
111
kono
parents: 67
diff changeset
1748 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1749 struct data_reference *dr;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1750
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1751 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
111
kono
parents: 67
diff changeset
1752
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1753 /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1754 here, but since we might use these niters to update the epilogues niters
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1755 and data references we can't insert them here as this definition might not
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1756 always dominate its uses. */
111
kono
parents: 67
diff changeset
1757 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1758 niters = fold_convert (sizetype, niters);
111
kono
parents: 67
diff changeset
1759
kono
parents: 67
diff changeset
1760 FOR_EACH_VEC_ELT (datarefs, i, dr)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1761 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1762 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1763 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1764 vect_update_init_of_dr (dr_info, niters, code);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1765 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1766 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1767
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1768 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1769 by masking. This involves calculating the number of iterations to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1770 be peeled and then aligning all memory references appropriately. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1771
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1772 void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1773 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1774 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1775 tree misalign_in_elems;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1776 tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1777
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1778 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1779
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1780 /* From the information recorded in LOOP_VINFO get the number of iterations
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1781 that need to be skipped via masking. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1782 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1783 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1784 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1785 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1786 misalign_in_elems = build_int_cst (type, misalign);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1787 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1788 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1789 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1790 gimple_seq seq1 = NULL, seq2 = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1791 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1792 misalign_in_elems = fold_convert (type, misalign_in_elems);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1793 misalign_in_elems = force_gimple_operand (misalign_in_elems,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1794 &seq2, true, NULL_TREE);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1795 gimple_seq_add_seq (&seq1, seq2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1796 if (seq1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1797 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1798 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1799 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1800 gcc_assert (!new_bb);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1801 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1802 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1803
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1804 if (dump_enabled_p ())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1805 dump_printf_loc (MSG_NOTE, vect_location,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1806 "misalignment for fully-masked loop: %T\n",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1807 misalign_in_elems);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1808
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1809 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1810
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1811 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1812 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1813
111
kono
parents: 67
diff changeset
1814 /* This function builds ni_name = number of iterations. Statements
kono
parents: 67
diff changeset
1815 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
kono
parents: 67
diff changeset
1816 it to TRUE if new ssa_var is generated. */
kono
parents: 67
diff changeset
1817
kono
parents: 67
diff changeset
1818 tree
kono
parents: 67
diff changeset
1819 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
kono
parents: 67
diff changeset
1820 {
kono
parents: 67
diff changeset
1821 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
kono
parents: 67
diff changeset
1822 if (TREE_CODE (ni) == INTEGER_CST)
kono
parents: 67
diff changeset
1823 return ni;
kono
parents: 67
diff changeset
1824 else
kono
parents: 67
diff changeset
1825 {
kono
parents: 67
diff changeset
1826 tree ni_name, var;
kono
parents: 67
diff changeset
1827 gimple_seq stmts = NULL;
kono
parents: 67
diff changeset
1828 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
kono
parents: 67
diff changeset
1829
kono
parents: 67
diff changeset
1830 var = create_tmp_var (TREE_TYPE (ni), "niters");
kono
parents: 67
diff changeset
1831 ni_name = force_gimple_operand (ni, &stmts, false, var);
kono
parents: 67
diff changeset
1832 if (stmts)
kono
parents: 67
diff changeset
1833 {
kono
parents: 67
diff changeset
1834 gsi_insert_seq_on_edge_immediate (pe, stmts);
kono
parents: 67
diff changeset
1835 if (new_var_p != NULL)
kono
parents: 67
diff changeset
1836 *new_var_p = true;
kono
parents: 67
diff changeset
1837 }
kono
parents: 67
diff changeset
1838
kono
parents: 67
diff changeset
1839 return ni_name;
kono
parents: 67
diff changeset
1840 }
kono
parents: 67
diff changeset
1841 }
kono
parents: 67
diff changeset
1842
kono
parents: 67
diff changeset
1843 /* Calculate the number of iterations above which vectorized loop will be
kono
parents: 67
diff changeset
1844 preferred than scalar loop. NITERS_PROLOG is the number of iterations
kono
parents: 67
diff changeset
1845 of prolog loop. If it's integer const, the integer number is also passed
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1846 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1847 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1848 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1849 threshold below which the scalar (rather than vectorized) loop will be
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1850 executed. This function stores the upper bound (inclusive) of the result
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1851 in BOUND_SCALAR. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1852
111
kono
parents: 67
diff changeset
1853 static tree
kono
parents: 67
diff changeset
1854 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1855 int bound_prolog, poly_int64 bound_epilog, int th,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1856 poly_uint64 *bound_scalar,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1857 bool check_profitability)
111
kono
parents: 67
diff changeset
1858 {
kono
parents: 67
diff changeset
1859 tree type = TREE_TYPE (niters_prolog);
kono
parents: 67
diff changeset
1860 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1861 build_int_cst (type, bound_epilog));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1862
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1863 *bound_scalar = bound_prolog + bound_epilog;
111
kono
parents: 67
diff changeset
1864 if (check_profitability)
kono
parents: 67
diff changeset
1865 {
kono
parents: 67
diff changeset
1866 /* TH indicates the minimum niters of vectorized loop, while we
kono
parents: 67
diff changeset
1867 compute the maximum niters of scalar loop. */
kono
parents: 67
diff changeset
1868 th--;
kono
parents: 67
diff changeset
1869 /* Peeling for constant times. */
kono
parents: 67
diff changeset
1870 if (int_niters_prolog >= 0)
kono
parents: 67
diff changeset
1871 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1872 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
111
kono
parents: 67
diff changeset
1873 return build_int_cst (type, *bound_scalar);
kono
parents: 67
diff changeset
1874 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1875 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1876 and BOUND_EPILOG are inclusive upper bounds. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1877 if (known_ge (th, bound_prolog + bound_epilog))
111
kono
parents: 67
diff changeset
1878 {
kono
parents: 67
diff changeset
1879 *bound_scalar = th;
kono
parents: 67
diff changeset
1880 return build_int_cst (type, th);
kono
parents: 67
diff changeset
1881 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1882 /* Need to do runtime comparison. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1883 else if (maybe_gt (th, bound_epilog))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1884 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1885 *bound_scalar = upper_bound (*bound_scalar, th);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1886 return fold_build2 (MAX_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1887 build_int_cst (type, th), niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1888 }
111
kono
parents: 67
diff changeset
1889 }
kono
parents: 67
diff changeset
1890 return niters;
kono
parents: 67
diff changeset
1891 }
kono
parents: 67
diff changeset
1892
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1893 /* NITERS is the number of times that the original scalar loop executes
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1894 after peeling. Work out the maximum number of iterations N that can
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1895 be handled by the vectorized form of the loop and then either:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1896
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1897 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1898
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1899 niters_vector = N
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1900
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1901 b) set *STEP_VECTOR_PTR to one and generate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1902
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1903 niters_vector = N / vf
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1904
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1905 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1906 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1907 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1908
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1909 void
111
kono
parents: 67
diff changeset
1910 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1911 tree *niters_vector_ptr, tree *step_vector_ptr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1912 bool niters_no_overflow)
111
kono
parents: 67
diff changeset
1913 {
kono
parents: 67
diff changeset
1914 tree ni_minus_gap, var;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1915 tree niters_vector, step_vector, type = TREE_TYPE (niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1916 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
111
kono
parents: 67
diff changeset
1917 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1918 tree log_vf = NULL_TREE;
111
kono
parents: 67
diff changeset
1919
kono
parents: 67
diff changeset
1920 /* If epilogue loop is required because of data accesses with gaps, we
kono
parents: 67
diff changeset
1921 subtract one iteration from the total number of iterations here for
kono
parents: 67
diff changeset
1922 correct calculation of RATIO. */
kono
parents: 67
diff changeset
1923 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
kono
parents: 67
diff changeset
1924 {
kono
parents: 67
diff changeset
1925 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
kono
parents: 67
diff changeset
1926 build_one_cst (type));
kono
parents: 67
diff changeset
1927 if (!is_gimple_val (ni_minus_gap))
kono
parents: 67
diff changeset
1928 {
kono
parents: 67
diff changeset
1929 var = create_tmp_var (type, "ni_gap");
kono
parents: 67
diff changeset
1930 gimple *stmts = NULL;
kono
parents: 67
diff changeset
1931 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
kono
parents: 67
diff changeset
1932 true, var);
kono
parents: 67
diff changeset
1933 gsi_insert_seq_on_edge_immediate (pe, stmts);
kono
parents: 67
diff changeset
1934 }
kono
parents: 67
diff changeset
1935 }
kono
parents: 67
diff changeset
1936 else
kono
parents: 67
diff changeset
1937 ni_minus_gap = niters;
kono
parents: 67
diff changeset
1938
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1939 unsigned HOST_WIDE_INT const_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1940 if (vf.is_constant (&const_vf)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1941 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1942 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1943 /* Create: niters >> log2(vf) */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1944 /* If it's known that niters == number of latch executions + 1 doesn't
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1945 overflow, we can generate niters >> log2(vf); otherwise we generate
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1946 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1947 will be at least one. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1948 log_vf = build_int_cst (type, exact_log2 (const_vf));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1949 if (niters_no_overflow)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1950 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1951 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1952 niters_vector
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1953 = fold_build2 (PLUS_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1954 fold_build2 (RSHIFT_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1955 fold_build2 (MINUS_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1956 ni_minus_gap,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1957 build_int_cst (type, vf)),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1958 log_vf),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1959 build_int_cst (type, 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1960 step_vector = build_one_cst (type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1961 }
111
kono
parents: 67
diff changeset
1962 else
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1963 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1964 niters_vector = ni_minus_gap;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1965 step_vector = build_int_cst (type, vf);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1966 }
111
kono
parents: 67
diff changeset
1967
kono
parents: 67
diff changeset
1968 if (!is_gimple_val (niters_vector))
kono
parents: 67
diff changeset
1969 {
kono
parents: 67
diff changeset
1970 var = create_tmp_var (type, "bnd");
kono
parents: 67
diff changeset
1971 gimple_seq stmts = NULL;
kono
parents: 67
diff changeset
1972 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
kono
parents: 67
diff changeset
1973 gsi_insert_seq_on_edge_immediate (pe, stmts);
kono
parents: 67
diff changeset
1974 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
kono
parents: 67
diff changeset
1975 we set range information to make niters analyzer's life easier. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1976 if (stmts != NULL && log_vf)
111
kono
parents: 67
diff changeset
1977 set_range_info (niters_vector, VR_RANGE,
kono
parents: 67
diff changeset
1978 wi::to_wide (build_int_cst (type, 1)),
kono
parents: 67
diff changeset
1979 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
kono
parents: 67
diff changeset
1980 TYPE_MAX_VALUE (type),
kono
parents: 67
diff changeset
1981 log_vf)));
kono
parents: 67
diff changeset
1982 }
kono
parents: 67
diff changeset
1983 *niters_vector_ptr = niters_vector;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1984 *step_vector_ptr = step_vector;
111
kono
parents: 67
diff changeset
1985
kono
parents: 67
diff changeset
1986 return;
kono
parents: 67
diff changeset
1987 }
kono
parents: 67
diff changeset
1988
kono
parents: 67
diff changeset
1989 /* Given NITERS_VECTOR which is the number of iterations for vectorized
kono
parents: 67
diff changeset
1990 loop specified by LOOP_VINFO after vectorization, compute the number
kono
parents: 67
diff changeset
1991 of iterations before vectorization (niters_vector * vf) and store it
kono
parents: 67
diff changeset
1992 to NITERS_VECTOR_MULT_VF_PTR. */
kono
parents: 67
diff changeset
1993
kono
parents: 67
diff changeset
1994 static void
kono
parents: 67
diff changeset
1995 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
1996 tree niters_vector,
kono
parents: 67
diff changeset
1997 tree *niters_vector_mult_vf_ptr)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1998 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1999 /* We should be using a step_vector of VF if VF is variable. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2000 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2001 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
111
kono
parents: 67
diff changeset
2002 tree type = TREE_TYPE (niters_vector);
kono
parents: 67
diff changeset
2003 tree log_vf = build_int_cst (type, exact_log2 (vf));
kono
parents: 67
diff changeset
2004 basic_block exit_bb = single_exit (loop)->dest;
kono
parents: 67
diff changeset
2005
kono
parents: 67
diff changeset
2006 gcc_assert (niters_vector_mult_vf_ptr != NULL);
kono
parents: 67
diff changeset
2007 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
kono
parents: 67
diff changeset
2008 niters_vector, log_vf);
kono
parents: 67
diff changeset
2009 if (!is_gimple_val (niters_vector_mult_vf))
kono
parents: 67
diff changeset
2010 {
kono
parents: 67
diff changeset
2011 tree var = create_tmp_var (type, "niters_vector_mult_vf");
kono
parents: 67
diff changeset
2012 gimple_seq stmts = NULL;
kono
parents: 67
diff changeset
2013 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
kono
parents: 67
diff changeset
2014 &stmts, true, var);
kono
parents: 67
diff changeset
2015 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
kono
parents: 67
diff changeset
2016 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
kono
parents: 67
diff changeset
2017 }
kono
parents: 67
diff changeset
2018 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
kono
parents: 67
diff changeset
2019 }
kono
parents: 67
diff changeset
2020
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2021 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2022 this function searches for the corresponding lcssa phi node in exit
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2023 bb of LOOP. If it is found, return the phi result; otherwise return
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2024 NULL. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2025
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2026 static tree
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2027 find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2028 gphi *lcssa_phi)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2029 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2030 gphi_iterator gsi;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2031 edge e = single_exit (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2032
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2033 gcc_assert (single_pred_p (e->dest));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2034 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2035 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2036 gphi *phi = gsi.phi ();
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2037 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2038 PHI_ARG_DEF (lcssa_phi, 0), 0))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2039 return PHI_RESULT (phi);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2040 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2041 return NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2042 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2043
111
kono
parents: 67
diff changeset
2044 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
kono
parents: 67
diff changeset
2045 from SECOND/FIRST and puts it at the original loop's preheader/exit
kono
parents: 67
diff changeset
2046 edge, the two loops are arranged as below:
kono
parents: 67
diff changeset
2047
kono
parents: 67
diff changeset
2048 preheader_a:
kono
parents: 67
diff changeset
2049 first_loop:
kono
parents: 67
diff changeset
2050 header_a:
kono
parents: 67
diff changeset
2051 i_1 = PHI<i_0, i_2>;
kono
parents: 67
diff changeset
2052 ...
kono
parents: 67
diff changeset
2053 i_2 = i_1 + 1;
kono
parents: 67
diff changeset
2054 if (cond_a)
kono
parents: 67
diff changeset
2055 goto latch_a;
kono
parents: 67
diff changeset
2056 else
kono
parents: 67
diff changeset
2057 goto between_bb;
kono
parents: 67
diff changeset
2058 latch_a:
kono
parents: 67
diff changeset
2059 goto header_a;
kono
parents: 67
diff changeset
2060
kono
parents: 67
diff changeset
2061 between_bb:
kono
parents: 67
diff changeset
2062 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
kono
parents: 67
diff changeset
2063
kono
parents: 67
diff changeset
2064 second_loop:
kono
parents: 67
diff changeset
2065 header_b:
kono
parents: 67
diff changeset
2066 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
kono
parents: 67
diff changeset
2067 or with i_2 if no LCSSA phi is created
kono
parents: 67
diff changeset
2068 under condition of CREATE_LCSSA_FOR_IV_PHIS.
kono
parents: 67
diff changeset
2069 ...
kono
parents: 67
diff changeset
2070 i_4 = i_3 + 1;
kono
parents: 67
diff changeset
2071 if (cond_b)
kono
parents: 67
diff changeset
2072 goto latch_b;
kono
parents: 67
diff changeset
2073 else
kono
parents: 67
diff changeset
2074 goto exit_bb;
kono
parents: 67
diff changeset
2075 latch_b:
kono
parents: 67
diff changeset
2076 goto header_b;
kono
parents: 67
diff changeset
2077
kono
parents: 67
diff changeset
2078 exit_bb:
kono
parents: 67
diff changeset
2079
kono
parents: 67
diff changeset
2080 This function creates loop closed SSA for the first loop; update the
kono
parents: 67
diff changeset
2081 second loop's PHI nodes by replacing argument on incoming edge with the
kono
parents: 67
diff changeset
2082 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
kono
parents: 67
diff changeset
2083 is false, Loop closed ssa phis will only be created for non-iv phis for
kono
parents: 67
diff changeset
2084 the first loop.
kono
parents: 67
diff changeset
2085
kono
parents: 67
diff changeset
2086 This function assumes exit bb of the first loop is preheader bb of the
kono
parents: 67
diff changeset
2087 second loop, i.e, between_bb in the example code. With PHIs updated,
kono
parents: 67
diff changeset
2088 the second loop will execute rest iterations of the first. */
kono
parents: 67
diff changeset
2089
kono
parents: 67
diff changeset
2090 static void
kono
parents: 67
diff changeset
2091 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2092 class loop *first, class loop *second,
111
kono
parents: 67
diff changeset
2093 bool create_lcssa_for_iv_phis)
kono
parents: 67
diff changeset
2094 {
kono
parents: 67
diff changeset
2095 gphi_iterator gsi_update, gsi_orig;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2096 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
111
kono
parents: 67
diff changeset
2097
kono
parents: 67
diff changeset
2098 edge first_latch_e = EDGE_SUCC (first->latch, 0);
kono
parents: 67
diff changeset
2099 edge second_preheader_e = loop_preheader_edge (second);
kono
parents: 67
diff changeset
2100 basic_block between_bb = single_exit (first)->dest;
kono
parents: 67
diff changeset
2101
kono
parents: 67
diff changeset
2102 gcc_assert (between_bb == second_preheader_e->src);
kono
parents: 67
diff changeset
2103 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
kono
parents: 67
diff changeset
2104 /* Either the first loop or the second is the loop to be vectorized. */
kono
parents: 67
diff changeset
2105 gcc_assert (loop == first || loop == second);
kono
parents: 67
diff changeset
2106
kono
parents: 67
diff changeset
2107 for (gsi_orig = gsi_start_phis (first->header),
kono
parents: 67
diff changeset
2108 gsi_update = gsi_start_phis (second->header);
kono
parents: 67
diff changeset
2109 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
kono
parents: 67
diff changeset
2110 gsi_next (&gsi_orig), gsi_next (&gsi_update))
kono
parents: 67
diff changeset
2111 {
kono
parents: 67
diff changeset
2112 gphi *orig_phi = gsi_orig.phi ();
kono
parents: 67
diff changeset
2113 gphi *update_phi = gsi_update.phi ();
kono
parents: 67
diff changeset
2114
kono
parents: 67
diff changeset
2115 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
kono
parents: 67
diff changeset
2116 /* Generate lcssa PHI node for the first loop. */
kono
parents: 67
diff changeset
2117 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2118 stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2119 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
111
kono
parents: 67
diff changeset
2120 {
kono
parents: 67
diff changeset
2121 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
kono
parents: 67
diff changeset
2122 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
kono
parents: 67
diff changeset
2123 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
2124 arg = new_res;
kono
parents: 67
diff changeset
2125 }
kono
parents: 67
diff changeset
2126
kono
parents: 67
diff changeset
2127 /* Update PHI node in the second loop by replacing arg on the loop's
kono
parents: 67
diff changeset
2128 incoming edge. */
kono
parents: 67
diff changeset
2129 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
kono
parents: 67
diff changeset
2130 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2131
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2132 /* For epilogue peeling we have to make sure to copy all LC PHIs
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2133 for correct vectorization of live stmts. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2134 if (loop == first)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2135 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2136 basic_block orig_exit = single_exit (second)->dest;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2137 for (gsi_orig = gsi_start_phis (orig_exit);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2138 !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2139 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2140 gphi *orig_phi = gsi_orig.phi ();
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2141 tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2142 if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p (orig_arg))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2143 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2144
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2145 /* Already created in the above loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2146 if (find_guard_arg (first, second, orig_phi))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2147 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2148
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2149 tree new_res = copy_ssa_name (orig_arg);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2150 gphi *lcphi = create_phi_node (new_res, between_bb);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2151 add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2152 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2153 }
111
kono
parents: 67
diff changeset
2154 }
kono
parents: 67
diff changeset
2155
kono
parents: 67
diff changeset
2156 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
kono
parents: 67
diff changeset
2157 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
kono
parents: 67
diff changeset
2158 are two pred edges of the merge point before UPDATE_LOOP. The two loops
kono
parents: 67
diff changeset
2159 appear like below:
kono
parents: 67
diff changeset
2160
kono
parents: 67
diff changeset
2161 guard_bb:
kono
parents: 67
diff changeset
2162 if (cond)
kono
parents: 67
diff changeset
2163 goto merge_bb;
kono
parents: 67
diff changeset
2164 else
kono
parents: 67
diff changeset
2165 goto skip_loop;
kono
parents: 67
diff changeset
2166
kono
parents: 67
diff changeset
2167 skip_loop:
kono
parents: 67
diff changeset
2168 header_a:
kono
parents: 67
diff changeset
2169 i_1 = PHI<i_0, i_2>;
kono
parents: 67
diff changeset
2170 ...
kono
parents: 67
diff changeset
2171 i_2 = i_1 + 1;
kono
parents: 67
diff changeset
2172 if (cond_a)
kono
parents: 67
diff changeset
2173 goto latch_a;
kono
parents: 67
diff changeset
2174 else
kono
parents: 67
diff changeset
2175 goto exit_a;
kono
parents: 67
diff changeset
2176 latch_a:
kono
parents: 67
diff changeset
2177 goto header_a;
kono
parents: 67
diff changeset
2178
kono
parents: 67
diff changeset
2179 exit_a:
kono
parents: 67
diff changeset
2180 i_5 = PHI<i_2>;
kono
parents: 67
diff changeset
2181
kono
parents: 67
diff changeset
2182 merge_bb:
kono
parents: 67
diff changeset
2183 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
kono
parents: 67
diff changeset
2184
kono
parents: 67
diff changeset
2185 update_loop:
kono
parents: 67
diff changeset
2186 header_b:
kono
parents: 67
diff changeset
2187 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
kono
parents: 67
diff changeset
2188 ...
kono
parents: 67
diff changeset
2189 i_4 = i_3 + 1;
kono
parents: 67
diff changeset
2190 if (cond_b)
kono
parents: 67
diff changeset
2191 goto latch_b;
kono
parents: 67
diff changeset
2192 else
kono
parents: 67
diff changeset
2193 goto exit_bb;
kono
parents: 67
diff changeset
2194 latch_b:
kono
parents: 67
diff changeset
2195 goto header_b;
kono
parents: 67
diff changeset
2196
kono
parents: 67
diff changeset
2197 exit_bb:
kono
parents: 67
diff changeset
2198
kono
parents: 67
diff changeset
2199 This function creates PHI nodes at merge_bb and replaces the use of i_5
kono
parents: 67
diff changeset
2200 in the update_loop's PHI node with the result of new PHI result. */
kono
parents: 67
diff changeset
2201
kono
parents: 67
diff changeset
2202 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2203 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2204 class loop *update_loop,
111
kono
parents: 67
diff changeset
2205 edge guard_edge, edge merge_edge)
kono
parents: 67
diff changeset
2206 {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2207 location_t merge_loc, guard_loc;
111
kono
parents: 67
diff changeset
2208 edge orig_e = loop_preheader_edge (skip_loop);
kono
parents: 67
diff changeset
2209 edge update_e = loop_preheader_edge (update_loop);
kono
parents: 67
diff changeset
2210 gphi_iterator gsi_orig, gsi_update;
kono
parents: 67
diff changeset
2211
kono
parents: 67
diff changeset
2212 for ((gsi_orig = gsi_start_phis (skip_loop->header),
kono
parents: 67
diff changeset
2213 gsi_update = gsi_start_phis (update_loop->header));
kono
parents: 67
diff changeset
2214 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
kono
parents: 67
diff changeset
2215 gsi_next (&gsi_orig), gsi_next (&gsi_update))
kono
parents: 67
diff changeset
2216 {
kono
parents: 67
diff changeset
2217 gphi *orig_phi = gsi_orig.phi ();
kono
parents: 67
diff changeset
2218 gphi *update_phi = gsi_update.phi ();
kono
parents: 67
diff changeset
2219
kono
parents: 67
diff changeset
2220 /* Generate new phi node at merge bb of the guard. */
kono
parents: 67
diff changeset
2221 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
kono
parents: 67
diff changeset
2222 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
kono
parents: 67
diff changeset
2223
kono
parents: 67
diff changeset
2224 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
kono
parents: 67
diff changeset
2225 args in NEW_PHI for these edges. */
kono
parents: 67
diff changeset
2226 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
kono
parents: 67
diff changeset
2227 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
kono
parents: 67
diff changeset
2228 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
kono
parents: 67
diff changeset
2229 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
kono
parents: 67
diff changeset
2230 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
kono
parents: 67
diff changeset
2231 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
kono
parents: 67
diff changeset
2232
kono
parents: 67
diff changeset
2233 /* Update phi in UPDATE_PHI. */
kono
parents: 67
diff changeset
2234 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
kono
parents: 67
diff changeset
2235 }
kono
parents: 67
diff changeset
2236 }
kono
parents: 67
diff changeset
2237
kono
parents: 67
diff changeset
2238 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
kono
parents: 67
diff changeset
2239 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
kono
parents: 67
diff changeset
2240 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
kono
parents: 67
diff changeset
2241 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
kono
parents: 67
diff changeset
2242 The CFG looks like:
kono
parents: 67
diff changeset
2243
kono
parents: 67
diff changeset
2244 loop:
kono
parents: 67
diff changeset
2245 header_a:
kono
parents: 67
diff changeset
2246 i_1 = PHI<i_0, i_2>;
kono
parents: 67
diff changeset
2247 ...
kono
parents: 67
diff changeset
2248 i_2 = i_1 + 1;
kono
parents: 67
diff changeset
2249 if (cond_a)
kono
parents: 67
diff changeset
2250 goto latch_a;
kono
parents: 67
diff changeset
2251 else
kono
parents: 67
diff changeset
2252 goto exit_a;
kono
parents: 67
diff changeset
2253 latch_a:
kono
parents: 67
diff changeset
2254 goto header_a;
kono
parents: 67
diff changeset
2255
kono
parents: 67
diff changeset
2256 exit_a:
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2257
111
kono
parents: 67
diff changeset
2258 guard_bb:
kono
parents: 67
diff changeset
2259 if (cond)
kono
parents: 67
diff changeset
2260 goto merge_bb;
kono
parents: 67
diff changeset
2261 else
kono
parents: 67
diff changeset
2262 goto epilog_loop;
kono
parents: 67
diff changeset
2263
kono
parents: 67
diff changeset
2264 ;; fall_through_bb
kono
parents: 67
diff changeset
2265
kono
parents: 67
diff changeset
2266 epilog_loop:
kono
parents: 67
diff changeset
2267 header_b:
kono
parents: 67
diff changeset
2268 i_3 = PHI<i_2, i_4>;
kono
parents: 67
diff changeset
2269 ...
kono
parents: 67
diff changeset
2270 i_4 = i_3 + 1;
kono
parents: 67
diff changeset
2271 if (cond_b)
kono
parents: 67
diff changeset
2272 goto latch_b;
kono
parents: 67
diff changeset
2273 else
kono
parents: 67
diff changeset
2274 goto merge_bb;
kono
parents: 67
diff changeset
2275 latch_b:
kono
parents: 67
diff changeset
2276 goto header_b;
kono
parents: 67
diff changeset
2277
kono
parents: 67
diff changeset
2278 merge_bb:
kono
parents: 67
diff changeset
2279 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
kono
parents: 67
diff changeset
2280
kono
parents: 67
diff changeset
2281 exit_bb:
kono
parents: 67
diff changeset
2282 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
kono
parents: 67
diff changeset
2283
kono
parents: 67
diff changeset
2284 For each name used out side EPILOG (i.e - for each name that has a lcssa
kono
parents: 67
diff changeset
2285 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
kono
parents: 67
diff changeset
2286 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
kono
parents: 67
diff changeset
2287 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
kono
parents: 67
diff changeset
2288 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
kono
parents: 67
diff changeset
2289 in exit_bb will also be updated. */
kono
parents: 67
diff changeset
2290
kono
parents: 67
diff changeset
2291 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2292 slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
111
kono
parents: 67
diff changeset
2293 edge guard_edge, edge merge_edge)
kono
parents: 67
diff changeset
2294 {
kono
parents: 67
diff changeset
2295 gphi_iterator gsi;
kono
parents: 67
diff changeset
2296 basic_block merge_bb = guard_edge->dest;
kono
parents: 67
diff changeset
2297
kono
parents: 67
diff changeset
2298 gcc_assert (single_succ_p (merge_bb));
kono
parents: 67
diff changeset
2299 edge e = single_succ_edge (merge_bb);
kono
parents: 67
diff changeset
2300 basic_block exit_bb = e->dest;
kono
parents: 67
diff changeset
2301 gcc_assert (single_pred_p (exit_bb));
kono
parents: 67
diff changeset
2302 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
kono
parents: 67
diff changeset
2303
kono
parents: 67
diff changeset
2304 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
2305 {
kono
parents: 67
diff changeset
2306 gphi *update_phi = gsi.phi ();
kono
parents: 67
diff changeset
2307 tree old_arg = PHI_ARG_DEF (update_phi, 0);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2308
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2309 tree merge_arg = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2310
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2311 /* If the old argument is a SSA_NAME use its current_def. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2312 if (TREE_CODE (old_arg) == SSA_NAME)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2313 merge_arg = get_current_def (old_arg);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2314 /* If it's a constant or doesn't have a current_def, just use the old
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2315 argument. */
111
kono
parents: 67
diff changeset
2316 if (!merge_arg)
kono
parents: 67
diff changeset
2317 merge_arg = old_arg;
kono
parents: 67
diff changeset
2318
kono
parents: 67
diff changeset
2319 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
kono
parents: 67
diff changeset
2320 /* If the var is live after loop but not a reduction, we simply
kono
parents: 67
diff changeset
2321 use the old arg. */
kono
parents: 67
diff changeset
2322 if (!guard_arg)
kono
parents: 67
diff changeset
2323 guard_arg = old_arg;
kono
parents: 67
diff changeset
2324
kono
parents: 67
diff changeset
2325 /* Create new phi node in MERGE_BB: */
kono
parents: 67
diff changeset
2326 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
kono
parents: 67
diff changeset
2327 gphi *merge_phi = create_phi_node (new_res, merge_bb);
kono
parents: 67
diff changeset
2328
kono
parents: 67
diff changeset
2329 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
kono
parents: 67
diff changeset
2330 the two PHI args in merge_phi for these edges. */
kono
parents: 67
diff changeset
2331 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
2332 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
2333
kono
parents: 67
diff changeset
2334 /* Update the original phi in exit_bb. */
kono
parents: 67
diff changeset
2335 adjust_phi_and_debug_stmts (update_phi, e, new_res);
kono
parents: 67
diff changeset
2336 }
kono
parents: 67
diff changeset
2337 }
kono
parents: 67
diff changeset
2338
kono
parents: 67
diff changeset
2339 /* EPILOG loop is duplicated from the original loop for vectorizing,
kono
parents: 67
diff changeset
2340 the arg of its loop closed ssa PHI needs to be updated. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2341
111
kono
parents: 67
diff changeset
2342 static void
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2343 slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
111
kono
parents: 67
diff changeset
2344 {
kono
parents: 67
diff changeset
2345 gphi_iterator gsi;
kono
parents: 67
diff changeset
2346 basic_block exit_bb = single_exit (epilog)->dest;
kono
parents: 67
diff changeset
2347
kono
parents: 67
diff changeset
2348 gcc_assert (single_pred_p (exit_bb));
kono
parents: 67
diff changeset
2349 edge e = EDGE_PRED (exit_bb, 0);
kono
parents: 67
diff changeset
2350 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
2351 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
kono
parents: 67
diff changeset
2352 }
kono
parents: 67
diff changeset
2353
kono
parents: 67
diff changeset
2354 /* Function vect_do_peeling.
kono
parents: 67
diff changeset
2355
kono
parents: 67
diff changeset
2356 Input:
kono
parents: 67
diff changeset
2357 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
kono
parents: 67
diff changeset
2358
kono
parents: 67
diff changeset
2359 preheader:
kono
parents: 67
diff changeset
2360 LOOP:
kono
parents: 67
diff changeset
2361 header_bb:
kono
parents: 67
diff changeset
2362 loop_body
kono
parents: 67
diff changeset
2363 if (exit_loop_cond) goto exit_bb
kono
parents: 67
diff changeset
2364 else goto header_bb
kono
parents: 67
diff changeset
2365 exit_bb:
kono
parents: 67
diff changeset
2366
kono
parents: 67
diff changeset
2367 - NITERS: The number of iterations of the loop.
kono
parents: 67
diff changeset
2368 - NITERSM1: The number of iterations of the loop's latch.
kono
parents: 67
diff changeset
2369 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
kono
parents: 67
diff changeset
2370 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
kono
parents: 67
diff changeset
2371 CHECK_PROFITABILITY is true.
kono
parents: 67
diff changeset
2372 Output:
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2373 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2374 iterate after vectorization; see vect_set_loop_condition for details.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2375 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2376 should be set to the number of scalar iterations handled by the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2377 vector loop. The SSA name is only used on exit from the loop.
111
kono
parents: 67
diff changeset
2378
kono
parents: 67
diff changeset
2379 This function peels prolog and epilog from the loop, adds guards skipping
kono
parents: 67
diff changeset
2380 PROLOG and EPILOG for various conditions. As a result, the changed CFG
kono
parents: 67
diff changeset
2381 would look like:
kono
parents: 67
diff changeset
2382
kono
parents: 67
diff changeset
2383 guard_bb_1:
kono
parents: 67
diff changeset
2384 if (prefer_scalar_loop) goto merge_bb_1
kono
parents: 67
diff changeset
2385 else goto guard_bb_2
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2386
111
kono
parents: 67
diff changeset
2387 guard_bb_2:
kono
parents: 67
diff changeset
2388 if (skip_prolog) goto merge_bb_2
kono
parents: 67
diff changeset
2389 else goto prolog_preheader
kono
parents: 67
diff changeset
2390
kono
parents: 67
diff changeset
2391 prolog_preheader:
kono
parents: 67
diff changeset
2392 PROLOG:
kono
parents: 67
diff changeset
2393 prolog_header_bb:
kono
parents: 67
diff changeset
2394 prolog_body
kono
parents: 67
diff changeset
2395 if (exit_prolog_cond) goto prolog_exit_bb
kono
parents: 67
diff changeset
2396 else goto prolog_header_bb
kono
parents: 67
diff changeset
2397 prolog_exit_bb:
kono
parents: 67
diff changeset
2398
kono
parents: 67
diff changeset
2399 merge_bb_2:
kono
parents: 67
diff changeset
2400
kono
parents: 67
diff changeset
2401 vector_preheader:
kono
parents: 67
diff changeset
2402 VECTOR LOOP:
kono
parents: 67
diff changeset
2403 vector_header_bb:
kono
parents: 67
diff changeset
2404 vector_body
kono
parents: 67
diff changeset
2405 if (exit_vector_cond) goto vector_exit_bb
kono
parents: 67
diff changeset
2406 else goto vector_header_bb
kono
parents: 67
diff changeset
2407 vector_exit_bb:
kono
parents: 67
diff changeset
2408
kono
parents: 67
diff changeset
2409 guard_bb_3:
kono
parents: 67
diff changeset
2410 if (skip_epilog) goto merge_bb_3
kono
parents: 67
diff changeset
2411 else goto epilog_preheader
kono
parents: 67
diff changeset
2412
kono
parents: 67
diff changeset
2413 merge_bb_1:
kono
parents: 67
diff changeset
2414
kono
parents: 67
diff changeset
2415 epilog_preheader:
kono
parents: 67
diff changeset
2416 EPILOG:
kono
parents: 67
diff changeset
2417 epilog_header_bb:
kono
parents: 67
diff changeset
2418 epilog_body
kono
parents: 67
diff changeset
2419 if (exit_epilog_cond) goto merge_bb_3
kono
parents: 67
diff changeset
2420 else goto epilog_header_bb
kono
parents: 67
diff changeset
2421
kono
parents: 67
diff changeset
2422 merge_bb_3:
kono
parents: 67
diff changeset
2423
kono
parents: 67
diff changeset
2424 Note this function peels prolog and epilog only if it's necessary,
kono
parents: 67
diff changeset
2425 as well as guards.
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2426 This function returns the epilogue loop if a decision was made to vectorize
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2427 it, otherwise NULL.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2428
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2429 The analysis resulting in this epilogue loop's loop_vec_info was performed
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2430 in the same vect_analyze_loop call as the main loop's. At that time
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2431 vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2432 vectorization factors than the main loop. This list is stored in the main
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2433 loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2434 vectorize the epilogue loop for a lower vectorization factor, the
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2435 loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2436 updated and linked to the epilogue loop. This is later used to vectorize
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2437 the epilogue. The reason the loop_vec_info needs updating is that it was
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2438 constructed based on the original main loop, and the epilogue loop is a
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2439 copy of this loop, so all links pointing to statements in the original loop
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2440 need updating. Furthermore, these loop_vec_infos share the
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2441 data_reference's records, which will also need to be updated.
111
kono
parents: 67
diff changeset
2442
kono
parents: 67
diff changeset
2443 TODO: Guard for prefer_scalar_loop should be emitted along with
kono
parents: 67
diff changeset
2444 versioning conditions if loop versioning is needed. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2445
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2446
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2447 class loop *
111
kono
parents: 67
diff changeset
2448 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2449 tree *niters_vector, tree *step_vector,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2450 tree *niters_vector_mult_vf_var, int th,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2451 bool check_profitability, bool niters_no_overflow,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2452 tree *advance)
111
kono
parents: 67
diff changeset
2453 {
kono
parents: 67
diff changeset
2454 edge e, guard_e;
kono
parents: 67
diff changeset
2455 tree type = TREE_TYPE (niters), guard_cond;
kono
parents: 67
diff changeset
2456 basic_block guard_bb, guard_to;
kono
parents: 67
diff changeset
2457 profile_probability prob_prolog, prob_vector, prob_epilog;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2458 int estimated_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2459 int prolog_peeling = 0;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2460 bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2461 /* We currently do not support prolog peeling if the target alignment is not
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2462 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2463 target alignment being constant. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2464 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2465 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2466 return NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2467
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2468 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2469 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2470
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2471 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2472 poly_uint64 bound_epilog = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2473 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2474 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2475 bound_epilog += vf - 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2476 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2477 bound_epilog += 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2478 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2479 poly_uint64 bound_scalar = bound_epilog;
111
kono
parents: 67
diff changeset
2480
kono
parents: 67
diff changeset
2481 if (!prolog_peeling && !epilog_peeling)
kono
parents: 67
diff changeset
2482 return NULL;
kono
parents: 67
diff changeset
2483
kono
parents: 67
diff changeset
2484 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2485 estimated_vf = vect_vf_for_cost (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2486 if (estimated_vf == 2)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2487 estimated_vf = 3;
111
kono
parents: 67
diff changeset
2488 prob_prolog = prob_epilog = profile_probability::guessed_always ()
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2489 .apply_scale (estimated_vf - 1, estimated_vf);
111
kono
parents: 67
diff changeset
2490
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2491 class loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2492 class loop *first_loop = loop;
111
kono
parents: 67
diff changeset
2493 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2494
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2495 /* We might have a queued need to update virtual SSA form. As we
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2496 delete the update SSA machinery below after doing a regular
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2497 incremental SSA update during loop copying make sure we don't
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2498 lose that fact.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2499 ??? Needing to update virtual SSA form by renaming is unfortunate
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2500 but not all of the vectorizer code inserting new loads / stores
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2501 properly assigns virtual operands to those statements. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2502 update_ssa (TODO_update_ssa_only_virtuals);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2503
111
kono
parents: 67
diff changeset
2504 create_lcssa_for_virtual_phi (loop);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2505
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2506 /* If we're vectorizing an epilogue loop, the update_ssa above will
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2507 have ensured that the virtual operand is in SSA form throughout the
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2508 vectorized main loop. Normally it is possible to trace the updated
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2509 vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2510 back to scalar-stmt vuses, meaning that the effect of the SSA update
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2511 remains local to the main loop. However, there are rare cases in
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2512 which the vectorized loop has vdefs even when the original scalar
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2513 loop didn't. For example, vectorizing a load with IFN_LOAD_LANES
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2514 introduces clobbers of the temporary vector array, which in turn
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2515 needs new vdefs. If the scalar loop doesn't write to memory, these
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2516 new vdefs will be the only ones in the vector loop.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2517
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2518 In that case, update_ssa will have added a new virtual phi to the
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2519 main loop, which previously didn't need one. Ensure that we (locally)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2520 maintain LCSSA form for the virtual operand, just as we would have
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2521 done if the virtual phi had existed from the outset. This makes it
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2522 easier to duplicate the scalar epilogue loop below. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2523 tree vop_to_rename = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2524 if (loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2525 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2526 class loop *orig_loop = LOOP_VINFO_LOOP (orig_loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2527 vop_to_rename = create_lcssa_for_virtual_phi (orig_loop);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2528 }
111
kono
parents: 67
diff changeset
2529
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2530 if (MAY_HAVE_DEBUG_BIND_STMTS)
111
kono
parents: 67
diff changeset
2531 {
kono
parents: 67
diff changeset
2532 gcc_assert (!adjust_vec.exists ());
kono
parents: 67
diff changeset
2533 adjust_vec.create (32);
kono
parents: 67
diff changeset
2534 }
kono
parents: 67
diff changeset
2535 initialize_original_copy_tables ();
kono
parents: 67
diff changeset
2536
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2537 /* Record the anchor bb at which the guard should be placed if the scalar
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2538 loop might be preferred. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2539 basic_block anchor = loop_preheader_edge (loop)->src;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2540
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2541 /* Generate the number of iterations for the prolog loop. We do this here
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2542 so that we can also get the upper bound on the number of iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2543 tree niters_prolog;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2544 int bound_prolog = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2545 if (prolog_peeling)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2546 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2547 &bound_prolog);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2548 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2549 niters_prolog = build_int_cst (type, 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2550
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2551 loop_vec_info epilogue_vinfo = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2552 if (vect_epilogues)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2553 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2554 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2555 loop_vinfo->epilogue_vinfos.ordered_remove (0);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2556 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2557
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2558 tree niters_vector_mult_vf = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2559 /* Saving NITERs before the loop, as this may be changed by prologue. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2560 tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2561 edge update_e = NULL, skip_e = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2562 unsigned int lowest_vf = constant_lower_bound (vf);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2563 /* If we know the number of scalar iterations for the main loop we should
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2564 check whether after the main loop there are enough iterations left over
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2565 for the epilogue. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2566 if (vect_epilogues
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2567 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2568 && prolog_peeling >= 0
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2569 && known_eq (vf, lowest_vf))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2570 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2571 unsigned HOST_WIDE_INT eiters
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2572 = (LOOP_VINFO_INT_NITERS (loop_vinfo)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2573 - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2574
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2575 eiters -= prolog_peeling;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2576 eiters
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2577 = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2578
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2579 unsigned int ratio;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2580 unsigned int epilogue_gaps
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2581 = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2582 while (!(constant_multiple_p
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2583 (GET_MODE_SIZE (loop_vinfo->vector_mode),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2584 GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2585 && eiters >= lowest_vf / ratio + epilogue_gaps))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2586 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2587 delete epilogue_vinfo;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2588 epilogue_vinfo = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2589 if (loop_vinfo->epilogue_vinfos.length () == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2590 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2591 vect_epilogues = false;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2592 break;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2593 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2594 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2595 loop_vinfo->epilogue_vinfos.ordered_remove (0);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2596 epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2597 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2598 }
111
kono
parents: 67
diff changeset
2599 /* Prolog loop may be skipped. */
kono
parents: 67
diff changeset
2600 bool skip_prolog = (prolog_peeling != 0);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2601 /* Skip this loop to epilog when there are not enough iterations to enter this
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2602 vectorized loop. If true we should perform runtime checks on the NITERS
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2603 to check whether we should skip the current vectorized loop. If we know
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2604 the number of scalar iterations we may choose to add a runtime check if
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2605 this number "maybe" smaller than the number of iterations required
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2606 when we know the number of scalar iterations may potentially
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2607 be smaller than the number of iterations required to enter this loop, for
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2608 this we use the upper bounds on the prolog and epilog peeling. When we
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2609 don't know the number of iterations and don't require versioning it is
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2610 because we have asserted that there are enough scalar iterations to enter
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2611 the main loop, so this skip is not necessary. When we are versioning then
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2612 we only add such a skip if we have chosen to vectorize the epilogue. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2613 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2614 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2615 bound_prolog + bound_epilog)
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2616 : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2617 || vect_epilogues));
111
kono
parents: 67
diff changeset
2618 /* Epilog loop must be executed if the number of iterations for epilog
kono
parents: 67
diff changeset
2619 loop is known at compile time, otherwise we need to add a check at
kono
parents: 67
diff changeset
2620 the end of vector loop and skip to the end of epilog loop. */
kono
parents: 67
diff changeset
2621 bool skip_epilog = (prolog_peeling < 0
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2622 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2623 || !vf.is_constant ());
111
kono
parents: 67
diff changeset
2624 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
kono
parents: 67
diff changeset
2625 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
kono
parents: 67
diff changeset
2626 skip_epilog = false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2627
111
kono
parents: 67
diff changeset
2628 if (skip_vector)
kono
parents: 67
diff changeset
2629 {
kono
parents: 67
diff changeset
2630 split_edge (loop_preheader_edge (loop));
kono
parents: 67
diff changeset
2631
kono
parents: 67
diff changeset
2632 /* Due to the order in which we peel prolog and epilog, we first
kono
parents: 67
diff changeset
2633 propagate probability to the whole loop. The purpose is to
kono
parents: 67
diff changeset
2634 avoid adjusting probabilities of both prolog and vector loops
kono
parents: 67
diff changeset
2635 separately. Note in this case, the probability of epilog loop
kono
parents: 67
diff changeset
2636 needs to be scaled back later. */
kono
parents: 67
diff changeset
2637 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
kono
parents: 67
diff changeset
2638 if (prob_vector.initialized_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2639 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2640 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2641 scale_loop_profile (loop, prob_vector, 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2642 }
111
kono
parents: 67
diff changeset
2643 }
kono
parents: 67
diff changeset
2644
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2645 dump_user_location_t loop_loc = find_loop_location (loop);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2646 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2647 if (vect_epilogues)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2648 /* Make sure to set the epilogue's epilogue scalar loop, such that we can
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2649 use the original scalar loop as remaining epilogue if necessary. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2650 LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2651 = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2652
111
kono
parents: 67
diff changeset
2653 if (prolog_peeling)
kono
parents: 67
diff changeset
2654 {
kono
parents: 67
diff changeset
2655 e = loop_preheader_edge (loop);
kono
parents: 67
diff changeset
2656 if (!slpeel_can_duplicate_loop_p (loop, e))
kono
parents: 67
diff changeset
2657 {
kono
parents: 67
diff changeset
2658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2659 "loop can't be duplicated to preheader edge.\n");
kono
parents: 67
diff changeset
2660 gcc_unreachable ();
kono
parents: 67
diff changeset
2661 }
kono
parents: 67
diff changeset
2662 /* Peel prolog and put it on preheader edge of loop. */
kono
parents: 67
diff changeset
2663 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
kono
parents: 67
diff changeset
2664 if (!prolog)
kono
parents: 67
diff changeset
2665 {
kono
parents: 67
diff changeset
2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2667 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
kono
parents: 67
diff changeset
2668 gcc_unreachable ();
kono
parents: 67
diff changeset
2669 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2670 prolog->force_vectorize = false;
111
kono
parents: 67
diff changeset
2671 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
kono
parents: 67
diff changeset
2672 first_loop = prolog;
kono
parents: 67
diff changeset
2673 reset_original_copy_tables ();
kono
parents: 67
diff changeset
2674
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2675 /* Update the number of iterations for prolog loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2676 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2677 vect_set_loop_condition (prolog, NULL, niters_prolog,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2678 step_prolog, NULL_TREE, false);
111
kono
parents: 67
diff changeset
2679
kono
parents: 67
diff changeset
2680 /* Skip the prolog loop. */
kono
parents: 67
diff changeset
2681 if (skip_prolog)
kono
parents: 67
diff changeset
2682 {
kono
parents: 67
diff changeset
2683 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2684 niters_prolog, build_int_cst (type, 0));
kono
parents: 67
diff changeset
2685 guard_bb = loop_preheader_edge (prolog)->src;
kono
parents: 67
diff changeset
2686 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
kono
parents: 67
diff changeset
2687 guard_to = split_edge (loop_preheader_edge (loop));
kono
parents: 67
diff changeset
2688 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
kono
parents: 67
diff changeset
2689 guard_to, guard_bb,
kono
parents: 67
diff changeset
2690 prob_prolog.invert (),
kono
parents: 67
diff changeset
2691 irred_flag);
kono
parents: 67
diff changeset
2692 e = EDGE_PRED (guard_to, 0);
kono
parents: 67
diff changeset
2693 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
kono
parents: 67
diff changeset
2694 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2695
111
kono
parents: 67
diff changeset
2696 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
kono
parents: 67
diff changeset
2697 scale_loop_profile (prolog, prob_prolog, bound_prolog);
kono
parents: 67
diff changeset
2698 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2699
111
kono
parents: 67
diff changeset
2700 /* Update init address of DRs. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2701 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
111
kono
parents: 67
diff changeset
2702 /* Update niters for vector loop. */
kono
parents: 67
diff changeset
2703 LOOP_VINFO_NITERS (loop_vinfo)
kono
parents: 67
diff changeset
2704 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
kono
parents: 67
diff changeset
2705 LOOP_VINFO_NITERSM1 (loop_vinfo)
kono
parents: 67
diff changeset
2706 = fold_build2 (MINUS_EXPR, type,
kono
parents: 67
diff changeset
2707 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
kono
parents: 67
diff changeset
2708 bool new_var_p = false;
kono
parents: 67
diff changeset
2709 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
kono
parents: 67
diff changeset
2710 /* It's guaranteed that vector loop bound before vectorization is at
kono
parents: 67
diff changeset
2711 least VF, so set range information for newly generated var. */
kono
parents: 67
diff changeset
2712 if (new_var_p)
kono
parents: 67
diff changeset
2713 set_range_info (niters, VR_RANGE,
kono
parents: 67
diff changeset
2714 wi::to_wide (build_int_cst (type, vf)),
kono
parents: 67
diff changeset
2715 wi::to_wide (TYPE_MAX_VALUE (type)));
kono
parents: 67
diff changeset
2716
kono
parents: 67
diff changeset
2717 /* Prolog iterates at most bound_prolog times, latch iterates at
kono
parents: 67
diff changeset
2718 most bound_prolog - 1 times. */
kono
parents: 67
diff changeset
2719 record_niter_bound (prolog, bound_prolog - 1, false, true);
kono
parents: 67
diff changeset
2720 delete_update_ssa ();
kono
parents: 67
diff changeset
2721 adjust_vec_debug_stmts ();
kono
parents: 67
diff changeset
2722 scev_reset ();
kono
parents: 67
diff changeset
2723 }
kono
parents: 67
diff changeset
2724
kono
parents: 67
diff changeset
2725 if (epilog_peeling)
kono
parents: 67
diff changeset
2726 {
kono
parents: 67
diff changeset
2727 e = single_exit (loop);
kono
parents: 67
diff changeset
2728 if (!slpeel_can_duplicate_loop_p (loop, e))
kono
parents: 67
diff changeset
2729 {
kono
parents: 67
diff changeset
2730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2731 "loop can't be duplicated to exit edge.\n");
kono
parents: 67
diff changeset
2732 gcc_unreachable ();
kono
parents: 67
diff changeset
2733 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2734 /* Peel epilog and put it on exit edge of loop. If we are vectorizing
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2735 said epilog then we should use a copy of the main loop as a starting
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2736 point. This loop may have already had some preliminary transformations
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2737 to allow for more optimal vectorization, for example if-conversion.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2738 If we are not vectorizing the epilog then we should use the scalar loop
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2739 as the transformations mentioned above make less or no sense when not
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2740 vectorizing. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2741 epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2742 if (vop_to_rename)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2743 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2744 /* Vectorizing the main loop can sometimes introduce a vdef to
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2745 a loop that previously didn't have one; see the comment above
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2746 the definition of VOP_TO_RENAME for details. The definition
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2747 D that holds on E will then be different from the definition
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2748 VOP_TO_RENAME that holds during SCALAR_LOOP, so we need to
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2749 rename VOP_TO_RENAME to D when copying the loop.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2750
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2751 The virtual operand is in LCSSA form for the main loop,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2752 and no stmt between the main loop and E needs a vdef,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2753 so we know that D is provided by a phi rather than by a
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2754 vdef on a normal gimple stmt. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2755 basic_block vdef_bb = e->src;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2756 gphi *vphi;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2757 while (!(vphi = get_virtual_phi (vdef_bb)))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2758 vdef_bb = get_immediate_dominator (CDI_DOMINATORS, vdef_bb);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2759 gcc_assert (vop_to_rename != gimple_phi_result (vphi));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2760 set_current_def (vop_to_rename, gimple_phi_result (vphi));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2761 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2762 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
111
kono
parents: 67
diff changeset
2763 if (!epilog)
kono
parents: 67
diff changeset
2764 {
kono
parents: 67
diff changeset
2765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2766 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
kono
parents: 67
diff changeset
2767 gcc_unreachable ();
kono
parents: 67
diff changeset
2768 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2769 epilog->force_vectorize = false;
111
kono
parents: 67
diff changeset
2770 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2771
111
kono
parents: 67
diff changeset
2772 /* Scalar version loop may be preferred. In this case, add guard
kono
parents: 67
diff changeset
2773 and skip to epilog. Note this only happens when the number of
kono
parents: 67
diff changeset
2774 iterations of loop is unknown at compile time, otherwise this
kono
parents: 67
diff changeset
2775 won't be vectorized. */
kono
parents: 67
diff changeset
2776 if (skip_vector)
kono
parents: 67
diff changeset
2777 {
kono
parents: 67
diff changeset
2778 /* Additional epilogue iteration is peeled if gap exists. */
kono
parents: 67
diff changeset
2779 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2780 bound_prolog, bound_epilog,
111
kono
parents: 67
diff changeset
2781 th, &bound_scalar,
kono
parents: 67
diff changeset
2782 check_profitability);
kono
parents: 67
diff changeset
2783 /* Build guard against NITERSM1 since NITERS may overflow. */
kono
parents: 67
diff changeset
2784 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
kono
parents: 67
diff changeset
2785 guard_bb = anchor;
kono
parents: 67
diff changeset
2786 guard_to = split_edge (loop_preheader_edge (epilog));
kono
parents: 67
diff changeset
2787 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
kono
parents: 67
diff changeset
2788 guard_to, guard_bb,
kono
parents: 67
diff changeset
2789 prob_vector.invert (),
kono
parents: 67
diff changeset
2790 irred_flag);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2791 skip_e = guard_e;
111
kono
parents: 67
diff changeset
2792 e = EDGE_PRED (guard_to, 0);
kono
parents: 67
diff changeset
2793 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
kono
parents: 67
diff changeset
2794 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
kono
parents: 67
diff changeset
2795
kono
parents: 67
diff changeset
2796 /* Simply propagate profile info from guard_bb to guard_to which is
kono
parents: 67
diff changeset
2797 a merge point of control flow. */
kono
parents: 67
diff changeset
2798 guard_to->count = guard_bb->count;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2799
111
kono
parents: 67
diff changeset
2800 /* Scale probability of epilog loop back.
kono
parents: 67
diff changeset
2801 FIXME: We should avoid scaling down and back up. Profile may
kono
parents: 67
diff changeset
2802 get lost if we scale down to 0. */
kono
parents: 67
diff changeset
2803 basic_block *bbs = get_loop_body (epilog);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2804 for (unsigned int i = 0; i < epilog->num_nodes; i++)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2805 bbs[i]->count = bbs[i]->count.apply_scale
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2806 (bbs[i]->count,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2807 bbs[i]->count.apply_probability
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2808 (prob_vector));
111
kono
parents: 67
diff changeset
2809 free (bbs);
kono
parents: 67
diff changeset
2810 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2811
111
kono
parents: 67
diff changeset
2812 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
kono
parents: 67
diff changeset
2813 /* If loop is peeled for non-zero constant times, now niters refers to
kono
parents: 67
diff changeset
2814 orig_niters - prolog_peeling, it won't overflow even the orig_niters
kono
parents: 67
diff changeset
2815 overflows. */
kono
parents: 67
diff changeset
2816 niters_no_overflow |= (prolog_peeling > 0);
kono
parents: 67
diff changeset
2817 vect_gen_vector_loop_niters (loop_vinfo, niters,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2818 niters_vector, step_vector,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2819 niters_no_overflow);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2820 if (!integer_onep (*step_vector))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2821 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2822 /* On exit from the loop we will have an easy way of calcalating
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2823 NITERS_VECTOR / STEP * STEP. Install a dummy definition
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2824 until then. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2825 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2826 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2827 *niters_vector_mult_vf_var = niters_vector_mult_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2828 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2829 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2830 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2831 &niters_vector_mult_vf);
111
kono
parents: 67
diff changeset
2832 /* Update IVs of original loop as if they were advanced by
kono
parents: 67
diff changeset
2833 niters_vector_mult_vf steps. */
kono
parents: 67
diff changeset
2834 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2835 update_e = skip_vector ? e : loop_preheader_edge (epilog);
111
kono
parents: 67
diff changeset
2836 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
kono
parents: 67
diff changeset
2837 update_e);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2838
111
kono
parents: 67
diff changeset
2839 if (skip_epilog)
kono
parents: 67
diff changeset
2840 {
kono
parents: 67
diff changeset
2841 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2842 niters, niters_vector_mult_vf);
kono
parents: 67
diff changeset
2843 guard_bb = single_exit (loop)->dest;
kono
parents: 67
diff changeset
2844 guard_to = split_edge (single_exit (epilog));
kono
parents: 67
diff changeset
2845 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
kono
parents: 67
diff changeset
2846 skip_vector ? anchor : guard_bb,
kono
parents: 67
diff changeset
2847 prob_epilog.invert (),
kono
parents: 67
diff changeset
2848 irred_flag);
kono
parents: 67
diff changeset
2849 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
kono
parents: 67
diff changeset
2850 single_exit (epilog));
kono
parents: 67
diff changeset
2851 /* Only need to handle basic block before epilog loop if it's not
kono
parents: 67
diff changeset
2852 the guard_bb, which is the case when skip_vector is true. */
kono
parents: 67
diff changeset
2853 if (guard_bb != bb_before_epilog)
kono
parents: 67
diff changeset
2854 {
kono
parents: 67
diff changeset
2855 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2856
111
kono
parents: 67
diff changeset
2857 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
kono
parents: 67
diff changeset
2858 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2859 scale_loop_profile (epilog, prob_epilog, 0);
111
kono
parents: 67
diff changeset
2860 }
kono
parents: 67
diff changeset
2861 else
kono
parents: 67
diff changeset
2862 slpeel_update_phi_nodes_for_lcssa (epilog);
kono
parents: 67
diff changeset
2863
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2864 unsigned HOST_WIDE_INT bound;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2865 if (bound_scalar.is_constant (&bound))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2866 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2867 gcc_assert (bound != 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2868 /* -1 to convert loop iterations to latch iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2869 record_niter_bound (epilog, bound - 1, false, true);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2870 }
111
kono
parents: 67
diff changeset
2871
kono
parents: 67
diff changeset
2872 delete_update_ssa ();
kono
parents: 67
diff changeset
2873 adjust_vec_debug_stmts ();
kono
parents: 67
diff changeset
2874 scev_reset ();
kono
parents: 67
diff changeset
2875 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2876
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2877 if (vect_epilogues)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2878 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2879 epilog->aux = epilogue_vinfo;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2880 LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2881
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2882 loop_constraint_clear (epilog, LOOP_C_INFINITE);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2883
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2884 /* We now must calculate the number of NITERS performed by the previous
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2885 loop and EPILOGUE_NITERS to be performed by the epilogue. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2886 tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2887 niters_prolog, niters_vector_mult_vf);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2888
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2889 /* If skip_vector we may skip the previous loop, we insert a phi-node to
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2890 determine whether we are coming from the previous vectorized loop
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2891 using the update_e edge or the skip_vector basic block using the
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2892 skip_e edge. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2893 if (skip_vector)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2894 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2895 gcc_assert (update_e != NULL && skip_e != NULL);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2896 gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2897 update_e->dest);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2898 tree new_ssa = make_ssa_name (TREE_TYPE (niters));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2899 gimple *stmt = gimple_build_assign (new_ssa, niters);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2900 gimple_stmt_iterator gsi;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2901 if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2902 && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2903 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2904 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2905 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2906 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2907 else
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2908 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2909 gsi = gsi_last_bb (update_e->src);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2910 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2911 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2912
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2913 niters = new_ssa;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2914 add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2915 add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2916 UNKNOWN_LOCATION);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2917 niters = PHI_RESULT (new_phi);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2918 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2919
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2920 /* Subtract the number of iterations performed by the vectorized loop
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2921 from the number of total iterations. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2922 tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2923 before_loop_niters,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2924 niters);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2925
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2926 LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2927 LOOP_VINFO_NITERSM1 (epilogue_vinfo)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2928 = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2929 epilogue_niters,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2930 build_one_cst (TREE_TYPE (epilogue_niters)));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2931
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2932 /* Set ADVANCE to the number of iterations performed by the previous
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2933 loop and its prologue. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2934 *advance = niters;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2935
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2936 /* Redo the peeling for niter analysis as the NITERs and alignment
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2937 may have been updated to take the main loop into account. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2938 determine_peel_for_niter (epilogue_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2939 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2940
111
kono
parents: 67
diff changeset
2941 adjust_vec.release ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2942 free_original_copy_tables ();
111
kono
parents: 67
diff changeset
2943
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2944 return vect_epilogues ? epilog : NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2945 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2946
111
kono
parents: 67
diff changeset
2947 /* Function vect_create_cond_for_niters_checks.
kono
parents: 67
diff changeset
2948
kono
parents: 67
diff changeset
2949 Create a conditional expression that represents the run-time checks for
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2950 loop's niter. The loop is guaranteed to terminate if the run-time
111
kono
parents: 67
diff changeset
2951 checks hold.
kono
parents: 67
diff changeset
2952
kono
parents: 67
diff changeset
2953 Input:
kono
parents: 67
diff changeset
2954 COND_EXPR - input conditional expression. New conditions will be chained
kono
parents: 67
diff changeset
2955 with logical AND operation. If it is NULL, then the function
kono
parents: 67
diff changeset
2956 is used to return the number of alias checks.
kono
parents: 67
diff changeset
2957 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
kono
parents: 67
diff changeset
2958 to be checked.
kono
parents: 67
diff changeset
2959
kono
parents: 67
diff changeset
2960 Output:
kono
parents: 67
diff changeset
2961 COND_EXPR - conditional expression.
kono
parents: 67
diff changeset
2962
kono
parents: 67
diff changeset
2963 The returned COND_EXPR is the conditional expression to be used in the
kono
parents: 67
diff changeset
2964 if statement that controls which version of the loop gets executed at
kono
parents: 67
diff changeset
2965 runtime. */
kono
parents: 67
diff changeset
2966
kono
parents: 67
diff changeset
2967 static void
kono
parents: 67
diff changeset
2968 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
kono
parents: 67
diff changeset
2969 {
kono
parents: 67
diff changeset
2970 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
kono
parents: 67
diff changeset
2971
kono
parents: 67
diff changeset
2972 if (*cond_expr)
kono
parents: 67
diff changeset
2973 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2974 *cond_expr, part_cond_expr);
kono
parents: 67
diff changeset
2975 else
kono
parents: 67
diff changeset
2976 *cond_expr = part_cond_expr;
kono
parents: 67
diff changeset
2977 }
kono
parents: 67
diff changeset
2978
kono
parents: 67
diff changeset
2979 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
kono
parents: 67
diff changeset
2980 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
kono
parents: 67
diff changeset
2981
kono
parents: 67
diff changeset
2982 static void
kono
parents: 67
diff changeset
2983 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
kono
parents: 67
diff changeset
2984 {
kono
parents: 67
diff changeset
2985 if (*cond_expr)
kono
parents: 67
diff changeset
2986 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2987 *cond_expr, part_cond_expr);
kono
parents: 67
diff changeset
2988 else
kono
parents: 67
diff changeset
2989 *cond_expr = part_cond_expr;
kono
parents: 67
diff changeset
2990 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2991
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2992 /* Function vect_create_cond_for_align_checks.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2993
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2994 Create a conditional expression that represents the alignment checks for
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2995 all of data references (array element references) whose alignment must be
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2996 checked at runtime.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2997
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2998 Input:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2999 COND_EXPR - input conditional expression. New conditions will be chained
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3000 with logical AND operation.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3001 LOOP_VINFO - two fields of the loop information are used.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3002 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3003 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3004
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3005 Output:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3006 COND_EXPR_STMT_LIST - statements needed to construct the conditional
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3007 expression.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3008 The returned value is the conditional expression to be used in the if
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3009 statement that controls which version of the loop gets executed at runtime.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3010
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3011 The algorithm makes two assumptions:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3012 1) The number of bytes "n" in a vector is a power of 2.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3013 2) An address "a" is aligned if a%n is zero and that this
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3014 test can be done as a&(n-1) == 0. For example, for 16
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3015 byte vectors the test is a&0xf == 0. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3016
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3017 static void
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3018 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3019 tree *cond_expr,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3020 gimple_seq *cond_expr_stmt_list)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3021 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3022 vec<stmt_vec_info> may_misalign_stmts
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3023 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3024 stmt_vec_info stmt_info;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3025 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3026 tree mask_cst;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3027 unsigned int i;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3028 tree int_ptrsize_type;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3029 char tmp_name[20];
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3030 tree or_tmp_name = NULL_TREE;
111
kono
parents: 67
diff changeset
3031 tree and_tmp_name;
kono
parents: 67
diff changeset
3032 gimple *and_stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3033 tree ptrsize_zero;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3034 tree part_cond_expr;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3035
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3036 /* Check that mask is one less than a power of 2, i.e., mask is
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3037 all zeros followed by all ones. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3038 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3039
111
kono
parents: 67
diff changeset
3040 int_ptrsize_type = signed_type_for (ptr_type_node);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3041
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3042 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3043 of the first vector of the i'th data reference. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3044
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3045 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3046 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3047 gimple_seq new_stmt_list = NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3048 tree addr_base;
111
kono
parents: 67
diff changeset
3049 tree addr_tmp_name;
kono
parents: 67
diff changeset
3050 tree new_or_tmp_name;
kono
parents: 67
diff changeset
3051 gimple *addr_stmt, *or_stmt;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3052 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
3053 bool negative = tree_int_cst_compare
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3054 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
3055 tree offset = negative
111
kono
parents: 67
diff changeset
3056 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3057
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3058 /* create: addr_tmp = (int)(address_of_first_vector) */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3059 addr_base =
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3060 vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
111
kono
parents: 67
diff changeset
3061 offset);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3062 if (new_stmt_list != NULL)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3063 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3064
111
kono
parents: 67
diff changeset
3065 sprintf (tmp_name, "addr2int%d", i);
kono
parents: 67
diff changeset
3066 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
kono
parents: 67
diff changeset
3067 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3068 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3069
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3070 /* The addresses are OR together. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3071
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3072 if (or_tmp_name != NULL_TREE)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3073 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3074 /* create: or_tmp = or_tmp | addr_tmp */
111
kono
parents: 67
diff changeset
3075 sprintf (tmp_name, "orptrs%d", i);
kono
parents: 67
diff changeset
3076 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
kono
parents: 67
diff changeset
3077 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
kono
parents: 67
diff changeset
3078 or_tmp_name, addr_tmp_name);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3079 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3080 or_tmp_name = new_or_tmp_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3081 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3082 else
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3083 or_tmp_name = addr_tmp_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3084
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3085 } /* end for i */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3086
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3087 mask_cst = build_int_cst (int_ptrsize_type, mask);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3088
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3089 /* create: and_tmp = or_tmp & mask */
111
kono
parents: 67
diff changeset
3090 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3091
111
kono
parents: 67
diff changeset
3092 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
kono
parents: 67
diff changeset
3093 or_tmp_name, mask_cst);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3094 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3095
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3096 /* Make and_tmp the left operand of the conditional test against zero.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3097 if and_tmp has a nonzero bit then some address is unaligned. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3098 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3099 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3100 and_tmp_name, ptrsize_zero);
111
kono
parents: 67
diff changeset
3101 chain_cond_expr (cond_expr, part_cond_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3102 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3103
111
kono
parents: 67
diff changeset
3104 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
kono
parents: 67
diff changeset
3105 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
kono
parents: 67
diff changeset
3106 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
kono
parents: 67
diff changeset
3107 and this new condition are true. Treat a null *COND_EXPR as "true". */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3108
111
kono
parents: 67
diff changeset
3109 static void
kono
parents: 67
diff changeset
3110 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3111 {
111
kono
parents: 67
diff changeset
3112 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
kono
parents: 67
diff changeset
3113 unsigned int i;
kono
parents: 67
diff changeset
3114 vec_object_pair *pair;
kono
parents: 67
diff changeset
3115 FOR_EACH_VEC_ELT (pairs, i, pair)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3116 {
111
kono
parents: 67
diff changeset
3117 tree addr1 = build_fold_addr_expr (pair->first);
kono
parents: 67
diff changeset
3118 tree addr2 = build_fold_addr_expr (pair->second);
kono
parents: 67
diff changeset
3119 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
3120 addr1, addr2);
kono
parents: 67
diff changeset
3121 chain_cond_expr (cond_expr, part_cond_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3122 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3123 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3124
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3125 /* Create an expression that is true when all lower-bound conditions for
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3126 the vectorized loop are met. Chain this condition with *COND_EXPR. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3127
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3128 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3129 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3130 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3131 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3132 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3133 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3134 tree expr = lower_bounds[i].expr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3135 tree type = unsigned_type_for (TREE_TYPE (expr));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3136 expr = fold_convert (type, expr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3137 poly_uint64 bound = lower_bounds[i].min_value;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3138 if (!lower_bounds[i].unsigned_p)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3139 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3140 expr = fold_build2 (PLUS_EXPR, type, expr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3141 build_int_cstu (type, bound - 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3142 bound += bound - 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3143 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3144 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3145 build_int_cstu (type, bound));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3146 chain_cond_expr (cond_expr, part_cond_expr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3147 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3148 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3149
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3150 /* Function vect_create_cond_for_alias_checks.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3151
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3152 Create a conditional expression that represents the run-time checks for
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3153 overlapping of address ranges represented by a list of data references
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3154 relations passed as input.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3155
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3156 Input:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3157 COND_EXPR - input conditional expression. New conditions will be chained
111
kono
parents: 67
diff changeset
3158 with logical AND operation. If it is NULL, then the function
kono
parents: 67
diff changeset
3159 is used to return the number of alias checks.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3160 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3161 to be checked.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3162
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3163 Output:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3164 COND_EXPR - conditional expression.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3165
111
kono
parents: 67
diff changeset
3166 The returned COND_EXPR is the conditional expression to be used in the if
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3167 statement that controls which version of the loop gets executed at runtime.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3168 */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3169
111
kono
parents: 67
diff changeset
3170 void
kono
parents: 67
diff changeset
3171 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3172 {
111
kono
parents: 67
diff changeset
3173 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
kono
parents: 67
diff changeset
3174 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3175
111
kono
parents: 67
diff changeset
3176 if (comp_alias_ddrs.is_empty ())
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3177 return;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3178
111
kono
parents: 67
diff changeset
3179 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
kono
parents: 67
diff changeset
3180 &comp_alias_ddrs, cond_expr);
kono
parents: 67
diff changeset
3181 if (dump_enabled_p ())
kono
parents: 67
diff changeset
3182 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
3183 "created %u versioning for alias checks.\n",
kono
parents: 67
diff changeset
3184 comp_alias_ddrs.length ());
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3185 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3186
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3187
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3188 /* Function vect_loop_versioning.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3189
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3190 If the loop has data references that may or may not be aligned or/and
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3191 has data reference relations whose independence was not proven then
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3192 two versions of the loop need to be generated, one which is vectorized
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3193 and one which isn't. A test is then generated to control which of the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3194 loops is executed. The test checks for the alignment of all of the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3195 data references that may or may not be aligned. An additional
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3196 sequence of runtime tests is generated for each pairs of DDRs whose
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3197 independence was not proven. The vectorized version of loop is
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3198 executed only if both alias and alignment tests are passed.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3199
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3200 The test generated to check which version of loop is executed
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3201 is modified to also check for profitability as indicated by the
111
kono
parents: 67
diff changeset
3202 cost model threshold TH.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3203
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3204 The versioning precondition(s) are placed in *COND_EXPR and
111
kono
parents: 67
diff changeset
3205 *COND_EXPR_STMT_LIST. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3206
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3207 class loop *
111
kono
parents: 67
diff changeset
3208 vect_loop_versioning (loop_vec_info loop_vinfo,
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3209 gimple *loop_vectorized_call)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3210 {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3211 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3212 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3213 basic_block condition_bb;
111
kono
parents: 67
diff changeset
3214 gphi_iterator gsi;
kono
parents: 67
diff changeset
3215 gimple_stmt_iterator cond_exp_gsi;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3216 basic_block merge_bb;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3217 basic_block new_exit_bb;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3218 edge new_exit_e, e;
111
kono
parents: 67
diff changeset
3219 gphi *orig_phi, *new_phi;
kono
parents: 67
diff changeset
3220 tree cond_expr = NULL_TREE;
kono
parents: 67
diff changeset
3221 gimple_seq cond_expr_stmt_list = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3222 tree arg;
111
kono
parents: 67
diff changeset
3223 profile_probability prob = profile_probability::likely ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3224 gimple_seq gimplify_stmt_list = NULL;
111
kono
parents: 67
diff changeset
3225 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
kono
parents: 67
diff changeset
3226 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
kono
parents: 67
diff changeset
3227 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
kono
parents: 67
diff changeset
3228 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3229 poly_uint64 versioning_threshold
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3230 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3231 tree version_simd_if_cond
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3232 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3233 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3234
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3235 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3236 && !ordered_p (th, versioning_threshold))
111
kono
parents: 67
diff changeset
3237 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
kono
parents: 67
diff changeset
3238 build_int_cst (TREE_TYPE (scalar_loop_iters),
kono
parents: 67
diff changeset
3239 th - 1));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3240 if (maybe_ne (versioning_threshold, 0U))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3241 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3242 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3243 build_int_cst (TREE_TYPE (scalar_loop_iters),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3244 versioning_threshold - 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3245 if (cond_expr)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3246 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3247 expr, cond_expr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3248 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3249 cond_expr = expr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3250 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3251
111
kono
parents: 67
diff changeset
3252 if (version_niter)
kono
parents: 67
diff changeset
3253 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3254
111
kono
parents: 67
diff changeset
3255 if (cond_expr)
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3256 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3257 &cond_expr_stmt_list,
111
kono
parents: 67
diff changeset
3258 is_gimple_condexpr, NULL_TREE);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3259
111
kono
parents: 67
diff changeset
3260 if (version_align)
kono
parents: 67
diff changeset
3261 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
kono
parents: 67
diff changeset
3262 &cond_expr_stmt_list);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3263
111
kono
parents: 67
diff changeset
3264 if (version_alias)
kono
parents: 67
diff changeset
3265 {
kono
parents: 67
diff changeset
3266 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3267 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
111
kono
parents: 67
diff changeset
3268 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
kono
parents: 67
diff changeset
3269 }
kono
parents: 67
diff changeset
3270
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3271 if (version_simd_if_cond)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3272 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3273 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3274 if (flag_checking)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3275 if (basic_block bb
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3276 = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3277 gcc_assert (bb != loop->header
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3278 && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3279 && (scalar_loop == NULL
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3280 || (bb != scalar_loop->header
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3281 && dominated_by_p (CDI_DOMINATORS,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3282 scalar_loop->header, bb))));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3283 tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3284 tree c = fold_build2 (NE_EXPR, boolean_type_node,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3285 version_simd_if_cond, zero);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3286 if (cond_expr)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3287 cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3288 c, cond_expr);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3289 else
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3290 cond_expr = c;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3291 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3292 dump_printf_loc (MSG_NOTE, vect_location,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3293 "created versioning for simd if condition check.\n");
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3294 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3295
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3296 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3297 &gimplify_stmt_list,
111
kono
parents: 67
diff changeset
3298 is_gimple_condexpr, NULL_TREE);
kono
parents: 67
diff changeset
3299 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3300
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3301 /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3302 invariant in. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3303 class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3304 for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3305 !gsi_end_p (gsi); gsi_next (&gsi))
111
kono
parents: 67
diff changeset
3306 {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3307 gimple *stmt = gsi_stmt (gsi);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3308 update_stmt (stmt);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3309 ssa_op_iter iter;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3310 use_operand_p use_p;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3311 basic_block def_bb;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3312 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3313 if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3314 && flow_bb_inside_loop_p (outermost, def_bb))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3315 outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3316 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3317
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3318 /* Search for the outermost loop we can version. Avoid versioning of
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3319 non-perfect nests but allow if-conversion versioned loops inside. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3320 class loop *loop_to_version = loop;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3321 if (flow_loop_nested_p (outermost, loop))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3322 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3323 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3324 dump_printf_loc (MSG_NOTE, vect_location,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3325 "trying to apply versioning to outer loop %d\n",
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3326 outermost->num);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3327 if (outermost->num == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3328 outermost = superloop_at_depth (loop, 1);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3329 /* And avoid applying versioning on non-perfect nests. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3330 while (loop_to_version != outermost
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3331 && single_exit (loop_outer (loop_to_version))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3332 && (!loop_outer (loop_to_version)->inner->next
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3333 || vect_loop_vectorized_call (loop_to_version))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3334 && (!loop_outer (loop_to_version)->inner->next
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3335 || !loop_outer (loop_to_version)->inner->next->next))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3336 loop_to_version = loop_outer (loop_to_version);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3337 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3338
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3339 /* Apply versioning. If there is already a scalar version created by
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3340 if-conversion re-use that. Note we cannot re-use the copy of
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3341 an if-converted outer-loop when vectorizing the inner loop only. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3342 gcond *cond;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3343 if ((!loop_to_version->inner || loop == loop_to_version)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3344 && loop_vectorized_call)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3345 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3346 gcc_assert (scalar_loop);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3347 condition_bb = gimple_bb (loop_vectorized_call);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3348 cond = as_a <gcond *> (last_stmt (condition_bb));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3349 gimple_cond_set_condition_from_tree (cond, cond_expr);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3350 update_stmt (cond);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3351
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3352 if (cond_expr_stmt_list)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3353 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3354 cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3355 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3356 GSI_SAME_STMT);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3357 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3358
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3359 /* if-conversion uses profile_probability::always () for both paths,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3360 reset the paths probabilities appropriately. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3361 edge te, fe;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3362 extract_true_false_edges_from_block (condition_bb, &te, &fe);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3363 te->probability = prob;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3364 fe->probability = prob.invert ();
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3365 /* We can scale loops counts immediately but have to postpone
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3366 scaling the scalar loop because we re-use it during peeling. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3367 scale_loop_frequencies (loop_to_version, te->probability);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3368 LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = fe->probability;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3369
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3370 nloop = scalar_loop;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3371 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3372 dump_printf_loc (MSG_NOTE, vect_location,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3373 "reusing %sloop version created by if conversion\n",
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3374 loop_to_version != loop ? "outer " : "");
111
kono
parents: 67
diff changeset
3375 }
kono
parents: 67
diff changeset
3376 else
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3377 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3378 if (loop_to_version != loop
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3379 && dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3380 dump_printf_loc (MSG_NOTE, vect_location,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3381 "applying loop versioning to outer loop %d\n",
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3382 loop_to_version->num);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3383
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3384 initialize_original_copy_tables ();
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3385 nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3386 prob, prob.invert (), prob, prob.invert (), true);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3387 gcc_assert (nloop);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3388 nloop = get_loop_copy (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3389
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3390 /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3391 reap those otherwise; they also refer to the original
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3392 loops. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3393 class loop *l = loop;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3394 while (gimple *call = vect_loop_vectorized_call (l))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3395 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3396 call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3397 fold_loop_internal_call (call, boolean_false_node);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3398 l = loop_outer (l);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3399 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3400 free_original_copy_tables ();
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3401
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3402 if (cond_expr_stmt_list)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3403 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3404 cond_exp_gsi = gsi_last_bb (condition_bb);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3405 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3406 GSI_SAME_STMT);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3407 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3408
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3409 /* Loop versioning violates an assumption we try to maintain during
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3410 vectorization - that the loop exit block has a single predecessor.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3411 After versioning, the exit block of both loop versions is the same
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3412 basic block (i.e. it has two predecessors). Just in order to simplify
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3413 following transformations in the vectorizer, we fix this situation
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3414 here by adding a new (empty) block on the exit-edge of the loop,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3415 with the proper loop-exit phis to maintain loop-closed-form.
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3416 If loop versioning wasn't done from loop, but scalar_loop instead,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3417 merge_bb will have already just a single successor. */
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3418
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3419 merge_bb = single_exit (loop_to_version)->dest;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3420 if (EDGE_COUNT (merge_bb->preds) >= 2)
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3421 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3422 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3423 new_exit_bb = split_edge (single_exit (loop_to_version));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3424 new_exit_e = single_exit (loop_to_version);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3425 e = EDGE_SUCC (new_exit_bb, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3426
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3427 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3428 gsi_next (&gsi))
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3429 {
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3430 tree new_res;
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3431 orig_phi = gsi.phi ();
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3432 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3433 new_phi = create_phi_node (new_res, new_exit_bb);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3434 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3435 add_phi_arg (new_phi, arg, new_exit_e,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3436 gimple_phi_arg_location_from_edge (orig_phi, e));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3437 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3438 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3439 }
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3440
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3441 update_ssa (TODO_update_ssa);
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3442 }
111
kono
parents: 67
diff changeset
3443
kono
parents: 67
diff changeset
3444 if (version_niter)
kono
parents: 67
diff changeset
3445 {
kono
parents: 67
diff changeset
3446 /* The versioned loop could be infinite, we need to clear existing
kono
parents: 67
diff changeset
3447 niter information which is copied from the original loop. */
kono
parents: 67
diff changeset
3448 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
kono
parents: 67
diff changeset
3449 vect_free_loop_info_assumptions (nloop);
kono
parents: 67
diff changeset
3450 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
kono
parents: 67
diff changeset
3451 loop_constraint_set (loop, LOOP_C_INFINITE);
kono
parents: 67
diff changeset
3452 }
kono
parents: 67
diff changeset
3453
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3454 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
111
kono
parents: 67
diff changeset
3455 && dump_enabled_p ())
kono
parents: 67
diff changeset
3456 {
kono
parents: 67
diff changeset
3457 if (version_alias)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3458 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3459 vect_location,
111
kono
parents: 67
diff changeset
3460 "loop versioned for vectorization because of "
kono
parents: 67
diff changeset
3461 "possible aliasing\n");
kono
parents: 67
diff changeset
3462 if (version_align)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3463 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3464 vect_location,
111
kono
parents: 67
diff changeset
3465 "loop versioned for vectorization to enhance "
kono
parents: 67
diff changeset
3466 "alignment\n");
kono
parents: 67
diff changeset
3467
kono
parents: 67
diff changeset
3468 }
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3469
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
3470 return nloop;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3471 }