Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-phiopt.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
0 | 1 /* Optimization of PHI nodes by converting them into straightline code. |
145 | 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. |
0 | 3 |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
111 | 23 #include "backend.h" |
24 #include "insn-codes.h" | |
25 #include "rtl.h" | |
0 | 26 #include "tree.h" |
111 | 27 #include "gimple.h" |
28 #include "cfghooks.h" | |
0 | 29 #include "tree-pass.h" |
111 | 30 #include "ssa.h" |
31 #include "optabs-tree.h" | |
32 #include "insn-config.h" | |
33 #include "gimple-pretty-print.h" | |
34 #include "fold-const.h" | |
35 #include "stor-layout.h" | |
36 #include "cfganal.h" | |
37 #include "gimplify.h" | |
38 #include "gimple-iterator.h" | |
39 #include "gimplify-me.h" | |
40 #include "tree-cfg.h" | |
41 #include "tree-dfa.h" | |
0 | 42 #include "domwalk.h" |
111 | 43 #include "cfgloop.h" |
44 #include "tree-data-ref.h" | |
45 #include "tree-scalar-evolution.h" | |
46 #include "tree-inline.h" | |
131 | 47 #include "case-cfn-macros.h" |
48 | |
49 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); | |
145 | 50 static bool two_value_replacement (basic_block, basic_block, edge, gphi *, |
51 tree, tree); | |
0 | 52 static bool conditional_replacement (basic_block, basic_block, |
111 | 53 edge, edge, gphi *, tree, tree); |
54 static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, | |
55 gimple *); | |
56 static int value_replacement (basic_block, basic_block, | |
57 edge, edge, gimple *, tree, tree); | |
0 | 58 static bool minmax_replacement (basic_block, basic_block, |
111 | 59 edge, edge, gimple *, tree, tree); |
0 | 60 static bool abs_replacement (basic_block, basic_block, |
111 | 61 edge, edge, gimple *, tree, tree); |
131 | 62 static bool cond_removal_in_popcount_pattern (basic_block, basic_block, |
63 edge, edge, gimple *, tree, tree); | |
0 | 64 static bool cond_store_replacement (basic_block, basic_block, edge, edge, |
111 | 65 hash_set<tree> *); |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
66 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); |
111 | 67 static hash_set<tree> * get_non_trapping (); |
68 static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); | |
69 static void hoist_adjacent_loads (basic_block, basic_block, | |
70 basic_block, basic_block); | |
71 static bool gate_hoist_loads (void); | |
0 | 72 |
73 /* This pass tries to transform conditional stores into unconditional | |
74 ones, enabling further simplifications with the simpler then and else | |
75 blocks. In particular it replaces this: | |
76 | |
77 bb0: | |
78 if (cond) goto bb2; else goto bb1; | |
79 bb1: | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
80 *p = RHS; |
0 | 81 bb2: |
82 | |
83 with | |
84 | |
85 bb0: | |
86 if (cond) goto bb1; else goto bb2; | |
87 bb1: | |
88 condtmp' = *p; | |
89 bb2: | |
90 condtmp = PHI <RHS, condtmp'> | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
91 *p = condtmp; |
0 | 92 |
93 This transformation can only be done under several constraints, | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
94 documented below. It also replaces: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
95 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
96 bb0: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
97 if (cond) goto bb2; else goto bb1; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
98 bb1: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
99 *p = RHS1; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
100 goto bb3; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
101 bb2: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
102 *p = RHS2; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
103 bb3: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
104 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
105 with |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
106 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
107 bb0: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
108 if (cond) goto bb3; else goto bb1; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
109 bb1: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
110 bb3: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
111 condtmp = PHI <RHS1, RHS2> |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
112 *p = condtmp; */ |
0 | 113 |
114 static unsigned int | |
115 tree_ssa_cs_elim (void) | |
116 { | |
111 | 117 unsigned todo; |
118 /* ??? We are not interested in loop related info, but the following | |
119 will create it, ICEing as we didn't init loops with pre-headers. | |
120 An interfacing issue of find_data_references_in_bb. */ | |
121 loop_optimizer_init (LOOPS_NORMAL); | |
122 scev_initialize (); | |
131 | 123 todo = tree_ssa_phiopt_worker (true, false, false); |
111 | 124 scev_finalize (); |
125 loop_optimizer_finalize (); | |
126 return todo; | |
0 | 127 } |
128 | |
111 | 129 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ |
130 | |
131 static gphi * | |
132 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1) | |
133 { | |
134 gimple_stmt_iterator i; | |
135 gphi *phi = NULL; | |
136 if (gimple_seq_singleton_p (seq)) | |
137 return as_a <gphi *> (gsi_stmt (gsi_start (seq))); | |
138 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) | |
139 { | |
140 gphi *p = as_a <gphi *> (gsi_stmt (i)); | |
141 /* If the PHI arguments are equal then we can skip this PHI. */ | |
142 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx), | |
143 gimple_phi_arg_def (p, e1->dest_idx))) | |
144 continue; | |
145 | |
146 /* If we already have a PHI that has the two edge arguments are | |
147 different, then return it is not a singleton for these PHIs. */ | |
148 if (phi) | |
149 return NULL; | |
150 | |
151 phi = p; | |
152 } | |
153 return phi; | |
154 } | |
0 | 155 |
156 /* The core routine of conditional store replacement and normal | |
157 phi optimizations. Both share much of the infrastructure in how | |
158 to match applicable basic block patterns. DO_STORE_ELIM is true | |
111 | 159 when we want to do conditional store replacement, false otherwise. |
160 DO_HOIST_LOADS is true when we want to hoist adjacent loads out | |
161 of diamond control flow patterns, false otherwise. */ | |
0 | 162 static unsigned int |
131 | 163 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) |
0 | 164 { |
165 basic_block bb; | |
166 basic_block *bb_order; | |
167 unsigned n, i; | |
168 bool cfgchanged = false; | |
111 | 169 hash_set<tree> *nontrap = 0; |
0 | 170 |
171 if (do_store_elim) | |
111 | 172 /* Calculate the set of non-trapping memory accesses. */ |
173 nontrap = get_non_trapping (); | |
0 | 174 |
175 /* Search every basic block for COND_EXPR we may be able to optimize. | |
176 | |
177 We walk the blocks in order that guarantees that a block with | |
178 a single predecessor is processed before the predecessor. | |
179 This ensures that we collapse inner ifs before visiting the | |
180 outer ones, and also that we do not try to visit a removed | |
181 block. */ | |
111 | 182 bb_order = single_pred_before_succ_order (); |
183 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; | |
0 | 184 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
185 for (i = 0; i < n; i++) |
0 | 186 { |
111 | 187 gimple *cond_stmt; |
188 gphi *phi; | |
0 | 189 basic_block bb1, bb2; |
190 edge e1, e2; | |
191 tree arg0, arg1; | |
192 | |
193 bb = bb_order[i]; | |
194 | |
195 cond_stmt = last_stmt (bb); | |
196 /* Check to see if the last statement is a GIMPLE_COND. */ | |
197 if (!cond_stmt | |
198 || gimple_code (cond_stmt) != GIMPLE_COND) | |
199 continue; | |
200 | |
201 e1 = EDGE_SUCC (bb, 0); | |
202 bb1 = e1->dest; | |
203 e2 = EDGE_SUCC (bb, 1); | |
204 bb2 = e2->dest; | |
205 | |
206 /* We cannot do the optimization on abnormal edges. */ | |
207 if ((e1->flags & EDGE_ABNORMAL) != 0 | |
208 || (e2->flags & EDGE_ABNORMAL) != 0) | |
209 continue; | |
210 | |
211 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ | |
212 if (EDGE_COUNT (bb1->succs) == 0 | |
213 || bb2 == NULL | |
214 || EDGE_COUNT (bb2->succs) == 0) | |
215 continue; | |
216 | |
217 /* Find the bb which is the fall through to the other. */ | |
218 if (EDGE_SUCC (bb1, 0)->dest == bb2) | |
219 ; | |
220 else if (EDGE_SUCC (bb2, 0)->dest == bb1) | |
221 { | |
111 | 222 std::swap (bb1, bb2); |
223 std::swap (e1, e2); | |
0 | 224 } |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
225 else if (do_store_elim |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
226 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
227 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
228 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
229 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
230 if (!single_succ_p (bb1) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
231 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
232 || !single_succ_p (bb2) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
233 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
234 || EDGE_COUNT (bb3->preds) != 2) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
235 continue; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
236 if (cond_if_else_store_replacement (bb1, bb2, bb3)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
237 cfgchanged = true; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
238 continue; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
239 } |
111 | 240 else if (do_hoist_loads |
241 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) | |
242 { | |
243 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; | |
244 | |
245 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) | |
246 && single_succ_p (bb1) | |
247 && single_succ_p (bb2) | |
248 && single_pred_p (bb1) | |
249 && single_pred_p (bb2) | |
250 && EDGE_COUNT (bb->succs) == 2 | |
251 && EDGE_COUNT (bb3->preds) == 2 | |
252 /* If one edge or the other is dominant, a conditional move | |
253 is likely to perform worse than the well-predicted branch. */ | |
254 && !predictable_edge_p (EDGE_SUCC (bb, 0)) | |
255 && !predictable_edge_p (EDGE_SUCC (bb, 1))) | |
256 hoist_adjacent_loads (bb, bb1, bb2, bb3); | |
257 continue; | |
258 } | |
0 | 259 else |
111 | 260 continue; |
0 | 261 |
262 e1 = EDGE_SUCC (bb1, 0); | |
263 | |
264 /* Make sure that bb1 is just a fall through. */ | |
265 if (!single_succ_p (bb1) | |
266 || (e1->flags & EDGE_FALLTHRU) == 0) | |
267 continue; | |
268 | |
269 /* Also make sure that bb1 only have one predecessor and that it | |
270 is bb. */ | |
271 if (!single_pred_p (bb1) | |
272 || single_pred (bb1) != bb) | |
273 continue; | |
274 | |
275 if (do_store_elim) | |
276 { | |
277 /* bb1 is the middle block, bb2 the join block, bb the split block, | |
278 e1 the fallthrough edge from bb1 to bb2. We can't do the | |
279 optimization if the join block has more than two predecessors. */ | |
280 if (EDGE_COUNT (bb2->preds) > 2) | |
281 continue; | |
282 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap)) | |
283 cfgchanged = true; | |
284 } | |
285 else | |
286 { | |
287 gimple_seq phis = phi_nodes (bb2); | |
111 | 288 gimple_stmt_iterator gsi; |
289 bool candorest = true; | |
290 | |
291 /* Value replacement can work with more than one PHI | |
292 so try that first. */ | |
131 | 293 if (!early_p) |
294 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) | |
295 { | |
296 phi = as_a <gphi *> (gsi_stmt (gsi)); | |
297 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); | |
298 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); | |
299 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) | |
300 { | |
301 candorest = false; | |
302 cfgchanged = true; | |
303 break; | |
304 } | |
305 } | |
111 | 306 |
307 if (!candorest) | |
0 | 308 continue; |
309 | |
111 | 310 phi = single_non_singleton_phi_for_edges (phis, e1, e2); |
311 if (!phi) | |
312 continue; | |
313 | |
0 | 314 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); |
315 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); | |
316 | |
317 /* Something is wrong if we cannot find the arguments in the PHI | |
318 node. */ | |
111 | 319 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); |
320 | |
321 gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, | |
322 arg0, arg1, | |
323 cond_stmt); | |
324 if (newphi != NULL) | |
325 { | |
326 phi = newphi; | |
327 /* factor_out_conditional_conversion may create a new PHI in | |
328 BB2 and eliminate an existing PHI in BB2. Recompute values | |
329 that may be affected by that change. */ | |
330 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); | |
331 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); | |
332 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); | |
333 } | |
0 | 334 |
335 /* Do the replacement of conditional if it can be done. */ | |
145 | 336 if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) |
337 cfgchanged = true; | |
338 else if (!early_p | |
339 && conditional_replacement (bb, bb1, e1, e2, phi, | |
340 arg0, arg1)) | |
0 | 341 cfgchanged = true; |
342 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) | |
343 cfgchanged = true; | |
131 | 344 else if (!early_p |
345 && cond_removal_in_popcount_pattern (bb, bb1, e1, e2, | |
346 phi, arg0, arg1)) | |
347 cfgchanged = true; | |
0 | 348 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) |
349 cfgchanged = true; | |
350 } | |
351 } | |
352 | |
353 free (bb_order); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
354 |
0 | 355 if (do_store_elim) |
111 | 356 delete nontrap; |
0 | 357 /* If the CFG has changed, we should cleanup the CFG. */ |
358 if (cfgchanged && do_store_elim) | |
359 { | |
360 /* In cond-store replacement we have added some loads on edges | |
361 and new VOPS (as we moved the store, and created a load). */ | |
362 gsi_commit_edge_inserts (); | |
363 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; | |
364 } | |
365 else if (cfgchanged) | |
366 return TODO_cleanup_cfg; | |
367 return 0; | |
368 } | |
369 | |
370 /* Replace PHI node element whose edge is E in block BB with variable NEW. | |
371 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK | |
372 is known to have two edges, one of which must reach BB). */ | |
373 | |
374 static void | |
375 replace_phi_edge_with_variable (basic_block cond_block, | |
111 | 376 edge e, gimple *phi, tree new_tree) |
0 | 377 { |
378 basic_block bb = gimple_bb (phi); | |
379 basic_block block_to_remove; | |
380 gimple_stmt_iterator gsi; | |
381 | |
382 /* Change the PHI argument to new. */ | |
383 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); | |
384 | |
385 /* Remove the empty basic block. */ | |
386 if (EDGE_SUCC (cond_block, 0)->dest == bb) | |
387 { | |
388 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; | |
389 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); | |
111 | 390 EDGE_SUCC (cond_block, 0)->probability = profile_probability::always (); |
0 | 391 |
392 block_to_remove = EDGE_SUCC (cond_block, 1)->dest; | |
393 } | |
394 else | |
395 { | |
396 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; | |
397 EDGE_SUCC (cond_block, 1)->flags | |
398 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); | |
111 | 399 EDGE_SUCC (cond_block, 1)->probability = profile_probability::always (); |
0 | 400 |
401 block_to_remove = EDGE_SUCC (cond_block, 0)->dest; | |
402 } | |
403 delete_basic_block (block_to_remove); | |
404 | |
405 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ | |
406 gsi = gsi_last_bb (cond_block); | |
407 gsi_remove (&gsi, true); | |
408 | |
409 if (dump_file && (dump_flags & TDF_DETAILS)) | |
410 fprintf (dump_file, | |
411 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n", | |
412 cond_block->index, | |
413 bb->index); | |
414 } | |
415 | |
111 | 416 /* PR66726: Factor conversion out of COND_EXPR. If the arguments of the PHI |
417 stmt are CONVERT_STMT, factor out the conversion and perform the conversion | |
418 to the result of PHI stmt. COND_STMT is the controlling predicate. | |
419 Return the newly-created PHI, if any. */ | |
420 | |
421 static gphi * | |
422 factor_out_conditional_conversion (edge e0, edge e1, gphi *phi, | |
423 tree arg0, tree arg1, gimple *cond_stmt) | |
424 { | |
425 gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt; | |
426 tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE; | |
427 tree temp, result; | |
428 gphi *newphi; | |
429 gimple_stmt_iterator gsi, gsi_for_def; | |
145 | 430 location_t locus = gimple_location (phi); |
111 | 431 enum tree_code convert_code; |
432 | |
433 /* Handle only PHI statements with two arguments. TODO: If all | |
434 other arguments to PHI are INTEGER_CST or if their defining | |
435 statement have the same unary operation, we can handle more | |
436 than two arguments too. */ | |
437 if (gimple_phi_num_args (phi) != 2) | |
438 return NULL; | |
439 | |
440 /* First canonicalize to simplify tests. */ | |
441 if (TREE_CODE (arg0) != SSA_NAME) | |
442 { | |
443 std::swap (arg0, arg1); | |
444 std::swap (e0, e1); | |
445 } | |
446 | |
447 if (TREE_CODE (arg0) != SSA_NAME | |
448 || (TREE_CODE (arg1) != SSA_NAME | |
449 && TREE_CODE (arg1) != INTEGER_CST)) | |
450 return NULL; | |
451 | |
452 /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is | |
453 a conversion. */ | |
454 arg0_def_stmt = SSA_NAME_DEF_STMT (arg0); | |
455 if (!gimple_assign_cast_p (arg0_def_stmt)) | |
456 return NULL; | |
457 | |
458 /* Use the RHS as new_arg0. */ | |
459 convert_code = gimple_assign_rhs_code (arg0_def_stmt); | |
460 new_arg0 = gimple_assign_rhs1 (arg0_def_stmt); | |
461 if (convert_code == VIEW_CONVERT_EXPR) | |
462 { | |
463 new_arg0 = TREE_OPERAND (new_arg0, 0); | |
464 if (!is_gimple_reg_type (TREE_TYPE (new_arg0))) | |
465 return NULL; | |
466 } | |
467 | |
468 if (TREE_CODE (arg1) == SSA_NAME) | |
469 { | |
470 /* Check if arg1 is an SSA_NAME and the stmt which defines arg1 | |
471 is a conversion. */ | |
472 arg1_def_stmt = SSA_NAME_DEF_STMT (arg1); | |
473 if (!is_gimple_assign (arg1_def_stmt) | |
474 || gimple_assign_rhs_code (arg1_def_stmt) != convert_code) | |
475 return NULL; | |
476 | |
477 /* Use the RHS as new_arg1. */ | |
478 new_arg1 = gimple_assign_rhs1 (arg1_def_stmt); | |
479 if (convert_code == VIEW_CONVERT_EXPR) | |
480 new_arg1 = TREE_OPERAND (new_arg1, 0); | |
481 } | |
482 else | |
483 { | |
484 /* If arg1 is an INTEGER_CST, fold it to new type. */ | |
485 if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0)) | |
486 && int_fits_type_p (arg1, TREE_TYPE (new_arg0))) | |
487 { | |
488 if (gimple_assign_cast_p (arg0_def_stmt)) | |
489 { | |
490 /* For the INTEGER_CST case, we are just moving the | |
491 conversion from one place to another, which can often | |
492 hurt as the conversion moves further away from the | |
493 statement that computes the value. So, perform this | |
494 only if new_arg0 is an operand of COND_STMT, or | |
495 if arg0_def_stmt is the only non-debug stmt in | |
496 its basic block, because then it is possible this | |
497 could enable further optimizations (minmax replacement | |
498 etc.). See PR71016. */ | |
499 if (new_arg0 != gimple_cond_lhs (cond_stmt) | |
500 && new_arg0 != gimple_cond_rhs (cond_stmt) | |
501 && gimple_bb (arg0_def_stmt) == e0->src) | |
502 { | |
503 gsi = gsi_for_stmt (arg0_def_stmt); | |
504 gsi_prev_nondebug (&gsi); | |
505 if (!gsi_end_p (gsi)) | |
145 | 506 { |
507 if (gassign *assign | |
508 = dyn_cast <gassign *> (gsi_stmt (gsi))) | |
509 { | |
510 tree lhs = gimple_assign_lhs (assign); | |
511 enum tree_code ass_code | |
512 = gimple_assign_rhs_code (assign); | |
513 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) | |
514 return NULL; | |
515 if (lhs != gimple_assign_rhs1 (arg0_def_stmt)) | |
516 return NULL; | |
517 gsi_prev_nondebug (&gsi); | |
518 if (!gsi_end_p (gsi)) | |
519 return NULL; | |
520 } | |
521 else | |
522 return NULL; | |
523 } | |
111 | 524 gsi = gsi_for_stmt (arg0_def_stmt); |
525 gsi_next_nondebug (&gsi); | |
526 if (!gsi_end_p (gsi)) | |
527 return NULL; | |
528 } | |
529 new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1); | |
530 } | |
531 else | |
532 return NULL; | |
533 } | |
534 else | |
535 return NULL; | |
536 } | |
537 | |
538 /* If arg0/arg1 have > 1 use, then this transformation actually increases | |
539 the number of expressions evaluated at runtime. */ | |
540 if (!has_single_use (arg0) | |
541 || (arg1_def_stmt && !has_single_use (arg1))) | |
542 return NULL; | |
543 | |
544 /* If types of new_arg0 and new_arg1 are different bailout. */ | |
545 if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) | |
546 return NULL; | |
547 | |
548 /* Create a new PHI stmt. */ | |
549 result = PHI_RESULT (phi); | |
550 temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); | |
551 newphi = create_phi_node (temp, gimple_bb (phi)); | |
552 | |
553 if (dump_file && (dump_flags & TDF_DETAILS)) | |
554 { | |
555 fprintf (dump_file, "PHI "); | |
556 print_generic_expr (dump_file, gimple_phi_result (phi)); | |
557 fprintf (dump_file, | |
558 " changed to factor conversion out from COND_EXPR.\n"); | |
559 fprintf (dump_file, "New stmt with CAST that defines "); | |
560 print_generic_expr (dump_file, result); | |
561 fprintf (dump_file, ".\n"); | |
562 } | |
563 | |
564 /* Remove the old cast(s) that has single use. */ | |
565 gsi_for_def = gsi_for_stmt (arg0_def_stmt); | |
566 gsi_remove (&gsi_for_def, true); | |
567 release_defs (arg0_def_stmt); | |
568 | |
569 if (arg1_def_stmt) | |
570 { | |
571 gsi_for_def = gsi_for_stmt (arg1_def_stmt); | |
572 gsi_remove (&gsi_for_def, true); | |
573 release_defs (arg1_def_stmt); | |
574 } | |
575 | |
576 add_phi_arg (newphi, new_arg0, e0, locus); | |
577 add_phi_arg (newphi, new_arg1, e1, locus); | |
578 | |
579 /* Create the conversion stmt and insert it. */ | |
580 if (convert_code == VIEW_CONVERT_EXPR) | |
131 | 581 { |
582 temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp); | |
583 new_stmt = gimple_build_assign (result, temp); | |
584 } | |
585 else | |
586 new_stmt = gimple_build_assign (result, convert_code, temp); | |
111 | 587 gsi = gsi_after_labels (gimple_bb (phi)); |
588 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
589 | |
590 /* Remove the original PHI stmt. */ | |
591 gsi = gsi_for_stmt (phi); | |
592 gsi_remove (&gsi, true); | |
593 return newphi; | |
594 } | |
595 | |
145 | 596 /* Optimize |
597 # x_5 in range [cst1, cst2] where cst2 = cst1 + 1 | |
598 if (x_5 op cstN) # where op is == or != and N is 1 or 2 | |
599 goto bb3; | |
600 else | |
601 goto bb4; | |
602 bb3: | |
603 bb4: | |
604 # r_6 = PHI<cst3(2), cst4(3)> # where cst3 == cst4 + 1 or cst4 == cst3 + 1 | |
605 | |
606 to r_6 = x_5 + (min (cst3, cst4) - cst1) or | |
607 r_6 = (min (cst3, cst4) + cst1) - x_5 depending on op, N and which | |
608 of cst3 and cst4 is smaller. */ | |
609 | |
610 static bool | |
611 two_value_replacement (basic_block cond_bb, basic_block middle_bb, | |
612 edge e1, gphi *phi, tree arg0, tree arg1) | |
613 { | |
614 /* Only look for adjacent integer constants. */ | |
615 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) | |
616 || !INTEGRAL_TYPE_P (TREE_TYPE (arg1)) | |
617 || TREE_CODE (arg0) != INTEGER_CST | |
618 || TREE_CODE (arg1) != INTEGER_CST | |
619 || (tree_int_cst_lt (arg0, arg1) | |
620 ? wi::to_widest (arg0) + 1 != wi::to_widest (arg1) | |
621 : wi::to_widest (arg1) + 1 != wi::to_widest (arg0))) | |
622 return false; | |
623 | |
624 if (!empty_block_p (middle_bb)) | |
625 return false; | |
626 | |
627 gimple *stmt = last_stmt (cond_bb); | |
628 tree lhs = gimple_cond_lhs (stmt); | |
629 tree rhs = gimple_cond_rhs (stmt); | |
630 | |
631 if (TREE_CODE (lhs) != SSA_NAME | |
632 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
633 || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE | |
634 || TREE_CODE (rhs) != INTEGER_CST) | |
635 return false; | |
636 | |
637 switch (gimple_cond_code (stmt)) | |
638 { | |
639 case EQ_EXPR: | |
640 case NE_EXPR: | |
641 break; | |
642 default: | |
643 return false; | |
644 } | |
645 | |
646 wide_int min, max; | |
647 if (get_range_info (lhs, &min, &max) != VR_RANGE | |
648 || min + 1 != max | |
649 || (wi::to_wide (rhs) != min | |
650 && wi::to_wide (rhs) != max)) | |
651 return false; | |
652 | |
653 /* We need to know which is the true edge and which is the false | |
654 edge so that we know when to invert the condition below. */ | |
655 edge true_edge, false_edge; | |
656 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
657 if ((gimple_cond_code (stmt) == EQ_EXPR) | |
658 ^ (wi::to_wide (rhs) == max) | |
659 ^ (e1 == false_edge)) | |
660 std::swap (arg0, arg1); | |
661 | |
662 tree type; | |
663 if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE (arg0))) | |
664 { | |
665 /* Avoid performing the arithmetics in bool type which has different | |
666 semantics, otherwise prefer unsigned types from the two with | |
667 the same precision. */ | |
668 if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE | |
669 || !TYPE_UNSIGNED (TREE_TYPE (arg0))) | |
670 type = TREE_TYPE (lhs); | |
671 else | |
672 type = TREE_TYPE (arg0); | |
673 } | |
674 else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (TREE_TYPE (arg0))) | |
675 type = TREE_TYPE (lhs); | |
676 else | |
677 type = TREE_TYPE (arg0); | |
678 | |
679 min = wide_int::from (min, TYPE_PRECISION (type), | |
680 TYPE_SIGN (TREE_TYPE (lhs))); | |
681 wide_int a = wide_int::from (wi::to_wide (arg0), TYPE_PRECISION (type), | |
682 TYPE_SIGN (TREE_TYPE (arg0))); | |
683 enum tree_code code; | |
684 wi::overflow_type ovf; | |
685 if (tree_int_cst_lt (arg0, arg1)) | |
686 { | |
687 code = PLUS_EXPR; | |
688 a -= min; | |
689 if (!TYPE_UNSIGNED (type)) | |
690 { | |
691 /* lhs is known to be in range [min, min+1] and we want to add a | |
692 to it. Check if that operation can overflow for those 2 values | |
693 and if yes, force unsigned type. */ | |
694 wi::add (min + (wi::neg_p (a) ? 0 : 1), a, SIGNED, &ovf); | |
695 if (ovf) | |
696 type = unsigned_type_for (type); | |
697 } | |
698 } | |
699 else | |
700 { | |
701 code = MINUS_EXPR; | |
702 a += min; | |
703 if (!TYPE_UNSIGNED (type)) | |
704 { | |
705 /* lhs is known to be in range [min, min+1] and we want to subtract | |
706 it from a. Check if that operation can overflow for those 2 | |
707 values and if yes, force unsigned type. */ | |
708 wi::sub (a, min + (wi::neg_p (min) ? 0 : 1), SIGNED, &ovf); | |
709 if (ovf) | |
710 type = unsigned_type_for (type); | |
711 } | |
712 } | |
713 | |
714 tree arg = wide_int_to_tree (type, a); | |
715 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
716 if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) | |
717 lhs = gimplify_build1 (&gsi, NOP_EXPR, type, lhs); | |
718 tree new_rhs; | |
719 if (code == PLUS_EXPR) | |
720 new_rhs = gimplify_build2 (&gsi, PLUS_EXPR, type, lhs, arg); | |
721 else | |
722 new_rhs = gimplify_build2 (&gsi, MINUS_EXPR, type, arg, lhs); | |
723 if (!useless_type_conversion_p (TREE_TYPE (arg0), type)) | |
724 new_rhs = gimplify_build1 (&gsi, NOP_EXPR, TREE_TYPE (arg0), new_rhs); | |
725 | |
726 replace_phi_edge_with_variable (cond_bb, e1, phi, new_rhs); | |
727 | |
728 /* Note that we optimized this PHI. */ | |
729 return true; | |
730 } | |
731 | |
0 | 732 /* The function conditional_replacement does the main work of doing the |
733 conditional replacement. Return true if the replacement is done. | |
734 Otherwise return false. | |
735 BB is the basic block where the replacement is going to be done on. ARG0 | |
736 is argument 0 from PHI. Likewise for ARG1. */ | |
737 | |
738 static bool | |
739 conditional_replacement (basic_block cond_bb, basic_block middle_bb, | |
111 | 740 edge e0, edge e1, gphi *phi, |
0 | 741 tree arg0, tree arg1) |
742 { | |
743 tree result; | |
111 | 744 gimple *stmt; |
745 gassign *new_stmt; | |
0 | 746 tree cond; |
747 gimple_stmt_iterator gsi; | |
748 edge true_edge, false_edge; | |
749 tree new_var, new_var2; | |
111 | 750 bool neg; |
0 | 751 |
752 /* FIXME: Gimplification of complex type is too hard for now. */ | |
111 | 753 /* We aren't prepared to handle vectors either (and it is a question |
754 if it would be worthwhile anyway). */ | |
755 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) | |
756 || POINTER_TYPE_P (TREE_TYPE (arg0))) | |
757 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) | |
758 || POINTER_TYPE_P (TREE_TYPE (arg1)))) | |
0 | 759 return false; |
760 | |
111 | 761 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then |
762 convert it to the conditional. */ | |
0 | 763 if ((integer_zerop (arg0) && integer_onep (arg1)) |
764 || (integer_zerop (arg1) && integer_onep (arg0))) | |
111 | 765 neg = false; |
766 else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) | |
767 || (integer_zerop (arg1) && integer_all_onesp (arg0))) | |
768 neg = true; | |
0 | 769 else |
770 return false; | |
771 | |
772 if (!empty_block_p (middle_bb)) | |
773 return false; | |
774 | |
775 /* At this point we know we have a GIMPLE_COND with two successors. | |
776 One successor is BB, the other successor is an empty block which | |
777 falls through into BB. | |
778 | |
779 There is a single PHI node at the join point (BB) and its arguments | |
111 | 780 are constants (0, 1) or (0, -1). |
0 | 781 |
782 So, given the condition COND, and the two PHI arguments, we can | |
783 rewrite this PHI into non-branching code: | |
784 | |
785 dest = (COND) or dest = COND' | |
786 | |
787 We use the condition as-is if the argument associated with the | |
788 true edge has the value one or the argument associated with the | |
789 false edge as the value zero. Note that those conditions are not | |
790 the same since only one of the outgoing edges from the GIMPLE_COND | |
791 will directly reach BB and thus be associated with an argument. */ | |
792 | |
793 stmt = last_stmt (cond_bb); | |
794 result = PHI_RESULT (phi); | |
795 | |
796 /* To handle special cases like floating point comparison, it is easier and | |
797 less error-prone to build a tree and gimplify it on the fly though it is | |
798 less efficient. */ | |
111 | 799 cond = fold_build2_loc (gimple_location (stmt), |
800 gimple_cond_code (stmt), boolean_type_node, | |
801 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); | |
0 | 802 |
803 /* We need to know which is the true edge and which is the false | |
804 edge so that we know when to invert the condition below. */ | |
805 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
806 if ((e0 == true_edge && integer_zerop (arg0)) | |
111 | 807 || (e0 == false_edge && !integer_zerop (arg0)) |
0 | 808 || (e1 == true_edge && integer_zerop (arg1)) |
111 | 809 || (e1 == false_edge && !integer_zerop (arg1))) |
810 cond = fold_build1_loc (gimple_location (stmt), | |
811 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); | |
812 | |
813 if (neg) | |
814 { | |
815 cond = fold_convert_loc (gimple_location (stmt), | |
816 TREE_TYPE (result), cond); | |
817 cond = fold_build1_loc (gimple_location (stmt), | |
818 NEGATE_EXPR, TREE_TYPE (cond), cond); | |
819 } | |
0 | 820 |
821 /* Insert our new statements at the end of conditional block before the | |
822 COND_STMT. */ | |
823 gsi = gsi_for_stmt (stmt); | |
824 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, | |
825 GSI_SAME_STMT); | |
826 | |
827 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) | |
828 { | |
145 | 829 location_t locus_0, locus_1; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
830 |
111 | 831 new_var2 = make_ssa_name (TREE_TYPE (result)); |
832 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var); | |
0 | 833 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
834 new_var = new_var2; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
835 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
836 /* Set the locus to the first argument, unless is doesn't have one. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
837 locus_0 = gimple_phi_arg_location (phi, 0); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
838 locus_1 = gimple_phi_arg_location (phi, 1); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
839 if (locus_0 == UNKNOWN_LOCATION) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
840 locus_0 = locus_1; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
841 gimple_set_location (new_stmt, locus_0); |
0 | 842 } |
843 | |
844 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); | |
845 | |
846 /* Note that we optimized this PHI. */ | |
847 return true; | |
848 } | |
849 | |
111 | 850 /* Update *ARG which is defined in STMT so that it contains the |
851 computed value if that seems profitable. Return true if the | |
852 statement is made dead by that rewriting. */ | |
853 | |
854 static bool | |
855 jump_function_from_stmt (tree *arg, gimple *stmt) | |
856 { | |
857 enum tree_code code = gimple_assign_rhs_code (stmt); | |
858 if (code == ADDR_EXPR) | |
859 { | |
860 /* For arg = &p->i transform it to p, if possible. */ | |
861 tree rhs1 = gimple_assign_rhs1 (stmt); | |
131 | 862 poly_int64 offset; |
111 | 863 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0), |
864 &offset); | |
865 if (tem | |
866 && TREE_CODE (tem) == MEM_REF | |
131 | 867 && known_eq (mem_ref_offset (tem) + offset, 0)) |
111 | 868 { |
869 *arg = TREE_OPERAND (tem, 0); | |
870 return true; | |
871 } | |
872 } | |
873 /* TODO: Much like IPA-CP jump-functions we want to handle constant | |
874 additions symbolically here, and we'd need to update the comparison | |
875 code that compares the arg + cst tuples in our caller. For now the | |
876 code above exactly handles the VEC_BASE pattern from vec.h. */ | |
877 return false; | |
878 } | |
879 | |
880 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional | |
881 of the form SSA_NAME NE 0. | |
882 | |
883 If RHS is fed by a simple EQ_EXPR comparison of two values, see if | |
884 the two input values of the EQ_EXPR match arg0 and arg1. | |
885 | |
886 If so update *code and return TRUE. Otherwise return FALSE. */ | |
887 | |
888 static bool | |
889 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1, | |
890 enum tree_code *code, const_tree rhs) | |
891 { | |
892 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining | |
893 statement. */ | |
894 if (TREE_CODE (rhs) == SSA_NAME) | |
895 { | |
896 gimple *def1 = SSA_NAME_DEF_STMT (rhs); | |
897 | |
898 /* Verify the defining statement has an EQ_EXPR on the RHS. */ | |
899 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR) | |
900 { | |
901 /* Finally verify the source operands of the EQ_EXPR are equal | |
902 to arg0 and arg1. */ | |
903 tree op0 = gimple_assign_rhs1 (def1); | |
904 tree op1 = gimple_assign_rhs2 (def1); | |
905 if ((operand_equal_for_phi_arg_p (arg0, op0) | |
906 && operand_equal_for_phi_arg_p (arg1, op1)) | |
907 || (operand_equal_for_phi_arg_p (arg0, op1) | |
908 && operand_equal_for_phi_arg_p (arg1, op0))) | |
909 { | |
910 /* We will perform the optimization. */ | |
911 *code = gimple_assign_rhs_code (def1); | |
912 return true; | |
913 } | |
914 } | |
915 } | |
916 return false; | |
917 } | |
918 | |
919 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND. | |
920 | |
921 Also return TRUE if arg0/arg1 are equal to the source arguments of a | |
922 an EQ comparison feeding a BIT_AND_EXPR which feeds COND. | |
923 | |
924 Return FALSE otherwise. */ | |
0 | 925 |
926 static bool | |
111 | 927 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1, |
928 enum tree_code *code, gimple *cond) | |
929 { | |
930 gimple *def; | |
931 tree lhs = gimple_cond_lhs (cond); | |
932 tree rhs = gimple_cond_rhs (cond); | |
933 | |
934 if ((operand_equal_for_phi_arg_p (arg0, lhs) | |
935 && operand_equal_for_phi_arg_p (arg1, rhs)) | |
936 || (operand_equal_for_phi_arg_p (arg1, lhs) | |
937 && operand_equal_for_phi_arg_p (arg0, rhs))) | |
938 return true; | |
939 | |
940 /* Now handle more complex case where we have an EQ comparison | |
941 which feeds a BIT_AND_EXPR which feeds COND. | |
942 | |
943 First verify that COND is of the form SSA_NAME NE 0. */ | |
944 if (*code != NE_EXPR || !integer_zerop (rhs) | |
945 || TREE_CODE (lhs) != SSA_NAME) | |
946 return false; | |
947 | |
948 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */ | |
949 def = SSA_NAME_DEF_STMT (lhs); | |
950 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR) | |
951 return false; | |
952 | |
953 /* Now verify arg0/arg1 correspond to the source arguments of an | |
954 EQ comparison feeding the BIT_AND_EXPR. */ | |
955 | |
956 tree tmp = gimple_assign_rhs1 (def); | |
957 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) | |
958 return true; | |
959 | |
960 tmp = gimple_assign_rhs2 (def); | |
961 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) | |
962 return true; | |
963 | |
964 return false; | |
965 } | |
966 | |
967 /* Returns true if ARG is a neutral element for operation CODE | |
968 on the RIGHT side. */ | |
969 | |
970 static bool | |
971 neutral_element_p (tree_code code, tree arg, bool right) | |
972 { | |
973 switch (code) | |
974 { | |
975 case PLUS_EXPR: | |
976 case BIT_IOR_EXPR: | |
977 case BIT_XOR_EXPR: | |
978 return integer_zerop (arg); | |
979 | |
980 case LROTATE_EXPR: | |
981 case RROTATE_EXPR: | |
982 case LSHIFT_EXPR: | |
983 case RSHIFT_EXPR: | |
984 case MINUS_EXPR: | |
985 case POINTER_PLUS_EXPR: | |
986 return right && integer_zerop (arg); | |
987 | |
988 case MULT_EXPR: | |
989 return integer_onep (arg); | |
990 | |
991 case TRUNC_DIV_EXPR: | |
992 case CEIL_DIV_EXPR: | |
993 case FLOOR_DIV_EXPR: | |
994 case ROUND_DIV_EXPR: | |
995 case EXACT_DIV_EXPR: | |
996 return right && integer_onep (arg); | |
997 | |
998 case BIT_AND_EXPR: | |
999 return integer_all_onesp (arg); | |
1000 | |
1001 default: | |
1002 return false; | |
1003 } | |
1004 } | |
1005 | |
1006 /* Returns true if ARG is an absorbing element for operation CODE. */ | |
1007 | |
1008 static bool | |
1009 absorbing_element_p (tree_code code, tree arg, bool right, tree rval) | |
1010 { | |
1011 switch (code) | |
1012 { | |
1013 case BIT_IOR_EXPR: | |
1014 return integer_all_onesp (arg); | |
1015 | |
1016 case MULT_EXPR: | |
1017 case BIT_AND_EXPR: | |
1018 return integer_zerop (arg); | |
1019 | |
1020 case LSHIFT_EXPR: | |
1021 case RSHIFT_EXPR: | |
1022 case LROTATE_EXPR: | |
1023 case RROTATE_EXPR: | |
1024 return !right && integer_zerop (arg); | |
1025 | |
1026 case TRUNC_DIV_EXPR: | |
1027 case CEIL_DIV_EXPR: | |
1028 case FLOOR_DIV_EXPR: | |
1029 case ROUND_DIV_EXPR: | |
1030 case EXACT_DIV_EXPR: | |
1031 case TRUNC_MOD_EXPR: | |
1032 case CEIL_MOD_EXPR: | |
1033 case FLOOR_MOD_EXPR: | |
1034 case ROUND_MOD_EXPR: | |
1035 return (!right | |
1036 && integer_zerop (arg) | |
1037 && tree_single_nonzero_warnv_p (rval, NULL)); | |
1038 | |
1039 default: | |
1040 return false; | |
1041 } | |
1042 } | |
1043 | |
1044 /* The function value_replacement does the main work of doing the value | |
1045 replacement. Return non-zero if the replacement is done. Otherwise return | |
1046 0. If we remove the middle basic block, return 2. | |
1047 BB is the basic block where the replacement is going to be done on. ARG0 | |
1048 is argument 0 from the PHI. Likewise for ARG1. */ | |
1049 | |
1050 static int | |
0 | 1051 value_replacement (basic_block cond_bb, basic_block middle_bb, |
111 | 1052 edge e0, edge e1, gimple *phi, |
0 | 1053 tree arg0, tree arg1) |
1054 { | |
111 | 1055 gimple_stmt_iterator gsi; |
1056 gimple *cond; | |
0 | 1057 edge true_edge, false_edge; |
1058 enum tree_code code; | |
111 | 1059 bool emtpy_or_with_defined_p = true; |
0 | 1060 |
1061 /* If the type says honor signed zeros we cannot do this | |
1062 optimization. */ | |
111 | 1063 if (HONOR_SIGNED_ZEROS (arg1)) |
1064 return 0; | |
1065 | |
1066 /* If there is a statement in MIDDLE_BB that defines one of the PHI | |
1067 arguments, then adjust arg0 or arg1. */ | |
1068 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); | |
1069 while (!gsi_end_p (gsi)) | |
1070 { | |
1071 gimple *stmt = gsi_stmt (gsi); | |
1072 tree lhs; | |
1073 gsi_next_nondebug (&gsi); | |
1074 if (!is_gimple_assign (stmt)) | |
1075 { | |
131 | 1076 if (gimple_code (stmt) != GIMPLE_PREDICT |
1077 && gimple_code (stmt) != GIMPLE_NOP) | |
1078 emtpy_or_with_defined_p = false; | |
111 | 1079 continue; |
1080 } | |
1081 /* Now try to adjust arg0 or arg1 according to the computation | |
1082 in the statement. */ | |
1083 lhs = gimple_assign_lhs (stmt); | |
1084 if (!(lhs == arg0 | |
1085 && jump_function_from_stmt (&arg0, stmt)) | |
1086 || (lhs == arg1 | |
1087 && jump_function_from_stmt (&arg1, stmt))) | |
1088 emtpy_or_with_defined_p = false; | |
1089 } | |
0 | 1090 |
1091 cond = last_stmt (cond_bb); | |
1092 code = gimple_cond_code (cond); | |
1093 | |
1094 /* This transformation is only valid for equality comparisons. */ | |
1095 if (code != NE_EXPR && code != EQ_EXPR) | |
111 | 1096 return 0; |
0 | 1097 |
1098 /* We need to know which is the true edge and which is the false | |
1099 edge so that we know if have abs or negative abs. */ | |
1100 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
1101 | |
1102 /* At this point we know we have a COND_EXPR with two successors. | |
1103 One successor is BB, the other successor is an empty block which | |
1104 falls through into BB. | |
1105 | |
1106 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR. | |
1107 | |
1108 There is a single PHI node at the join point (BB) with two arguments. | |
1109 | |
1110 We now need to verify that the two arguments in the PHI node match | |
1111 the two arguments to the equality comparison. */ | |
1112 | |
111 | 1113 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond)) |
0 | 1114 { |
1115 edge e; | |
1116 tree arg; | |
1117 | |
1118 /* For NE_EXPR, we want to build an assignment result = arg where | |
1119 arg is the PHI argument associated with the true edge. For | |
1120 EQ_EXPR we want the PHI argument associated with the false edge. */ | |
1121 e = (code == NE_EXPR ? true_edge : false_edge); | |
1122 | |
1123 /* Unfortunately, E may not reach BB (it may instead have gone to | |
1124 OTHER_BLOCK). If that is the case, then we want the single outgoing | |
1125 edge from OTHER_BLOCK which reaches BB and represents the desired | |
1126 path from COND_BLOCK. */ | |
1127 if (e->dest == middle_bb) | |
1128 e = single_succ_edge (e->dest); | |
1129 | |
1130 /* Now we know the incoming edge to BB that has the argument for the | |
1131 RHS of our new assignment statement. */ | |
1132 if (e0 == e) | |
1133 arg = arg0; | |
1134 else | |
1135 arg = arg1; | |
1136 | |
111 | 1137 /* If the middle basic block was empty or is defining the |
1138 PHI arguments and this is a single phi where the args are different | |
1139 for the edges e0 and e1 then we can remove the middle basic block. */ | |
1140 if (emtpy_or_with_defined_p | |
1141 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), | |
1142 e0, e1) == phi) | |
1143 { | |
1144 replace_phi_edge_with_variable (cond_bb, e1, phi, arg); | |
1145 /* Note that we optimized this PHI. */ | |
1146 return 2; | |
1147 } | |
1148 else | |
1149 { | |
1150 /* Replace the PHI arguments with arg. */ | |
1151 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); | |
1152 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); | |
1153 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1154 { | |
1155 fprintf (dump_file, "PHI "); | |
1156 print_generic_expr (dump_file, gimple_phi_result (phi)); | |
1157 fprintf (dump_file, " reduced for COND_EXPR in block %d to ", | |
1158 cond_bb->index); | |
1159 print_generic_expr (dump_file, arg); | |
1160 fprintf (dump_file, ".\n"); | |
1161 } | |
1162 return 1; | |
1163 } | |
1164 | |
0 | 1165 } |
111 | 1166 |
1167 /* Now optimize (x != 0) ? x + y : y to just x + y. */ | |
1168 gsi = gsi_last_nondebug_bb (middle_bb); | |
1169 if (gsi_end_p (gsi)) | |
1170 return 0; | |
1171 | |
1172 gimple *assign = gsi_stmt (gsi); | |
1173 if (!is_gimple_assign (assign) | |
1174 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS | |
1175 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) | |
1176 && !POINTER_TYPE_P (TREE_TYPE (arg0)))) | |
1177 return 0; | |
1178 | |
1179 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */ | |
1180 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) | |
1181 return 0; | |
1182 | |
1183 /* Allow up to 2 cheap preparation statements that prepare argument | |
1184 for assign, e.g.: | |
1185 if (y_4 != 0) | |
1186 goto <bb 3>; | |
1187 else | |
1188 goto <bb 4>; | |
1189 <bb 3>: | |
1190 _1 = (int) y_4; | |
1191 iftmp.0_6 = x_5(D) r<< _1; | |
1192 <bb 4>: | |
1193 # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> | |
1194 or: | |
1195 if (y_3(D) == 0) | |
1196 goto <bb 4>; | |
1197 else | |
1198 goto <bb 3>; | |
1199 <bb 3>: | |
1200 y_4 = y_3(D) & 31; | |
1201 _1 = (int) y_4; | |
1202 _6 = x_5(D) r<< _1; | |
1203 <bb 4>: | |
1204 # _2 = PHI <x_5(D)(2), _6(3)> */ | |
1205 gimple *prep_stmt[2] = { NULL, NULL }; | |
1206 int prep_cnt; | |
1207 for (prep_cnt = 0; ; prep_cnt++) | |
1208 { | |
1209 gsi_prev_nondebug (&gsi); | |
1210 if (gsi_end_p (gsi)) | |
1211 break; | |
1212 | |
1213 gimple *g = gsi_stmt (gsi); | |
1214 if (gimple_code (g) == GIMPLE_LABEL) | |
1215 break; | |
1216 | |
1217 if (prep_cnt == 2 || !is_gimple_assign (g)) | |
1218 return 0; | |
1219 | |
1220 tree lhs = gimple_assign_lhs (g); | |
1221 tree rhs1 = gimple_assign_rhs1 (g); | |
1222 use_operand_p use_p; | |
1223 gimple *use_stmt; | |
1224 if (TREE_CODE (lhs) != SSA_NAME | |
1225 || TREE_CODE (rhs1) != SSA_NAME | |
1226 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
1227 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) | |
1228 || !single_imm_use (lhs, &use_p, &use_stmt) | |
1229 || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign)) | |
1230 return 0; | |
1231 switch (gimple_assign_rhs_code (g)) | |
1232 { | |
1233 CASE_CONVERT: | |
1234 break; | |
1235 case PLUS_EXPR: | |
1236 case BIT_AND_EXPR: | |
1237 case BIT_IOR_EXPR: | |
1238 case BIT_XOR_EXPR: | |
1239 if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) | |
1240 return 0; | |
1241 break; | |
1242 default: | |
1243 return 0; | |
1244 } | |
1245 prep_stmt[prep_cnt] = g; | |
1246 } | |
1247 | |
1248 /* Only transform if it removes the condition. */ | |
1249 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1)) | |
1250 return 0; | |
1251 | |
1252 /* Size-wise, this is always profitable. */ | |
1253 if (optimize_bb_for_speed_p (cond_bb) | |
1254 /* The special case is useless if it has a low probability. */ | |
1255 && profile_status_for_fn (cfun) != PROFILE_ABSENT | |
1256 && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () | |
1257 /* If assign is cheap, there is no point avoiding it. */ | |
1258 && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights) | |
1259 >= 3 * estimate_num_insns (cond, &eni_time_weights)) | |
1260 return 0; | |
1261 | |
1262 tree lhs = gimple_assign_lhs (assign); | |
1263 tree rhs1 = gimple_assign_rhs1 (assign); | |
1264 tree rhs2 = gimple_assign_rhs2 (assign); | |
1265 enum tree_code code_def = gimple_assign_rhs_code (assign); | |
1266 tree cond_lhs = gimple_cond_lhs (cond); | |
1267 tree cond_rhs = gimple_cond_rhs (cond); | |
1268 | |
1269 /* Propagate the cond_rhs constant through preparation stmts, | |
1270 make sure UB isn't invoked while doing that. */ | |
1271 for (int i = prep_cnt - 1; i >= 0; --i) | |
1272 { | |
1273 gimple *g = prep_stmt[i]; | |
1274 tree grhs1 = gimple_assign_rhs1 (g); | |
1275 if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) | |
1276 return 0; | |
1277 cond_lhs = gimple_assign_lhs (g); | |
1278 cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); | |
1279 if (TREE_CODE (cond_rhs) != INTEGER_CST | |
1280 || TREE_OVERFLOW (cond_rhs)) | |
1281 return 0; | |
1282 if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS) | |
1283 { | |
1284 cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs, | |
1285 gimple_assign_rhs2 (g)); | |
1286 if (TREE_OVERFLOW (cond_rhs)) | |
1287 return 0; | |
1288 } | |
1289 cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); | |
1290 if (TREE_CODE (cond_rhs) != INTEGER_CST | |
1291 || TREE_OVERFLOW (cond_rhs)) | |
1292 return 0; | |
1293 } | |
1294 | |
1295 if (((code == NE_EXPR && e1 == false_edge) | |
1296 || (code == EQ_EXPR && e1 == true_edge)) | |
1297 && arg0 == lhs | |
1298 && ((arg1 == rhs1 | |
1299 && operand_equal_for_phi_arg_p (rhs2, cond_lhs) | |
1300 && neutral_element_p (code_def, cond_rhs, true)) | |
1301 || (arg1 == rhs2 | |
1302 && operand_equal_for_phi_arg_p (rhs1, cond_lhs) | |
1303 && neutral_element_p (code_def, cond_rhs, false)) | |
1304 || (operand_equal_for_phi_arg_p (arg1, cond_rhs) | |
1305 && ((operand_equal_for_phi_arg_p (rhs2, cond_lhs) | |
1306 && absorbing_element_p (code_def, cond_rhs, true, rhs2)) | |
1307 || (operand_equal_for_phi_arg_p (rhs1, cond_lhs) | |
1308 && absorbing_element_p (code_def, | |
1309 cond_rhs, false, rhs2)))))) | |
1310 { | |
1311 gsi = gsi_for_stmt (cond); | |
131 | 1312 /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6 |
1313 def-stmt in: | |
1314 if (n_5 != 0) | |
1315 goto <bb 3>; | |
1316 else | |
1317 goto <bb 4>; | |
1318 | |
1319 <bb 3>: | |
1320 # RANGE [0, 4294967294] | |
1321 u_6 = n_5 + 4294967295; | |
1322 | |
1323 <bb 4>: | |
1324 # u_3 = PHI <u_6(3), 4294967295(2)> */ | |
1325 reset_flow_sensitive_info (lhs); | |
111 | 1326 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) |
1327 { | |
1328 /* If available, we can use VR of phi result at least. */ | |
1329 tree phires = gimple_phi_result (phi); | |
1330 struct range_info_def *phires_range_info | |
1331 = SSA_NAME_RANGE_INFO (phires); | |
1332 if (phires_range_info) | |
1333 duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), | |
1334 phires_range_info); | |
1335 } | |
1336 gimple_stmt_iterator gsi_from; | |
1337 for (int i = prep_cnt - 1; i >= 0; --i) | |
1338 { | |
1339 tree plhs = gimple_assign_lhs (prep_stmt[i]); | |
131 | 1340 reset_flow_sensitive_info (plhs); |
111 | 1341 gsi_from = gsi_for_stmt (prep_stmt[i]); |
1342 gsi_move_before (&gsi_from, &gsi); | |
1343 } | |
1344 gsi_from = gsi_for_stmt (assign); | |
1345 gsi_move_before (&gsi_from, &gsi); | |
1346 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); | |
1347 return 2; | |
1348 } | |
1349 | |
1350 return 0; | |
0 | 1351 } |
1352 | |
1353 /* The function minmax_replacement does the main work of doing the minmax | |
1354 replacement. Return true if the replacement is done. Otherwise return | |
1355 false. | |
1356 BB is the basic block where the replacement is going to be done on. ARG0 | |
1357 is argument 0 from the PHI. Likewise for ARG1. */ | |
1358 | |
1359 static bool | |
1360 minmax_replacement (basic_block cond_bb, basic_block middle_bb, | |
111 | 1361 edge e0, edge e1, gimple *phi, |
0 | 1362 tree arg0, tree arg1) |
1363 { | |
145 | 1364 tree result, type, rhs; |
111 | 1365 gcond *cond; |
1366 gassign *new_stmt; | |
0 | 1367 edge true_edge, false_edge; |
1368 enum tree_code cmp, minmax, ass_code; | |
111 | 1369 tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; |
0 | 1370 gimple_stmt_iterator gsi, gsi_from; |
1371 | |
1372 type = TREE_TYPE (PHI_RESULT (phi)); | |
1373 | |
1374 /* The optimization may be unsafe due to NaNs. */ | |
111 | 1375 if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) |
0 | 1376 return false; |
1377 | |
111 | 1378 cond = as_a <gcond *> (last_stmt (cond_bb)); |
0 | 1379 cmp = gimple_cond_code (cond); |
145 | 1380 rhs = gimple_cond_rhs (cond); |
1381 | |
1382 /* Turn EQ/NE of extreme values to order comparisons. */ | |
1383 if ((cmp == NE_EXPR || cmp == EQ_EXPR) | |
1384 && TREE_CODE (rhs) == INTEGER_CST | |
1385 && INTEGRAL_TYPE_P (TREE_TYPE (rhs))) | |
1386 { | |
1387 if (wi::eq_p (wi::to_wide (rhs), wi::min_value (TREE_TYPE (rhs)))) | |
1388 { | |
1389 cmp = (cmp == EQ_EXPR) ? LT_EXPR : GE_EXPR; | |
1390 rhs = wide_int_to_tree (TREE_TYPE (rhs), | |
1391 wi::min_value (TREE_TYPE (rhs)) + 1); | |
1392 } | |
1393 else if (wi::eq_p (wi::to_wide (rhs), wi::max_value (TREE_TYPE (rhs)))) | |
1394 { | |
1395 cmp = (cmp == EQ_EXPR) ? GT_EXPR : LE_EXPR; | |
1396 rhs = wide_int_to_tree (TREE_TYPE (rhs), | |
1397 wi::max_value (TREE_TYPE (rhs)) - 1); | |
1398 } | |
1399 } | |
0 | 1400 |
1401 /* This transformation is only valid for order comparisons. Record which | |
1402 operand is smaller/larger if the result of the comparison is true. */ | |
111 | 1403 alt_smaller = NULL_TREE; |
1404 alt_larger = NULL_TREE; | |
0 | 1405 if (cmp == LT_EXPR || cmp == LE_EXPR) |
1406 { | |
1407 smaller = gimple_cond_lhs (cond); | |
145 | 1408 larger = rhs; |
111 | 1409 /* If we have smaller < CST it is equivalent to smaller <= CST-1. |
1410 Likewise smaller <= CST is equivalent to smaller < CST+1. */ | |
145 | 1411 if (TREE_CODE (larger) == INTEGER_CST |
1412 && INTEGRAL_TYPE_P (TREE_TYPE (larger))) | |
111 | 1413 { |
1414 if (cmp == LT_EXPR) | |
1415 { | |
131 | 1416 wi::overflow_type overflow; |
111 | 1417 wide_int alt = wi::sub (wi::to_wide (larger), 1, |
1418 TYPE_SIGN (TREE_TYPE (larger)), | |
1419 &overflow); | |
1420 if (! overflow) | |
1421 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); | |
1422 } | |
1423 else | |
1424 { | |
131 | 1425 wi::overflow_type overflow; |
111 | 1426 wide_int alt = wi::add (wi::to_wide (larger), 1, |
1427 TYPE_SIGN (TREE_TYPE (larger)), | |
1428 &overflow); | |
1429 if (! overflow) | |
1430 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); | |
1431 } | |
1432 } | |
0 | 1433 } |
1434 else if (cmp == GT_EXPR || cmp == GE_EXPR) | |
1435 { | |
145 | 1436 smaller = rhs; |
0 | 1437 larger = gimple_cond_lhs (cond); |
111 | 1438 /* If we have larger > CST it is equivalent to larger >= CST+1. |
1439 Likewise larger >= CST is equivalent to larger > CST-1. */ | |
145 | 1440 if (TREE_CODE (smaller) == INTEGER_CST |
1441 && INTEGRAL_TYPE_P (TREE_TYPE (smaller))) | |
111 | 1442 { |
131 | 1443 wi::overflow_type overflow; |
111 | 1444 if (cmp == GT_EXPR) |
1445 { | |
1446 wide_int alt = wi::add (wi::to_wide (smaller), 1, | |
1447 TYPE_SIGN (TREE_TYPE (smaller)), | |
1448 &overflow); | |
1449 if (! overflow) | |
1450 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); | |
1451 } | |
1452 else | |
1453 { | |
1454 wide_int alt = wi::sub (wi::to_wide (smaller), 1, | |
1455 TYPE_SIGN (TREE_TYPE (smaller)), | |
1456 &overflow); | |
1457 if (! overflow) | |
1458 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); | |
1459 } | |
1460 } | |
0 | 1461 } |
1462 else | |
1463 return false; | |
1464 | |
1465 /* We need to know which is the true edge and which is the false | |
1466 edge so that we know if have abs or negative abs. */ | |
1467 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
1468 | |
1469 /* Forward the edges over the middle basic block. */ | |
1470 if (true_edge->dest == middle_bb) | |
1471 true_edge = EDGE_SUCC (true_edge->dest, 0); | |
1472 if (false_edge->dest == middle_bb) | |
1473 false_edge = EDGE_SUCC (false_edge->dest, 0); | |
1474 | |
1475 if (true_edge == e0) | |
1476 { | |
1477 gcc_assert (false_edge == e1); | |
1478 arg_true = arg0; | |
1479 arg_false = arg1; | |
1480 } | |
1481 else | |
1482 { | |
1483 gcc_assert (false_edge == e0); | |
1484 gcc_assert (true_edge == e1); | |
1485 arg_true = arg1; | |
1486 arg_false = arg0; | |
1487 } | |
1488 | |
1489 if (empty_block_p (middle_bb)) | |
1490 { | |
111 | 1491 if ((operand_equal_for_phi_arg_p (arg_true, smaller) |
1492 || (alt_smaller | |
1493 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) | |
1494 && (operand_equal_for_phi_arg_p (arg_false, larger) | |
1495 || (alt_larger | |
1496 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) | |
0 | 1497 { |
1498 /* Case | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1499 |
0 | 1500 if (smaller < larger) |
1501 rslt = smaller; | |
1502 else | |
1503 rslt = larger; */ | |
1504 minmax = MIN_EXPR; | |
1505 } | |
111 | 1506 else if ((operand_equal_for_phi_arg_p (arg_false, smaller) |
1507 || (alt_smaller | |
1508 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) | |
1509 && (operand_equal_for_phi_arg_p (arg_true, larger) | |
1510 || (alt_larger | |
1511 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) | |
0 | 1512 minmax = MAX_EXPR; |
1513 else | |
1514 return false; | |
1515 } | |
1516 else | |
1517 { | |
1518 /* Recognize the following case, assuming d <= u: | |
1519 | |
1520 if (a <= u) | |
1521 b = MAX (a, d); | |
1522 x = PHI <b, u> | |
1523 | |
1524 This is equivalent to | |
1525 | |
1526 b = MAX (a, d); | |
1527 x = MIN (b, u); */ | |
1528 | |
111 | 1529 gimple *assign = last_and_only_stmt (middle_bb); |
0 | 1530 tree lhs, op0, op1, bound; |
1531 | |
1532 if (!assign | |
1533 || gimple_code (assign) != GIMPLE_ASSIGN) | |
1534 return false; | |
1535 | |
1536 lhs = gimple_assign_lhs (assign); | |
1537 ass_code = gimple_assign_rhs_code (assign); | |
1538 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) | |
1539 return false; | |
1540 op0 = gimple_assign_rhs1 (assign); | |
1541 op1 = gimple_assign_rhs2 (assign); | |
1542 | |
1543 if (true_edge->src == middle_bb) | |
1544 { | |
1545 /* We got here if the condition is true, i.e., SMALLER < LARGER. */ | |
1546 if (!operand_equal_for_phi_arg_p (lhs, arg_true)) | |
1547 return false; | |
1548 | |
111 | 1549 if (operand_equal_for_phi_arg_p (arg_false, larger) |
1550 || (alt_larger | |
1551 && operand_equal_for_phi_arg_p (arg_false, alt_larger))) | |
0 | 1552 { |
1553 /* Case | |
1554 | |
1555 if (smaller < larger) | |
1556 { | |
1557 r' = MAX_EXPR (smaller, bound) | |
1558 } | |
1559 r = PHI <r', larger> --> to be turned to MIN_EXPR. */ | |
1560 if (ass_code != MAX_EXPR) | |
1561 return false; | |
1562 | |
1563 minmax = MIN_EXPR; | |
111 | 1564 if (operand_equal_for_phi_arg_p (op0, smaller) |
1565 || (alt_smaller | |
1566 && operand_equal_for_phi_arg_p (op0, alt_smaller))) | |
0 | 1567 bound = op1; |
111 | 1568 else if (operand_equal_for_phi_arg_p (op1, smaller) |
1569 || (alt_smaller | |
1570 && operand_equal_for_phi_arg_p (op1, alt_smaller))) | |
0 | 1571 bound = op0; |
1572 else | |
1573 return false; | |
1574 | |
1575 /* We need BOUND <= LARGER. */ | |
1576 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, | |
1577 bound, larger))) | |
1578 return false; | |
1579 } | |
111 | 1580 else if (operand_equal_for_phi_arg_p (arg_false, smaller) |
1581 || (alt_smaller | |
1582 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) | |
0 | 1583 { |
1584 /* Case | |
1585 | |
1586 if (smaller < larger) | |
1587 { | |
1588 r' = MIN_EXPR (larger, bound) | |
1589 } | |
1590 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */ | |
1591 if (ass_code != MIN_EXPR) | |
1592 return false; | |
1593 | |
1594 minmax = MAX_EXPR; | |
111 | 1595 if (operand_equal_for_phi_arg_p (op0, larger) |
1596 || (alt_larger | |
1597 && operand_equal_for_phi_arg_p (op0, alt_larger))) | |
0 | 1598 bound = op1; |
111 | 1599 else if (operand_equal_for_phi_arg_p (op1, larger) |
1600 || (alt_larger | |
1601 && operand_equal_for_phi_arg_p (op1, alt_larger))) | |
0 | 1602 bound = op0; |
1603 else | |
1604 return false; | |
1605 | |
1606 /* We need BOUND >= SMALLER. */ | |
1607 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, | |
1608 bound, smaller))) | |
1609 return false; | |
1610 } | |
1611 else | |
1612 return false; | |
1613 } | |
1614 else | |
1615 { | |
1616 /* We got here if the condition is false, i.e., SMALLER > LARGER. */ | |
1617 if (!operand_equal_for_phi_arg_p (lhs, arg_false)) | |
1618 return false; | |
1619 | |
111 | 1620 if (operand_equal_for_phi_arg_p (arg_true, larger) |
1621 || (alt_larger | |
1622 && operand_equal_for_phi_arg_p (arg_true, alt_larger))) | |
0 | 1623 { |
1624 /* Case | |
1625 | |
1626 if (smaller > larger) | |
1627 { | |
1628 r' = MIN_EXPR (smaller, bound) | |
1629 } | |
1630 r = PHI <r', larger> --> to be turned to MAX_EXPR. */ | |
1631 if (ass_code != MIN_EXPR) | |
1632 return false; | |
1633 | |
1634 minmax = MAX_EXPR; | |
111 | 1635 if (operand_equal_for_phi_arg_p (op0, smaller) |
1636 || (alt_smaller | |
1637 && operand_equal_for_phi_arg_p (op0, alt_smaller))) | |
0 | 1638 bound = op1; |
111 | 1639 else if (operand_equal_for_phi_arg_p (op1, smaller) |
1640 || (alt_smaller | |
1641 && operand_equal_for_phi_arg_p (op1, alt_smaller))) | |
0 | 1642 bound = op0; |
1643 else | |
1644 return false; | |
1645 | |
1646 /* We need BOUND >= LARGER. */ | |
1647 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, | |
1648 bound, larger))) | |
1649 return false; | |
1650 } | |
111 | 1651 else if (operand_equal_for_phi_arg_p (arg_true, smaller) |
1652 || (alt_smaller | |
1653 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) | |
0 | 1654 { |
1655 /* Case | |
1656 | |
1657 if (smaller > larger) | |
1658 { | |
1659 r' = MAX_EXPR (larger, bound) | |
1660 } | |
1661 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */ | |
1662 if (ass_code != MAX_EXPR) | |
1663 return false; | |
1664 | |
1665 minmax = MIN_EXPR; | |
1666 if (operand_equal_for_phi_arg_p (op0, larger)) | |
1667 bound = op1; | |
1668 else if (operand_equal_for_phi_arg_p (op1, larger)) | |
1669 bound = op0; | |
1670 else | |
1671 return false; | |
1672 | |
1673 /* We need BOUND <= SMALLER. */ | |
1674 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, | |
1675 bound, smaller))) | |
1676 return false; | |
1677 } | |
1678 else | |
1679 return false; | |
1680 } | |
1681 | |
1682 /* Move the statement from the middle block. */ | |
1683 gsi = gsi_last_bb (cond_bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1684 gsi_from = gsi_last_nondebug_bb (middle_bb); |
131 | 1685 reset_flow_sensitive_info (SINGLE_SSA_TREE_OPERAND (gsi_stmt (gsi_from), |
1686 SSA_OP_DEF)); | |
0 | 1687 gsi_move_before (&gsi_from, &gsi); |
1688 } | |
1689 | |
111 | 1690 /* Create an SSA var to hold the min/max result. If we're the only |
1691 things setting the target PHI, then we can clone the PHI | |
1692 variable. Otherwise we must create a new one. */ | |
1693 result = PHI_RESULT (phi); | |
1694 if (EDGE_COUNT (gimple_bb (phi)->preds) == 2) | |
1695 result = duplicate_ssa_name (result, NULL); | |
1696 else | |
1697 result = make_ssa_name (TREE_TYPE (result)); | |
1698 | |
0 | 1699 /* Emit the statement to compute min/max. */ |
111 | 1700 new_stmt = gimple_build_assign (result, minmax, arg0, arg1); |
0 | 1701 gsi = gsi_last_bb (cond_bb); |
1702 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); | |
1703 | |
1704 replace_phi_edge_with_variable (cond_bb, e1, phi, result); | |
131 | 1705 |
1706 return true; | |
1707 } | |
1708 | |
1709 /* Convert | |
1710 | |
1711 <bb 2> | |
1712 if (b_4(D) != 0) | |
1713 goto <bb 3> | |
1714 else | |
1715 goto <bb 4> | |
1716 | |
1717 <bb 3> | |
1718 _2 = (unsigned long) b_4(D); | |
1719 _9 = __builtin_popcountl (_2); | |
1720 OR | |
1721 _9 = __builtin_popcountl (b_4(D)); | |
1722 | |
1723 <bb 4> | |
1724 c_12 = PHI <0(2), _9(3)> | |
1725 | |
1726 Into | |
1727 <bb 2> | |
1728 _2 = (unsigned long) b_4(D); | |
1729 _9 = __builtin_popcountl (_2); | |
1730 OR | |
1731 _9 = __builtin_popcountl (b_4(D)); | |
1732 | |
1733 <bb 4> | |
1734 c_12 = PHI <_9(2)> | |
1735 */ | |
1736 | |
1737 static bool | |
1738 cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, | |
1739 edge e1, edge e2, | |
1740 gimple *phi, tree arg0, tree arg1) | |
1741 { | |
1742 gimple *cond; | |
1743 gimple_stmt_iterator gsi, gsi_from; | |
1744 gimple *popcount; | |
1745 gimple *cast = NULL; | |
1746 tree lhs, arg; | |
1747 | |
1748 /* Check that | |
1749 _2 = (unsigned long) b_4(D); | |
1750 _9 = __builtin_popcountl (_2); | |
1751 OR | |
1752 _9 = __builtin_popcountl (b_4(D)); | |
1753 are the only stmts in the middle_bb. */ | |
1754 | |
1755 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); | |
1756 if (gsi_end_p (gsi)) | |
1757 return false; | |
1758 cast = gsi_stmt (gsi); | |
1759 gsi_next_nondebug (&gsi); | |
1760 if (!gsi_end_p (gsi)) | |
1761 { | |
1762 popcount = gsi_stmt (gsi); | |
1763 gsi_next_nondebug (&gsi); | |
1764 if (!gsi_end_p (gsi)) | |
1765 return false; | |
1766 } | |
1767 else | |
1768 { | |
1769 popcount = cast; | |
1770 cast = NULL; | |
1771 } | |
1772 | |
1773 /* Check that we have a popcount builtin. */ | |
1774 if (!is_gimple_call (popcount)) | |
1775 return false; | |
1776 combined_fn cfn = gimple_call_combined_fn (popcount); | |
1777 switch (cfn) | |
1778 { | |
1779 CASE_CFN_POPCOUNT: | |
1780 break; | |
1781 default: | |
1782 return false; | |
1783 } | |
1784 | |
1785 arg = gimple_call_arg (popcount, 0); | |
1786 lhs = gimple_get_lhs (popcount); | |
1787 | |
1788 if (cast) | |
1789 { | |
1790 /* We have a cast stmt feeding popcount builtin. */ | |
1791 /* Check that we have a cast prior to that. */ | |
1792 if (gimple_code (cast) != GIMPLE_ASSIGN | |
1793 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast))) | |
1794 return false; | |
1795 /* Result of the cast stmt is the argument to the builtin. */ | |
1796 if (arg != gimple_assign_lhs (cast)) | |
1797 return false; | |
1798 arg = gimple_assign_rhs1 (cast); | |
1799 } | |
1800 | |
1801 cond = last_stmt (cond_bb); | |
1802 | |
1803 /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount | |
1804 builtin. */ | |
1805 if (gimple_code (cond) != GIMPLE_COND | |
1806 || (gimple_cond_code (cond) != NE_EXPR | |
1807 && gimple_cond_code (cond) != EQ_EXPR) | |
1808 || !integer_zerop (gimple_cond_rhs (cond)) | |
1809 || arg != gimple_cond_lhs (cond)) | |
1810 return false; | |
1811 | |
1812 /* Canonicalize. */ | |
1813 if ((e2->flags & EDGE_TRUE_VALUE | |
1814 && gimple_cond_code (cond) == NE_EXPR) | |
1815 || (e1->flags & EDGE_TRUE_VALUE | |
1816 && gimple_cond_code (cond) == EQ_EXPR)) | |
1817 { | |
1818 std::swap (arg0, arg1); | |
1819 std::swap (e1, e2); | |
1820 } | |
1821 | |
1822 /* Check PHI arguments. */ | |
1823 if (lhs != arg0 || !integer_zerop (arg1)) | |
1824 return false; | |
1825 | |
1826 /* And insert the popcount builtin and cast stmt before the cond_bb. */ | |
1827 gsi = gsi_last_bb (cond_bb); | |
1828 if (cast) | |
1829 { | |
1830 gsi_from = gsi_for_stmt (cast); | |
1831 gsi_move_before (&gsi_from, &gsi); | |
1832 reset_flow_sensitive_info (gimple_get_lhs (cast)); | |
1833 } | |
1834 gsi_from = gsi_for_stmt (popcount); | |
1835 gsi_move_before (&gsi_from, &gsi); | |
1836 reset_flow_sensitive_info (gimple_get_lhs (popcount)); | |
1837 | |
1838 /* Now update the PHI and remove unneeded bbs. */ | |
1839 replace_phi_edge_with_variable (cond_bb, e2, phi, lhs); | |
0 | 1840 return true; |
1841 } | |
1842 | |
1843 /* The function absolute_replacement does the main work of doing the absolute | |
1844 replacement. Return true if the replacement is done. Otherwise return | |
1845 false. | |
1846 bb is the basic block where the replacement is going to be done on. arg0 | |
1847 is argument 0 from the phi. Likewise for arg1. */ | |
1848 | |
1849 static bool | |
1850 abs_replacement (basic_block cond_bb, basic_block middle_bb, | |
1851 edge e0 ATTRIBUTE_UNUSED, edge e1, | |
111 | 1852 gimple *phi, tree arg0, tree arg1) |
0 | 1853 { |
1854 tree result; | |
111 | 1855 gassign *new_stmt; |
1856 gimple *cond; | |
0 | 1857 gimple_stmt_iterator gsi; |
1858 edge true_edge, false_edge; | |
111 | 1859 gimple *assign; |
0 | 1860 edge e; |
1861 tree rhs, lhs; | |
1862 bool negate; | |
1863 enum tree_code cond_code; | |
1864 | |
1865 /* If the type says honor signed zeros we cannot do this | |
1866 optimization. */ | |
111 | 1867 if (HONOR_SIGNED_ZEROS (arg1)) |
0 | 1868 return false; |
1869 | |
1870 /* OTHER_BLOCK must have only one executable statement which must have the | |
1871 form arg0 = -arg1 or arg1 = -arg0. */ | |
1872 | |
1873 assign = last_and_only_stmt (middle_bb); | |
145 | 1874 /* If we did not find the proper negation assignment, then we cannot |
0 | 1875 optimize. */ |
1876 if (assign == NULL) | |
1877 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1878 |
0 | 1879 /* If we got here, then we have found the only executable statement |
1880 in OTHER_BLOCK. If it is anything other than arg = -arg1 or | |
145 | 1881 arg1 = -arg0, then we cannot optimize. */ |
0 | 1882 if (gimple_code (assign) != GIMPLE_ASSIGN) |
1883 return false; | |
1884 | |
1885 lhs = gimple_assign_lhs (assign); | |
1886 | |
1887 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR) | |
1888 return false; | |
1889 | |
1890 rhs = gimple_assign_rhs1 (assign); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1891 |
0 | 1892 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ |
1893 if (!(lhs == arg0 && rhs == arg1) | |
1894 && !(lhs == arg1 && rhs == arg0)) | |
1895 return false; | |
1896 | |
1897 cond = last_stmt (cond_bb); | |
1898 result = PHI_RESULT (phi); | |
1899 | |
1900 /* Only relationals comparing arg[01] against zero are interesting. */ | |
1901 cond_code = gimple_cond_code (cond); | |
1902 if (cond_code != GT_EXPR && cond_code != GE_EXPR | |
1903 && cond_code != LT_EXPR && cond_code != LE_EXPR) | |
1904 return false; | |
1905 | |
1906 /* Make sure the conditional is arg[01] OP y. */ | |
1907 if (gimple_cond_lhs (cond) != rhs) | |
1908 return false; | |
1909 | |
1910 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond))) | |
1911 ? real_zerop (gimple_cond_rhs (cond)) | |
1912 : integer_zerop (gimple_cond_rhs (cond))) | |
1913 ; | |
1914 else | |
1915 return false; | |
1916 | |
1917 /* We need to know which is the true edge and which is the false | |
1918 edge so that we know if have abs or negative abs. */ | |
1919 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
1920 | |
1921 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we | |
1922 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if | |
1923 the false edge goes to OTHER_BLOCK. */ | |
1924 if (cond_code == GT_EXPR || cond_code == GE_EXPR) | |
1925 e = true_edge; | |
1926 else | |
1927 e = false_edge; | |
1928 | |
1929 if (e->dest == middle_bb) | |
1930 negate = true; | |
1931 else | |
1932 negate = false; | |
1933 | |
111 | 1934 /* If the code negates only iff positive then make sure to not |
1935 introduce undefined behavior when negating or computing the absolute. | |
1936 ??? We could use range info if present to check for arg1 == INT_MIN. */ | |
1937 if (negate | |
1938 && (ANY_INTEGRAL_TYPE_P (TREE_TYPE (arg1)) | |
1939 && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1)))) | |
1940 return false; | |
1941 | |
0 | 1942 result = duplicate_ssa_name (result, NULL); |
1943 | |
1944 if (negate) | |
111 | 1945 lhs = make_ssa_name (TREE_TYPE (result)); |
0 | 1946 else |
1947 lhs = result; | |
1948 | |
1949 /* Build the modify expression with abs expression. */ | |
111 | 1950 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs); |
0 | 1951 |
1952 gsi = gsi_last_bb (cond_bb); | |
1953 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); | |
1954 | |
1955 if (negate) | |
1956 { | |
1957 /* Get the right GSI. We want to insert after the recently | |
1958 added ABS_EXPR statement (which we know is the first statement | |
1959 in the block. */ | |
111 | 1960 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs); |
0 | 1961 |
1962 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); | |
1963 } | |
1964 | |
1965 replace_phi_edge_with_variable (cond_bb, e1, phi, result); | |
1966 | |
1967 /* Note that we optimized this PHI. */ | |
1968 return true; | |
1969 } | |
1970 | |
1971 /* Auxiliary functions to determine the set of memory accesses which | |
1972 can't trap because they are preceded by accesses to the same memory | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1973 portion. We do that for MEM_REFs, so we only need to track |
0 | 1974 the SSA_NAME of the pointer indirectly referenced. The algorithm |
1975 simply is a walk over all instructions in dominator order. When | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1976 we see an MEM_REF we determine if we've already seen a same |
0 | 1977 ref anywhere up to the root of the dominator tree. If we do the |
1978 current access can't trap. If we don't see any dominating access | |
1979 the current access might trap, but might also make later accesses | |
1980 non-trapping, so we remember it. We need to be careful with loads | |
1981 or stores, for instance a load might not trap, while a store would, | |
1982 so if we see a dominating read access this doesn't mean that a later | |
1983 write access would not trap. Hence we also need to differentiate the | |
1984 type of access(es) seen. | |
1985 | |
1986 ??? We currently are very conservative and assume that a load might | |
1987 trap even if a store doesn't (write-only memory). This probably is | |
1988 overly conservative. */ | |
1989 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1990 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF |
0 | 1991 through it was seen, which would constitute a no-trap region for |
1992 same accesses. */ | |
1993 struct name_to_bb | |
1994 { | |
111 | 1995 unsigned int ssa_name_ver; |
1996 unsigned int phase; | |
1997 bool store; | |
1998 HOST_WIDE_INT offset, size; | |
0 | 1999 basic_block bb; |
111 | 2000 }; |
2001 | |
2002 /* Hashtable helpers. */ | |
2003 | |
2004 struct ssa_names_hasher : free_ptr_hash <name_to_bb> | |
2005 { | |
2006 static inline hashval_t hash (const name_to_bb *); | |
2007 static inline bool equal (const name_to_bb *, const name_to_bb *); | |
0 | 2008 }; |
2009 | |
111 | 2010 /* Used for quick clearing of the hash-table when we see calls. |
2011 Hash entries with phase < nt_call_phase are invalid. */ | |
2012 static unsigned int nt_call_phase; | |
2013 | |
2014 /* The hash function. */ | |
2015 | |
2016 inline hashval_t | |
2017 ssa_names_hasher::hash (const name_to_bb *n) | |
2018 { | |
2019 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) | |
2020 ^ (n->offset << 6) ^ (n->size << 3); | |
2021 } | |
2022 | |
2023 /* The equality function of *P1 and *P2. */ | |
2024 | |
2025 inline bool | |
2026 ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) | |
2027 { | |
2028 return n1->ssa_name_ver == n2->ssa_name_ver | |
2029 && n1->store == n2->store | |
2030 && n1->offset == n2->offset | |
2031 && n1->size == n2->size; | |
2032 } | |
2033 | |
2034 class nontrapping_dom_walker : public dom_walker | |
0 | 2035 { |
111 | 2036 public: |
2037 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) | |
2038 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} | |
2039 | |
2040 virtual edge before_dom_children (basic_block); | |
2041 virtual void after_dom_children (basic_block); | |
2042 | |
2043 private: | |
2044 | |
2045 /* We see the expression EXP in basic block BB. If it's an interesting | |
2046 expression (an MEM_REF through an SSA_NAME) possibly insert the | |
2047 expression into the set NONTRAP or the hash table of seen expressions. | |
2048 STORE is true if this expression is on the LHS, otherwise it's on | |
2049 the RHS. */ | |
2050 void add_or_mark_expr (basic_block, tree, bool); | |
2051 | |
2052 hash_set<tree> *m_nontrapping; | |
2053 | |
2054 /* The hash table for remembering what we've seen. */ | |
2055 hash_table<ssa_names_hasher> m_seen_ssa_names; | |
2056 }; | |
2057 | |
2058 /* Called by walk_dominator_tree, when entering the block BB. */ | |
2059 edge | |
2060 nontrapping_dom_walker::before_dom_children (basic_block bb) | |
2061 { | |
2062 edge e; | |
2063 edge_iterator ei; | |
2064 gimple_stmt_iterator gsi; | |
2065 | |
2066 /* If we haven't seen all our predecessors, clear the hash-table. */ | |
2067 FOR_EACH_EDGE (e, ei, bb->preds) | |
2068 if ((((size_t)e->src->aux) & 2) == 0) | |
2069 { | |
2070 nt_call_phase++; | |
2071 break; | |
2072 } | |
2073 | |
2074 /* Mark this BB as being on the path to dominator root and as visited. */ | |
2075 bb->aux = (void*)(1 | 2); | |
2076 | |
2077 /* And walk the statements in order. */ | |
2078 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2079 { | |
2080 gimple *stmt = gsi_stmt (gsi); | |
2081 | |
2082 if ((gimple_code (stmt) == GIMPLE_ASM && gimple_vdef (stmt)) | |
2083 || (is_gimple_call (stmt) | |
2084 && (!nonfreeing_call_p (stmt) || !nonbarrier_call_p (stmt)))) | |
2085 nt_call_phase++; | |
2086 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt)) | |
2087 { | |
2088 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true); | |
2089 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false); | |
2090 } | |
2091 } | |
2092 return NULL; | |
0 | 2093 } |
2094 | |
111 | 2095 /* Called by walk_dominator_tree, when basic block BB is exited. */ |
2096 void | |
2097 nontrapping_dom_walker::after_dom_children (basic_block bb) | |
0 | 2098 { |
111 | 2099 /* This BB isn't on the path to dominator root anymore. */ |
2100 bb->aux = (void*)2; | |
0 | 2101 } |
2102 | |
2103 /* We see the expression EXP in basic block BB. If it's an interesting | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2104 expression (an MEM_REF through an SSA_NAME) possibly insert the |
0 | 2105 expression into the set NONTRAP or the hash table of seen expressions. |
2106 STORE is true if this expression is on the LHS, otherwise it's on | |
2107 the RHS. */ | |
111 | 2108 void |
2109 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) | |
0 | 2110 { |
111 | 2111 HOST_WIDE_INT size; |
2112 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2113 if (TREE_CODE (exp) == MEM_REF |
111 | 2114 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME |
2115 && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) | |
2116 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) | |
0 | 2117 { |
2118 tree name = TREE_OPERAND (exp, 0); | |
2119 struct name_to_bb map; | |
111 | 2120 name_to_bb **slot; |
0 | 2121 struct name_to_bb *n2bb; |
2122 basic_block found_bb = 0; | |
2123 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2124 /* Try to find the last seen MEM_REF through the same |
0 | 2125 SSA_NAME, which can trap. */ |
111 | 2126 map.ssa_name_ver = SSA_NAME_VERSION (name); |
2127 map.phase = 0; | |
0 | 2128 map.bb = 0; |
2129 map.store = store; | |
111 | 2130 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); |
2131 map.size = size; | |
2132 | |
2133 slot = m_seen_ssa_names.find_slot (&map, INSERT); | |
2134 n2bb = *slot; | |
2135 if (n2bb && n2bb->phase >= nt_call_phase) | |
0 | 2136 found_bb = n2bb->bb; |
2137 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2138 /* If we've found a trapping MEM_REF, _and_ it dominates EXP |
0 | 2139 (it's in a basic block on the path from us to the dominator root) |
2140 then we can't trap. */ | |
111 | 2141 if (found_bb && (((size_t)found_bb->aux) & 1) == 1) |
0 | 2142 { |
111 | 2143 m_nontrapping->add (exp); |
0 | 2144 } |
2145 else | |
2146 { | |
2147 /* EXP might trap, so insert it into the hash table. */ | |
2148 if (n2bb) | |
2149 { | |
111 | 2150 n2bb->phase = nt_call_phase; |
0 | 2151 n2bb->bb = bb; |
2152 } | |
2153 else | |
2154 { | |
2155 n2bb = XNEW (struct name_to_bb); | |
111 | 2156 n2bb->ssa_name_ver = SSA_NAME_VERSION (name); |
2157 n2bb->phase = nt_call_phase; | |
0 | 2158 n2bb->bb = bb; |
2159 n2bb->store = store; | |
111 | 2160 n2bb->offset = map.offset; |
2161 n2bb->size = size; | |
0 | 2162 *slot = n2bb; |
2163 } | |
2164 } | |
2165 } | |
2166 } | |
2167 | |
2168 /* This is the entry point of gathering non trapping memory accesses. | |
2169 It will do a dominator walk over the whole function, and it will | |
2170 make use of the bb->aux pointers. It returns a set of trees | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2171 (the MEM_REFs itself) which can't trap. */ |
111 | 2172 static hash_set<tree> * |
0 | 2173 get_non_trapping (void) |
2174 { | |
111 | 2175 nt_call_phase = 0; |
2176 hash_set<tree> *nontrap = new hash_set<tree>; | |
0 | 2177 /* We're going to do a dominator walk, so ensure that we have |
2178 dominance information. */ | |
2179 calculate_dominance_info (CDI_DOMINATORS); | |
2180 | |
111 | 2181 nontrapping_dom_walker (CDI_DOMINATORS, nontrap) |
2182 .walk (cfun->cfg->x_entry_block_ptr); | |
2183 | |
2184 clear_aux_for_blocks (); | |
0 | 2185 return nontrap; |
2186 } | |
2187 | |
2188 /* Do the main work of conditional store replacement. We already know | |
2189 that the recognized pattern looks like so: | |
2190 | |
2191 split: | |
2192 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1) | |
2193 MIDDLE_BB: | |
2194 something | |
2195 fallthrough (edge E0) | |
2196 JOIN_BB: | |
2197 some more | |
2198 | |
2199 We check that MIDDLE_BB contains only one store, that that store | |
2200 doesn't trap (not via NOTRAP, but via checking if an access to the same | |
145 | 2201 memory location dominates us, or the store is to a local addressable |
2202 object) and that the store has a "simple" RHS. */ | |
0 | 2203 |
2204 static bool | |
2205 cond_store_replacement (basic_block middle_bb, basic_block join_bb, | |
111 | 2206 edge e0, edge e1, hash_set<tree> *nontrap) |
0 | 2207 { |
111 | 2208 gimple *assign = last_and_only_stmt (middle_bb); |
2209 tree lhs, rhs, name, name2; | |
2210 gphi *newphi; | |
2211 gassign *new_stmt; | |
0 | 2212 gimple_stmt_iterator gsi; |
145 | 2213 location_t locus; |
0 | 2214 |
2215 /* Check if middle_bb contains of only one store. */ | |
2216 if (!assign | |
111 | 2217 || !gimple_assign_single_p (assign) |
2218 || gimple_has_volatile_ops (assign)) | |
0 | 2219 return false; |
2220 | |
145 | 2221 /* And no PHI nodes so all uses in the single stmt are also |
2222 available where we insert to. */ | |
2223 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) | |
2224 return false; | |
2225 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2226 locus = gimple_location (assign); |
0 | 2227 lhs = gimple_assign_lhs (assign); |
2228 rhs = gimple_assign_rhs1 (assign); | |
145 | 2229 if ((TREE_CODE (lhs) != MEM_REF |
2230 && TREE_CODE (lhs) != ARRAY_REF | |
2231 && TREE_CODE (lhs) != COMPONENT_REF) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2232 || !is_gimple_reg_type (TREE_TYPE (lhs))) |
0 | 2233 return false; |
2234 | |
2235 /* Prove that we can move the store down. We could also check | |
2236 TREE_THIS_NOTRAP here, but in that case we also could move stores, | |
2237 whose value is not available readily, which we want to avoid. */ | |
111 | 2238 if (!nontrap->contains (lhs)) |
145 | 2239 { |
2240 /* If LHS is a local variable without address-taken, we could | |
2241 always safely move down the store. */ | |
2242 tree base = get_base_address (lhs); | |
2243 if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) | |
2244 return false; | |
2245 } | |
0 | 2246 |
2247 /* Now we've checked the constraints, so do the transformation: | |
2248 1) Remove the single store. */ | |
2249 gsi = gsi_for_stmt (assign); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2250 unlink_stmt_vdef (assign); |
0 | 2251 gsi_remove (&gsi, true); |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2252 release_defs (assign); |
0 | 2253 |
111 | 2254 /* Make both store and load use alias-set zero as we have to |
2255 deal with the case of the store being a conditional change | |
2256 of the dynamic type. */ | |
2257 lhs = unshare_expr (lhs); | |
2258 tree *basep = &lhs; | |
2259 while (handled_component_p (*basep)) | |
2260 basep = &TREE_OPERAND (*basep, 0); | |
2261 if (TREE_CODE (*basep) == MEM_REF | |
2262 || TREE_CODE (*basep) == TARGET_MEM_REF) | |
2263 TREE_OPERAND (*basep, 1) | |
2264 = fold_convert (ptr_type_node, TREE_OPERAND (*basep, 1)); | |
2265 else | |
2266 *basep = build2 (MEM_REF, TREE_TYPE (*basep), | |
2267 build_fold_addr_expr (*basep), | |
2268 build_zero_cst (ptr_type_node)); | |
2269 | |
2270 /* 2) Insert a load from the memory of the store to the temporary | |
0 | 2271 on the edge which did not contain the store. */ |
111 | 2272 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); |
2273 new_stmt = gimple_build_assign (name, lhs); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2274 gimple_set_location (new_stmt, locus); |
145 | 2275 lhs = unshare_expr (lhs); |
2276 /* Set TREE_NO_WARNING on the rhs of the load to avoid uninit | |
2277 warnings. */ | |
2278 TREE_NO_WARNING (gimple_assign_rhs1 (new_stmt)) = 1; | |
0 | 2279 gsi_insert_on_edge (e1, new_stmt); |
2280 | |
111 | 2281 /* 3) Create a PHI node at the join block, with one argument |
0 | 2282 holding the old RHS, and the other holding the temporary |
2283 where we stored the old memory contents. */ | |
111 | 2284 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); |
2285 newphi = create_phi_node (name2, join_bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2286 add_phi_arg (newphi, rhs, e0, locus); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2287 add_phi_arg (newphi, name, e1, locus); |
0 | 2288 |
2289 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); | |
2290 | |
111 | 2291 /* 4) Insert that PHI node. */ |
0 | 2292 gsi = gsi_after_labels (join_bb); |
2293 if (gsi_end_p (gsi)) | |
2294 { | |
2295 gsi = gsi_last_bb (join_bb); | |
2296 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); | |
2297 } | |
2298 else | |
2299 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); | |
2300 | |
145 | 2301 if (dump_file && (dump_flags & TDF_DETAILS)) |
2302 { | |
2303 fprintf (dump_file, "\nConditional store replacement happened!"); | |
2304 fprintf (dump_file, "\nReplaced the store with a load."); | |
2305 fprintf (dump_file, "\nInserted a new PHI statement in joint block:\n"); | |
2306 print_gimple_stmt (dump_file, new_stmt, 0, TDF_VOPS|TDF_MEMSYMS); | |
2307 } | |
2308 | |
0 | 2309 return true; |
2310 } | |
2311 | |
111 | 2312 /* Do the main work of conditional store replacement. */ |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2313 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2314 static bool |
111 | 2315 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, |
2316 basic_block join_bb, gimple *then_assign, | |
2317 gimple *else_assign) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2318 { |
111 | 2319 tree lhs_base, lhs, then_rhs, else_rhs, name; |
145 | 2320 location_t then_locus, else_locus; |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2321 gimple_stmt_iterator gsi; |
111 | 2322 gphi *newphi; |
2323 gassign *new_stmt; | |
2324 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2325 if (then_assign == NULL |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2326 || !gimple_assign_single_p (then_assign) |
111 | 2327 || gimple_clobber_p (then_assign) |
2328 || gimple_has_volatile_ops (then_assign) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2329 || else_assign == NULL |
111 | 2330 || !gimple_assign_single_p (else_assign) |
2331 || gimple_clobber_p (else_assign) | |
2332 || gimple_has_volatile_ops (else_assign)) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2333 return false; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2334 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2335 lhs = gimple_assign_lhs (then_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2336 if (!is_gimple_reg_type (TREE_TYPE (lhs)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2337 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2338 return false; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2339 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2340 lhs_base = get_base_address (lhs); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2341 if (lhs_base == NULL_TREE |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2342 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2343 return false; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2344 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2345 then_rhs = gimple_assign_rhs1 (then_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2346 else_rhs = gimple_assign_rhs1 (else_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2347 then_locus = gimple_location (then_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2348 else_locus = gimple_location (else_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2349 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2350 /* Now we've checked the constraints, so do the transformation: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2351 1) Remove the stores. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2352 gsi = gsi_for_stmt (then_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2353 unlink_stmt_vdef (then_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2354 gsi_remove (&gsi, true); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2355 release_defs (then_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2356 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2357 gsi = gsi_for_stmt (else_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2358 unlink_stmt_vdef (else_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2359 gsi_remove (&gsi, true); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2360 release_defs (else_assign); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2361 |
111 | 2362 /* 2) Create a PHI node at the join block, with one argument |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2363 holding the old RHS, and the other holding the temporary |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2364 where we stored the old memory contents. */ |
111 | 2365 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); |
2366 newphi = create_phi_node (name, join_bb); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2367 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2368 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2369 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2370 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2371 |
111 | 2372 /* 3) Insert that PHI node. */ |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2373 gsi = gsi_after_labels (join_bb); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2374 if (gsi_end_p (gsi)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2375 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2376 gsi = gsi_last_bb (join_bb); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2377 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2378 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2379 else |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2380 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2381 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2382 return true; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2383 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2384 |
131 | 2385 /* Return the single store in BB with VDEF or NULL if there are |
2386 other stores in the BB or loads following the store. */ | |
2387 | |
2388 static gimple * | |
2389 single_trailing_store_in_bb (basic_block bb, tree vdef) | |
2390 { | |
2391 if (SSA_NAME_IS_DEFAULT_DEF (vdef)) | |
2392 return NULL; | |
2393 gimple *store = SSA_NAME_DEF_STMT (vdef); | |
2394 if (gimple_bb (store) != bb | |
2395 || gimple_code (store) == GIMPLE_PHI) | |
2396 return NULL; | |
2397 | |
2398 /* Verify there is no other store in this BB. */ | |
2399 if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store)) | |
2400 && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb | |
2401 && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI) | |
2402 return NULL; | |
2403 | |
2404 /* Verify there is no load or store after the store. */ | |
2405 use_operand_p use_p; | |
2406 imm_use_iterator imm_iter; | |
2407 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (store)) | |
2408 if (USE_STMT (use_p) != store | |
2409 && gimple_bb (USE_STMT (use_p)) == bb) | |
2410 return NULL; | |
2411 | |
2412 return store; | |
2413 } | |
2414 | |
111 | 2415 /* Conditional store replacement. We already know |
2416 that the recognized pattern looks like so: | |
2417 | |
2418 split: | |
2419 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) | |
2420 THEN_BB: | |
2421 ... | |
2422 X = Y; | |
2423 ... | |
2424 goto JOIN_BB; | |
2425 ELSE_BB: | |
2426 ... | |
2427 X = Z; | |
2428 ... | |
2429 fallthrough (edge E0) | |
2430 JOIN_BB: | |
2431 some more | |
2432 | |
2433 We check that it is safe to sink the store to JOIN_BB by verifying that | |
2434 there are no read-after-write or write-after-write dependencies in | |
2435 THEN_BB and ELSE_BB. */ | |
2436 | |
0 | 2437 static bool |
111 | 2438 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, |
2439 basic_block join_bb) | |
0 | 2440 { |
111 | 2441 vec<data_reference_p> then_datarefs, else_datarefs; |
2442 vec<ddr_p> then_ddrs, else_ddrs; | |
2443 gimple *then_store, *else_store; | |
2444 bool found, ok = false, res; | |
2445 struct data_dependence_relation *ddr; | |
2446 data_reference_p then_dr, else_dr; | |
2447 int i, j; | |
2448 tree then_lhs, else_lhs; | |
2449 basic_block blocks[3]; | |
2450 | |
131 | 2451 /* Handle the case with single store in THEN_BB and ELSE_BB. That is |
2452 cheap enough to always handle as it allows us to elide dependence | |
2453 checking. */ | |
2454 gphi *vphi = NULL; | |
2455 for (gphi_iterator si = gsi_start_phis (join_bb); !gsi_end_p (si); | |
2456 gsi_next (&si)) | |
2457 if (virtual_operand_p (gimple_phi_result (si.phi ()))) | |
2458 { | |
2459 vphi = si.phi (); | |
2460 break; | |
2461 } | |
2462 if (!vphi) | |
2463 return false; | |
2464 tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb)); | |
2465 tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb)); | |
2466 gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef); | |
2467 if (then_assign) | |
2468 { | |
2469 gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef); | |
2470 if (else_assign) | |
2471 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, | |
2472 then_assign, else_assign); | |
2473 } | |
2474 | |
145 | 2475 /* If either vectorization or if-conversion is disabled then do |
2476 not sink any stores. */ | |
2477 if (param_max_stores_to_sink == 0 | |
2478 || (!flag_tree_loop_vectorize && !flag_tree_slp_vectorize) | |
2479 || !flag_tree_loop_if_convert) | |
111 | 2480 return false; |
2481 | |
2482 /* Find data references. */ | |
2483 then_datarefs.create (1); | |
2484 else_datarefs.create (1); | |
2485 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) | |
2486 == chrec_dont_know) | |
2487 || !then_datarefs.length () | |
2488 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) | |
2489 == chrec_dont_know) | |
2490 || !else_datarefs.length ()) | |
2491 { | |
2492 free_data_refs (then_datarefs); | |
2493 free_data_refs (else_datarefs); | |
2494 return false; | |
2495 } | |
2496 | |
2497 /* Find pairs of stores with equal LHS. */ | |
2498 auto_vec<gimple *, 1> then_stores, else_stores; | |
2499 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr) | |
2500 { | |
2501 if (DR_IS_READ (then_dr)) | |
2502 continue; | |
2503 | |
2504 then_store = DR_STMT (then_dr); | |
2505 then_lhs = gimple_get_lhs (then_store); | |
2506 if (then_lhs == NULL_TREE) | |
2507 continue; | |
2508 found = false; | |
2509 | |
2510 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr) | |
2511 { | |
2512 if (DR_IS_READ (else_dr)) | |
2513 continue; | |
2514 | |
2515 else_store = DR_STMT (else_dr); | |
2516 else_lhs = gimple_get_lhs (else_store); | |
2517 if (else_lhs == NULL_TREE) | |
2518 continue; | |
2519 | |
2520 if (operand_equal_p (then_lhs, else_lhs, 0)) | |
2521 { | |
2522 found = true; | |
2523 break; | |
2524 } | |
2525 } | |
2526 | |
2527 if (!found) | |
2528 continue; | |
2529 | |
2530 then_stores.safe_push (then_store); | |
2531 else_stores.safe_push (else_store); | |
2532 } | |
2533 | |
2534 /* No pairs of stores found. */ | |
2535 if (!then_stores.length () | |
145 | 2536 || then_stores.length () > (unsigned) param_max_stores_to_sink) |
111 | 2537 { |
2538 free_data_refs (then_datarefs); | |
2539 free_data_refs (else_datarefs); | |
2540 return false; | |
2541 } | |
2542 | |
2543 /* Compute and check data dependencies in both basic blocks. */ | |
2544 then_ddrs.create (1); | |
2545 else_ddrs.create (1); | |
2546 if (!compute_all_dependences (then_datarefs, &then_ddrs, | |
2547 vNULL, false) | |
2548 || !compute_all_dependences (else_datarefs, &else_ddrs, | |
2549 vNULL, false)) | |
2550 { | |
2551 free_dependence_relations (then_ddrs); | |
2552 free_dependence_relations (else_ddrs); | |
2553 free_data_refs (then_datarefs); | |
2554 free_data_refs (else_datarefs); | |
2555 return false; | |
2556 } | |
2557 blocks[0] = then_bb; | |
2558 blocks[1] = else_bb; | |
2559 blocks[2] = join_bb; | |
2560 renumber_gimple_stmt_uids_in_blocks (blocks, 3); | |
2561 | |
2562 /* Check that there are no read-after-write or write-after-write dependencies | |
2563 in THEN_BB. */ | |
2564 FOR_EACH_VEC_ELT (then_ddrs, i, ddr) | |
2565 { | |
2566 struct data_reference *dra = DDR_A (ddr); | |
2567 struct data_reference *drb = DDR_B (ddr); | |
2568 | |
2569 if (DDR_ARE_DEPENDENT (ddr) != chrec_known | |
2570 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) | |
2571 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) | |
2572 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) | |
2573 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) | |
2574 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) | |
2575 { | |
2576 free_dependence_relations (then_ddrs); | |
2577 free_dependence_relations (else_ddrs); | |
2578 free_data_refs (then_datarefs); | |
2579 free_data_refs (else_datarefs); | |
2580 return false; | |
2581 } | |
2582 } | |
2583 | |
2584 /* Check that there are no read-after-write or write-after-write dependencies | |
2585 in ELSE_BB. */ | |
2586 FOR_EACH_VEC_ELT (else_ddrs, i, ddr) | |
2587 { | |
2588 struct data_reference *dra = DDR_A (ddr); | |
2589 struct data_reference *drb = DDR_B (ddr); | |
2590 | |
2591 if (DDR_ARE_DEPENDENT (ddr) != chrec_known | |
2592 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) | |
2593 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) | |
2594 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) | |
2595 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) | |
2596 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) | |
2597 { | |
2598 free_dependence_relations (then_ddrs); | |
2599 free_dependence_relations (else_ddrs); | |
2600 free_data_refs (then_datarefs); | |
2601 free_data_refs (else_datarefs); | |
2602 return false; | |
2603 } | |
2604 } | |
2605 | |
2606 /* Sink stores with same LHS. */ | |
2607 FOR_EACH_VEC_ELT (then_stores, i, then_store) | |
2608 { | |
2609 else_store = else_stores[i]; | |
2610 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, | |
2611 then_store, else_store); | |
2612 ok = ok || res; | |
2613 } | |
2614 | |
2615 free_dependence_relations (then_ddrs); | |
2616 free_dependence_relations (else_ddrs); | |
2617 free_data_refs (then_datarefs); | |
2618 free_data_refs (else_datarefs); | |
2619 | |
2620 return ok; | |
2621 } | |
2622 | |
2623 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ | |
0 | 2624 |
2625 static bool | |
111 | 2626 local_mem_dependence (gimple *stmt, basic_block bb) |
2627 { | |
2628 tree vuse = gimple_vuse (stmt); | |
2629 gimple *def; | |
2630 | |
2631 if (!vuse) | |
2632 return false; | |
2633 | |
2634 def = SSA_NAME_DEF_STMT (vuse); | |
2635 return (def && gimple_bb (def) == bb); | |
2636 } | |
2637 | |
2638 /* Given a "diamond" control-flow pattern where BB0 tests a condition, | |
2639 BB1 and BB2 are "then" and "else" blocks dependent on this test, | |
2640 and BB3 rejoins control flow following BB1 and BB2, look for | |
2641 opportunities to hoist loads as follows. If BB3 contains a PHI of | |
2642 two loads, one each occurring in BB1 and BB2, and the loads are | |
2643 provably of adjacent fields in the same structure, then move both | |
2644 loads into BB0. Of course this can only be done if there are no | |
2645 dependencies preventing such motion. | |
2646 | |
2647 One of the hoisted loads will always be speculative, so the | |
2648 transformation is currently conservative: | |
2649 | |
2650 - The fields must be strictly adjacent. | |
2651 - The two fields must occupy a single memory block that is | |
2652 guaranteed to not cross a page boundary. | |
2653 | |
2654 The last is difficult to prove, as such memory blocks should be | |
2655 aligned on the minimum of the stack alignment boundary and the | |
2656 alignment guaranteed by heap allocation interfaces. Thus we rely | |
2657 on a parameter for the alignment value. | |
2658 | |
2659 Provided a good value is used for the last case, the first | |
2660 restriction could possibly be relaxed. */ | |
2661 | |
2662 static void | |
2663 hoist_adjacent_loads (basic_block bb0, basic_block bb1, | |
2664 basic_block bb2, basic_block bb3) | |
0 | 2665 { |
145 | 2666 int param_align = param_l1_cache_line_size; |
111 | 2667 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); |
2668 gphi_iterator gsi; | |
2669 | |
2670 /* Walk the phis in bb3 looking for an opportunity. We are looking | |
2671 for phis of two SSA names, one each of which is defined in bb1 and | |
2672 bb2. */ | |
2673 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2674 { | |
2675 gphi *phi_stmt = gsi.phi (); | |
2676 gimple *def1, *def2; | |
2677 tree arg1, arg2, ref1, ref2, field1, field2; | |
2678 tree tree_offset1, tree_offset2, tree_size2, next; | |
2679 int offset1, offset2, size2; | |
2680 unsigned align1; | |
2681 gimple_stmt_iterator gsi2; | |
2682 basic_block bb_for_def1, bb_for_def2; | |
2683 | |
2684 if (gimple_phi_num_args (phi_stmt) != 2 | |
2685 || virtual_operand_p (gimple_phi_result (phi_stmt))) | |
2686 continue; | |
2687 | |
2688 arg1 = gimple_phi_arg_def (phi_stmt, 0); | |
2689 arg2 = gimple_phi_arg_def (phi_stmt, 1); | |
2690 | |
2691 if (TREE_CODE (arg1) != SSA_NAME | |
2692 || TREE_CODE (arg2) != SSA_NAME | |
2693 || SSA_NAME_IS_DEFAULT_DEF (arg1) | |
2694 || SSA_NAME_IS_DEFAULT_DEF (arg2)) | |
2695 continue; | |
2696 | |
2697 def1 = SSA_NAME_DEF_STMT (arg1); | |
2698 def2 = SSA_NAME_DEF_STMT (arg2); | |
2699 | |
2700 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2) | |
2701 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2)) | |
2702 continue; | |
2703 | |
2704 /* Check the mode of the arguments to be sure a conditional move | |
2705 can be generated for it. */ | |
2706 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1))) | |
2707 == CODE_FOR_nothing) | |
2708 continue; | |
2709 | |
2710 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */ | |
2711 if (!gimple_assign_single_p (def1) | |
2712 || !gimple_assign_single_p (def2) | |
2713 || gimple_has_volatile_ops (def1) | |
2714 || gimple_has_volatile_ops (def2)) | |
2715 continue; | |
2716 | |
2717 ref1 = gimple_assign_rhs1 (def1); | |
2718 ref2 = gimple_assign_rhs1 (def2); | |
2719 | |
2720 if (TREE_CODE (ref1) != COMPONENT_REF | |
2721 || TREE_CODE (ref2) != COMPONENT_REF) | |
2722 continue; | |
2723 | |
2724 /* The zeroth operand of the two component references must be | |
2725 identical. It is not sufficient to compare get_base_address of | |
2726 the two references, because this could allow for different | |
2727 elements of the same array in the two trees. It is not safe to | |
2728 assume that the existence of one array element implies the | |
2729 existence of a different one. */ | |
2730 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0)) | |
2731 continue; | |
2732 | |
2733 field1 = TREE_OPERAND (ref1, 1); | |
2734 field2 = TREE_OPERAND (ref2, 1); | |
2735 | |
2736 /* Check for field adjacency, and ensure field1 comes first. */ | |
2737 for (next = DECL_CHAIN (field1); | |
2738 next && TREE_CODE (next) != FIELD_DECL; | |
2739 next = DECL_CHAIN (next)) | |
2740 ; | |
2741 | |
2742 if (next != field2) | |
2743 { | |
2744 for (next = DECL_CHAIN (field2); | |
2745 next && TREE_CODE (next) != FIELD_DECL; | |
2746 next = DECL_CHAIN (next)) | |
2747 ; | |
2748 | |
2749 if (next != field1) | |
2750 continue; | |
2751 | |
2752 std::swap (field1, field2); | |
2753 std::swap (def1, def2); | |
2754 } | |
2755 | |
2756 bb_for_def1 = gimple_bb (def1); | |
2757 bb_for_def2 = gimple_bb (def2); | |
2758 | |
2759 /* Check for proper alignment of the first field. */ | |
2760 tree_offset1 = bit_position (field1); | |
2761 tree_offset2 = bit_position (field2); | |
2762 tree_size2 = DECL_SIZE (field2); | |
2763 | |
2764 if (!tree_fits_uhwi_p (tree_offset1) | |
2765 || !tree_fits_uhwi_p (tree_offset2) | |
2766 || !tree_fits_uhwi_p (tree_size2)) | |
2767 continue; | |
2768 | |
2769 offset1 = tree_to_uhwi (tree_offset1); | |
2770 offset2 = tree_to_uhwi (tree_offset2); | |
2771 size2 = tree_to_uhwi (tree_size2); | |
2772 align1 = DECL_ALIGN (field1) % param_align_bits; | |
2773 | |
2774 if (offset1 % BITS_PER_UNIT != 0) | |
2775 continue; | |
2776 | |
2777 /* For profitability, the two field references should fit within | |
2778 a single cache line. */ | |
2779 if (align1 + offset2 - offset1 + size2 > param_align_bits) | |
2780 continue; | |
2781 | |
2782 /* The two expressions cannot be dependent upon vdefs defined | |
2783 in bb1/bb2. */ | |
2784 if (local_mem_dependence (def1, bb_for_def1) | |
2785 || local_mem_dependence (def2, bb_for_def2)) | |
2786 continue; | |
2787 | |
2788 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into | |
2789 bb0. We hoist the first one first so that a cache miss is handled | |
2790 efficiently regardless of hardware cache-fill policy. */ | |
2791 gsi2 = gsi_for_stmt (def1); | |
2792 gsi_move_to_bb_end (&gsi2, bb0); | |
2793 gsi2 = gsi_for_stmt (def2); | |
2794 gsi_move_to_bb_end (&gsi2, bb0); | |
2795 | |
2796 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2797 { | |
2798 fprintf (dump_file, | |
2799 "\nHoisting adjacent loads from %d and %d into %d: \n", | |
2800 bb_for_def1->index, bb_for_def2->index, bb0->index); | |
2801 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS); | |
2802 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS); | |
2803 } | |
2804 } | |
0 | 2805 } |
2806 | |
111 | 2807 /* Determine whether we should attempt to hoist adjacent loads out of |
2808 diamond patterns in pass_phiopt. Always hoist loads if | |
2809 -fhoist-adjacent-loads is specified and the target machine has | |
2810 both a conditional move instruction and a defined cache line size. */ | |
2811 | |
2812 static bool | |
2813 gate_hoist_loads (void) | |
0 | 2814 { |
111 | 2815 return (flag_hoist_adjacent_loads == 1 |
145 | 2816 && param_l1_cache_line_size |
111 | 2817 && HAVE_conditional_move); |
2818 } | |
2819 | |
2820 /* This pass tries to replaces an if-then-else block with an | |
2821 assignment. We have four kinds of transformations. Some of these | |
2822 transformations are also performed by the ifcvt RTL optimizer. | |
2823 | |
2824 Conditional Replacement | |
2825 ----------------------- | |
2826 | |
2827 This transformation, implemented in conditional_replacement, | |
2828 replaces | |
2829 | |
2830 bb0: | |
2831 if (cond) goto bb2; else goto bb1; | |
2832 bb1: | |
2833 bb2: | |
2834 x = PHI <0 (bb1), 1 (bb0), ...>; | |
2835 | |
2836 with | |
2837 | |
2838 bb0: | |
2839 x' = cond; | |
2840 goto bb2; | |
2841 bb2: | |
2842 x = PHI <x' (bb0), ...>; | |
2843 | |
2844 We remove bb1 as it becomes unreachable. This occurs often due to | |
2845 gimplification of conditionals. | |
2846 | |
2847 Value Replacement | |
2848 ----------------- | |
2849 | |
2850 This transformation, implemented in value_replacement, replaces | |
2851 | |
2852 bb0: | |
2853 if (a != b) goto bb2; else goto bb1; | |
2854 bb1: | |
2855 bb2: | |
2856 x = PHI <a (bb1), b (bb0), ...>; | |
2857 | |
2858 with | |
2859 | |
2860 bb0: | |
2861 bb2: | |
2862 x = PHI <b (bb0), ...>; | |
2863 | |
2864 This opportunity can sometimes occur as a result of other | |
2865 optimizations. | |
2866 | |
2867 | |
2868 Another case caught by value replacement looks like this: | |
2869 | |
2870 bb0: | |
2871 t1 = a == CONST; | |
2872 t2 = b > c; | |
2873 t3 = t1 & t2; | |
2874 if (t3 != 0) goto bb1; else goto bb2; | |
2875 bb1: | |
2876 bb2: | |
2877 x = PHI (CONST, a) | |
2878 | |
2879 Gets replaced with: | |
2880 bb0: | |
2881 bb2: | |
2882 t1 = a == CONST; | |
2883 t2 = b > c; | |
2884 t3 = t1 & t2; | |
2885 x = a; | |
2886 | |
2887 ABS Replacement | |
2888 --------------- | |
2889 | |
2890 This transformation, implemented in abs_replacement, replaces | |
2891 | |
2892 bb0: | |
2893 if (a >= 0) goto bb2; else goto bb1; | |
2894 bb1: | |
2895 x = -a; | |
2896 bb2: | |
2897 x = PHI <x (bb1), a (bb0), ...>; | |
2898 | |
2899 with | |
2900 | |
2901 bb0: | |
2902 x' = ABS_EXPR< a >; | |
2903 bb2: | |
2904 x = PHI <x' (bb0), ...>; | |
2905 | |
2906 MIN/MAX Replacement | |
2907 ------------------- | |
2908 | |
2909 This transformation, minmax_replacement replaces | |
2910 | |
2911 bb0: | |
2912 if (a <= b) goto bb2; else goto bb1; | |
2913 bb1: | |
2914 bb2: | |
2915 x = PHI <b (bb1), a (bb0), ...>; | |
2916 | |
2917 with | |
2918 | |
2919 bb0: | |
2920 x' = MIN_EXPR (a, b) | |
2921 bb2: | |
2922 x = PHI <x' (bb0), ...>; | |
2923 | |
2924 A similar transformation is done for MAX_EXPR. | |
2925 | |
2926 | |
2927 This pass also performs a fifth transformation of a slightly different | |
2928 flavor. | |
2929 | |
2930 Factor conversion in COND_EXPR | |
2931 ------------------------------ | |
2932 | |
2933 This transformation factors the conversion out of COND_EXPR with | |
2934 factor_out_conditional_conversion. | |
2935 | |
2936 For example: | |
2937 if (a <= CST) goto <bb 3>; else goto <bb 4>; | |
2938 <bb 3>: | |
2939 tmp = (int) a; | |
2940 <bb 4>: | |
2941 tmp = PHI <tmp, CST> | |
2942 | |
2943 Into: | |
2944 if (a <= CST) goto <bb 3>; else goto <bb 4>; | |
2945 <bb 3>: | |
2946 <bb 4>: | |
2947 a = PHI <a, CST> | |
2948 tmp = (int) a; | |
2949 | |
2950 Adjacent Load Hoisting | |
2951 ---------------------- | |
2952 | |
2953 This transformation replaces | |
2954 | |
2955 bb0: | |
2956 if (...) goto bb2; else goto bb1; | |
2957 bb1: | |
2958 x1 = (<expr>).field1; | |
2959 goto bb3; | |
2960 bb2: | |
2961 x2 = (<expr>).field2; | |
2962 bb3: | |
2963 # x = PHI <x1, x2>; | |
2964 | |
2965 with | |
2966 | |
2967 bb0: | |
2968 x1 = (<expr>).field1; | |
2969 x2 = (<expr>).field2; | |
2970 if (...) goto bb2; else goto bb1; | |
2971 bb1: | |
2972 goto bb3; | |
2973 bb2: | |
2974 bb3: | |
2975 # x = PHI <x1, x2>; | |
2976 | |
2977 The purpose of this transformation is to enable generation of conditional | |
2978 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of | |
2979 the loads is speculative, the transformation is restricted to very | |
2980 specific cases to avoid introducing a page fault. We are looking for | |
2981 the common idiom: | |
2982 | |
2983 if (...) | |
2984 x = y->left; | |
2985 else | |
2986 x = y->right; | |
2987 | |
2988 where left and right are typically adjacent pointers in a tree structure. */ | |
2989 | |
2990 namespace { | |
2991 | |
2992 const pass_data pass_data_phiopt = | |
2993 { | |
2994 GIMPLE_PASS, /* type */ | |
2995 "phiopt", /* name */ | |
2996 OPTGROUP_NONE, /* optinfo_flags */ | |
2997 TV_TREE_PHIOPT, /* tv_id */ | |
2998 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
2999 0, /* properties_provided */ | |
3000 0, /* properties_destroyed */ | |
3001 0, /* todo_flags_start */ | |
3002 0, /* todo_flags_finish */ | |
0 | 3003 }; |
111 | 3004 |
3005 class pass_phiopt : public gimple_opt_pass | |
3006 { | |
3007 public: | |
3008 pass_phiopt (gcc::context *ctxt) | |
131 | 3009 : gimple_opt_pass (pass_data_phiopt, ctxt), early_p (false) |
111 | 3010 {} |
3011 | |
3012 /* opt_pass methods: */ | |
3013 opt_pass * clone () { return new pass_phiopt (m_ctxt); } | |
131 | 3014 void set_pass_param (unsigned n, bool param) |
3015 { | |
3016 gcc_assert (n == 0); | |
3017 early_p = param; | |
3018 } | |
111 | 3019 virtual bool gate (function *) { return flag_ssa_phiopt; } |
3020 virtual unsigned int execute (function *) | |
3021 { | |
131 | 3022 return tree_ssa_phiopt_worker (false, |
3023 !early_p ? gate_hoist_loads () : false, | |
3024 early_p); | |
111 | 3025 } |
3026 | |
131 | 3027 private: |
3028 bool early_p; | |
111 | 3029 }; // class pass_phiopt |
3030 | |
3031 } // anon namespace | |
3032 | |
3033 gimple_opt_pass * | |
3034 make_pass_phiopt (gcc::context *ctxt) | |
3035 { | |
3036 return new pass_phiopt (ctxt); | |
3037 } | |
3038 | |
3039 namespace { | |
3040 | |
3041 const pass_data pass_data_cselim = | |
3042 { | |
3043 GIMPLE_PASS, /* type */ | |
3044 "cselim", /* name */ | |
3045 OPTGROUP_NONE, /* optinfo_flags */ | |
3046 TV_TREE_PHIOPT, /* tv_id */ | |
3047 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
3048 0, /* properties_provided */ | |
3049 0, /* properties_destroyed */ | |
3050 0, /* todo_flags_start */ | |
3051 0, /* todo_flags_finish */ | |
3052 }; | |
3053 | |
3054 class pass_cselim : public gimple_opt_pass | |
3055 { | |
3056 public: | |
3057 pass_cselim (gcc::context *ctxt) | |
3058 : gimple_opt_pass (pass_data_cselim, ctxt) | |
3059 {} | |
3060 | |
3061 /* opt_pass methods: */ | |
3062 virtual bool gate (function *) { return flag_tree_cselim; } | |
3063 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); } | |
3064 | |
3065 }; // class pass_cselim | |
3066 | |
3067 } // anon namespace | |
3068 | |
3069 gimple_opt_pass * | |
3070 make_pass_cselim (gcc::context *ctxt) | |
3071 { | |
3072 return new pass_cselim (ctxt); | |
3073 } |