Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-threadedge.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
0 | 1 /* SSA Jump Threading |
145 | 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. |
0 | 3 Contributed by Jeff Law <law@redhat.com> |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify | |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, | |
13 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 GNU General Public License for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
111 | 24 #include "backend.h" |
0 | 25 #include "tree.h" |
111 | 26 #include "gimple.h" |
27 #include "predict.h" | |
28 #include "ssa.h" | |
29 #include "fold-const.h" | |
0 | 30 #include "cfgloop.h" |
111 | 31 #include "gimple-iterator.h" |
32 #include "tree-cfg.h" | |
33 #include "tree-ssa-threadupdate.h" | |
34 #include "tree-ssa-scopedtables.h" | |
35 #include "tree-ssa-threadedge.h" | |
36 #include "tree-ssa-dom.h" | |
37 #include "gimple-fold.h" | |
38 #include "cfganal.h" | |
131 | 39 #include "alloc-pool.h" |
40 #include "vr-values.h" | |
41 #include "gimple-ssa-evrp-analyze.h" | |
0 | 42 |
43 /* To avoid code explosion due to jump threading, we limit the | |
44 number of statements we are going to copy. This variable | |
45 holds the number of statements currently seen that we'll have | |
46 to copy as part of the jump threading process. */ | |
47 static int stmt_count; | |
48 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
49 /* Array to record value-handles per SSA_NAME. */ |
111 | 50 vec<tree> ssa_name_values; |
51 | |
52 typedef tree (pfn_simplify) (gimple *, gimple *, | |
53 class avail_exprs_stack *, | |
54 basic_block); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
55 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
56 /* Set the value for the SSA name NAME to VALUE. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
57 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
58 void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
59 set_ssa_name_value (tree name, tree value) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
60 { |
111 | 61 if (SSA_NAME_VERSION (name) >= ssa_name_values.length ()) |
62 ssa_name_values.safe_grow_cleared (SSA_NAME_VERSION (name) + 1); | |
63 if (value && TREE_OVERFLOW_P (value)) | |
64 value = drop_tree_overflow (value); | |
65 ssa_name_values[SSA_NAME_VERSION (name)] = value; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
66 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
67 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
68 /* Initialize the per SSA_NAME value-handles array. Returns it. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
69 void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
70 threadedge_initialize_values (void) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
71 { |
111 | 72 gcc_assert (!ssa_name_values.exists ()); |
73 ssa_name_values.create (num_ssa_names); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
74 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
75 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
76 /* Free the per SSA_NAME value-handle array. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
77 void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
78 threadedge_finalize_values (void) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
79 { |
111 | 80 ssa_name_values.release (); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
81 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
82 |
0 | 83 /* Return TRUE if we may be able to thread an incoming edge into |
84 BB to an outgoing edge from BB. Return FALSE otherwise. */ | |
85 | |
86 bool | |
87 potentially_threadable_block (basic_block bb) | |
88 { | |
89 gimple_stmt_iterator gsi; | |
90 | |
111 | 91 /* Special case. We can get blocks that are forwarders, but are |
92 not optimized away because they forward from outside a loop | |
93 to the loop header. We want to thread through them as we can | |
94 sometimes thread to the loop exit, which is obviously profitable. | |
95 the interesting case here is when the block has PHIs. */ | |
96 if (gsi_end_p (gsi_start_nondebug_bb (bb)) | |
97 && !gsi_end_p (gsi_start_phis (bb))) | |
98 return true; | |
99 | |
0 | 100 /* If BB has a single successor or a single predecessor, then |
101 there is no threading opportunity. */ | |
102 if (single_succ_p (bb) || single_pred_p (bb)) | |
103 return false; | |
104 | |
105 /* If BB does not end with a conditional, switch or computed goto, | |
106 then there is no threading opportunity. */ | |
107 gsi = gsi_last_bb (bb); | |
108 if (gsi_end_p (gsi) | |
109 || ! gsi_stmt (gsi) | |
110 || (gimple_code (gsi_stmt (gsi)) != GIMPLE_COND | |
111 && gimple_code (gsi_stmt (gsi)) != GIMPLE_GOTO | |
112 && gimple_code (gsi_stmt (gsi)) != GIMPLE_SWITCH)) | |
113 return false; | |
114 | |
115 return true; | |
116 } | |
117 | |
118 /* Record temporary equivalences created by PHIs at the target of the | |
131 | 119 edge E. Record unwind information for the equivalences into |
120 CONST_AND_COPIES and EVRP_RANGE_DATA. | |
0 | 121 |
122 If a PHI which prevents threading is encountered, then return FALSE | |
131 | 123 indicating we should not thread this edge, else return TRUE. */ |
0 | 124 |
125 static bool | |
131 | 126 record_temporary_equivalences_from_phis (edge e, |
127 const_and_copies *const_and_copies, | |
128 evrp_range_analyzer *evrp_range_analyzer) | |
0 | 129 { |
111 | 130 gphi_iterator gsi; |
0 | 131 |
132 /* Each PHI creates a temporary equivalence, record them. | |
133 These are context sensitive equivalences and will be removed | |
134 later. */ | |
135 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
136 { | |
111 | 137 gphi *phi = gsi.phi (); |
0 | 138 tree src = PHI_ARG_DEF_FROM_EDGE (phi, e); |
139 tree dst = gimple_phi_result (phi); | |
140 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
141 /* If the desired argument is not the same as this PHI's result |
145 | 142 and it is set by a PHI in E->dest, then we cannot thread |
0 | 143 through E->dest. */ |
144 if (src != dst | |
145 && TREE_CODE (src) == SSA_NAME | |
146 && gimple_code (SSA_NAME_DEF_STMT (src)) == GIMPLE_PHI | |
147 && gimple_bb (SSA_NAME_DEF_STMT (src)) == e->dest) | |
148 return false; | |
149 | |
150 /* We consider any non-virtual PHI as a statement since it | |
151 count result in a constant assignment or copy operation. */ | |
111 | 152 if (!virtual_operand_p (dst)) |
0 | 153 stmt_count++; |
154 | |
111 | 155 const_and_copies->record_const_or_copy (dst, src); |
131 | 156 |
157 /* Also update the value range associated with DST, using | |
158 the range from SRC. | |
159 | |
160 Note that even if SRC is a constant we need to set a suitable | |
161 output range so that VR_UNDEFINED ranges do not leak through. */ | |
162 if (evrp_range_analyzer) | |
163 { | |
164 /* Get an empty new VR we can pass to update_value_range and save | |
165 away in the VR stack. */ | |
166 vr_values *vr_values = evrp_range_analyzer->get_vr_values (); | |
145 | 167 value_range_equiv *new_vr = vr_values->allocate_value_range_equiv (); |
168 new (new_vr) value_range_equiv (); | |
131 | 169 |
170 /* There are three cases to consider: | |
171 | |
172 First if SRC is an SSA_NAME, then we can copy the value | |
173 range from SRC into NEW_VR. | |
174 | |
175 Second if SRC is an INTEGER_CST, then we can just wet | |
176 NEW_VR to a singleton range. | |
177 | |
178 Otherwise set NEW_VR to varying. This may be overly | |
179 conservative. */ | |
180 if (TREE_CODE (src) == SSA_NAME) | |
181 new_vr->deep_copy (vr_values->get_value_range (src)); | |
182 else if (TREE_CODE (src) == INTEGER_CST) | |
145 | 183 new_vr->set (src); |
131 | 184 else |
145 | 185 new_vr->set_varying (TREE_TYPE (src)); |
131 | 186 |
187 /* This is a temporary range for DST, so push it. */ | |
188 evrp_range_analyzer->push_value_range (dst, new_vr); | |
189 } | |
0 | 190 } |
191 return true; | |
192 } | |
193 | |
111 | 194 /* Valueize hook for gimple_fold_stmt_to_constant_1. */ |
0 | 195 |
196 static tree | |
111 | 197 threadedge_valueize (tree t) |
0 | 198 { |
111 | 199 if (TREE_CODE (t) == SSA_NAME) |
0 | 200 { |
111 | 201 tree tem = SSA_NAME_VALUE (t); |
202 if (tem) | |
203 return tem; | |
0 | 204 } |
111 | 205 return t; |
0 | 206 } |
207 | |
208 /* Try to simplify each statement in E->dest, ultimately leading to | |
209 a simplification of the COND_EXPR at the end of E->dest. | |
210 | |
211 Record unwind information for temporary equivalences onto STACK. | |
212 | |
213 Use SIMPLIFY (a pointer to a callback function) to further simplify | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
214 statements using pass specific information. |
0 | 215 |
216 We might consider marking just those statements which ultimately | |
217 feed the COND_EXPR. It's not clear if the overhead of bookkeeping | |
218 would be recovered by trying to simplify fewer statements. | |
219 | |
220 If we are able to simplify a statement into the form | |
221 SSA_NAME = (SSA_NAME | gimple invariant), then we can record | |
222 a context sensitive equivalence which may help us simplify | |
223 later statements in E->dest. */ | |
224 | |
111 | 225 static gimple * |
0 | 226 record_temporary_equivalences_from_stmts_at_dest (edge e, |
111 | 227 const_and_copies *const_and_copies, |
228 avail_exprs_stack *avail_exprs_stack, | |
131 | 229 evrp_range_analyzer *evrp_range_analyzer, |
111 | 230 pfn_simplify simplify) |
0 | 231 { |
111 | 232 gimple *stmt = NULL; |
0 | 233 gimple_stmt_iterator gsi; |
234 int max_stmt_count; | |
235 | |
145 | 236 max_stmt_count = param_max_jump_thread_duplication_stmts; |
0 | 237 |
238 /* Walk through each statement in the block recording equivalences | |
239 we discover. Note any equivalences we discover are context | |
240 sensitive (ie, are dependent on traversing E) and must be unwound | |
241 when we're finished processing E. */ | |
242 for (gsi = gsi_start_bb (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
243 { | |
244 tree cached_lhs = NULL; | |
245 | |
246 stmt = gsi_stmt (gsi); | |
247 | |
248 /* Ignore empty statements and labels. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
249 if (gimple_code (stmt) == GIMPLE_NOP |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
250 || gimple_code (stmt) == GIMPLE_LABEL |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
251 || is_gimple_debug (stmt)) |
0 | 252 continue; |
253 | |
254 /* If the statement has volatile operands, then we assume we | |
145 | 255 cannot thread through this block. This is overly |
0 | 256 conservative in some ways. */ |
111 | 257 if (gimple_code (stmt) == GIMPLE_ASM |
258 && gimple_asm_volatile_p (as_a <gasm *> (stmt))) | |
259 return NULL; | |
260 | |
145 | 261 /* If the statement is a unique builtin, we cannot thread |
111 | 262 through here. */ |
263 if (gimple_code (stmt) == GIMPLE_CALL | |
264 && gimple_call_internal_p (stmt) | |
265 && gimple_call_internal_unique_p (stmt)) | |
0 | 266 return NULL; |
267 | |
268 /* If duplicating this block is going to cause too much code | |
269 expansion, then do not thread through this block. */ | |
270 stmt_count++; | |
271 if (stmt_count > max_stmt_count) | |
131 | 272 { |
273 /* If any of the stmts in the PATH's dests are going to be | |
274 killed due to threading, grow the max count | |
275 accordingly. */ | |
276 if (max_stmt_count | |
145 | 277 == param_max_jump_thread_duplication_stmts) |
131 | 278 { |
279 max_stmt_count += estimate_threading_killed_stmts (e->dest); | |
280 if (dump_file) | |
281 fprintf (dump_file, "threading bb %i up to %i stmts\n", | |
282 e->dest->index, max_stmt_count); | |
283 } | |
284 /* If we're still past the limit, we're done. */ | |
285 if (stmt_count > max_stmt_count) | |
286 return NULL; | |
287 } | |
288 | |
289 /* These are temporary ranges, do nto reflect them back into | |
290 the global range data. */ | |
291 if (evrp_range_analyzer) | |
292 evrp_range_analyzer->record_ranges_from_stmt (stmt, true); | |
0 | 293 |
294 /* If this is not a statement that sets an SSA_NAME to a new | |
295 value, then do not try to simplify this statement as it will | |
296 not simplify in any way that is helpful for jump threading. */ | |
297 if ((gimple_code (stmt) != GIMPLE_ASSIGN | |
298 || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) | |
299 && (gimple_code (stmt) != GIMPLE_CALL | |
300 || gimple_call_lhs (stmt) == NULL_TREE | |
301 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)) | |
302 continue; | |
303 | |
304 /* The result of __builtin_object_size depends on all the arguments | |
305 of a phi node. Temporarily using only one edge produces invalid | |
306 results. For example | |
307 | |
308 if (x < 6) | |
309 goto l; | |
310 else | |
311 goto l; | |
312 | |
313 l: | |
314 r = PHI <&w[2].a[1](2), &a.a[6](3)> | |
315 __builtin_object_size (r, 0) | |
316 | |
317 The result of __builtin_object_size is defined to be the maximum of | |
318 remaining bytes. If we use only one edge on the phi, the result will | |
319 change to be the remaining bytes for the corresponding phi argument. | |
320 | |
321 Similarly for __builtin_constant_p: | |
322 | |
323 r = PHI <1(2), 2(3)> | |
324 __builtin_constant_p (r) | |
325 | |
326 Both PHI arguments are constant, but x ? 1 : 2 is still not | |
327 constant. */ | |
328 | |
329 if (is_gimple_call (stmt)) | |
330 { | |
331 tree fndecl = gimple_call_fndecl (stmt); | |
332 if (fndecl | |
145 | 333 && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) |
0 | 334 && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_OBJECT_SIZE |
335 || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)) | |
336 continue; | |
337 } | |
338 | |
339 /* At this point we have a statement which assigns an RHS to an | |
340 SSA_VAR on the LHS. We want to try and simplify this statement | |
341 to expose more context sensitive equivalences which in turn may | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
342 allow us to simplify the condition at the end of the loop. |
0 | 343 |
344 Handle simple copy operations as well as implied copies from | |
345 ASSERT_EXPRs. */ | |
346 if (gimple_assign_single_p (stmt) | |
347 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) | |
348 cached_lhs = gimple_assign_rhs1 (stmt); | |
349 else if (gimple_assign_single_p (stmt) | |
350 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ASSERT_EXPR) | |
351 cached_lhs = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); | |
352 else | |
353 { | |
354 /* A statement that is not a trivial copy or ASSERT_EXPR. | |
111 | 355 Try to fold the new expression. Inserting the |
356 expression into the hash table is unlikely to help. */ | |
357 /* ??? The DOM callback below can be changed to setting | |
358 the mprts_hook around the call to thread_across_edge, | |
359 avoiding the use substitution. The VRP hook should be | |
360 changed to properly valueize operands itself using | |
361 SSA_NAME_VALUE in addition to its own lattice. */ | |
362 cached_lhs = gimple_fold_stmt_to_constant_1 (stmt, | |
363 threadedge_valueize); | |
364 if (NUM_SSA_OPERANDS (stmt, SSA_OP_ALL_USES) != 0 | |
365 && (!cached_lhs | |
366 || (TREE_CODE (cached_lhs) != SSA_NAME | |
367 && !is_gimple_min_invariant (cached_lhs)))) | |
0 | 368 { |
111 | 369 /* We're going to temporarily copy propagate the operands |
370 and see if that allows us to simplify this statement. */ | |
371 tree *copy; | |
372 ssa_op_iter iter; | |
373 use_operand_p use_p; | |
374 unsigned int num, i = 0; | |
0 | 375 |
111 | 376 num = NUM_SSA_OPERANDS (stmt, SSA_OP_ALL_USES); |
377 copy = XALLOCAVEC (tree, num); | |
378 | |
379 /* Make a copy of the uses & vuses into USES_COPY, then cprop into | |
380 the operands. */ | |
381 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) | |
382 { | |
383 tree tmp = NULL; | |
384 tree use = USE_FROM_PTR (use_p); | |
0 | 385 |
111 | 386 copy[i++] = use; |
387 if (TREE_CODE (use) == SSA_NAME) | |
388 tmp = SSA_NAME_VALUE (use); | |
389 if (tmp) | |
390 SET_USE (use_p, tmp); | |
391 } | |
0 | 392 |
111 | 393 cached_lhs = (*simplify) (stmt, stmt, avail_exprs_stack, e->src); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
394 |
111 | 395 /* Restore the statement's original uses/defs. */ |
396 i = 0; | |
397 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) | |
398 SET_USE (use_p, copy[i++]); | |
399 } | |
0 | 400 } |
401 | |
402 /* Record the context sensitive equivalence if we were able | |
403 to simplify this statement. */ | |
404 if (cached_lhs | |
405 && (TREE_CODE (cached_lhs) == SSA_NAME | |
406 || is_gimple_min_invariant (cached_lhs))) | |
111 | 407 const_and_copies->record_const_or_copy (gimple_get_lhs (stmt), |
408 cached_lhs); | |
0 | 409 } |
410 return stmt; | |
411 } | |
412 | |
111 | 413 static tree simplify_control_stmt_condition_1 (edge, gimple *, |
414 class avail_exprs_stack *, | |
415 tree, enum tree_code, tree, | |
416 gcond *, pfn_simplify, | |
417 unsigned); | |
418 | |
0 | 419 /* Simplify the control statement at the end of the block E->dest. |
420 | |
421 To avoid allocating memory unnecessarily, a scratch GIMPLE_COND | |
422 is available to use/clobber in DUMMY_COND. | |
423 | |
424 Use SIMPLIFY (a pointer to a callback function) to further simplify | |
425 a condition using pass specific information. | |
426 | |
427 Return the simplified condition or NULL if simplification could | |
111 | 428 not be performed. When simplifying a GIMPLE_SWITCH, we may return |
429 the CASE_LABEL_EXPR that will be taken. | |
430 | |
431 The available expression table is referenced via AVAIL_EXPRS_STACK. */ | |
0 | 432 |
433 static tree | |
434 simplify_control_stmt_condition (edge e, | |
111 | 435 gimple *stmt, |
436 class avail_exprs_stack *avail_exprs_stack, | |
437 gcond *dummy_cond, | |
438 pfn_simplify simplify) | |
0 | 439 { |
440 tree cond, cached_lhs; | |
441 enum gimple_code code = gimple_code (stmt); | |
442 | |
443 /* For comparisons, we have to update both operands, then try | |
444 to simplify the comparison. */ | |
445 if (code == GIMPLE_COND) | |
446 { | |
447 tree op0, op1; | |
448 enum tree_code cond_code; | |
449 | |
450 op0 = gimple_cond_lhs (stmt); | |
451 op1 = gimple_cond_rhs (stmt); | |
452 cond_code = gimple_cond_code (stmt); | |
453 | |
454 /* Get the current value of both operands. */ | |
455 if (TREE_CODE (op0) == SSA_NAME) | |
456 { | |
111 | 457 for (int i = 0; i < 2; i++) |
458 { | |
459 if (TREE_CODE (op0) == SSA_NAME | |
460 && SSA_NAME_VALUE (op0)) | |
461 op0 = SSA_NAME_VALUE (op0); | |
462 else | |
463 break; | |
464 } | |
0 | 465 } |
466 | |
467 if (TREE_CODE (op1) == SSA_NAME) | |
468 { | |
111 | 469 for (int i = 0; i < 2; i++) |
470 { | |
471 if (TREE_CODE (op1) == SSA_NAME | |
472 && SSA_NAME_VALUE (op1)) | |
473 op1 = SSA_NAME_VALUE (op1); | |
474 else | |
475 break; | |
476 } | |
0 | 477 } |
478 | |
111 | 479 const unsigned recursion_limit = 4; |
0 | 480 |
111 | 481 cached_lhs |
482 = simplify_control_stmt_condition_1 (e, stmt, avail_exprs_stack, | |
483 op0, cond_code, op1, | |
484 dummy_cond, simplify, | |
485 recursion_limit); | |
486 | |
487 /* If we were testing an integer/pointer against a constant, then | |
488 we can use the FSM code to trace the value of the SSA_NAME. If | |
489 a value is found, then the condition will collapse to a constant. | |
0 | 490 |
111 | 491 Return the SSA_NAME we want to trace back rather than the full |
492 expression and give the FSM threader a chance to find its value. */ | |
493 if (cached_lhs == NULL) | |
494 { | |
495 /* Recover the original operands. They may have been simplified | |
496 using context sensitive equivalences. Those context sensitive | |
497 equivalences may not be valid on paths found by the FSM optimizer. */ | |
498 tree op0 = gimple_cond_lhs (stmt); | |
499 tree op1 = gimple_cond_rhs (stmt); | |
0 | 500 |
111 | 501 if ((INTEGRAL_TYPE_P (TREE_TYPE (op0)) |
502 || POINTER_TYPE_P (TREE_TYPE (op0))) | |
503 && TREE_CODE (op0) == SSA_NAME | |
504 && TREE_CODE (op1) == INTEGER_CST) | |
505 return op0; | |
506 } | |
0 | 507 |
508 return cached_lhs; | |
509 } | |
510 | |
511 if (code == GIMPLE_SWITCH) | |
111 | 512 cond = gimple_switch_index (as_a <gswitch *> (stmt)); |
0 | 513 else if (code == GIMPLE_GOTO) |
514 cond = gimple_goto_dest (stmt); | |
515 else | |
516 gcc_unreachable (); | |
517 | |
518 /* We can have conditionals which just test the state of a variable | |
519 rather than use a relational operator. These are simpler to handle. */ | |
520 if (TREE_CODE (cond) == SSA_NAME) | |
521 { | |
111 | 522 tree original_lhs = cond; |
0 | 523 cached_lhs = cond; |
524 | |
525 /* Get the variable's current value from the equivalence chains. | |
526 | |
527 It is possible to get loops in the SSA_NAME_VALUE chains | |
528 (consider threading the backedge of a loop where we have | |
111 | 529 a loop invariant SSA_NAME used in the condition). */ |
530 if (cached_lhs) | |
531 { | |
532 for (int i = 0; i < 2; i++) | |
533 { | |
534 if (TREE_CODE (cached_lhs) == SSA_NAME | |
535 && SSA_NAME_VALUE (cached_lhs)) | |
536 cached_lhs = SSA_NAME_VALUE (cached_lhs); | |
537 else | |
538 break; | |
539 } | |
540 } | |
0 | 541 |
542 /* If we haven't simplified to an invariant yet, then use the | |
543 pass specific callback to try and simplify it further. */ | |
544 if (cached_lhs && ! is_gimple_min_invariant (cached_lhs)) | |
111 | 545 { |
546 if (code == GIMPLE_SWITCH) | |
547 { | |
548 /* Replace the index operand of the GIMPLE_SWITCH with any LHS | |
549 we found before handing off to VRP. If simplification is | |
550 possible, the simplified value will be a CASE_LABEL_EXPR of | |
551 the label that is proven to be taken. */ | |
552 gswitch *dummy_switch = as_a<gswitch *> (gimple_copy (stmt)); | |
553 gimple_switch_set_index (dummy_switch, cached_lhs); | |
554 cached_lhs = (*simplify) (dummy_switch, stmt, | |
555 avail_exprs_stack, e->src); | |
556 ggc_free (dummy_switch); | |
557 } | |
558 else | |
559 cached_lhs = (*simplify) (stmt, stmt, avail_exprs_stack, e->src); | |
560 } | |
561 | |
562 /* We couldn't find an invariant. But, callers of this | |
563 function may be able to do something useful with the | |
564 unmodified destination. */ | |
565 if (!cached_lhs) | |
566 cached_lhs = original_lhs; | |
0 | 567 } |
568 else | |
569 cached_lhs = NULL; | |
570 | |
571 return cached_lhs; | |
572 } | |
573 | |
111 | 574 /* Recursive helper for simplify_control_stmt_condition. */ |
575 | |
576 static tree | |
577 simplify_control_stmt_condition_1 (edge e, | |
578 gimple *stmt, | |
579 class avail_exprs_stack *avail_exprs_stack, | |
580 tree op0, | |
581 enum tree_code cond_code, | |
582 tree op1, | |
583 gcond *dummy_cond, | |
584 pfn_simplify simplify, | |
585 unsigned limit) | |
586 { | |
587 if (limit == 0) | |
588 return NULL_TREE; | |
589 | |
590 /* We may need to canonicalize the comparison. For | |
591 example, op0 might be a constant while op1 is an | |
592 SSA_NAME. Failure to canonicalize will cause us to | |
593 miss threading opportunities. */ | |
594 if (tree_swap_operands_p (op0, op1)) | |
595 { | |
596 cond_code = swap_tree_comparison (cond_code); | |
597 std::swap (op0, op1); | |
598 } | |
599 | |
600 /* If the condition has the form (A & B) CMP 0 or (A | B) CMP 0 then | |
601 recurse into the LHS to see if there is a dominating ASSERT_EXPR | |
602 of A or of B that makes this condition always true or always false | |
603 along the edge E. */ | |
604 if ((cond_code == EQ_EXPR || cond_code == NE_EXPR) | |
605 && TREE_CODE (op0) == SSA_NAME | |
606 && integer_zerop (op1)) | |
607 { | |
608 gimple *def_stmt = SSA_NAME_DEF_STMT (op0); | |
609 if (gimple_code (def_stmt) != GIMPLE_ASSIGN) | |
610 ; | |
611 else if (gimple_assign_rhs_code (def_stmt) == BIT_AND_EXPR | |
612 || gimple_assign_rhs_code (def_stmt) == BIT_IOR_EXPR) | |
613 { | |
614 enum tree_code rhs_code = gimple_assign_rhs_code (def_stmt); | |
615 const tree rhs1 = gimple_assign_rhs1 (def_stmt); | |
616 const tree rhs2 = gimple_assign_rhs2 (def_stmt); | |
617 | |
618 /* Is A != 0 ? */ | |
619 const tree res1 | |
620 = simplify_control_stmt_condition_1 (e, def_stmt, avail_exprs_stack, | |
621 rhs1, NE_EXPR, op1, | |
622 dummy_cond, simplify, | |
623 limit - 1); | |
624 if (res1 == NULL_TREE) | |
625 ; | |
626 else if (rhs_code == BIT_AND_EXPR && integer_zerop (res1)) | |
627 { | |
628 /* If A == 0 then (A & B) != 0 is always false. */ | |
629 if (cond_code == NE_EXPR) | |
630 return boolean_false_node; | |
631 /* If A == 0 then (A & B) == 0 is always true. */ | |
632 if (cond_code == EQ_EXPR) | |
633 return boolean_true_node; | |
634 } | |
635 else if (rhs_code == BIT_IOR_EXPR && integer_nonzerop (res1)) | |
636 { | |
637 /* If A != 0 then (A | B) != 0 is always true. */ | |
638 if (cond_code == NE_EXPR) | |
639 return boolean_true_node; | |
640 /* If A != 0 then (A | B) == 0 is always false. */ | |
641 if (cond_code == EQ_EXPR) | |
642 return boolean_false_node; | |
643 } | |
644 | |
645 /* Is B != 0 ? */ | |
646 const tree res2 | |
647 = simplify_control_stmt_condition_1 (e, def_stmt, avail_exprs_stack, | |
648 rhs2, NE_EXPR, op1, | |
649 dummy_cond, simplify, | |
650 limit - 1); | |
651 if (res2 == NULL_TREE) | |
652 ; | |
653 else if (rhs_code == BIT_AND_EXPR && integer_zerop (res2)) | |
654 { | |
655 /* If B == 0 then (A & B) != 0 is always false. */ | |
656 if (cond_code == NE_EXPR) | |
657 return boolean_false_node; | |
658 /* If B == 0 then (A & B) == 0 is always true. */ | |
659 if (cond_code == EQ_EXPR) | |
660 return boolean_true_node; | |
661 } | |
662 else if (rhs_code == BIT_IOR_EXPR && integer_nonzerop (res2)) | |
663 { | |
664 /* If B != 0 then (A | B) != 0 is always true. */ | |
665 if (cond_code == NE_EXPR) | |
666 return boolean_true_node; | |
667 /* If B != 0 then (A | B) == 0 is always false. */ | |
668 if (cond_code == EQ_EXPR) | |
669 return boolean_false_node; | |
670 } | |
671 | |
672 if (res1 != NULL_TREE && res2 != NULL_TREE) | |
673 { | |
674 if (rhs_code == BIT_AND_EXPR | |
675 && TYPE_PRECISION (TREE_TYPE (op0)) == 1 | |
676 && integer_nonzerop (res1) | |
677 && integer_nonzerop (res2)) | |
678 { | |
679 /* If A != 0 and B != 0 then (bool)(A & B) != 0 is true. */ | |
680 if (cond_code == NE_EXPR) | |
681 return boolean_true_node; | |
682 /* If A != 0 and B != 0 then (bool)(A & B) == 0 is false. */ | |
683 if (cond_code == EQ_EXPR) | |
684 return boolean_false_node; | |
685 } | |
686 | |
687 if (rhs_code == BIT_IOR_EXPR | |
688 && integer_zerop (res1) | |
689 && integer_zerop (res2)) | |
690 { | |
691 /* If A == 0 and B == 0 then (A | B) != 0 is false. */ | |
692 if (cond_code == NE_EXPR) | |
693 return boolean_false_node; | |
694 /* If A == 0 and B == 0 then (A | B) == 0 is true. */ | |
695 if (cond_code == EQ_EXPR) | |
696 return boolean_true_node; | |
697 } | |
698 } | |
699 } | |
700 /* Handle (A CMP B) CMP 0. */ | |
701 else if (TREE_CODE_CLASS (gimple_assign_rhs_code (def_stmt)) | |
702 == tcc_comparison) | |
703 { | |
704 tree rhs1 = gimple_assign_rhs1 (def_stmt); | |
705 tree rhs2 = gimple_assign_rhs2 (def_stmt); | |
706 | |
707 tree_code new_cond = gimple_assign_rhs_code (def_stmt); | |
708 if (cond_code == EQ_EXPR) | |
709 new_cond = invert_tree_comparison (new_cond, false); | |
710 | |
711 tree res | |
712 = simplify_control_stmt_condition_1 (e, def_stmt, avail_exprs_stack, | |
713 rhs1, new_cond, rhs2, | |
714 dummy_cond, simplify, | |
715 limit - 1); | |
716 if (res != NULL_TREE && is_gimple_min_invariant (res)) | |
717 return res; | |
718 } | |
719 } | |
720 | |
721 gimple_cond_set_code (dummy_cond, cond_code); | |
722 gimple_cond_set_lhs (dummy_cond, op0); | |
723 gimple_cond_set_rhs (dummy_cond, op1); | |
724 | |
725 /* We absolutely do not care about any type conversions | |
726 we only care about a zero/nonzero value. */ | |
727 fold_defer_overflow_warnings (); | |
728 | |
729 tree res = fold_binary (cond_code, boolean_type_node, op0, op1); | |
730 if (res) | |
731 while (CONVERT_EXPR_P (res)) | |
732 res = TREE_OPERAND (res, 0); | |
733 | |
734 fold_undefer_overflow_warnings ((res && is_gimple_min_invariant (res)), | |
735 stmt, WARN_STRICT_OVERFLOW_CONDITIONAL); | |
736 | |
737 /* If we have not simplified the condition down to an invariant, | |
738 then use the pass specific callback to simplify the condition. */ | |
739 if (!res | |
740 || !is_gimple_min_invariant (res)) | |
741 res = (*simplify) (dummy_cond, stmt, avail_exprs_stack, e->src); | |
742 | |
743 return res; | |
744 } | |
745 | |
746 /* Copy debug stmts from DEST's chain of single predecessors up to | |
747 SRC, so that we don't lose the bindings as PHI nodes are introduced | |
748 when DEST gains new predecessors. */ | |
749 void | |
750 propagate_threaded_block_debug_into (basic_block dest, basic_block src) | |
751 { | |
131 | 752 if (!MAY_HAVE_DEBUG_BIND_STMTS) |
111 | 753 return; |
754 | |
755 if (!single_pred_p (dest)) | |
756 return; | |
757 | |
758 gcc_checking_assert (dest != src); | |
759 | |
760 gimple_stmt_iterator gsi = gsi_after_labels (dest); | |
761 int i = 0; | |
762 const int alloc_count = 16; // ?? Should this be a PARAM? | |
763 | |
764 /* Estimate the number of debug vars overridden in the beginning of | |
765 DEST, to tell how many we're going to need to begin with. */ | |
766 for (gimple_stmt_iterator si = gsi; | |
767 i * 4 <= alloc_count * 3 && !gsi_end_p (si); gsi_next (&si)) | |
768 { | |
769 gimple *stmt = gsi_stmt (si); | |
770 if (!is_gimple_debug (stmt)) | |
771 break; | |
131 | 772 if (gimple_debug_nonbind_marker_p (stmt)) |
773 continue; | |
111 | 774 i++; |
775 } | |
776 | |
777 auto_vec<tree, alloc_count> fewvars; | |
778 hash_set<tree> *vars = NULL; | |
779 | |
780 /* If we're already starting with 3/4 of alloc_count, go for a | |
781 hash_set, otherwise start with an unordered stack-allocated | |
782 VEC. */ | |
783 if (i * 4 > alloc_count * 3) | |
784 vars = new hash_set<tree>; | |
785 | |
786 /* Now go through the initial debug stmts in DEST again, this time | |
787 actually inserting in VARS or FEWVARS. Don't bother checking for | |
788 duplicates in FEWVARS. */ | |
789 for (gimple_stmt_iterator si = gsi; !gsi_end_p (si); gsi_next (&si)) | |
790 { | |
791 gimple *stmt = gsi_stmt (si); | |
792 if (!is_gimple_debug (stmt)) | |
793 break; | |
794 | |
795 tree var; | |
796 | |
797 if (gimple_debug_bind_p (stmt)) | |
798 var = gimple_debug_bind_get_var (stmt); | |
799 else if (gimple_debug_source_bind_p (stmt)) | |
800 var = gimple_debug_source_bind_get_var (stmt); | |
131 | 801 else if (gimple_debug_nonbind_marker_p (stmt)) |
802 continue; | |
111 | 803 else |
804 gcc_unreachable (); | |
805 | |
806 if (vars) | |
807 vars->add (var); | |
808 else | |
809 fewvars.quick_push (var); | |
810 } | |
811 | |
812 basic_block bb = dest; | |
813 | |
814 do | |
815 { | |
816 bb = single_pred (bb); | |
817 for (gimple_stmt_iterator si = gsi_last_bb (bb); | |
818 !gsi_end_p (si); gsi_prev (&si)) | |
819 { | |
820 gimple *stmt = gsi_stmt (si); | |
821 if (!is_gimple_debug (stmt)) | |
822 continue; | |
823 | |
824 tree var; | |
825 | |
826 if (gimple_debug_bind_p (stmt)) | |
827 var = gimple_debug_bind_get_var (stmt); | |
828 else if (gimple_debug_source_bind_p (stmt)) | |
829 var = gimple_debug_source_bind_get_var (stmt); | |
131 | 830 else if (gimple_debug_nonbind_marker_p (stmt)) |
831 continue; | |
111 | 832 else |
833 gcc_unreachable (); | |
834 | |
131 | 835 /* Discard debug bind overlaps. Unlike stmts from src, |
111 | 836 copied into a new block that will precede BB, debug bind |
837 stmts in bypassed BBs may actually be discarded if | |
131 | 838 they're overwritten by subsequent debug bind stmts. We |
839 want to copy binds for all modified variables, so that we | |
840 retain a bind to the shared def if there is one, or to a | |
841 newly introduced PHI node if there is one. Our bind will | |
842 end up reset if the value is dead, but that implies the | |
843 variable couldn't have survived, so it's fine. We are | |
844 not actually running the code that performed the binds at | |
845 this point, we're just adding binds so that they survive | |
846 the new confluence, so markers should not be copied. */ | |
111 | 847 if (vars && vars->add (var)) |
848 continue; | |
849 else if (!vars) | |
850 { | |
851 int i = fewvars.length (); | |
852 while (i--) | |
853 if (fewvars[i] == var) | |
854 break; | |
855 if (i >= 0) | |
856 continue; | |
131 | 857 else if (fewvars.length () < (unsigned) alloc_count) |
111 | 858 fewvars.quick_push (var); |
859 else | |
860 { | |
861 vars = new hash_set<tree>; | |
862 for (i = 0; i < alloc_count; i++) | |
863 vars->add (fewvars[i]); | |
864 fewvars.release (); | |
865 vars->add (var); | |
866 } | |
867 } | |
868 | |
869 stmt = gimple_copy (stmt); | |
870 /* ??? Should we drop the location of the copy to denote | |
871 they're artificial bindings? */ | |
872 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
873 } | |
874 } | |
875 while (bb != src && single_pred_p (bb)); | |
876 | |
877 if (vars) | |
878 delete vars; | |
879 else if (fewvars.exists ()) | |
880 fewvars.release (); | |
881 } | |
882 | |
883 /* See if TAKEN_EDGE->dest is a threadable block with no side effecs (ie, it | |
884 need not be duplicated as part of the CFG/SSA updating process). | |
885 | |
886 If it is threadable, add it to PATH and VISITED and recurse, ultimately | |
887 returning TRUE from the toplevel call. Otherwise do nothing and | |
888 return false. | |
889 | |
890 DUMMY_COND, SIMPLIFY are used to try and simplify the condition at the | |
891 end of TAKEN_EDGE->dest. | |
892 | |
893 The available expression table is referenced via AVAIL_EXPRS_STACK. */ | |
894 | |
895 static bool | |
896 thread_around_empty_blocks (edge taken_edge, | |
897 gcond *dummy_cond, | |
898 class avail_exprs_stack *avail_exprs_stack, | |
899 pfn_simplify simplify, | |
900 bitmap visited, | |
901 vec<jump_thread_edge *> *path) | |
902 { | |
903 basic_block bb = taken_edge->dest; | |
904 gimple_stmt_iterator gsi; | |
905 gimple *stmt; | |
906 tree cond; | |
907 | |
908 /* The key property of these blocks is that they need not be duplicated | |
145 | 909 when threading. Thus they cannot have visible side effects such |
111 | 910 as PHI nodes. */ |
911 if (!gsi_end_p (gsi_start_phis (bb))) | |
912 return false; | |
913 | |
914 /* Skip over DEBUG statements at the start of the block. */ | |
915 gsi = gsi_start_nondebug_bb (bb); | |
916 | |
917 /* If the block has no statements, but does have a single successor, then | |
918 it's just a forwarding block and we can thread through it trivially. | |
919 | |
920 However, note that just threading through empty blocks with single | |
921 successors is not inherently profitable. For the jump thread to | |
922 be profitable, we must avoid a runtime conditional. | |
923 | |
924 By taking the return value from the recursive call, we get the | |
925 desired effect of returning TRUE when we found a profitable jump | |
926 threading opportunity and FALSE otherwise. | |
927 | |
928 This is particularly important when this routine is called after | |
929 processing a joiner block. Returning TRUE too aggressively in | |
930 that case results in pointless duplication of the joiner block. */ | |
931 if (gsi_end_p (gsi)) | |
932 { | |
933 if (single_succ_p (bb)) | |
934 { | |
935 taken_edge = single_succ_edge (bb); | |
936 | |
937 if ((taken_edge->flags & EDGE_DFS_BACK) != 0) | |
938 return false; | |
939 | |
940 if (!bitmap_bit_p (visited, taken_edge->dest->index)) | |
941 { | |
942 jump_thread_edge *x | |
943 = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK); | |
944 path->safe_push (x); | |
945 bitmap_set_bit (visited, taken_edge->dest->index); | |
946 return thread_around_empty_blocks (taken_edge, | |
947 dummy_cond, | |
948 avail_exprs_stack, | |
949 simplify, | |
950 visited, | |
951 path); | |
952 } | |
953 } | |
954 | |
955 /* We have a block with no statements, but multiple successors? */ | |
956 return false; | |
957 } | |
958 | |
959 /* The only real statements this block can have are a control | |
960 flow altering statement. Anything else stops the thread. */ | |
961 stmt = gsi_stmt (gsi); | |
962 if (gimple_code (stmt) != GIMPLE_COND | |
963 && gimple_code (stmt) != GIMPLE_GOTO | |
964 && gimple_code (stmt) != GIMPLE_SWITCH) | |
965 return false; | |
966 | |
967 /* Extract and simplify the condition. */ | |
968 cond = simplify_control_stmt_condition (taken_edge, stmt, | |
969 avail_exprs_stack, dummy_cond, | |
970 simplify); | |
971 | |
972 /* If the condition can be statically computed and we have not already | |
973 visited the destination edge, then add the taken edge to our thread | |
974 path. */ | |
975 if (cond != NULL_TREE | |
976 && (is_gimple_min_invariant (cond) | |
977 || TREE_CODE (cond) == CASE_LABEL_EXPR)) | |
978 { | |
979 if (TREE_CODE (cond) == CASE_LABEL_EXPR) | |
131 | 980 taken_edge = find_edge (bb, label_to_block (cfun, CASE_LABEL (cond))); |
111 | 981 else |
982 taken_edge = find_taken_edge (bb, cond); | |
983 | |
131 | 984 if (!taken_edge |
985 || (taken_edge->flags & EDGE_DFS_BACK) != 0) | |
111 | 986 return false; |
987 | |
988 if (bitmap_bit_p (visited, taken_edge->dest->index)) | |
989 return false; | |
990 bitmap_set_bit (visited, taken_edge->dest->index); | |
991 | |
992 jump_thread_edge *x | |
993 = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK); | |
994 path->safe_push (x); | |
995 | |
996 thread_around_empty_blocks (taken_edge, | |
997 dummy_cond, | |
998 avail_exprs_stack, | |
999 simplify, | |
1000 visited, | |
1001 path); | |
1002 return true; | |
1003 } | |
1004 | |
1005 return false; | |
1006 } | |
1007 | |
0 | 1008 /* We are exiting E->src, see if E->dest ends with a conditional |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1009 jump which has a known value when reached via E. |
0 | 1010 |
111 | 1011 E->dest can have arbitrary side effects which, if threading is |
1012 successful, will be maintained. | |
1013 | |
0 | 1014 Special care is necessary if E is a back edge in the CFG as we |
1015 may have already recorded equivalences for E->dest into our | |
1016 various tables, including the result of the conditional at | |
1017 the end of E->dest. Threading opportunities are severely | |
1018 limited in that case to avoid short-circuiting the loop | |
1019 incorrectly. | |
1020 | |
1021 DUMMY_COND is a shared cond_expr used by condition simplification as scratch, | |
1022 to avoid allocating memory. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1023 |
0 | 1024 STACK is used to undo temporary equivalences created during the walk of |
1025 E->dest. | |
1026 | |
111 | 1027 SIMPLIFY is a pass-specific function used to simplify statements. |
1028 | |
1029 Our caller is responsible for restoring the state of the expression | |
1030 and const_and_copies stacks. | |
0 | 1031 |
111 | 1032 Positive return value is success. Zero return value is failure, but |
1033 the block can still be duplicated as a joiner in a jump thread path, | |
1034 negative indicates the block should not be duplicated and thus is not | |
1035 suitable for a joiner in a jump threading path. */ | |
0 | 1036 |
111 | 1037 static int |
1038 thread_through_normal_block (edge e, | |
1039 gcond *dummy_cond, | |
1040 const_and_copies *const_and_copies, | |
1041 avail_exprs_stack *avail_exprs_stack, | |
131 | 1042 evrp_range_analyzer *evrp_range_analyzer, |
111 | 1043 pfn_simplify simplify, |
1044 vec<jump_thread_edge *> *path, | |
1045 bitmap visited) | |
1046 { | |
1047 /* We want to record any equivalences created by traversing E. */ | |
1048 record_temporary_equivalences (e, const_and_copies, avail_exprs_stack); | |
0 | 1049 |
111 | 1050 /* PHIs create temporary equivalences. |
1051 Note that if we found a PHI that made the block non-threadable, then | |
1052 we need to bubble that up to our caller in the same manner we do | |
1053 when we prematurely stop processing statements below. */ | |
131 | 1054 if (!record_temporary_equivalences_from_phis (e, const_and_copies, |
1055 evrp_range_analyzer)) | |
111 | 1056 return -1; |
0 | 1057 |
1058 /* Now walk each statement recording any context sensitive | |
1059 temporary equivalences we can detect. */ | |
111 | 1060 gimple *stmt |
1061 = record_temporary_equivalences_from_stmts_at_dest (e, const_and_copies, | |
1062 avail_exprs_stack, | |
131 | 1063 evrp_range_analyzer, |
111 | 1064 simplify); |
1065 | |
1066 /* There's two reasons STMT might be null, and distinguishing | |
1067 between them is important. | |
1068 | |
1069 First the block may not have had any statements. For example, it | |
1070 might have some PHIs and unconditionally transfer control elsewhere. | |
1071 Such blocks are suitable for jump threading, particularly as a | |
1072 joiner block. | |
1073 | |
1074 The second reason would be if we did not process all the statements | |
1075 in the block (because there were too many to make duplicating the | |
1076 block profitable. If we did not look at all the statements, then | |
1077 we may not have invalidated everything needing invalidation. Thus | |
1078 we must signal to our caller that this block is not suitable for | |
1079 use as a joiner in a threading path. */ | |
0 | 1080 if (!stmt) |
111 | 1081 { |
1082 /* First case. The statement simply doesn't have any instructions, but | |
1083 does have PHIs. */ | |
1084 if (gsi_end_p (gsi_start_nondebug_bb (e->dest)) | |
1085 && !gsi_end_p (gsi_start_phis (e->dest))) | |
1086 return 0; | |
1087 | |
1088 /* Second case. */ | |
1089 return -1; | |
1090 } | |
0 | 1091 |
1092 /* If we stopped at a COND_EXPR or SWITCH_EXPR, see if we know which arm | |
1093 will be taken. */ | |
1094 if (gimple_code (stmt) == GIMPLE_COND | |
1095 || gimple_code (stmt) == GIMPLE_GOTO | |
1096 || gimple_code (stmt) == GIMPLE_SWITCH) | |
1097 { | |
1098 tree cond; | |
1099 | |
1100 /* Extract and simplify the condition. */ | |
111 | 1101 cond = simplify_control_stmt_condition (e, stmt, avail_exprs_stack, |
1102 dummy_cond, simplify); | |
1103 | |
1104 if (!cond) | |
1105 return 0; | |
0 | 1106 |
111 | 1107 if (is_gimple_min_invariant (cond) |
1108 || TREE_CODE (cond) == CASE_LABEL_EXPR) | |
0 | 1109 { |
111 | 1110 edge taken_edge; |
1111 if (TREE_CODE (cond) == CASE_LABEL_EXPR) | |
1112 taken_edge = find_edge (e->dest, | |
131 | 1113 label_to_block (cfun, CASE_LABEL (cond))); |
111 | 1114 else |
1115 taken_edge = find_taken_edge (e->dest, cond); | |
1116 | |
0 | 1117 basic_block dest = (taken_edge ? taken_edge->dest : NULL); |
1118 | |
111 | 1119 /* DEST could be NULL for a computed jump to an absolute |
1120 address. */ | |
1121 if (dest == NULL | |
1122 || dest == e->dest | |
1123 || (taken_edge->flags & EDGE_DFS_BACK) != 0 | |
1124 || bitmap_bit_p (visited, dest->index)) | |
1125 return 0; | |
1126 | |
1127 /* Only push the EDGE_START_JUMP_THREAD marker if this is | |
1128 first edge on the path. */ | |
1129 if (path->length () == 0) | |
1130 { | |
1131 jump_thread_edge *x | |
1132 = new jump_thread_edge (e, EDGE_START_JUMP_THREAD); | |
1133 path->safe_push (x); | |
1134 } | |
1135 | |
1136 jump_thread_edge *x | |
1137 = new jump_thread_edge (taken_edge, EDGE_COPY_SRC_BLOCK); | |
1138 path->safe_push (x); | |
1139 | |
1140 /* See if we can thread through DEST as well, this helps capture | |
1141 secondary effects of threading without having to re-run DOM or | |
1142 VRP. | |
1143 | |
1144 We don't want to thread back to a block we have already | |
1145 visited. This may be overly conservative. */ | |
1146 bitmap_set_bit (visited, dest->index); | |
1147 bitmap_set_bit (visited, e->dest->index); | |
1148 thread_around_empty_blocks (taken_edge, | |
1149 dummy_cond, | |
1150 avail_exprs_stack, | |
1151 simplify, | |
1152 visited, | |
1153 path); | |
1154 return 1; | |
1155 } | |
1156 } | |
1157 return 0; | |
1158 } | |
1159 | |
145 | 1160 /* There are basic blocks look like: |
1161 <P0> | |
1162 p0 = a CMP b ; or p0 = (INT) (a CMP b) | |
1163 goto <X>; | |
1164 | |
1165 <P1> | |
1166 p1 = c CMP d | |
1167 goto <X>; | |
1168 | |
1169 <X> | |
1170 # phi = PHI <p0 (P0), p1 (P1)> | |
1171 if (phi != 0) goto <Y>; else goto <Z>; | |
1172 | |
1173 Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD | |
1174 And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK | |
1175 | |
1176 Return true if E is (P0,X) or (P1,X) */ | |
1177 | |
1178 bool | |
1179 edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) | |
1180 { | |
1181 /* See if there is only one stmt which is gcond. */ | |
1182 gcond *gs; | |
1183 if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest)))) | |
1184 return false; | |
1185 | |
1186 /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ | |
1187 tree cond = gimple_cond_lhs (gs); | |
1188 enum tree_code code = gimple_cond_code (gs); | |
1189 tree rhs = gimple_cond_rhs (gs); | |
1190 if (TREE_CODE (cond) != SSA_NAME | |
1191 || (code != NE_EXPR && code != EQ_EXPR) | |
1192 || (!integer_onep (rhs) && !integer_zerop (rhs))) | |
1193 return false; | |
1194 gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); | |
1195 if (phi == NULL || gimple_bb (phi) != e->dest) | |
1196 return false; | |
1197 | |
1198 /* Check if phi's incoming value is CMP. */ | |
1199 gassign *def; | |
1200 tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); | |
1201 if (TREE_CODE (value) != SSA_NAME | |
1202 || !has_single_use (value) | |
1203 || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) | |
1204 return false; | |
1205 | |
1206 /* Or if it is (INT) (a CMP b). */ | |
1207 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) | |
1208 { | |
1209 value = gimple_assign_rhs1 (def); | |
1210 if (TREE_CODE (value) != SSA_NAME | |
1211 || !has_single_use (value) | |
1212 || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value)))) | |
1213 return false; | |
1214 } | |
1215 | |
1216 if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison) | |
1217 return false; | |
1218 | |
1219 return true; | |
1220 } | |
1221 | |
111 | 1222 /* We are exiting E->src, see if E->dest ends with a conditional |
1223 jump which has a known value when reached via E. | |
1224 | |
1225 DUMMY_COND is a shared cond_expr used by condition simplification as scratch, | |
1226 to avoid allocating memory. | |
1227 | |
1228 CONST_AND_COPIES is used to undo temporary equivalences created during the | |
1229 walk of E->dest. | |
1230 | |
1231 The available expression table is referenced vai AVAIL_EXPRS_STACK. | |
1232 | |
1233 SIMPLIFY is a pass-specific function used to simplify statements. */ | |
0 | 1234 |
111 | 1235 static void |
1236 thread_across_edge (gcond *dummy_cond, | |
1237 edge e, | |
1238 class const_and_copies *const_and_copies, | |
1239 class avail_exprs_stack *avail_exprs_stack, | |
131 | 1240 class evrp_range_analyzer *evrp_range_analyzer, |
111 | 1241 pfn_simplify simplify) |
1242 { | |
1243 bitmap visited = BITMAP_ALLOC (NULL); | |
1244 | |
1245 const_and_copies->push_marker (); | |
1246 avail_exprs_stack->push_marker (); | |
131 | 1247 if (evrp_range_analyzer) |
1248 evrp_range_analyzer->push_marker (); | |
111 | 1249 |
1250 stmt_count = 0; | |
1251 | |
1252 vec<jump_thread_edge *> *path = new vec<jump_thread_edge *> (); | |
1253 bitmap_clear (visited); | |
1254 bitmap_set_bit (visited, e->src->index); | |
1255 bitmap_set_bit (visited, e->dest->index); | |
1256 | |
1257 int threaded; | |
1258 if ((e->flags & EDGE_DFS_BACK) == 0) | |
1259 threaded = thread_through_normal_block (e, dummy_cond, | |
1260 const_and_copies, | |
1261 avail_exprs_stack, | |
131 | 1262 evrp_range_analyzer, |
111 | 1263 simplify, path, |
1264 visited); | |
1265 else | |
1266 threaded = 0; | |
1267 | |
1268 if (threaded > 0) | |
1269 { | |
1270 propagate_threaded_block_debug_into (path->last ()->e->dest, | |
1271 e->dest); | |
1272 const_and_copies->pop_to_marker (); | |
1273 avail_exprs_stack->pop_to_marker (); | |
131 | 1274 if (evrp_range_analyzer) |
1275 evrp_range_analyzer->pop_to_marker (); | |
111 | 1276 BITMAP_FREE (visited); |
1277 register_jump_thread (path); | |
1278 return; | |
1279 } | |
1280 else | |
1281 { | |
1282 /* Negative and zero return values indicate no threading was possible, | |
1283 thus there should be no edges on the thread path and no need to walk | |
1284 through the vector entries. */ | |
1285 gcc_assert (path->length () == 0); | |
1286 path->release (); | |
1287 delete path; | |
1288 | |
1289 /* A negative status indicates the target block was deemed too big to | |
1290 duplicate. Just quit now rather than trying to use the block as | |
1291 a joiner in a jump threading path. | |
1292 | |
1293 This prevents unnecessary code growth, but more importantly if we | |
1294 do not look at all the statements in the block, then we may have | |
1295 missed some invalidations if we had traversed a backedge! */ | |
1296 if (threaded < 0) | |
1297 { | |
1298 BITMAP_FREE (visited); | |
1299 const_and_copies->pop_to_marker (); | |
1300 avail_exprs_stack->pop_to_marker (); | |
131 | 1301 if (evrp_range_analyzer) |
1302 evrp_range_analyzer->pop_to_marker (); | |
111 | 1303 return; |
0 | 1304 } |
1305 } | |
1306 | |
111 | 1307 /* We were unable to determine what out edge from E->dest is taken. However, |
1308 we might still be able to thread through successors of E->dest. This | |
1309 often occurs when E->dest is a joiner block which then fans back out | |
1310 based on redundant tests. | |
1311 | |
1312 If so, we'll copy E->dest and redirect the appropriate predecessor to | |
1313 the copy. Within the copy of E->dest, we'll thread one or more edges | |
1314 to points deeper in the CFG. | |
1315 | |
1316 This is a stopgap until we have a more structured approach to path | |
1317 isolation. */ | |
1318 { | |
1319 edge taken_edge; | |
1320 edge_iterator ei; | |
1321 bool found; | |
1322 | |
1323 /* If E->dest has abnormal outgoing edges, then there's no guarantee | |
1324 we can safely redirect any of the edges. Just punt those cases. */ | |
1325 FOR_EACH_EDGE (taken_edge, ei, e->dest->succs) | |
145 | 1326 if (taken_edge->flags & EDGE_COMPLEX) |
111 | 1327 { |
1328 const_and_copies->pop_to_marker (); | |
1329 avail_exprs_stack->pop_to_marker (); | |
131 | 1330 if (evrp_range_analyzer) |
1331 evrp_range_analyzer->pop_to_marker (); | |
111 | 1332 BITMAP_FREE (visited); |
1333 return; | |
1334 } | |
1335 | |
1336 /* Look at each successor of E->dest to see if we can thread through it. */ | |
1337 FOR_EACH_EDGE (taken_edge, ei, e->dest->succs) | |
1338 { | |
1339 if ((e->flags & EDGE_DFS_BACK) != 0 | |
1340 || (taken_edge->flags & EDGE_DFS_BACK) != 0) | |
1341 continue; | |
1342 | |
1343 /* Push a fresh marker so we can unwind the equivalences created | |
1344 for each of E->dest's successors. */ | |
1345 const_and_copies->push_marker (); | |
1346 avail_exprs_stack->push_marker (); | |
131 | 1347 if (evrp_range_analyzer) |
1348 evrp_range_analyzer->push_marker (); | |
111 | 1349 |
1350 /* Avoid threading to any block we have already visited. */ | |
1351 bitmap_clear (visited); | |
1352 bitmap_set_bit (visited, e->src->index); | |
1353 bitmap_set_bit (visited, e->dest->index); | |
1354 bitmap_set_bit (visited, taken_edge->dest->index); | |
1355 vec<jump_thread_edge *> *path = new vec<jump_thread_edge *> (); | |
1356 | |
1357 /* Record whether or not we were able to thread through a successor | |
1358 of E->dest. */ | |
1359 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD); | |
1360 path->safe_push (x); | |
1361 | |
1362 x = new jump_thread_edge (taken_edge, EDGE_COPY_SRC_JOINER_BLOCK); | |
1363 path->safe_push (x); | |
1364 found = thread_around_empty_blocks (taken_edge, | |
1365 dummy_cond, | |
1366 avail_exprs_stack, | |
1367 simplify, | |
1368 visited, | |
1369 path); | |
1370 | |
1371 if (!found) | |
1372 found = thread_through_normal_block (path->last ()->e, dummy_cond, | |
1373 const_and_copies, | |
1374 avail_exprs_stack, | |
131 | 1375 evrp_range_analyzer, |
111 | 1376 simplify, path, |
1377 visited) > 0; | |
1378 | |
1379 /* If we were able to thread through a successor of E->dest, then | |
1380 record the jump threading opportunity. */ | |
145 | 1381 if (found |
1382 || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e)) | |
111 | 1383 { |
145 | 1384 if (taken_edge->dest != path->last ()->e->dest) |
1385 propagate_threaded_block_debug_into (path->last ()->e->dest, | |
1386 taken_edge->dest); | |
111 | 1387 register_jump_thread (path); |
1388 } | |
1389 else | |
1390 delete_jump_thread_path (path); | |
1391 | |
1392 /* And unwind the equivalence table. */ | |
131 | 1393 if (evrp_range_analyzer) |
1394 evrp_range_analyzer->pop_to_marker (); | |
111 | 1395 avail_exprs_stack->pop_to_marker (); |
1396 const_and_copies->pop_to_marker (); | |
1397 } | |
1398 BITMAP_FREE (visited); | |
1399 } | |
1400 | |
131 | 1401 if (evrp_range_analyzer) |
1402 evrp_range_analyzer->pop_to_marker (); | |
111 | 1403 const_and_copies->pop_to_marker (); |
1404 avail_exprs_stack->pop_to_marker (); | |
0 | 1405 } |
111 | 1406 |
1407 /* Examine the outgoing edges from BB and conditionally | |
1408 try to thread them. | |
1409 | |
1410 DUMMY_COND is a shared cond_expr used by condition simplification as scratch, | |
1411 to avoid allocating memory. | |
1412 | |
1413 CONST_AND_COPIES is used to undo temporary equivalences created during the | |
1414 walk of E->dest. | |
1415 | |
1416 The available expression table is referenced vai AVAIL_EXPRS_STACK. | |
1417 | |
1418 SIMPLIFY is a pass-specific function used to simplify statements. */ | |
1419 | |
1420 void | |
1421 thread_outgoing_edges (basic_block bb, gcond *dummy_cond, | |
1422 class const_and_copies *const_and_copies, | |
1423 class avail_exprs_stack *avail_exprs_stack, | |
131 | 1424 class evrp_range_analyzer *evrp_range_analyzer, |
111 | 1425 tree (*simplify) (gimple *, gimple *, |
1426 class avail_exprs_stack *, | |
1427 basic_block)) | |
1428 { | |
1429 int flags = (EDGE_IGNORE | EDGE_COMPLEX | EDGE_ABNORMAL); | |
1430 gimple *last; | |
1431 | |
1432 /* If we have an outgoing edge to a block with multiple incoming and | |
1433 outgoing edges, then we may be able to thread the edge, i.e., we | |
1434 may be able to statically determine which of the outgoing edges | |
1435 will be traversed when the incoming edge from BB is traversed. */ | |
1436 if (single_succ_p (bb) | |
1437 && (single_succ_edge (bb)->flags & flags) == 0 | |
1438 && potentially_threadable_block (single_succ (bb))) | |
1439 { | |
1440 thread_across_edge (dummy_cond, single_succ_edge (bb), | |
1441 const_and_copies, avail_exprs_stack, | |
131 | 1442 evrp_range_analyzer, simplify); |
111 | 1443 } |
1444 else if ((last = last_stmt (bb)) | |
1445 && gimple_code (last) == GIMPLE_COND | |
1446 && EDGE_COUNT (bb->succs) == 2 | |
1447 && (EDGE_SUCC (bb, 0)->flags & flags) == 0 | |
1448 && (EDGE_SUCC (bb, 1)->flags & flags) == 0) | |
1449 { | |
1450 edge true_edge, false_edge; | |
1451 | |
1452 extract_true_false_edges_from_block (bb, &true_edge, &false_edge); | |
1453 | |
1454 /* Only try to thread the edge if it reaches a target block with | |
1455 more than one predecessor and more than one successor. */ | |
1456 if (potentially_threadable_block (true_edge->dest)) | |
1457 thread_across_edge (dummy_cond, true_edge, | |
131 | 1458 const_and_copies, avail_exprs_stack, |
1459 evrp_range_analyzer, simplify); | |
111 | 1460 |
1461 /* Similarly for the ELSE arm. */ | |
1462 if (potentially_threadable_block (false_edge->dest)) | |
1463 thread_across_edge (dummy_cond, false_edge, | |
131 | 1464 const_and_copies, avail_exprs_stack, |
1465 evrp_range_analyzer, simplify); | |
111 | 1466 } |
1467 } |