111
|
1 /* Detect paths through the CFG which can never be executed in a conforming
|
|
2 program and isolate them.
|
|
3
|
145
|
4 Copyright (C) 2013-2020 Free Software Foundation, Inc.
|
111
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify
|
|
9 it under the terms of the GNU General Public License as published by
|
|
10 the Free Software Foundation; either version 3, or (at your option)
|
|
11 any later version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful,
|
|
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 GNU General Public License for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "backend.h"
|
|
26 #include "tree.h"
|
|
27 #include "gimple.h"
|
|
28 #include "cfghooks.h"
|
|
29 #include "tree-pass.h"
|
|
30 #include "ssa.h"
|
|
31 #include "diagnostic-core.h"
|
|
32 #include "fold-const.h"
|
|
33 #include "gimple-iterator.h"
|
|
34 #include "gimple-walk.h"
|
|
35 #include "tree-ssa.h"
|
|
36 #include "cfgloop.h"
|
|
37 #include "tree-cfg.h"
|
|
38 #include "cfganal.h"
|
|
39 #include "intl.h"
|
|
40
|
|
41
|
|
42 static bool cfg_altered;
|
|
43
|
|
44 /* Callback for walk_stmt_load_store_ops.
|
|
45
|
|
46 Return TRUE if OP will dereference the tree stored in DATA, FALSE
|
|
47 otherwise.
|
|
48
|
|
49 This routine only makes a superficial check for a dereference. Thus,
|
|
50 it must only be used if it is safe to return a false negative. */
|
|
51 static bool
|
|
52 check_loadstore (gimple *stmt, tree op, tree, void *data)
|
|
53 {
|
|
54 if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF)
|
|
55 && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0))
|
|
56 {
|
|
57 TREE_THIS_VOLATILE (op) = 1;
|
|
58 TREE_SIDE_EFFECTS (op) = 1;
|
|
59 update_stmt (stmt);
|
|
60 return true;
|
|
61 }
|
|
62 return false;
|
|
63 }
|
|
64
|
|
65 /* Insert a trap after SI and split the block after the trap. */
|
|
66
|
|
67 static void
|
|
68 insert_trap (gimple_stmt_iterator *si_p, tree op)
|
|
69 {
|
|
70 /* We want the NULL pointer dereference to actually occur so that
|
|
71 code that wishes to catch the signal can do so.
|
|
72
|
|
73 If the dereference is a load, then there's nothing to do as the
|
|
74 LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference.
|
|
75
|
|
76 If the dereference is a store and we can easily transform the RHS,
|
|
77 then simplify the RHS to enable more DCE. Note that we require the
|
|
78 statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */
|
|
79 gimple *stmt = gsi_stmt (*si_p);
|
|
80 if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore)
|
|
81 && is_gimple_assign (stmt)
|
|
82 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
|
|
83 {
|
|
84 /* We just need to turn the RHS into zero converted to the proper
|
|
85 type. */
|
|
86 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
|
|
87 gimple_assign_set_rhs_code (stmt, INTEGER_CST);
|
|
88 gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node));
|
|
89 update_stmt (stmt);
|
|
90 }
|
|
91
|
|
92 gcall *new_stmt
|
|
93 = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
|
|
94 gimple_seq seq = NULL;
|
|
95 gimple_seq_add_stmt (&seq, new_stmt);
|
|
96
|
|
97 /* If we had a NULL pointer dereference, then we want to insert the
|
|
98 __builtin_trap after the statement, for the other cases we want
|
|
99 to insert before the statement. */
|
|
100 if (walk_stmt_load_store_ops (stmt, (void *)op,
|
|
101 check_loadstore,
|
|
102 check_loadstore))
|
|
103 {
|
|
104 gsi_insert_after (si_p, seq, GSI_NEW_STMT);
|
|
105 if (stmt_ends_bb_p (stmt))
|
|
106 {
|
|
107 split_block (gimple_bb (stmt), stmt);
|
|
108 return;
|
|
109 }
|
|
110 }
|
|
111 else
|
|
112 gsi_insert_before (si_p, seq, GSI_NEW_STMT);
|
|
113
|
|
114 split_block (gimple_bb (new_stmt), new_stmt);
|
|
115 *si_p = gsi_for_stmt (stmt);
|
|
116 }
|
|
117
|
|
118 /* BB when reached via incoming edge E will exhibit undefined behavior
|
|
119 at STMT. Isolate and optimize the path which exhibits undefined
|
|
120 behavior.
|
|
121
|
|
122 Isolation is simple. Duplicate BB and redirect E to BB'.
|
|
123
|
|
124 Optimization is simple as well. Replace STMT in BB' with an
|
|
125 unconditional trap and remove all outgoing edges from BB'.
|
|
126
|
|
127 If RET_ZERO, do not trap, only return NULL.
|
|
128
|
|
129 DUPLICATE is a pre-existing duplicate, use it as BB' if it exists.
|
|
130
|
145
|
131 Return BB' (which may be equal to DUPLICATE). */
|
111
|
132
|
145
|
133 ATTRIBUTE_RETURNS_NONNULL basic_block
|
111
|
134 isolate_path (basic_block bb, basic_block duplicate,
|
|
135 edge e, gimple *stmt, tree op, bool ret_zero)
|
|
136 {
|
|
137 gimple_stmt_iterator si, si2;
|
|
138 edge_iterator ei;
|
|
139 edge e2;
|
|
140 bool impossible = true;
|
131
|
141 profile_count count = e->count ();
|
111
|
142
|
|
143 for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next (&si))
|
|
144 if (stmt_can_terminate_bb_p (gsi_stmt (si)))
|
|
145 {
|
|
146 impossible = false;
|
|
147 break;
|
|
148 }
|
|
149 force_edge_cold (e, impossible);
|
|
150
|
|
151 /* First duplicate BB if we have not done so already and remove all
|
|
152 the duplicate's outgoing edges as duplicate is going to unconditionally
|
|
153 trap. Removing the outgoing edges is both an optimization and ensures
|
|
154 we don't need to do any PHI node updates. */
|
|
155 if (!duplicate)
|
|
156 {
|
|
157 duplicate = duplicate_block (bb, NULL, NULL);
|
131
|
158 duplicate->count = profile_count::zero ();
|
111
|
159 if (!ret_zero)
|
|
160 for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
|
|
161 remove_edge (e2);
|
|
162 }
|
131
|
163 bb->count -= count;
|
111
|
164
|
|
165 /* Complete the isolation step by redirecting E to reach DUPLICATE. */
|
|
166 e2 = redirect_edge_and_branch (e, duplicate);
|
|
167 if (e2)
|
|
168 {
|
|
169 flush_pending_stmts (e2);
|
|
170
|
|
171 /* Update profile only when redirection is really processed. */
|
131
|
172 bb->count += e->count ();
|
111
|
173 }
|
|
174
|
|
175 /* There may be more than one statement in DUPLICATE which exhibits
|
|
176 undefined behavior. Ultimately we want the first such statement in
|
|
177 DUPLCIATE so that we're able to delete as much code as possible.
|
|
178
|
|
179 So each time we discover undefined behavior in DUPLICATE, search for
|
|
180 the statement which triggers undefined behavior. If found, then
|
|
181 transform the statement into a trap and delete everything after the
|
|
182 statement. If not found, then this particular instance was subsumed by
|
|
183 an earlier instance of undefined behavior and there's nothing to do.
|
|
184
|
|
185 This is made more complicated by the fact that we have STMT, which is in
|
|
186 BB rather than in DUPLICATE. So we set up two iterators, one for each
|
|
187 block and walk forward looking for STMT in BB, advancing each iterator at
|
|
188 each step.
|
|
189
|
|
190 When we find STMT the second iterator should point to STMT's equivalent in
|
|
191 duplicate. If DUPLICATE ends before STMT is found in BB, then there's
|
|
192 nothing to do.
|
|
193
|
|
194 Ignore labels and debug statements. */
|
|
195 si = gsi_start_nondebug_after_labels_bb (bb);
|
|
196 si2 = gsi_start_nondebug_after_labels_bb (duplicate);
|
|
197 while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt)
|
|
198 {
|
|
199 gsi_next_nondebug (&si);
|
|
200 gsi_next_nondebug (&si2);
|
|
201 }
|
|
202
|
|
203 /* This would be an indicator that we never found STMT in BB, which should
|
|
204 never happen. */
|
|
205 gcc_assert (!gsi_end_p (si));
|
|
206
|
|
207 /* If we did not run to the end of DUPLICATE, then SI points to STMT and
|
|
208 SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap
|
|
209 before SI2 and remove SI2 and all trailing statements. */
|
|
210 if (!gsi_end_p (si2))
|
|
211 {
|
|
212 if (ret_zero)
|
|
213 {
|
|
214 greturn *ret = as_a <greturn *> (gsi_stmt (si2));
|
|
215 tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret)));
|
|
216 gimple_return_set_retval (ret, zero);
|
|
217 update_stmt (ret);
|
|
218 }
|
|
219 else
|
|
220 insert_trap (&si2, op);
|
|
221 }
|
|
222
|
|
223 return duplicate;
|
|
224 }
|
|
225
|
|
226 /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor.
|
|
227 FALSE otherwise. */
|
|
228
|
|
229 static bool
|
|
230 is_divmod_with_given_divisor (gimple *stmt, tree divisor)
|
|
231 {
|
|
232 /* Only assignments matter. */
|
|
233 if (!is_gimple_assign (stmt))
|
|
234 return false;
|
|
235
|
|
236 /* Check for every DIV/MOD expression. */
|
|
237 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
|
|
238 if (rhs_code == TRUNC_DIV_EXPR
|
|
239 || rhs_code == FLOOR_DIV_EXPR
|
|
240 || rhs_code == CEIL_DIV_EXPR
|
|
241 || rhs_code == EXACT_DIV_EXPR
|
|
242 || rhs_code == ROUND_DIV_EXPR
|
|
243 || rhs_code == TRUNC_MOD_EXPR
|
|
244 || rhs_code == FLOOR_MOD_EXPR
|
|
245 || rhs_code == CEIL_MOD_EXPR
|
|
246 || rhs_code == ROUND_MOD_EXPR)
|
|
247 {
|
|
248 /* Pointer equality is fine when DIVISOR is an SSA_NAME, but
|
|
249 not sufficient for constants which may have different types. */
|
|
250 if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0))
|
|
251 return true;
|
|
252 }
|
|
253 return false;
|
|
254 }
|
|
255
|
|
256 /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL.
|
|
257
|
|
258 Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results
|
|
259 in undefined behavior, FALSE otherwise
|
|
260
|
|
261 LOC is used for issuing diagnostics. This case represents potential
|
|
262 undefined behavior exposed by path splitting and that's reflected in
|
|
263 the diagnostic. */
|
|
264
|
|
265 bool
|
|
266 stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc)
|
|
267 {
|
|
268 /* If we are working with a non pointer type, then see
|
|
269 if this use is a DIV/MOD operation using NAME as the
|
|
270 divisor. */
|
|
271 if (!POINTER_TYPE_P (TREE_TYPE (name)))
|
|
272 {
|
145
|
273 if (!cfun->can_throw_non_call_exceptions)
|
111
|
274 return is_divmod_with_given_divisor (use_stmt, name);
|
|
275 return false;
|
|
276 }
|
|
277
|
|
278 /* NAME is a pointer, so see if it's used in a context where it must
|
|
279 be non-NULL. */
|
|
280 bool by_dereference
|
|
281 = infer_nonnull_range_by_dereference (use_stmt, name);
|
|
282
|
|
283 if (by_dereference
|
|
284 || infer_nonnull_range_by_attribute (use_stmt, name))
|
|
285 {
|
|
286
|
|
287 if (by_dereference)
|
|
288 {
|
|
289 warning_at (loc, OPT_Wnull_dereference,
|
|
290 "potential null pointer dereference");
|
|
291 if (!flag_isolate_erroneous_paths_dereference)
|
|
292 return false;
|
|
293 }
|
|
294 else
|
|
295 {
|
|
296 if (!flag_isolate_erroneous_paths_attribute)
|
|
297 return false;
|
|
298 }
|
|
299 return true;
|
|
300 }
|
|
301 return false;
|
|
302 }
|
|
303
|
|
304 /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in
|
|
305 undefined behavior, FALSE otherwise.
|
|
306
|
|
307 These cases are explicit in the IL. */
|
|
308
|
|
309 bool
|
|
310 stmt_uses_0_or_null_in_undefined_way (gimple *stmt)
|
|
311 {
|
145
|
312 if (!cfun->can_throw_non_call_exceptions
|
111
|
313 && is_divmod_with_given_divisor (stmt, integer_zero_node))
|
|
314 return true;
|
|
315
|
|
316 /* By passing null_pointer_node, we can use the
|
|
317 infer_nonnull_range functions to detect explicit NULL
|
|
318 pointer dereferences and other uses where a non-NULL
|
|
319 value is required. */
|
|
320
|
|
321 bool by_dereference
|
|
322 = infer_nonnull_range_by_dereference (stmt, null_pointer_node);
|
|
323 if (by_dereference
|
|
324 || infer_nonnull_range_by_attribute (stmt, null_pointer_node))
|
|
325 {
|
|
326 if (by_dereference)
|
|
327 {
|
|
328 location_t loc = gimple_location (stmt);
|
|
329 warning_at (loc, OPT_Wnull_dereference,
|
|
330 "null pointer dereference");
|
|
331 if (!flag_isolate_erroneous_paths_dereference)
|
|
332 return false;
|
|
333 }
|
|
334 else
|
|
335 {
|
|
336 if (!flag_isolate_erroneous_paths_attribute)
|
|
337 return false;
|
|
338 }
|
|
339 return true;
|
|
340 }
|
|
341 return false;
|
|
342 }
|
|
343
|
145
|
344 /* Describes the property of a return statement that may return
|
|
345 the address of one or more local variables. The type must
|
|
346 be safely assignable and copyable so that it can be stored in
|
|
347 a hash_map. */
|
|
348 class args_loc_t
|
|
349 {
|
|
350 public:
|
|
351
|
|
352 args_loc_t (): nargs (), locvec (), ptr (&ptr)
|
|
353 {
|
|
354 locvec.create (4);
|
|
355 }
|
|
356
|
|
357 args_loc_t (const args_loc_t &rhs)
|
|
358 : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { }
|
|
359
|
|
360 args_loc_t& operator= (const args_loc_t &rhs)
|
|
361 {
|
|
362 nargs = rhs.nargs;
|
|
363 locvec.release ();
|
|
364 locvec = rhs.locvec.copy ();
|
|
365 return *this;
|
|
366 }
|
|
367
|
|
368 ~args_loc_t ()
|
|
369 {
|
|
370 locvec.release ();
|
|
371 gcc_assert (ptr == &ptr);
|
|
372 }
|
|
373
|
|
374 /* For a PHI in a return statement its number of arguments. When greater
|
|
375 than LOCVEC.LENGTH () implies that an address of one of the locals in
|
|
376 LOCVEC may but need not be returned by the statement. Otherwise,
|
|
377 unless both are zero, it implies it definitely is returned. */
|
|
378 unsigned nargs;
|
|
379 /* The locations of local variables/alloca calls returned by the return
|
|
380 statement. Avoid using auto_vec here since it's not safe to copy due
|
|
381 to pr90904. */
|
|
382 vec <location_t> locvec;
|
|
383 void *ptr;
|
|
384 };
|
|
385
|
|
386 /* A mapping from a return statement to the locations of local variables
|
|
387 whose addresses it may return. */
|
|
388 typedef hash_map <gimple *, args_loc_t> locmap_t;
|
|
389
|
|
390 /* Given the LOCMAP mapping, issue diagnostics about returning addresses
|
|
391 of local variables. When MAYBE is set, all diagnostics will be of
|
|
392 the "may return" kind. Otherwise each will be determined based on
|
|
393 the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */
|
|
394
|
|
395 static void
|
|
396 diag_returned_locals (bool maybe, const locmap_t &locmap)
|
|
397 {
|
|
398 for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it)
|
|
399 {
|
|
400 gimple *stmt = (*it).first;
|
|
401 const args_loc_t &argsloc = (*it).second;
|
|
402 location_t stmtloc = gimple_location (stmt);
|
|
403
|
|
404 auto_diagnostic_group d;
|
|
405 unsigned nargs = argsloc.locvec.length ();
|
|
406 if (warning_at (stmtloc, OPT_Wreturn_local_addr,
|
|
407 (maybe || argsloc.nargs > nargs
|
|
408 ? G_("function may return address of local variable")
|
|
409 : G_("function returns address of local variable"))))
|
|
410 {
|
|
411 for (unsigned i = 0; i != nargs; ++i)
|
|
412 inform (argsloc.locvec[i], "declared here");
|
|
413 }
|
|
414 }
|
|
415 }
|
|
416
|
|
417 /* Return true if EXPR is an expression of pointer type that refers
|
|
418 to the address of one or more variables with automatic storage
|
|
419 duration. If so, add an entry to *PLOCMAP and insert into
|
|
420 PLOCMAP->LOCVEC the locations of the corresponding local variables
|
|
421 whose address is returned by the RETURN_STMT (which may be set to
|
|
422 (gimple*)-1 as a placeholder for such a statement). VISITED is
|
|
423 a bitmap of PHI nodes already visited by recursive calls. When
|
|
424 null, PHI expressions are not considered. */
|
|
425
|
|
426 static bool
|
|
427 is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap,
|
|
428 hash_set<gphi *> *visited)
|
|
429 {
|
|
430 if (TREE_CODE (exp) == ADDR_EXPR)
|
|
431 {
|
|
432 tree baseaddr = get_base_address (TREE_OPERAND (exp, 0));
|
|
433 if (TREE_CODE (baseaddr) == MEM_REF)
|
|
434 return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0),
|
|
435 plocmap, visited);
|
|
436
|
|
437 if ((!VAR_P (baseaddr)
|
|
438 || is_global_var (baseaddr))
|
|
439 && TREE_CODE (baseaddr) != PARM_DECL)
|
|
440 return false;
|
|
441
|
|
442 args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
|
|
443 argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr));
|
|
444 return true;
|
|
445 }
|
|
446
|
|
447 if (!POINTER_TYPE_P (TREE_TYPE (exp)))
|
|
448 return false;
|
|
449
|
|
450 if (TREE_CODE (exp) == SSA_NAME)
|
|
451 {
|
|
452 gimple *def_stmt = SSA_NAME_DEF_STMT (exp);
|
|
453 enum gimple_code code = gimple_code (def_stmt);
|
|
454
|
|
455 if (is_gimple_assign (def_stmt))
|
|
456 {
|
|
457 tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
|
|
458 if (POINTER_TYPE_P (type))
|
|
459 {
|
|
460 tree_code code = gimple_assign_rhs_code (def_stmt);
|
|
461 tree ptr1 = NULL_TREE, ptr2 = NULL_TREE;
|
|
462
|
|
463 /* Set to the number of arguments examined that should
|
|
464 be added to ARGSLOC->NARGS to identify expressions
|
|
465 only some but not all of whose operands refer to local
|
|
466 addresses. */
|
|
467 unsigned nargs = 0;
|
|
468 if (code == COND_EXPR)
|
|
469 {
|
|
470 ptr1 = gimple_assign_rhs2 (def_stmt);
|
|
471 ptr2 = gimple_assign_rhs3 (def_stmt);
|
|
472 nargs = 2;
|
|
473 }
|
|
474 else if (code == MAX_EXPR || code == MIN_EXPR)
|
|
475 {
|
|
476 ptr1 = gimple_assign_rhs1 (def_stmt);
|
|
477 ptr2 = gimple_assign_rhs2 (def_stmt);
|
|
478 nargs = 2;
|
|
479 }
|
|
480 else if (code == ADDR_EXPR
|
|
481 || code == NOP_EXPR
|
|
482 || code == POINTER_PLUS_EXPR)
|
|
483 /* Leave NARGS at zero and let the recursive call set it. */
|
|
484 ptr1 = gimple_assign_rhs1 (def_stmt);
|
|
485
|
|
486 /* Avoid short-circuiting the logical OR result in case
|
|
487 both operands refer to local variables, in which case
|
|
488 both should be considered and identified in the warning. */
|
|
489 bool res1 = false, res2 = false;
|
|
490 if (ptr1)
|
|
491 res1 = is_addr_local (return_stmt, ptr1, plocmap, visited);
|
|
492 if (ptr2)
|
|
493 res2 = is_addr_local (return_stmt, ptr2, plocmap, visited);
|
|
494
|
|
495 if (nargs)
|
|
496 if (args_loc_t *argsloc = plocmap->get (return_stmt))
|
|
497 argsloc->nargs += nargs;
|
|
498
|
|
499 return res1 || res2;
|
|
500 }
|
|
501 return false;
|
|
502 }
|
|
503
|
|
504 if (code == GIMPLE_CALL
|
|
505 && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
|
|
506 {
|
|
507 /* Handle alloca and friends that return pointers to automatic
|
|
508 storage. */
|
|
509 tree fn = gimple_call_fndecl (def_stmt);
|
|
510 int code = DECL_FUNCTION_CODE (fn);
|
|
511 if (code == BUILT_IN_ALLOCA
|
|
512 || code == BUILT_IN_ALLOCA_WITH_ALIGN
|
|
513 || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX)
|
|
514 {
|
|
515 args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
|
|
516 argsloc.locvec.safe_push (gimple_location (def_stmt));
|
|
517 return true;
|
|
518 }
|
|
519
|
|
520 if (gimple_call_num_args (def_stmt) < 1)
|
|
521 return false;
|
|
522
|
|
523 /* Recursively examine the first argument of calls to built-ins
|
|
524 that return it. */
|
|
525 switch (code)
|
|
526 {
|
|
527 case BUILT_IN_MEMCPY:
|
|
528 case BUILT_IN_MEMCPY_CHK:
|
|
529 case BUILT_IN_MEMPCPY:
|
|
530 case BUILT_IN_MEMPCPY_CHK:
|
|
531 case BUILT_IN_MEMMOVE:
|
|
532 case BUILT_IN_MEMMOVE_CHK:
|
|
533 case BUILT_IN_STPCPY:
|
|
534 case BUILT_IN_STPCPY_CHK:
|
|
535 case BUILT_IN_STPNCPY:
|
|
536 case BUILT_IN_STPNCPY_CHK:
|
|
537 case BUILT_IN_STRCAT:
|
|
538 case BUILT_IN_STRCAT_CHK:
|
|
539 case BUILT_IN_STRCHR:
|
|
540 case BUILT_IN_STRCPY:
|
|
541 case BUILT_IN_STRCPY_CHK:
|
|
542 case BUILT_IN_STRNCAT:
|
|
543 case BUILT_IN_STRNCAT_CHK:
|
|
544 case BUILT_IN_STRNCPY:
|
|
545 case BUILT_IN_STRNCPY_CHK:
|
|
546 case BUILT_IN_STRRCHR:
|
|
547 case BUILT_IN_STRSTR:
|
|
548 return is_addr_local (return_stmt,
|
|
549 gimple_call_arg (def_stmt, 0),
|
|
550 plocmap, visited);
|
|
551 default:
|
|
552 return false;
|
|
553 }
|
|
554 }
|
|
555
|
|
556 if (code == GIMPLE_PHI && visited)
|
|
557 {
|
|
558 gphi *phi_stmt = as_a <gphi *> (def_stmt);
|
|
559 if (visited->add (phi_stmt))
|
|
560 return false;
|
|
561
|
|
562 unsigned count = 0;
|
|
563 unsigned nargs = gimple_phi_num_args (phi_stmt);
|
|
564 args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
|
|
565 /* Bump up the number of operands examined by the number of
|
|
566 operands of this PHI. */
|
|
567 argsloc.nargs += nargs;
|
|
568 for (unsigned i = 0; i < gimple_phi_num_args (phi_stmt); ++i)
|
|
569 {
|
|
570 tree arg = gimple_phi_arg_def (phi_stmt, i);
|
|
571 if (is_addr_local (return_stmt, arg, plocmap, visited))
|
|
572 ++count;
|
|
573 }
|
|
574 return count != 0;
|
|
575 }
|
|
576 }
|
|
577
|
|
578 return false;
|
|
579 }
|
|
580
|
|
581 /* Detect returning the address of a local variable in a PHI result LHS
|
|
582 and argument ARG and PHI edge E in basic block BB. Add an entry for
|
|
583 each use to LOCMAP, setting its NARGS member to the NARGS argument
|
|
584 (the number of PHI operands) plus the number of arguments in binary
|
|
585 expressions refereced by ARG. Call isolate_path for each returned
|
|
586 address and set *ISOLATED to true if called.
|
|
587 Return either DUPLICATE or the most recent result of isolate_path. */
|
|
588
|
|
589 static basic_block
|
|
590 handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate,
|
|
591 tree lhs, tree arg, edge e, locmap_t &locmap,
|
|
592 unsigned nargs, bool *isolated)
|
|
593 {
|
|
594 /* Use (gimple*)-1 as a temporary placeholder and replace it with
|
|
595 the return statement below once it is known. Using a null doesn't
|
|
596 work because it's used by the hash_map to mean "no-entry." Pass
|
|
597 null instead of a visited_phis bitmap to avoid descending into
|
|
598 PHIs since they are being processed by the caller. Those that
|
|
599 remain will be checked again later. */
|
|
600 if (!is_addr_local ((gimple*)-1, arg, &locmap, NULL))
|
|
601 {
|
|
602 /* Remove the placeholder regardless of success or failure. */
|
|
603 locmap.remove ((gimple*)-1);
|
|
604 return duplicate;
|
|
605 }
|
|
606
|
|
607 const args_loc_t* const placeargsloc = locmap.get ((gimple*)-1);
|
|
608 const unsigned nlocs = placeargsloc->locvec.length ();
|
|
609 gcc_assert (nlocs);
|
|
610
|
|
611 /* Add to the number of PHI arguments determined by the caller
|
|
612 the number of operands of the expressions referenced by ARG.
|
|
613 This lets the caller determine whether it's dealing with
|
|
614 a "may return" or "definitely returns." */
|
|
615 nargs += placeargsloc->nargs;
|
|
616
|
|
617 /* Set to true if any expressions referenced by ARG involve
|
|
618 multiple addresses only some of which are those of locals. */
|
|
619 bool maybe = placeargsloc->nargs > placeargsloc->locvec.length ();
|
|
620
|
|
621 gimple *use_stmt;
|
|
622 imm_use_iterator iter;
|
|
623
|
|
624 /* Look for uses of the PHI result LHS in return statements. */
|
|
625 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
|
|
626 {
|
|
627 greturn *return_stmt = dyn_cast <greturn *> (use_stmt);
|
|
628 if (!return_stmt)
|
|
629 continue;
|
|
630
|
|
631 if (gimple_return_retval (return_stmt) != lhs)
|
|
632 continue;
|
|
633
|
|
634 /* Add an entry for the return statement and the locations
|
|
635 oof the PHI arguments obtained above to the map. */
|
|
636 args_loc_t &argsloc = locmap.get_or_insert (use_stmt);
|
|
637 argsloc.nargs = nargs;
|
|
638 unsigned nelts = argsloc.locvec.length () + nlocs;
|
|
639 argsloc.locvec.reserve (nelts);
|
|
640 argsloc.locvec.splice (placeargsloc->locvec);
|
|
641
|
|
642 if (!maybe
|
|
643 && (flag_isolate_erroneous_paths_dereference
|
|
644 || flag_isolate_erroneous_paths_attribute)
|
|
645 && gimple_bb (use_stmt) == bb)
|
|
646 {
|
|
647 duplicate = isolate_path (bb, duplicate, e,
|
|
648 use_stmt, lhs, true);
|
|
649
|
|
650 /* Let caller know the path has been isolated. */
|
|
651 *isolated = true;
|
|
652 }
|
|
653 }
|
|
654
|
|
655 locmap.remove ((gimple*)-1);
|
|
656
|
|
657 return duplicate;
|
|
658 }
|
|
659
|
111
|
660 /* Look for PHI nodes which feed statements in the same block where
|
|
661 the value of the PHI node implies the statement is erroneous.
|
|
662
|
|
663 For example, a NULL PHI arg value which then feeds a pointer
|
|
664 dereference.
|
|
665
|
|
666 When found isolate and optimize the path associated with the PHI
|
|
667 argument feeding the erroneous statement. */
|
|
668 static void
|
|
669 find_implicit_erroneous_behavior (void)
|
|
670 {
|
145
|
671 locmap_t locmap;
|
|
672
|
111
|
673 basic_block bb;
|
|
674
|
|
675 FOR_EACH_BB_FN (bb, cfun)
|
|
676 {
|
|
677 gphi_iterator si;
|
|
678
|
|
679 /* Out of an abundance of caution, do not isolate paths to a
|
|
680 block where the block has any abnormal outgoing edges.
|
|
681
|
|
682 We might be able to relax this in the future. We have to detect
|
|
683 when we have to split the block with the NULL dereference and
|
|
684 the trap we insert. We have to preserve abnormal edges out
|
|
685 of the isolated block which in turn means updating PHIs at
|
|
686 the targets of those abnormal outgoing edges. */
|
|
687 if (has_abnormal_or_eh_outgoing_edge_p (bb))
|
|
688 continue;
|
|
689
|
|
690
|
|
691 /* If BB has an edge to itself, then duplication of BB below
|
|
692 could result in reallocation of BB's PHI nodes. If that happens
|
|
693 then the loop below over the PHIs would use the old PHI and
|
|
694 thus invalid information. We don't have a good way to know
|
|
695 if a PHI has been reallocated, so just avoid isolation in
|
|
696 this case. */
|
|
697 if (find_edge (bb, bb))
|
|
698 continue;
|
|
699
|
|
700 /* First look for a PHI which sets a pointer to NULL and which
|
|
701 is then dereferenced within BB. This is somewhat overly
|
|
702 conservative, but probably catches most of the interesting
|
|
703 cases. */
|
|
704 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
|
|
705 {
|
|
706 gphi *phi = si.phi ();
|
|
707 tree lhs = gimple_phi_result (phi);
|
|
708
|
145
|
709 /* Initial number of PHI arguments. The result may change
|
|
710 from one iteration of the loop below to the next in
|
|
711 response to changes to the CFG but only the initial
|
|
712 value is stored below for use by diagnostics. */
|
|
713 unsigned nargs = gimple_phi_num_args (phi);
|
|
714
|
111
|
715 /* PHI produces a pointer result. See if any of the PHI's
|
|
716 arguments are NULL.
|
|
717
|
|
718 When we remove an edge, we want to reprocess the current
|
145
|
719 index since the argument at that index will have been
|
|
720 removed, hence the ugly way we update I for each iteration. */
|
111
|
721 basic_block duplicate = NULL;
|
|
722 for (unsigned i = 0, next_i = 0;
|
145
|
723 i < gimple_phi_num_args (phi); i = next_i)
|
111
|
724 {
|
145
|
725 tree arg = gimple_phi_arg_def (phi, i);
|
111
|
726 edge e = gimple_phi_arg_edge (phi, i);
|
|
727
|
145
|
728 /* Advance the argument index unless a path involving
|
|
729 the current argument has been isolated. */
|
|
730 next_i = i + 1;
|
|
731 bool isolated = false;
|
|
732 duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs,
|
|
733 arg, e, locmap,
|
|
734 nargs, &isolated);
|
|
735 if (isolated)
|
|
736 {
|
|
737 cfg_altered = true;
|
|
738 next_i = i;
|
111
|
739 }
|
|
740
|
145
|
741 if (!integer_zerop (arg))
|
111
|
742 continue;
|
|
743
|
|
744 location_t phi_arg_loc = gimple_phi_arg_location (phi, i);
|
|
745
|
145
|
746 imm_use_iterator iter;
|
|
747 gimple *use_stmt;
|
|
748
|
111
|
749 /* We've got a NULL PHI argument. Now see if the
|
|
750 PHI's result is dereferenced within BB. */
|
|
751 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
|
|
752 {
|
|
753 /* We only care about uses in BB. Catching cases in
|
|
754 in other blocks would require more complex path
|
|
755 isolation code. */
|
|
756 if (gimple_bb (use_stmt) != bb)
|
|
757 continue;
|
|
758
|
|
759 location_t loc = gimple_location (use_stmt)
|
|
760 ? gimple_location (use_stmt)
|
|
761 : phi_arg_loc;
|
|
762
|
|
763 if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc))
|
|
764 {
|
|
765 duplicate = isolate_path (bb, duplicate, e,
|
|
766 use_stmt, lhs, false);
|
|
767
|
|
768 /* When we remove an incoming edge, we need to
|
|
769 reprocess the Ith element. */
|
|
770 next_i = i;
|
|
771 cfg_altered = true;
|
|
772 }
|
|
773 }
|
|
774 }
|
|
775 }
|
|
776 }
|
145
|
777
|
|
778 diag_returned_locals (false, locmap);
|
|
779 }
|
|
780
|
|
781 /* Detect and diagnose returning the address of a local variable
|
|
782 in RETURN_STMT in basic block BB. This only becomes undefined
|
|
783 behavior if the result is used, so we do not insert a trap and
|
|
784 only return NULL instead. */
|
|
785
|
|
786 static void
|
|
787 warn_return_addr_local (basic_block bb, greturn *return_stmt)
|
|
788 {
|
|
789 tree val = gimple_return_retval (return_stmt);
|
|
790 if (!val)
|
|
791 return;
|
|
792
|
|
793 locmap_t locmap;
|
|
794 hash_set<gphi *> visited_phis;
|
|
795 if (!is_addr_local (return_stmt, val, &locmap, &visited_phis))
|
|
796 return;
|
|
797
|
|
798 /* We only need it for this particular case. */
|
|
799 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
800
|
|
801 const args_loc_t *argsloc = locmap.get (return_stmt);
|
|
802 gcc_assert (argsloc);
|
|
803
|
|
804 bool maybe = argsloc->nargs > argsloc->locvec.length ();
|
|
805 if (!maybe)
|
|
806 maybe = !dominated_by_p (CDI_POST_DOMINATORS,
|
|
807 single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb);
|
|
808
|
|
809 diag_returned_locals (maybe, locmap);
|
|
810
|
|
811 /* Bail if the statement isn't certain to return the address
|
|
812 of a local (e.g., if it involves a conditional expression
|
|
813 that wasn't trasnformed into a PHI or if it involves
|
|
814 a MAX_EXPR or MIN_EXPR only one of whose operands is a local
|
|
815 (even though such an expression isn't valid in C or has
|
|
816 defined semantics in C++). */
|
|
817 if (maybe)
|
|
818 return;
|
|
819
|
|
820 /* Do not modify code if the user only asked for warnings. */
|
|
821 if (flag_isolate_erroneous_paths_dereference
|
|
822 || flag_isolate_erroneous_paths_attribute)
|
|
823 {
|
|
824 tree zero = build_zero_cst (TREE_TYPE (val));
|
|
825 gimple_return_set_retval (return_stmt, zero);
|
|
826 update_stmt (return_stmt);
|
|
827 }
|
111
|
828 }
|
|
829
|
|
830 /* Look for statements which exhibit erroneous behavior. For example
|
|
831 a NULL pointer dereference.
|
|
832
|
|
833 When found, optimize the block containing the erroneous behavior. */
|
|
834 static void
|
|
835 find_explicit_erroneous_behavior (void)
|
|
836 {
|
|
837 basic_block bb;
|
|
838
|
|
839 FOR_EACH_BB_FN (bb, cfun)
|
|
840 {
|
|
841 gimple_stmt_iterator si;
|
|
842
|
|
843 /* Out of an abundance of caution, do not isolate paths to a
|
|
844 block where the block has any abnormal outgoing edges.
|
|
845
|
|
846 We might be able to relax this in the future. We have to detect
|
|
847 when we have to split the block with the NULL dereference and
|
|
848 the trap we insert. We have to preserve abnormal edges out
|
|
849 of the isolated block which in turn means updating PHIs at
|
|
850 the targets of those abnormal outgoing edges. */
|
|
851 if (has_abnormal_or_eh_outgoing_edge_p (bb))
|
|
852 continue;
|
|
853
|
|
854 /* Now look at the statements in the block and see if any of
|
|
855 them explicitly dereference a NULL pointer. This happens
|
|
856 because of jump threading and constant propagation. */
|
|
857 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
|
858 {
|
|
859 gimple *stmt = gsi_stmt (si);
|
|
860
|
|
861 if (stmt_uses_0_or_null_in_undefined_way (stmt))
|
|
862 {
|
|
863 insert_trap (&si, null_pointer_node);
|
|
864 bb = gimple_bb (gsi_stmt (si));
|
|
865
|
|
866 /* Ignore any more operands on this statement and
|
|
867 continue the statement iterator (which should
|
|
868 terminate its loop immediately. */
|
|
869 cfg_altered = true;
|
|
870 break;
|
|
871 }
|
|
872
|
145
|
873 /* Look for a return statement that returns the address
|
|
874 of a local variable or the result of alloca. */
|
111
|
875 if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
|
145
|
876 warn_return_addr_local (bb, return_stmt);
|
111
|
877 }
|
|
878 }
|
|
879 }
|
|
880
|
|
881 /* Search the function for statements which, if executed, would cause
|
|
882 the program to fault such as a dereference of a NULL pointer.
|
|
883
|
|
884 Such a program can't be valid if such a statement was to execute
|
|
885 according to ISO standards.
|
|
886
|
|
887 We detect explicit NULL pointer dereferences as well as those implied
|
|
888 by a PHI argument having a NULL value which unconditionally flows into
|
|
889 a dereference in the same block as the PHI.
|
|
890
|
|
891 In the former case we replace the offending statement with an
|
|
892 unconditional trap and eliminate the outgoing edges from the statement's
|
|
893 basic block. This may expose secondary optimization opportunities.
|
|
894
|
|
895 In the latter case, we isolate the path(s) with the NULL PHI
|
|
896 feeding the dereference. We can then replace the offending statement
|
|
897 and eliminate the outgoing edges in the duplicate. Again, this may
|
|
898 expose secondary optimization opportunities.
|
|
899
|
|
900 A warning for both cases may be advisable as well.
|
|
901
|
|
902 Other statically detectable violations of the ISO standard could be
|
|
903 handled in a similar way, such as out-of-bounds array indexing. */
|
|
904
|
|
905 static unsigned int
|
|
906 gimple_ssa_isolate_erroneous_paths (void)
|
|
907 {
|
|
908 initialize_original_copy_tables ();
|
|
909
|
|
910 /* Search all the blocks for edges which, if traversed, will
|
|
911 result in undefined behavior. */
|
|
912 cfg_altered = false;
|
|
913
|
|
914 /* First handle cases where traversal of a particular edge
|
|
915 triggers undefined behavior. These cases require creating
|
|
916 duplicate blocks and thus new SSA_NAMEs.
|
|
917
|
|
918 We want that process complete prior to the phase where we start
|
|
919 removing edges from the CFG. Edge removal may ultimately result in
|
|
920 removal of PHI nodes and thus releasing SSA_NAMEs back to the
|
|
921 name manager.
|
|
922
|
|
923 If the two processes run in parallel we could release an SSA_NAME
|
|
924 back to the manager but we could still have dangling references
|
|
925 to the released SSA_NAME in unreachable blocks.
|
|
926 that any released names not have dangling references in the IL. */
|
|
927 find_implicit_erroneous_behavior ();
|
|
928 find_explicit_erroneous_behavior ();
|
|
929
|
|
930 free_original_copy_tables ();
|
|
931
|
|
932 /* We scramble the CFG and loop structures a bit, clean up
|
|
933 appropriately. We really should incrementally update the
|
|
934 loop structures, in theory it shouldn't be that hard. */
|
|
935 free_dominance_info (CDI_POST_DOMINATORS);
|
|
936 if (cfg_altered)
|
|
937 {
|
|
938 free_dominance_info (CDI_DOMINATORS);
|
|
939 loops_state_set (LOOPS_NEED_FIXUP);
|
|
940 return TODO_cleanup_cfg | TODO_update_ssa;
|
|
941 }
|
|
942 return 0;
|
|
943 }
|
|
944
|
|
945 namespace {
|
|
946 const pass_data pass_data_isolate_erroneous_paths =
|
|
947 {
|
|
948 GIMPLE_PASS, /* type */
|
|
949 "isolate-paths", /* name */
|
|
950 OPTGROUP_NONE, /* optinfo_flags */
|
|
951 TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */
|
|
952 ( PROP_cfg | PROP_ssa ), /* properties_required */
|
|
953 0, /* properties_provided */
|
|
954 0, /* properties_destroyed */
|
|
955 0, /* todo_flags_start */
|
|
956 0, /* todo_flags_finish */
|
|
957 };
|
|
958
|
|
959 class pass_isolate_erroneous_paths : public gimple_opt_pass
|
|
960 {
|
|
961 public:
|
|
962 pass_isolate_erroneous_paths (gcc::context *ctxt)
|
|
963 : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt)
|
|
964 {}
|
|
965
|
|
966 /* opt_pass methods: */
|
|
967 opt_pass * clone () { return new pass_isolate_erroneous_paths (m_ctxt); }
|
|
968 virtual bool gate (function *)
|
|
969 {
|
|
970 /* If we do not have a suitable builtin function for the trap statement,
|
|
971 then do not perform the optimization. */
|
|
972 return (flag_isolate_erroneous_paths_dereference != 0
|
|
973 || flag_isolate_erroneous_paths_attribute != 0
|
|
974 || warn_null_dereference);
|
|
975 }
|
|
976
|
|
977 virtual unsigned int execute (function *)
|
|
978 {
|
|
979 return gimple_ssa_isolate_erroneous_paths ();
|
|
980 }
|
|
981
|
|
982 }; // class pass_isolate_erroneous_paths
|
|
983 }
|
|
984
|
|
985 gimple_opt_pass *
|
|
986 make_pass_isolate_erroneous_paths (gcc::context *ctxt)
|
|
987 {
|
|
988 return new pass_isolate_erroneous_paths (ctxt);
|
|
989 }
|