145
|
1 /* An experimental state machine, for tracking "taint": unsanitized uses
|
|
2 of data potentially under an attacker's control.
|
|
3
|
|
4 Copyright (C) 2019-2020 Free Software Foundation, Inc.
|
|
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
|
|
6
|
|
7 This file is part of GCC.
|
|
8
|
|
9 GCC is free software; you can redistribute it and/or modify it
|
|
10 under the terms of the GNU General Public License as published by
|
|
11 the Free Software Foundation; either version 3, or (at your option)
|
|
12 any later version.
|
|
13
|
|
14 GCC is distributed in the hope that it will be useful, but
|
|
15 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
17 General Public License for more details.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License
|
|
20 along with GCC; see the file COPYING3. If not see
|
|
21 <http://www.gnu.org/licenses/>. */
|
|
22
|
|
23 #include "config.h"
|
|
24 #include "system.h"
|
|
25 #include "coretypes.h"
|
|
26 #include "tree.h"
|
|
27 #include "function.h"
|
|
28 #include "basic-block.h"
|
|
29 #include "gimple.h"
|
|
30 #include "options.h"
|
|
31 #include "diagnostic-path.h"
|
|
32 #include "diagnostic-metadata.h"
|
|
33 #include "function.h"
|
|
34 #include "analyzer/analyzer.h"
|
|
35 #include "diagnostic-event-id.h"
|
|
36 #include "analyzer/analyzer-logging.h"
|
|
37 #include "analyzer/sm.h"
|
|
38 #include "analyzer/pending-diagnostic.h"
|
|
39
|
|
40 #if ENABLE_ANALYZER
|
|
41
|
|
42 namespace ana {
|
|
43
|
|
44 namespace {
|
|
45
|
|
46 /* An experimental state machine, for tracking "taint": unsanitized uses
|
|
47 of data potentially under an attacker's control. */
|
|
48
|
|
49 class taint_state_machine : public state_machine
|
|
50 {
|
|
51 public:
|
|
52 taint_state_machine (logger *logger);
|
|
53
|
|
54 bool inherited_state_p () const FINAL OVERRIDE { return true; }
|
|
55
|
|
56 bool on_stmt (sm_context *sm_ctxt,
|
|
57 const supernode *node,
|
|
58 const gimple *stmt) const FINAL OVERRIDE;
|
|
59
|
|
60 void on_condition (sm_context *sm_ctxt,
|
|
61 const supernode *node,
|
|
62 const gimple *stmt,
|
|
63 tree lhs,
|
|
64 enum tree_code op,
|
|
65 tree rhs) const FINAL OVERRIDE;
|
|
66
|
|
67 bool can_purge_p (state_t s) const FINAL OVERRIDE;
|
|
68
|
|
69 /* Start state. */
|
|
70 state_t m_start;
|
|
71
|
|
72 /* State for a "tainted" value: unsanitized data potentially under an
|
|
73 attacker's control. */
|
|
74 state_t m_tainted;
|
|
75
|
|
76 /* State for a "tainted" value that has a lower bound. */
|
|
77 state_t m_has_lb;
|
|
78
|
|
79 /* State for a "tainted" value that has an upper bound. */
|
|
80 state_t m_has_ub;
|
|
81
|
|
82 /* Stop state, for a value we don't want to track any more. */
|
|
83 state_t m_stop;
|
|
84 };
|
|
85
|
|
86 enum bounds
|
|
87 {
|
|
88 BOUNDS_NONE,
|
|
89 BOUNDS_UPPER,
|
|
90 BOUNDS_LOWER
|
|
91 };
|
|
92
|
|
93 class tainted_array_index
|
|
94 : public pending_diagnostic_subclass<tainted_array_index>
|
|
95 {
|
|
96 public:
|
|
97 tainted_array_index (const taint_state_machine &sm, tree arg,
|
|
98 enum bounds has_bounds)
|
|
99 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {}
|
|
100
|
|
101 const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
|
|
102
|
|
103 bool operator== (const tainted_array_index &other) const
|
|
104 {
|
|
105 return same_tree_p (m_arg, other.m_arg);
|
|
106 }
|
|
107
|
|
108 bool emit (rich_location *rich_loc) FINAL OVERRIDE
|
|
109 {
|
|
110 diagnostic_metadata m;
|
|
111 m.add_cwe (129);
|
|
112 switch (m_has_bounds)
|
|
113 {
|
|
114 default:
|
|
115 gcc_unreachable ();
|
|
116 case BOUNDS_NONE:
|
|
117 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
|
|
118 "use of tainted value %qE in array lookup"
|
|
119 " without bounds checking",
|
|
120 m_arg);
|
|
121 break;
|
|
122 case BOUNDS_UPPER:
|
|
123 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
|
|
124 "use of tainted value %qE in array lookup"
|
|
125 " without lower-bounds checking",
|
|
126 m_arg);
|
|
127 break;
|
|
128 case BOUNDS_LOWER:
|
|
129 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
|
|
130 "use of tainted value %qE in array lookup"
|
|
131 " without upper-bounds checking",
|
|
132 m_arg);
|
|
133 break;
|
|
134 }
|
|
135 }
|
|
136
|
|
137 label_text describe_state_change (const evdesc::state_change &change)
|
|
138 FINAL OVERRIDE
|
|
139 {
|
|
140 if (change.m_new_state == m_sm.m_tainted)
|
|
141 {
|
|
142 if (change.m_origin)
|
|
143 return change.formatted_print ("%qE has an unchecked value here"
|
|
144 " (from %qE)",
|
|
145 change.m_expr, change.m_origin);
|
|
146 else
|
|
147 return change.formatted_print ("%qE gets an unchecked value here",
|
|
148 change.m_expr);
|
|
149 }
|
|
150 else if (change.m_new_state == m_sm.m_has_lb)
|
|
151 return change.formatted_print ("%qE has its lower bound checked here",
|
|
152 change.m_expr);
|
|
153 else if (change.m_new_state == m_sm.m_has_ub)
|
|
154 return change.formatted_print ("%qE has its upper bound checked here",
|
|
155 change.m_expr);
|
|
156 return label_text ();
|
|
157 }
|
|
158
|
|
159 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
|
|
160 {
|
|
161 switch (m_has_bounds)
|
|
162 {
|
|
163 default:
|
|
164 gcc_unreachable ();
|
|
165 case BOUNDS_NONE:
|
|
166 return ev.formatted_print ("use of tainted value %qE in array lookup"
|
|
167 " without bounds checking",
|
|
168 m_arg);
|
|
169 case BOUNDS_UPPER:
|
|
170 return ev.formatted_print ("use of tainted value %qE in array lookup"
|
|
171 " without lower-bounds checking",
|
|
172 m_arg);
|
|
173 case BOUNDS_LOWER:
|
|
174 return ev.formatted_print ("use of tainted value %qE in array lookup"
|
|
175 " without upper-bounds checking",
|
|
176 m_arg);
|
|
177 }
|
|
178 }
|
|
179
|
|
180 private:
|
|
181 const taint_state_machine &m_sm;
|
|
182 tree m_arg;
|
|
183 enum bounds m_has_bounds;
|
|
184 };
|
|
185
|
|
186 /* taint_state_machine's ctor. */
|
|
187
|
|
188 taint_state_machine::taint_state_machine (logger *logger)
|
|
189 : state_machine ("taint", logger)
|
|
190 {
|
|
191 m_start = add_state ("start");
|
|
192 m_tainted = add_state ("tainted");
|
|
193 m_has_lb = add_state ("has_lb");
|
|
194 m_has_ub = add_state ("has_ub");
|
|
195 m_stop = add_state ("stop");
|
|
196 }
|
|
197
|
|
198 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
|
|
199
|
|
200 bool
|
|
201 taint_state_machine::on_stmt (sm_context *sm_ctxt,
|
|
202 const supernode *node,
|
|
203 const gimple *stmt) const
|
|
204 {
|
|
205 if (const gcall *call = dyn_cast <const gcall *> (stmt))
|
|
206 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
|
|
207 {
|
|
208 if (is_named_call_p (callee_fndecl, "fread", call, 4))
|
|
209 {
|
|
210 tree arg = gimple_call_arg (call, 0);
|
|
211 arg = sm_ctxt->get_readable_tree (arg);
|
|
212
|
|
213 sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
|
|
214
|
|
215 /* Dereference an ADDR_EXPR. */
|
|
216 // TODO: should the engine do this?
|
|
217 if (TREE_CODE (arg) == ADDR_EXPR)
|
|
218 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
|
|
219 m_start, m_tainted);
|
|
220 return true;
|
|
221 }
|
|
222 }
|
|
223 // TODO: ...etc; many other sources of untrusted data
|
|
224
|
|
225 if (const gassign *assign = dyn_cast <const gassign *> (stmt))
|
|
226 {
|
|
227 tree rhs1 = gimple_assign_rhs1 (assign);
|
|
228 enum tree_code op = gimple_assign_rhs_code (assign);
|
|
229
|
|
230 /* Check array accesses. */
|
|
231 if (op == ARRAY_REF)
|
|
232 {
|
|
233 tree arg = TREE_OPERAND (rhs1, 1);
|
|
234 arg = sm_ctxt->get_readable_tree (arg);
|
|
235
|
|
236 /* Unsigned types have an implicit lower bound. */
|
|
237 bool is_unsigned = false;
|
|
238 if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
|
239 is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg));
|
|
240
|
|
241 /* Complain about missing bounds. */
|
|
242 sm_ctxt->warn_for_state
|
|
243 (node, stmt, arg, m_tainted,
|
|
244 new tainted_array_index (*this, arg,
|
|
245 is_unsigned
|
|
246 ? BOUNDS_LOWER : BOUNDS_NONE));
|
|
247 sm_ctxt->on_transition (node, stmt, arg, m_tainted, m_stop);
|
|
248
|
|
249 /* Complain about missing upper bound. */
|
|
250 sm_ctxt->warn_for_state (node, stmt, arg, m_has_lb,
|
|
251 new tainted_array_index (*this, arg,
|
|
252 BOUNDS_LOWER));
|
|
253 sm_ctxt->on_transition (node, stmt, arg, m_has_lb, m_stop);
|
|
254
|
|
255 /* Complain about missing lower bound. */
|
|
256 if (!is_unsigned)
|
|
257 {
|
|
258 sm_ctxt->warn_for_state (node, stmt, arg, m_has_ub,
|
|
259 new tainted_array_index (*this, arg,
|
|
260 BOUNDS_UPPER));
|
|
261 sm_ctxt->on_transition (node, stmt, arg, m_has_ub, m_stop);
|
|
262 }
|
|
263 }
|
|
264 }
|
|
265
|
|
266 return false;
|
|
267 }
|
|
268
|
|
269 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
|
|
270 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
|
|
271 and states 'has_ub' and 'has_lb' to 'stop'. */
|
|
272
|
|
273 void
|
|
274 taint_state_machine::on_condition (sm_context *sm_ctxt,
|
|
275 const supernode *node,
|
|
276 const gimple *stmt,
|
|
277 tree lhs,
|
|
278 enum tree_code op,
|
|
279 tree rhs ATTRIBUTE_UNUSED) const
|
|
280 {
|
|
281 if (stmt == NULL)
|
|
282 return;
|
|
283
|
|
284 // TODO: this doesn't use the RHS; should we make it symmetric?
|
|
285
|
|
286 // TODO
|
|
287 switch (op)
|
|
288 {
|
|
289 //case NE_EXPR:
|
|
290 //case EQ_EXPR:
|
|
291 case GE_EXPR:
|
|
292 case GT_EXPR:
|
|
293 {
|
|
294 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
|
|
295 m_has_lb);
|
|
296 sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
|
|
297 m_stop);
|
|
298 }
|
|
299 break;
|
|
300 case LE_EXPR:
|
|
301 case LT_EXPR:
|
|
302 {
|
|
303 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
|
|
304 m_has_ub);
|
|
305 sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
|
|
306 m_stop);
|
|
307 }
|
|
308 break;
|
|
309 default:
|
|
310 break;
|
|
311 }
|
|
312 }
|
|
313
|
|
314 bool
|
|
315 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
|
|
316 {
|
|
317 return true;
|
|
318 }
|
|
319
|
|
320 } // anonymous namespace
|
|
321
|
|
322 /* Internal interface to this file. */
|
|
323
|
|
324 state_machine *
|
|
325 make_taint_state_machine (logger *logger)
|
|
326 {
|
|
327 return new taint_state_machine (logger);
|
|
328 }
|
|
329
|
|
330 } // namespace ana
|
|
331
|
|
332 #endif /* #if ENABLE_ANALYZER */
|