111
|
1 //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
|
|
2 //
|
145
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
111
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 //
|
|
9 // Scanf/printf implementation for use in *Sanitizer interceptors.
|
|
10 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
|
|
11 // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
|
|
12 // with a few common GNU extensions.
|
|
13 //
|
|
14 //===----------------------------------------------------------------------===//
|
|
15
|
|
16 #include <stdarg.h>
|
|
17
|
|
18 static const char *parse_number(const char *p, int *out) {
|
|
19 *out = internal_atoll(p);
|
|
20 while (*p >= '0' && *p <= '9')
|
|
21 ++p;
|
|
22 return p;
|
|
23 }
|
|
24
|
|
25 static const char *maybe_parse_param_index(const char *p, int *out) {
|
|
26 // n$
|
|
27 if (*p >= '0' && *p <= '9') {
|
|
28 int number;
|
|
29 const char *q = parse_number(p, &number);
|
|
30 CHECK(q);
|
|
31 if (*q == '$') {
|
|
32 *out = number;
|
|
33 p = q + 1;
|
|
34 }
|
|
35 }
|
|
36
|
|
37 // Otherwise, do not change p. This will be re-parsed later as the field
|
|
38 // width.
|
|
39 return p;
|
|
40 }
|
|
41
|
|
42 static bool char_is_one_of(char c, const char *s) {
|
|
43 return !!internal_strchr(s, c);
|
|
44 }
|
|
45
|
|
46 static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
|
|
47 if (char_is_one_of(*p, "jztLq")) {
|
|
48 ll[0] = *p;
|
|
49 ++p;
|
|
50 } else if (*p == 'h') {
|
|
51 ll[0] = 'h';
|
|
52 ++p;
|
|
53 if (*p == 'h') {
|
|
54 ll[1] = 'h';
|
|
55 ++p;
|
|
56 }
|
|
57 } else if (*p == 'l') {
|
|
58 ll[0] = 'l';
|
|
59 ++p;
|
|
60 if (*p == 'l') {
|
|
61 ll[1] = 'l';
|
|
62 ++p;
|
|
63 }
|
|
64 }
|
|
65 return p;
|
|
66 }
|
|
67
|
|
68 // Returns true if the character is an integer conversion specifier.
|
|
69 static bool format_is_integer_conv(char c) {
|
|
70 return char_is_one_of(c, "diouxXn");
|
|
71 }
|
|
72
|
|
73 // Returns true if the character is an floating point conversion specifier.
|
|
74 static bool format_is_float_conv(char c) {
|
|
75 return char_is_one_of(c, "aAeEfFgG");
|
|
76 }
|
|
77
|
|
78 // Returns string output character size for string-like conversions,
|
|
79 // or 0 if the conversion is invalid.
|
|
80 static int format_get_char_size(char convSpecifier,
|
|
81 const char lengthModifier[2]) {
|
|
82 if (char_is_one_of(convSpecifier, "CS")) {
|
|
83 return sizeof(wchar_t);
|
|
84 }
|
|
85
|
|
86 if (char_is_one_of(convSpecifier, "cs[")) {
|
|
87 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
|
|
88 return sizeof(wchar_t);
|
|
89 else if (lengthModifier[0] == '\0')
|
|
90 return sizeof(char);
|
|
91 }
|
|
92
|
|
93 return 0;
|
|
94 }
|
|
95
|
|
96 enum FormatStoreSize {
|
|
97 // Store size not known in advance; can be calculated as wcslen() of the
|
|
98 // destination buffer.
|
|
99 FSS_WCSLEN = -2,
|
|
100 // Store size not known in advance; can be calculated as strlen() of the
|
|
101 // destination buffer.
|
|
102 FSS_STRLEN = -1,
|
|
103 // Invalid conversion specifier.
|
|
104 FSS_INVALID = 0
|
|
105 };
|
|
106
|
|
107 // Returns the memory size of a format directive (if >0), or a value of
|
|
108 // FormatStoreSize.
|
|
109 static int format_get_value_size(char convSpecifier,
|
|
110 const char lengthModifier[2],
|
|
111 bool promote_float) {
|
|
112 if (format_is_integer_conv(convSpecifier)) {
|
|
113 switch (lengthModifier[0]) {
|
|
114 case 'h':
|
|
115 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
|
|
116 case 'l':
|
|
117 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
|
|
118 case 'q':
|
|
119 return sizeof(long long);
|
|
120 case 'L':
|
|
121 return sizeof(long long);
|
|
122 case 'j':
|
|
123 return sizeof(INTMAX_T);
|
|
124 case 'z':
|
|
125 return sizeof(SIZE_T);
|
|
126 case 't':
|
|
127 return sizeof(PTRDIFF_T);
|
|
128 case 0:
|
|
129 return sizeof(int);
|
|
130 default:
|
|
131 return FSS_INVALID;
|
|
132 }
|
|
133 }
|
|
134
|
|
135 if (format_is_float_conv(convSpecifier)) {
|
|
136 switch (lengthModifier[0]) {
|
|
137 case 'L':
|
|
138 case 'q':
|
|
139 return sizeof(long double);
|
|
140 case 'l':
|
|
141 return lengthModifier[1] == 'l' ? sizeof(long double)
|
|
142 : sizeof(double);
|
|
143 case 0:
|
|
144 // Printf promotes floats to doubles but scanf does not
|
|
145 return promote_float ? sizeof(double) : sizeof(float);
|
|
146 default:
|
|
147 return FSS_INVALID;
|
|
148 }
|
|
149 }
|
|
150
|
|
151 if (convSpecifier == 'p') {
|
|
152 if (lengthModifier[0] != 0)
|
|
153 return FSS_INVALID;
|
|
154 return sizeof(void *);
|
|
155 }
|
|
156
|
|
157 return FSS_INVALID;
|
|
158 }
|
|
159
|
|
160 struct ScanfDirective {
|
|
161 int argIdx; // argument index, or -1 if not specified ("%n$")
|
|
162 int fieldWidth;
|
|
163 const char *begin;
|
|
164 const char *end;
|
|
165 bool suppressed; // suppress assignment ("*")
|
|
166 bool allocate; // allocate space ("m")
|
|
167 char lengthModifier[2];
|
|
168 char convSpecifier;
|
|
169 bool maybeGnuMalloc;
|
|
170 };
|
|
171
|
|
172 // Parse scanf format string. If a valid directive in encountered, it is
|
|
173 // returned in dir. This function returns the pointer to the first
|
|
174 // unprocessed character, or 0 in case of error.
|
|
175 // In case of the end-of-string, a pointer to the closing \0 is returned.
|
|
176 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
|
|
177 ScanfDirective *dir) {
|
|
178 internal_memset(dir, 0, sizeof(*dir));
|
|
179 dir->argIdx = -1;
|
|
180
|
|
181 while (*p) {
|
|
182 if (*p != '%') {
|
|
183 ++p;
|
|
184 continue;
|
|
185 }
|
|
186 dir->begin = p;
|
|
187 ++p;
|
|
188 // %%
|
|
189 if (*p == '%') {
|
|
190 ++p;
|
|
191 continue;
|
|
192 }
|
|
193 if (*p == '\0') {
|
|
194 return nullptr;
|
|
195 }
|
|
196 // %n$
|
|
197 p = maybe_parse_param_index(p, &dir->argIdx);
|
|
198 CHECK(p);
|
|
199 // *
|
|
200 if (*p == '*') {
|
|
201 dir->suppressed = true;
|
|
202 ++p;
|
|
203 }
|
|
204 // Field width
|
|
205 if (*p >= '0' && *p <= '9') {
|
|
206 p = parse_number(p, &dir->fieldWidth);
|
|
207 CHECK(p);
|
|
208 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
|
|
209 return nullptr;
|
|
210 }
|
|
211 // m
|
|
212 if (*p == 'm') {
|
|
213 dir->allocate = true;
|
|
214 ++p;
|
|
215 }
|
|
216 // Length modifier.
|
|
217 p = maybe_parse_length_modifier(p, dir->lengthModifier);
|
|
218 // Conversion specifier.
|
|
219 dir->convSpecifier = *p++;
|
|
220 // Consume %[...] expression.
|
|
221 if (dir->convSpecifier == '[') {
|
|
222 if (*p == '^')
|
|
223 ++p;
|
|
224 if (*p == ']')
|
|
225 ++p;
|
|
226 while (*p && *p != ']')
|
|
227 ++p;
|
|
228 if (*p == 0)
|
|
229 return nullptr; // unexpected end of string
|
|
230 // Consume the closing ']'.
|
|
231 ++p;
|
|
232 }
|
|
233 // This is unfortunately ambiguous between old GNU extension
|
|
234 // of %as, %aS and %a[...] and newer POSIX %a followed by
|
|
235 // letters s, S or [.
|
|
236 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
|
|
237 !dir->lengthModifier[0]) {
|
|
238 if (*p == 's' || *p == 'S') {
|
|
239 dir->maybeGnuMalloc = true;
|
|
240 ++p;
|
|
241 } else if (*p == '[') {
|
|
242 // Watch for %a[h-j%d], if % appears in the
|
|
243 // [...] range, then we need to give up, we don't know
|
|
244 // if scanf will parse it as POSIX %a [h-j %d ] or
|
|
245 // GNU allocation of string with range dh-j plus %.
|
|
246 const char *q = p + 1;
|
|
247 if (*q == '^')
|
|
248 ++q;
|
|
249 if (*q == ']')
|
|
250 ++q;
|
|
251 while (*q && *q != ']' && *q != '%')
|
|
252 ++q;
|
|
253 if (*q == 0 || *q == '%')
|
|
254 return nullptr;
|
|
255 p = q + 1; // Consume the closing ']'.
|
|
256 dir->maybeGnuMalloc = true;
|
|
257 }
|
|
258 }
|
|
259 dir->end = p;
|
|
260 break;
|
|
261 }
|
|
262 return p;
|
|
263 }
|
|
264
|
|
265 static int scanf_get_value_size(ScanfDirective *dir) {
|
|
266 if (dir->allocate) {
|
|
267 if (!char_is_one_of(dir->convSpecifier, "cCsS["))
|
|
268 return FSS_INVALID;
|
|
269 return sizeof(char *);
|
|
270 }
|
|
271
|
|
272 if (dir->maybeGnuMalloc) {
|
|
273 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
|
|
274 return FSS_INVALID;
|
|
275 // This is ambiguous, so check the smaller size of char * (if it is
|
|
276 // a GNU extension of %as, %aS or %a[...]) and float (if it is
|
|
277 // POSIX %a followed by s, S or [ letters).
|
|
278 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
|
|
279 }
|
|
280
|
|
281 if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
|
|
282 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
|
|
283 unsigned charSize =
|
|
284 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
|
|
285 if (charSize == 0)
|
|
286 return FSS_INVALID;
|
|
287 if (dir->fieldWidth == 0) {
|
|
288 if (!needsTerminator)
|
|
289 return charSize;
|
|
290 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
|
|
291 }
|
|
292 return (dir->fieldWidth + needsTerminator) * charSize;
|
|
293 }
|
|
294
|
|
295 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
|
|
296 }
|
|
297
|
|
298 // Common part of *scanf interceptors.
|
|
299 // Process format string and va_list, and report all store ranges.
|
|
300 // Stops when "consuming" n_inputs input items.
|
|
301 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
|
|
302 const char *format, va_list aq) {
|
|
303 CHECK_GT(n_inputs, 0);
|
|
304 const char *p = format;
|
|
305
|
|
306 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
|
|
307
|
|
308 while (*p) {
|
|
309 ScanfDirective dir;
|
|
310 p = scanf_parse_next(p, allowGnuMalloc, &dir);
|
|
311 if (!p)
|
|
312 break;
|
|
313 if (dir.convSpecifier == 0) {
|
|
314 // This can only happen at the end of the format string.
|
|
315 CHECK_EQ(*p, 0);
|
|
316 break;
|
|
317 }
|
|
318 // Here the directive is valid. Do what it says.
|
|
319 if (dir.argIdx != -1) {
|
|
320 // Unsupported.
|
|
321 break;
|
|
322 }
|
|
323 if (dir.suppressed)
|
|
324 continue;
|
|
325 int size = scanf_get_value_size(&dir);
|
|
326 if (size == FSS_INVALID) {
|
|
327 Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
|
|
328 SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
|
|
329 break;
|
|
330 }
|
|
331 void *argp = va_arg(aq, void *);
|
|
332 if (dir.convSpecifier != 'n')
|
|
333 --n_inputs;
|
|
334 if (n_inputs < 0)
|
|
335 break;
|
|
336 if (size == FSS_STRLEN) {
|
|
337 size = internal_strlen((const char *)argp) + 1;
|
|
338 } else if (size == FSS_WCSLEN) {
|
|
339 // FIXME: actually use wcslen() to calculate it.
|
|
340 size = 0;
|
|
341 }
|
|
342 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
|
|
343 }
|
|
344 }
|
|
345
|
|
346 #if SANITIZER_INTERCEPT_PRINTF
|
|
347
|
|
348 struct PrintfDirective {
|
|
349 int fieldWidth;
|
|
350 int fieldPrecision;
|
|
351 int argIdx; // width argument index, or -1 if not specified ("%*n$")
|
|
352 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
|
|
353 const char *begin;
|
|
354 const char *end;
|
|
355 bool starredWidth;
|
|
356 bool starredPrecision;
|
|
357 char lengthModifier[2];
|
|
358 char convSpecifier;
|
|
359 };
|
|
360
|
|
361 static const char *maybe_parse_number(const char *p, int *out) {
|
|
362 if (*p >= '0' && *p <= '9')
|
|
363 p = parse_number(p, out);
|
|
364 return p;
|
|
365 }
|
|
366
|
|
367 static const char *maybe_parse_number_or_star(const char *p, int *out,
|
|
368 bool *star) {
|
|
369 if (*p == '*') {
|
|
370 *star = true;
|
|
371 ++p;
|
|
372 } else {
|
|
373 *star = false;
|
|
374 p = maybe_parse_number(p, out);
|
|
375 }
|
|
376 return p;
|
|
377 }
|
|
378
|
|
379 // Parse printf format string. Same as scanf_parse_next.
|
|
380 static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
|
|
381 internal_memset(dir, 0, sizeof(*dir));
|
|
382 dir->argIdx = -1;
|
|
383 dir->precisionIdx = -1;
|
|
384
|
|
385 while (*p) {
|
|
386 if (*p != '%') {
|
|
387 ++p;
|
|
388 continue;
|
|
389 }
|
|
390 dir->begin = p;
|
|
391 ++p;
|
|
392 // %%
|
|
393 if (*p == '%') {
|
|
394 ++p;
|
|
395 continue;
|
|
396 }
|
|
397 if (*p == '\0') {
|
|
398 return nullptr;
|
|
399 }
|
|
400 // %n$
|
|
401 p = maybe_parse_param_index(p, &dir->precisionIdx);
|
|
402 CHECK(p);
|
|
403 // Flags
|
|
404 while (char_is_one_of(*p, "'-+ #0")) {
|
|
405 ++p;
|
|
406 }
|
|
407 // Field width
|
|
408 p = maybe_parse_number_or_star(p, &dir->fieldWidth,
|
|
409 &dir->starredWidth);
|
|
410 if (!p)
|
|
411 return nullptr;
|
|
412 // Precision
|
|
413 if (*p == '.') {
|
|
414 ++p;
|
|
415 // Actual precision is optional (surprise!)
|
|
416 p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
|
|
417 &dir->starredPrecision);
|
|
418 if (!p)
|
|
419 return nullptr;
|
|
420 // m$
|
|
421 if (dir->starredPrecision) {
|
|
422 p = maybe_parse_param_index(p, &dir->precisionIdx);
|
|
423 CHECK(p);
|
|
424 }
|
|
425 }
|
|
426 // Length modifier.
|
|
427 p = maybe_parse_length_modifier(p, dir->lengthModifier);
|
|
428 // Conversion specifier.
|
|
429 dir->convSpecifier = *p++;
|
|
430 dir->end = p;
|
|
431 break;
|
|
432 }
|
|
433 return p;
|
|
434 }
|
|
435
|
|
436 static int printf_get_value_size(PrintfDirective *dir) {
|
|
437 if (char_is_one_of(dir->convSpecifier, "cCsS")) {
|
|
438 unsigned charSize =
|
|
439 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
|
|
440 if (charSize == 0)
|
|
441 return FSS_INVALID;
|
|
442 if (char_is_one_of(dir->convSpecifier, "sS")) {
|
|
443 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
|
|
444 }
|
|
445 return charSize;
|
|
446 }
|
|
447
|
|
448 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
|
|
449 }
|
|
450
|
|
451 #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
|
|
452 do { \
|
|
453 if (format_is_float_conv(convSpecifier)) { \
|
|
454 switch (size) { \
|
|
455 case 8: \
|
|
456 va_arg(*aq, double); \
|
|
457 break; \
|
|
458 case 12: \
|
|
459 va_arg(*aq, long double); \
|
|
460 break; \
|
|
461 case 16: \
|
|
462 va_arg(*aq, long double); \
|
|
463 break; \
|
|
464 default: \
|
|
465 Report("WARNING: unexpected floating-point arg size" \
|
|
466 " in printf interceptor: %d\n", size); \
|
|
467 return; \
|
|
468 } \
|
|
469 } else { \
|
|
470 switch (size) { \
|
|
471 case 1: \
|
|
472 case 2: \
|
|
473 case 4: \
|
|
474 va_arg(*aq, u32); \
|
|
475 break; \
|
|
476 case 8: \
|
|
477 va_arg(*aq, u64); \
|
|
478 break; \
|
|
479 default: \
|
|
480 Report("WARNING: unexpected arg size" \
|
|
481 " in printf interceptor: %d\n", size); \
|
|
482 return; \
|
|
483 } \
|
|
484 } \
|
|
485 } while (0)
|
|
486
|
|
487 // Common part of *printf interceptors.
|
|
488 // Process format string and va_list, and report all load ranges.
|
|
489 static void printf_common(void *ctx, const char *format, va_list aq) {
|
|
490 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
|
|
491
|
|
492 const char *p = format;
|
|
493
|
|
494 while (*p) {
|
|
495 PrintfDirective dir;
|
|
496 p = printf_parse_next(p, &dir);
|
|
497 if (!p)
|
|
498 break;
|
|
499 if (dir.convSpecifier == 0) {
|
|
500 // This can only happen at the end of the format string.
|
|
501 CHECK_EQ(*p, 0);
|
|
502 break;
|
|
503 }
|
|
504 // Here the directive is valid. Do what it says.
|
|
505 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
|
|
506 // Unsupported.
|
|
507 break;
|
|
508 }
|
|
509 if (dir.starredWidth) {
|
|
510 // Dynamic width
|
|
511 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
|
|
512 }
|
|
513 if (dir.starredPrecision) {
|
|
514 // Dynamic precision
|
|
515 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
|
|
516 }
|
|
517 // %m does not require an argument: strlen(errno).
|
|
518 if (dir.convSpecifier == 'm')
|
|
519 continue;
|
|
520 int size = printf_get_value_size(&dir);
|
|
521 if (size == FSS_INVALID) {
|
|
522 static int ReportedOnce;
|
|
523 if (!ReportedOnce++)
|
|
524 Report(
|
|
525 "%s: WARNING: unexpected format specifier in printf "
|
|
526 "interceptor: %.*s (reported once per process)\n",
|
|
527 SanitizerToolName, dir.end - dir.begin, dir.begin);
|
|
528 break;
|
|
529 }
|
|
530 if (dir.convSpecifier == 'n') {
|
|
531 void *argp = va_arg(aq, void *);
|
|
532 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
|
|
533 continue;
|
|
534 } else if (size == FSS_STRLEN) {
|
|
535 if (void *argp = va_arg(aq, void *)) {
|
|
536 if (dir.starredPrecision) {
|
|
537 // FIXME: properly support starred precision for strings.
|
|
538 size = 0;
|
|
539 } else if (dir.fieldPrecision > 0) {
|
|
540 // Won't read more than "precision" symbols.
|
|
541 size = internal_strnlen((const char *)argp, dir.fieldPrecision);
|
|
542 if (size < dir.fieldPrecision) size++;
|
|
543 } else {
|
|
544 // Whole string will be accessed.
|
|
545 size = internal_strlen((const char *)argp) + 1;
|
|
546 }
|
|
547 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
|
|
548 }
|
|
549 } else if (size == FSS_WCSLEN) {
|
|
550 if (void *argp = va_arg(aq, void *)) {
|
|
551 // FIXME: Properly support wide-character strings (via wcsrtombs).
|
|
552 size = 0;
|
|
553 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
|
|
554 }
|
|
555 } else {
|
|
556 // Skip non-pointer args
|
|
557 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
|
|
558 }
|
|
559 }
|
|
560 }
|
|
561
|
|
562 #endif // SANITIZER_INTERCEPT_PRINTF
|