master
  1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8//
  9// Scanf/printf implementation for use in *Sanitizer interceptors.
 10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
 11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
 12// with a few common GNU extensions.
 13//
 14//===----------------------------------------------------------------------===//
 15
 16#include <stdarg.h>
 17
 18static const char *parse_number(const char *p, int *out) {
 19  *out = internal_atoll(p);
 20  while (*p >= '0' && *p <= '9')
 21    ++p;
 22  return p;
 23}
 24
 25static const char *maybe_parse_param_index(const char *p, int *out) {
 26  // n$
 27  if (*p >= '0' && *p <= '9') {
 28    int number;
 29    const char *q = parse_number(p, &number);
 30    CHECK(q);
 31    if (*q == '$') {
 32      *out = number;
 33      p = q + 1;
 34    }
 35  }
 36
 37  // Otherwise, do not change p. This will be re-parsed later as the field
 38  // width.
 39  return p;
 40}
 41
 42static bool char_is_one_of(char c, const char *s) {
 43  return !!internal_strchr(s, c);
 44}
 45
 46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
 47  if (char_is_one_of(*p, "jztLq")) {
 48    ll[0] = *p;
 49    ++p;
 50  } else if (*p == 'h') {
 51    ll[0] = 'h';
 52    ++p;
 53    if (*p == 'h') {
 54      ll[1] = 'h';
 55      ++p;
 56    }
 57  } else if (*p == 'l') {
 58    ll[0] = 'l';
 59    ++p;
 60    if (*p == 'l') {
 61      ll[1] = 'l';
 62      ++p;
 63    }
 64  }
 65  return p;
 66}
 67
 68// Returns true if the character is an integer conversion specifier.
 69static bool format_is_integer_conv(char c) {
 70#if SANITIZER_GLIBC
 71  if (char_is_one_of(c, "bB"))
 72    return true;
 73#endif
 74  return char_is_one_of(c, "diouxXn");
 75}
 76
 77// Returns true if the character is an floating point conversion specifier.
 78static bool format_is_float_conv(char c) {
 79  return char_is_one_of(c, "aAeEfFgG");
 80}
 81
 82// Returns string output character size for string-like conversions,
 83// or 0 if the conversion is invalid.
 84static int format_get_char_size(char convSpecifier,
 85                                const char lengthModifier[2]) {
 86  if (char_is_one_of(convSpecifier, "CS")) {
 87    return sizeof(wchar_t);
 88  }
 89
 90  if (char_is_one_of(convSpecifier, "cs[")) {
 91    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
 92      return sizeof(wchar_t);
 93    else if (lengthModifier[0] == '\0')
 94      return sizeof(char);
 95  }
 96
 97  return 0;
 98}
 99
100enum FormatStoreSize {
101  // Store size not known in advance; can be calculated as wcslen() of the
102  // destination buffer.
103  FSS_WCSLEN = -2,
104  // Store size not known in advance; can be calculated as strlen() of the
105  // destination buffer.
106  FSS_STRLEN = -1,
107  // Invalid conversion specifier.
108  FSS_INVALID = 0
109};
110
111// Returns the memory size of a format directive (if >0), or a value of
112// FormatStoreSize.
113static int format_get_value_size(char convSpecifier,
114                                 const char lengthModifier[2],
115                                 bool promote_float) {
116  if (format_is_integer_conv(convSpecifier)) {
117    switch (lengthModifier[0]) {
118    case 'h':
119      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
120    case 'l':
121      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
122    case 'q':
123      return sizeof(long long);
124    case 'L':
125      return sizeof(long long);
126    case 'j':
127      return sizeof(INTMAX_T);
128    case 'z':
129      return sizeof(SIZE_T);
130    case 't':
131      return sizeof(PTRDIFF_T);
132    case 0:
133      return sizeof(int);
134    default:
135      return FSS_INVALID;
136    }
137  }
138
139  if (format_is_float_conv(convSpecifier)) {
140    switch (lengthModifier[0]) {
141    case 'L':
142    case 'q':
143      return sizeof(long double);
144    case 'l':
145      return lengthModifier[1] == 'l' ? sizeof(long double)
146                                           : sizeof(double);
147    case 0:
148      // Printf promotes floats to doubles but scanf does not
149      return promote_float ? sizeof(double) : sizeof(float);
150    default:
151      return FSS_INVALID;
152    }
153  }
154
155  if (convSpecifier == 'p') {
156    if (lengthModifier[0] != 0)
157      return FSS_INVALID;
158    return sizeof(void *);
159  }
160
161  return FSS_INVALID;
162}
163
164struct ScanfDirective {
165  int argIdx; // argument index, or -1 if not specified ("%n$")
166  int fieldWidth;
167  const char *begin;
168  const char *end;
169  bool suppressed; // suppress assignment ("*")
170  bool allocate;   // allocate space ("m")
171  char lengthModifier[2];
172  char convSpecifier;
173  bool maybeGnuMalloc;
174};
175
176// Parse scanf format string. If a valid directive in encountered, it is
177// returned in dir. This function returns the pointer to the first
178// unprocessed character, or 0 in case of error.
179// In case of the end-of-string, a pointer to the closing \0 is returned.
180static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
181                                    ScanfDirective *dir) {
182  internal_memset(dir, 0, sizeof(*dir));
183  dir->argIdx = -1;
184
185  while (*p) {
186    if (*p != '%') {
187      ++p;
188      continue;
189    }
190    dir->begin = p;
191    ++p;
192    // %%
193    if (*p == '%') {
194      ++p;
195      continue;
196    }
197    if (*p == '\0') {
198      return nullptr;
199    }
200    // %n$
201    p = maybe_parse_param_index(p, &dir->argIdx);
202    CHECK(p);
203    // *
204    if (*p == '*') {
205      dir->suppressed = true;
206      ++p;
207    }
208    // Field width
209    if (*p >= '0' && *p <= '9') {
210      p = parse_number(p, &dir->fieldWidth);
211      CHECK(p);
212      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
213        return nullptr;
214    }
215    // m
216    if (*p == 'm') {
217      dir->allocate = true;
218      ++p;
219    }
220    // Length modifier.
221    p = maybe_parse_length_modifier(p, dir->lengthModifier);
222    // Conversion specifier.
223    dir->convSpecifier = *p++;
224    // Consume %[...] expression.
225    if (dir->convSpecifier == '[') {
226      if (*p == '^')
227        ++p;
228      if (*p == ']')
229        ++p;
230      while (*p && *p != ']')
231        ++p;
232      if (*p == 0)
233        return nullptr; // unexpected end of string
234                        // Consume the closing ']'.
235      ++p;
236    }
237    // This is unfortunately ambiguous between old GNU extension
238    // of %as, %aS and %a[...] and newer POSIX %a followed by
239    // letters s, S or [.
240    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
241        !dir->lengthModifier[0]) {
242      if (*p == 's' || *p == 'S') {
243        dir->maybeGnuMalloc = true;
244        ++p;
245      } else if (*p == '[') {
246        // Watch for %a[h-j%d], if % appears in the
247        // [...] range, then we need to give up, we don't know
248        // if scanf will parse it as POSIX %a [h-j %d ] or
249        // GNU allocation of string with range dh-j plus %.
250        const char *q = p + 1;
251        if (*q == '^')
252          ++q;
253        if (*q == ']')
254          ++q;
255        while (*q && *q != ']' && *q != '%')
256          ++q;
257        if (*q == 0 || *q == '%')
258          return nullptr;
259        p = q + 1; // Consume the closing ']'.
260        dir->maybeGnuMalloc = true;
261      }
262    }
263    dir->end = p;
264    break;
265  }
266  return p;
267}
268
269static int scanf_get_value_size(ScanfDirective *dir) {
270  if (dir->allocate) {
271    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
272      return FSS_INVALID;
273    return sizeof(char *);
274  }
275
276  if (dir->maybeGnuMalloc) {
277    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
278      return FSS_INVALID;
279    // This is ambiguous, so check the smaller size of char * (if it is
280    // a GNU extension of %as, %aS or %a[...]) and float (if it is
281    // POSIX %a followed by s, S or [ letters).
282    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
283  }
284
285  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
286    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
287    unsigned charSize =
288        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
289    if (charSize == 0)
290      return FSS_INVALID;
291    if (dir->fieldWidth == 0) {
292      if (!needsTerminator)
293        return charSize;
294      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
295    }
296    return (dir->fieldWidth + needsTerminator) * charSize;
297  }
298
299  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
300}
301
302// Common part of *scanf interceptors.
303// Process format string and va_list, and report all store ranges.
304// Stops when "consuming" n_inputs input items.
305static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
306                         const char *format, va_list aq) {
307  CHECK_GT(n_inputs, 0);
308  const char *p = format;
309
310  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
311
312  while (*p) {
313    ScanfDirective dir;
314    p = scanf_parse_next(p, allowGnuMalloc, &dir);
315    if (!p)
316      break;
317    if (dir.convSpecifier == 0) {
318      // This can only happen at the end of the format string.
319      CHECK_EQ(*p, 0);
320      break;
321    }
322    // Here the directive is valid. Do what it says.
323    if (dir.argIdx != -1) {
324      // Unsupported.
325      break;
326    }
327    if (dir.suppressed)
328      continue;
329    int size = scanf_get_value_size(&dir);
330    if (size == FSS_INVALID) {
331      Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
332             SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
333      break;
334    }
335    void *argp = va_arg(aq, void *);
336    if (dir.convSpecifier != 'n')
337      --n_inputs;
338    if (n_inputs < 0)
339      break;
340    if (size == FSS_STRLEN) {
341      size = internal_strlen((const char *)argp) + 1;
342    } else if (size == FSS_WCSLEN) {
343      // FIXME: actually use wcslen() to calculate it.
344      size = 0;
345    }
346    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
347    // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well.
348    if (dir.allocate) {
349      if (char *buf = *(char **)argp) {
350        if (dir.convSpecifier == 'c')
351          size = 1;
352        else if (dir.convSpecifier == 'C')
353          size = sizeof(wchar_t);
354        else if (dir.convSpecifier == 'S')
355          size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t);
356        else  // 's' or '['
357          size = internal_strlen(buf) + 1;
358        COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
359      }
360    }
361  }
362}
363
364#if SANITIZER_INTERCEPT_PRINTF
365
366struct PrintfDirective {
367  int fieldWidth;
368  int fieldPrecision;
369  int argIdx; // width argument index, or -1 if not specified ("%*n$")
370  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
371  const char *begin;
372  const char *end;
373  bool starredWidth;
374  bool starredPrecision;
375  char lengthModifier[2];
376  char convSpecifier;
377};
378
379static const char *maybe_parse_number(const char *p, int *out) {
380  if (*p >= '0' && *p <= '9')
381    p = parse_number(p, out);
382  return p;
383}
384
385static const char *maybe_parse_number_or_star(const char *p, int *out,
386                                              bool *star) {
387  if (*p == '*') {
388    *star = true;
389    ++p;
390  } else {
391    *star = false;
392    p = maybe_parse_number(p, out);
393  }
394  return p;
395}
396
397// Parse printf format string. Same as scanf_parse_next.
398static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
399  internal_memset(dir, 0, sizeof(*dir));
400  dir->argIdx = -1;
401  dir->precisionIdx = -1;
402
403  while (*p) {
404    if (*p != '%') {
405      ++p;
406      continue;
407    }
408    dir->begin = p;
409    ++p;
410    // %%
411    if (*p == '%') {
412      ++p;
413      continue;
414    }
415    if (*p == '\0') {
416      return nullptr;
417    }
418    // %n$
419    p = maybe_parse_param_index(p, &dir->precisionIdx);
420    CHECK(p);
421    // Flags
422    while (char_is_one_of(*p, "'-+ #0")) {
423      ++p;
424    }
425    // Field width
426    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
427                                   &dir->starredWidth);
428    if (!p)
429      return nullptr;
430    // Precision
431    if (*p == '.') {
432      ++p;
433      // Actual precision is optional (surprise!)
434      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
435                                     &dir->starredPrecision);
436      if (!p)
437        return nullptr;
438      // m$
439      if (dir->starredPrecision) {
440        p = maybe_parse_param_index(p, &dir->precisionIdx);
441        CHECK(p);
442      }
443    }
444    // Length modifier.
445    p = maybe_parse_length_modifier(p, dir->lengthModifier);
446    // Conversion specifier.
447    dir->convSpecifier = *p++;
448    dir->end = p;
449    break;
450  }
451  return p;
452}
453
454static int printf_get_value_size(PrintfDirective *dir) {
455  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
456    unsigned charSize =
457        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
458    if (charSize == 0)
459      return FSS_INVALID;
460    if (char_is_one_of(dir->convSpecifier, "sS")) {
461      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
462    }
463    return charSize;
464  }
465
466  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
467}
468
469#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
470  do {                                                             \
471    if (format_is_float_conv(convSpecifier)) {                     \
472      switch (size) {                                              \
473      case 8:                                                      \
474        va_arg(*aq, double);                                       \
475        break;                                                     \
476      case 12:                                                     \
477        va_arg(*aq, long double);                                  \
478        break;                                                     \
479      case 16:                                                     \
480        va_arg(*aq, long double);                                  \
481        break;                                                     \
482      default:                                                     \
483        Report("WARNING: unexpected floating-point arg size"       \
484               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
485        return;                                                    \
486      }                                                            \
487    } else {                                                       \
488      switch (size) {                                              \
489      case 1:                                                      \
490      case 2:                                                      \
491      case 4:                                                      \
492        va_arg(*aq, u32);                                          \
493        break;                                                     \
494      case 8:                                                      \
495        va_arg(*aq, u64);                                          \
496        break;                                                     \
497      default:                                                     \
498        Report("WARNING: unexpected arg size"                      \
499               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
500        return;                                                    \
501      }                                                            \
502    }                                                              \
503  } while (0)
504
505// Common part of *printf interceptors.
506// Process format string and va_list, and report all load ranges.
507static void printf_common(void *ctx, const char *format, va_list aq) {
508  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
509
510  const char *p = format;
511
512  while (*p) {
513    PrintfDirective dir;
514    p = printf_parse_next(p, &dir);
515    if (!p)
516      break;
517    if (dir.convSpecifier == 0) {
518      // This can only happen at the end of the format string.
519      CHECK_EQ(*p, 0);
520      break;
521    }
522    // Here the directive is valid. Do what it says.
523    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
524      // Unsupported.
525      break;
526    }
527    if (dir.starredWidth) {
528      // Dynamic width
529      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
530    }
531    if (dir.starredPrecision) {
532      // Dynamic precision
533      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
534    }
535    // %m does not require an argument: strlen(errno).
536    if (dir.convSpecifier == 'm')
537      continue;
538    int size = printf_get_value_size(&dir);
539    if (size == FSS_INVALID) {
540      static int ReportedOnce;
541      if (!ReportedOnce++)
542        Report(
543            "%s: WARNING: unexpected format specifier in printf "
544            "interceptor: %.*s (reported once per process)\n",
545            SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
546      break;
547    }
548    if (dir.convSpecifier == 'n') {
549      void *argp = va_arg(aq, void *);
550      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
551      continue;
552    } else if (size == FSS_STRLEN) {
553      if (void *argp = va_arg(aq, void *)) {
554        uptr len;
555        if (dir.starredPrecision) {
556          // FIXME: properly support starred precision for strings.
557          len = 0;
558        } else if (dir.fieldPrecision > 0) {
559          // Won't read more than "precision" symbols.
560          len = internal_strnlen((const char *)argp, dir.fieldPrecision);
561          if (len < (uptr)dir.fieldPrecision)
562            len++;
563        } else {
564          // Whole string will be accessed.
565          len = internal_strlen((const char *)argp) + 1;
566        }
567        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
568      }
569    } else if (size == FSS_WCSLEN) {
570      if (void *argp = va_arg(aq, void *)) {
571        // FIXME: Properly support wide-character strings (via wcsrtombs).
572        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
573      }
574    } else {
575      // Skip non-pointer args
576      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
577    }
578  }
579}
580
581#endif // SANITIZER_INTERCEPT_PRINTF