master
  1//===-- String to integer conversion utils ----------------------*- C++ -*-===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8
  9// -----------------------------------------------------------------------------
 10//                               **** WARNING ****
 11// This file is shared with libc++. You should also be careful when adding
 12// dependencies to this file, since it needs to build for all libc++ targets.
 13// -----------------------------------------------------------------------------
 14
 15#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
 16#define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
 17
 18#include "hdr/errno_macros.h" // For ERANGE
 19#include "src/__support/CPP/limits.h"
 20#include "src/__support/CPP/type_traits.h"
 21#include "src/__support/CPP/type_traits/make_unsigned.h"
 22#include "src/__support/big_int.h"
 23#include "src/__support/common.h"
 24#include "src/__support/ctype_utils.h"
 25#include "src/__support/macros/config.h"
 26#include "src/__support/str_to_num_result.h"
 27#include "src/__support/uint128.h"
 28
 29namespace LIBC_NAMESPACE_DECL {
 30namespace internal {
 31
 32// Returns the idx to the first character in src that is not a whitespace
 33// character (as determined by isspace())
 34LIBC_INLINE size_t
 35first_non_whitespace(const char *__restrict src,
 36                     size_t src_len = cpp::numeric_limits<size_t>::max()) {
 37  size_t src_cur = 0;
 38  while (src_cur < src_len && internal::isspace(src[src_cur])) {
 39    ++src_cur;
 40  }
 41  return src_cur;
 42}
 43
 44// checks if the next 3 characters of the string pointer are the start of a
 45// hexadecimal number. Does not advance the string pointer.
 46LIBC_INLINE bool
 47is_hex_start(const char *__restrict src,
 48             size_t src_len = cpp::numeric_limits<size_t>::max()) {
 49  if (src_len < 3)
 50    return false;
 51  return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
 52         b36_char_to_int(*(src + 2)) < 16;
 53}
 54
 55// Takes the address of the string pointer and parses the base from the start of
 56// it.
 57LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
 58  // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
 59  // sequence of the decimal digits and the letters a (or A) through f (or F)
 60  // with values 10 through 15 respectively." (C standard 6.4.4.1)
 61  if (is_hex_start(src, src_len))
 62    return 16;
 63  // An octal number is defined as "the prefix 0 optionally followed by a
 64  // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
 65  // number that starts with 0, including just 0, is an octal number.
 66  if (src_len > 0 && src[0] == '0')
 67    return 8;
 68  // A decimal number is defined as beginning "with a nonzero digit and
 69  // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
 70  return 10;
 71}
 72
 73// -----------------------------------------------------------------------------
 74//                               **** WARNING ****
 75// This interface is shared with libc++, if you change this interface you need
 76// to update it in both libc and libc++.
 77// -----------------------------------------------------------------------------
 78// Takes a pointer to a string and the base to convert to. This function is used
 79// as the backend for all of the string to int functions.
 80template <class T>
 81LIBC_INLINE StrToNumResult<T>
 82strtointeger(const char *__restrict src, int base,
 83             const size_t src_len = cpp::numeric_limits<size_t>::max()) {
 84  using ResultType = make_integral_or_big_int_unsigned_t<T>;
 85
 86  ResultType result = 0;
 87
 88  bool is_number = false;
 89  size_t src_cur = 0;
 90  int error_val = 0;
 91
 92  if (src_len == 0)
 93    return {0, 0, 0};
 94
 95  if (base < 0 || base == 1 || base > 36)
 96    return {0, 0, EINVAL};
 97
 98  src_cur = first_non_whitespace(src, src_len);
 99
100  char result_sign = '+';
101  if (src[src_cur] == '+' || src[src_cur] == '-') {
102    result_sign = src[src_cur];
103    ++src_cur;
104  }
105
106  if (base == 0)
107    base = infer_base(src + src_cur, src_len - src_cur);
108
109  if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
110    src_cur = src_cur + 2;
111
112  constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
113  const bool is_positive = (result_sign == '+');
114
115  ResultType constexpr NEGATIVE_MAX =
116      !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
117                   : cpp::numeric_limits<T>::max();
118  ResultType const abs_max =
119      (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
120  ResultType const abs_max_div_by_base =
121      abs_max / static_cast<ResultType>(base);
122
123  while (src_cur < src_len && isalnum(src[src_cur])) {
124    int cur_digit = b36_char_to_int(src[src_cur]);
125    if (cur_digit >= base)
126      break;
127
128    is_number = true;
129    ++src_cur;
130
131    // If the number has already hit the maximum value for the current type then
132    // the result cannot change, but we still need to advance src to the end of
133    // the number.
134    if (result == abs_max) {
135      error_val = ERANGE;
136      continue;
137    }
138
139    if (result > abs_max_div_by_base) {
140      result = abs_max;
141      error_val = ERANGE;
142    } else {
143      result = result * static_cast<ResultType>(base);
144    }
145    if (result > abs_max - static_cast<ResultType>(cur_digit)) {
146      result = abs_max;
147      error_val = ERANGE;
148    } else {
149      result = result + static_cast<ResultType>(cur_digit);
150    }
151  }
152
153  ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0;
154
155  if (error_val == ERANGE) {
156    if (is_positive || IS_UNSIGNED)
157      return {cpp::numeric_limits<T>::max(), str_len, error_val};
158    else // T is signed and there is a negative overflow
159      return {cpp::numeric_limits<T>::min(), str_len, error_val};
160  }
161
162  return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
163}
164
165} // namespace internal
166} // namespace LIBC_NAMESPACE_DECL
167
168#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H