master
  1//===----------------------------------------------------------------------===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8
  9#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
 10#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
 11
 12// These headers are in the shared LLVM-libc header library.
 13#include "shared/fp_bits.h"
 14#include "shared/str_to_float.h"
 15#include "shared/str_to_integer.h"
 16
 17#include <__assert>
 18#include <__config>
 19#include <cctype>
 20#include <charconv>
 21#include <concepts>
 22#include <limits>
 23
 24// Included for the _Floating_type_traits class
 25#include "to_chars_floating_point.h"
 26
 27_LIBCPP_BEGIN_NAMESPACE_STD
 28
 29// Parses an infinity string.
 30// Valid strings are case insensitive and contain INF or INFINITY.
 31//
 32// - __first is the first argument to std::from_chars. When the string is invalid
 33//   this value is returned as ptr in the result.
 34// - __last is the last argument of std::from_chars.
 35// - __value is the value argument of std::from_chars,
 36// - __ptr is the current position is the input string. This is points beyond
 37//   the initial I character.
 38// - __negative whether a valid string represents -inf or +inf.
 39template <floating_point _Fp>
 40__from_chars_result<_Fp>
 41__from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
 42  if (__last - __ptr < 2) [[unlikely]]
 43    return {_Fp{0}, 0, errc::invalid_argument};
 44
 45  if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]]
 46    return {_Fp{0}, 0, errc::invalid_argument};
 47
 48  __ptr += 2;
 49
 50  // At this point the result is valid and contains INF.
 51  // When the remaining part contains INITY this will be consumed. Otherwise
 52  // only INF is consumed. For example INFINITZ will consume INF and ignore
 53  // INITZ.
 54
 55  if (__last - __ptr >= 5              //
 56      && std::tolower(__ptr[0]) == 'i' //
 57      && std::tolower(__ptr[1]) == 'n' //
 58      && std::tolower(__ptr[2]) == 'i' //
 59      && std::tolower(__ptr[3]) == 't' //
 60      && std::tolower(__ptr[4]) == 'y')
 61    __ptr += 5;
 62
 63  if constexpr (numeric_limits<_Fp>::has_infinity) {
 64    if (__negative)
 65      return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
 66
 67    return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
 68  } else {
 69    return {_Fp{0}, __ptr - __first, errc::result_out_of_range};
 70  }
 71}
 72
 73// Parses a nan string.
 74// Valid strings are case insensitive and contain INF or INFINITY.
 75//
 76// - __first is the first argument to std::from_chars. When the string is invalid
 77//   this value is returned as ptr in the result.
 78// - __last is the last argument of std::from_chars.
 79// - __value is the value argument of std::from_chars,
 80// - __ptr is the current position is the input string. This is points beyond
 81//   the initial N character.
 82// - __negative whether a valid string represents -nan or +nan.
 83template <floating_point _Fp>
 84__from_chars_result<_Fp>
 85__from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
 86  if (__last - __ptr < 2) [[unlikely]]
 87    return {_Fp{0}, 0, errc::invalid_argument};
 88
 89  if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]]
 90    return {_Fp{0}, 0, errc::invalid_argument};
 91
 92  __ptr += 2;
 93
 94  // At this point the result is valid and contains NAN. When the remaining
 95  // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
 96  // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
 97  // (abcd.
 98  if (__last - __ptr >= 2 && __ptr[0] == '(') {
 99    size_t __offset = 1;
100    do {
101      if (__ptr[__offset] == ')') {
102        __ptr += __offset + 1;
103        break;
104      }
105      if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset]))
106        break;
107      ++__offset;
108    } while (__ptr + __offset != __last);
109  }
110
111  if (__negative)
112    return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
113
114  return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
115}
116
117template <class _Tp>
118struct __fractional_constant_result {
119  size_t __offset{size_t(-1)};
120  _Tp __mantissa{0};
121  int __exponent{0};
122  bool __truncated{false};
123  bool __is_valid{false};
124};
125
126// Parses the hex constant part of the hexadecimal floating-point value.
127// - input start of buffer given to from_chars
128// - __n the number of elements in the buffer
129// - __offset where to start parsing. The input can have an optional sign, the
130//   offset starts after this sign.
131template <class _Tp>
132__fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
133  __fractional_constant_result<_Tp> __result;
134
135  const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16;
136  bool __fraction                         = false;
137  for (; __offset < __n; ++__offset) {
138    if (std::isxdigit(__input[__offset])) {
139      __result.__is_valid = true;
140
141      uint32_t __digit = __input[__offset] - '0';
142      switch (std::tolower(__input[__offset])) {
143      case 'a':
144        __digit = 10;
145        break;
146      case 'b':
147        __digit = 11;
148        break;
149      case 'c':
150        __digit = 12;
151        break;
152      case 'd':
153        __digit = 13;
154        break;
155      case 'e':
156        __digit = 14;
157        break;
158      case 'f':
159        __digit = 15;
160        break;
161      }
162
163      if (__result.__mantissa < __mantissa_truncate_threshold) {
164        __result.__mantissa = (__result.__mantissa * 16) + __digit;
165        if (__fraction)
166          __result.__exponent -= 4;
167      } else {
168        if (__digit > 0)
169          __result.__truncated = true;
170        if (!__fraction)
171          __result.__exponent += 4;
172      }
173    } else if (__input[__offset] == '.') {
174      if (__fraction)
175        break; // this means that __input[__offset] points to a second decimal point, ending the number.
176
177      __fraction = true;
178    } else
179      break;
180  }
181
182  __result.__offset = __offset;
183  return __result;
184}
185
186struct __exponent_result {
187  size_t __offset{size_t(-1)};
188  int __value{0};
189  bool __present{false};
190};
191
192// When the exponent is not present the result of the struct contains
193// __offset, 0, false. This allows using the results unconditionally, the
194// __present is important for the scientific notation, where the value is
195// mandatory.
196__exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
197  if (__offset + 1 < __n &&                          // an exponent always needs at least one digit.
198      std::tolower(__input[__offset]) == __marker && //
199      !std::isspace(__input[__offset + 1])           // leading whitespace is not allowed.
200  ) {
201    ++__offset;
202    LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
203        LIBC_NAMESPACE::shared::strtointeger<int32_t>(__input + __offset, 10, __n - __offset);
204    // __result.error contains the errno value, 0 or ERANGE these are not interesting.
205    // If the number of characters parsed is 0 it means there was no number.
206    if (__e.parsed_len != 0)
207      return {__offset + __e.parsed_len, __e.value, true};
208    else
209      --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
210  }
211
212  return {__offset, 0, false};
213}
214
215// Here we do this operation as int64 to avoid overflow.
216int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
217  int64_t __sum = __fractional + __exponent;
218
219  if (__sum > __max_biased_exponent)
220    return __max_biased_exponent;
221
222  if (__sum < -__max_biased_exponent)
223    return -__max_biased_exponent;
224
225  return __sum;
226}
227
228template <class _Fp, class _Tp>
229__from_chars_result<_Fp>
230__calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
231  auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
232  __r.set_mantissa(__mantissa);
233  __r.set_biased_exponent(__exponent);
234
235  // C17 7.12.1/6
236  // The result underflows if the magnitude of the mathematical result is so
237  // small that the mathematical result cannot be represented, without
238  // extraordinary roundoff error, in an object of the specified type.237) If
239  // the result underflows, the function returns an implementation-defined
240  // value whose magnitude is no greater than the smallest normalized positive
241  // number in the specified type; if the integer expression math_errhandling
242  // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
243  // implementation-defined; if the integer expression math_errhandling &
244  // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
245  // exception is raised is implementation-defined.
246  //
247  // LLVM-LIBC sets ERAGNE for subnormal values
248  //
249  // [charconv.from.chars]/1
250  //   ... If the parsed value is not in the range representable by the type of
251  //   value, value is unmodified and the member ec of the return value is
252  //   equal to errc::result_out_of_range. ...
253  //
254  // Undo the ERANGE for subnormal values.
255  if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
256    __result.__ec = errc{};
257
258  if (__negative)
259    __result.__value = -__r.get_val();
260  else
261    __result.__value = __r.get_val();
262
263  return __result;
264}
265
266// Implements from_chars for decimal floating-point values.
267// __first forwarded from from_chars
268// __last forwarded from from_chars
269// __value forwarded from from_chars
270// __fmt forwarded from from_chars
271// __ptr the start of the buffer to parse. This is after the optional sign character.
272// __negative should __value be set to a negative value?
273//
274// This function and __from_chars_floating_point_decimal are similar. However
275// the similar parts are all in helper functions. So the amount of code
276// duplication is minimal.
277template <floating_point _Fp>
278__from_chars_result<_Fp>
279__from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
280  size_t __n         = __last - __first;
281  ptrdiff_t __offset = __ptr - __first;
282
283  auto __fractional =
284      std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
285  if (!__fractional.__is_valid)
286    return {_Fp{0}, 0, errc::invalid_argument};
287
288  auto __parsed_exponent = std::__parse_exponent(__first, __n, __fractional.__offset, 'p');
289  __offset               = __parsed_exponent.__offset;
290  int __exponent         = std::__merge_exponents(
291      __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
292
293  __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
294  LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
295  if (__fractional.__mantissa != 0) {
296    auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
297        {__fractional.__mantissa, __exponent},
298        __fractional.__truncated,
299        LIBC_NAMESPACE::shared::RoundDirection::Nearest);
300    __expanded_float = __temp.num;
301    if (__temp.error == ERANGE) {
302      __result.__ec = errc::result_out_of_range;
303    }
304  }
305
306  return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
307}
308
309// Parses the hex constant part of the decimal float value.
310// - input start of buffer given to from_chars
311// - __n the number of elements in the buffer
312// - __offset where to start parsing. The input can have an optional sign, the
313//   offset starts after this sign.
314template <class _Tp>
315__fractional_constant_result<_Tp>
316__parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
317  __fractional_constant_result<_Tp> __result;
318
319  const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10;
320  bool __fraction                         = false;
321  for (; __offset < __n; ++__offset) {
322    if (std::isdigit(__input[__offset])) {
323      __result.__is_valid = true;
324
325      uint32_t __digit = __input[__offset] - '0';
326      if (__result.__mantissa < __mantissa_truncate_threshold) {
327        __result.__mantissa = (__result.__mantissa * 10) + __digit;
328        if (__fraction)
329          --__result.__exponent;
330      } else {
331        if (__digit > 0)
332          __result.__truncated = true;
333        if (!__fraction)
334          ++__result.__exponent;
335      }
336    } else if (__input[__offset] == '.') {
337      if (__fraction)
338        break; // this means that __input[__offset] points to a second decimal point, ending the number.
339
340      __fraction = true;
341    } else
342      break;
343  }
344
345  __result.__offset = __offset;
346  return __result;
347}
348
349// Implements from_chars for decimal floating-point values.
350// __first forwarded from from_chars
351// __last forwarded from from_chars
352// __value forwarded from from_chars
353// __fmt forwarded from from_chars
354// __ptr the start of the buffer to parse. This is after the optional sign character.
355// __negative should __value be set to a negative value?
356template <floating_point _Fp>
357__from_chars_result<_Fp> __from_chars_floating_point_decimal(
358    const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
359  ptrdiff_t __n      = __last - __first;
360  ptrdiff_t __offset = __ptr - __first;
361
362  auto __fractional =
363      std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
364  if (!__fractional.__is_valid)
365    return {_Fp{0}, 0, errc::invalid_argument};
366
367  __offset = __fractional.__offset;
368
369  // LWG3456 Pattern used by std::from_chars is underspecified
370  // This changes fixed to ignore a possible exponent instead of making its
371  // existance an error.
372  int __exponent;
373  if (__fmt == chars_format::fixed) {
374    __exponent =
375        std::__merge_exponents(__fractional.__exponent, 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
376  } else {
377    auto __parsed_exponent = std::__parse_exponent(__first, __n, __offset, 'e');
378    if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
379      // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
380      // the otherwise optional exponent part shall appear;
381      return {_Fp{0}, 0, errc::invalid_argument};
382    }
383
384    __offset   = __parsed_exponent.__offset;
385    __exponent = std::__merge_exponents(
386        __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
387  }
388
389  __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
390  LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
391  if (__fractional.__mantissa != 0) {
392    // This function expects to parse a positive value. This means it does not
393    // take a __first, __n as arguments, since __first points to '-' for
394    // negative values.
395    auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
396        {__fractional.__mantissa, __exponent},
397        __fractional.__truncated,
398        LIBC_NAMESPACE::shared::RoundDirection::Nearest,
399        __ptr,
400        __last - __ptr);
401    __expanded_float = __temp.num;
402    if (__temp.error == ERANGE) {
403      __result.__ec = errc::result_out_of_range;
404    }
405  }
406
407  return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
408}
409
410template <floating_point _Fp>
411__from_chars_result<_Fp>
412__from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
413  if (__first == __last) [[unlikely]]
414    return {_Fp{0}, 0, errc::invalid_argument};
415
416  const char* __ptr = __first;
417  bool __negative   = *__ptr == '-';
418  if (__negative) {
419    ++__ptr;
420    if (__ptr == __last) [[unlikely]]
421      return {_Fp{0}, 0, errc::invalid_argument};
422  }
423
424  // [charconv.from.chars]
425  //   [Note 1: If the pattern allows for an optional sign, but the string has
426  //   no digit characters following the sign, no characters match the pattern.
427  //   -- end note]
428  // This is true for integrals, floating point allows -.0
429
430  // [charconv.from.chars]/6.2
431  //   if fmt has chars_format::scientific set but not chars_format::fixed, the
432  //   otherwise optional exponent part shall appear;
433  // Since INF/NAN do not have an exponent this value is not valid.
434  //
435  // LWG3456 Pattern used by std::from_chars is underspecified
436  // Does not address this point, but proposed option B does solve this issue,
437  // Both MSVC STL and libstdc++ implement this this behaviour.
438  switch (std::tolower(*__ptr)) {
439  case 'i':
440    return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative);
441  case 'n':
442    if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
443      // NOTE: The pointer passed here will be parsed in the default C locale.
444      // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
445      return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative);
446    return {_Fp{0}, 0, errc::invalid_argument};
447  }
448
449  if (__fmt == chars_format::hex)
450    return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);
451
452  return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
453}
454
455_LIBCPP_END_NAMESPACE_STD
456
457#endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H