zig/lib/libcxx/include/__format/parser_std_format

   1// -*- C++ -*-
   2//===----------------------------------------------------------------------===//
   3//
   4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   5// See https://llvm.org/LICENSE.txt for license information.
   6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   7//
   8//===----------------------------------------------------------------------===//
   9
  10#ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
  11#define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
  12
  13/// \file Contains the std-format-spec parser.
  14///
  15/// Most of the code can be reused in the chrono-format-spec.
  16/// This header has some support for the chrono-format-spec since it doesn't
  17/// affect the std-format-spec.
  18
  19#include <__algorithm/copy_n.h>
  20#include <__algorithm/min.h>
  21#include <__assert>
  22#include <__concepts/arithmetic.h>
  23#include <__concepts/same_as.h>
  24#include <__config>
  25#include <__format/format_arg.h>
  26#include <__format/format_error.h>
  27#include <__format/format_parse_context.h>
  28#include <__format/format_string.h>
  29#include <__format/unicode.h>
  30#include <__format/width_estimation_table.h>
  31#include <__iterator/concepts.h>
  32#include <__iterator/iterator_traits.h> // iter_value_t
  33#include <__memory/addressof.h>
  34#include <__type_traits/common_type.h>
  35#include <__type_traits/is_constant_evaluated.h>
  36#include <__type_traits/is_trivially_copyable.h>
  37#include <__variant/monostate.h>
  38#include <cstdint>
  39#include <string>
  40#include <string_view>
  41
  42#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
  43#  pragma GCC system_header
  44#endif
  45
  46_LIBCPP_PUSH_MACROS
  47#include <__undef_macros>
  48
  49_LIBCPP_BEGIN_NAMESPACE_STD
  50
  51#if _LIBCPP_STD_VER >= 20
  52
  53namespace __format_spec {
  54
  55[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void
  56__throw_invalid_option_format_error(const char* __id, const char* __option) {
  57  std::__throw_format_error(
  58      (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str());
  59}
  60
  61[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) {
  62  std::__throw_format_error(
  63      (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str());
  64}
  65
  66template <contiguous_iterator _Iterator, class _ParseContext>
  67_LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator>
  68__parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) {
  69  using _CharT = iter_value_t<_Iterator>;
  70  // This function is a wrapper to call the real parser. But it does the
  71  // validation for the pre-conditions and post-conditions.
  72  if (__begin == __end)
  73    std::__throw_format_error("End of input while parsing an argument index");
  74
  75  __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx);
  76
  77  if (__r.__last == __end || *__r.__last != _CharT('}'))
  78    std::__throw_format_error("The argument index is invalid");
  79
  80  ++__r.__last;
  81  return __r;
  82}
  83
  84template <class _Context>
  85_LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
  86  // [format.string.std]/8
  87  //   If the corresponding formatting argument is not of integral type...
  88  // This wording allows char and bool too. LWG-3720 changes the wording to
  89  //    If the corresponding formatting argument is not of standard signed or
  90  //    unsigned integer type,
  91  // This means the 128-bit will not be valid anymore.
  92  // TODO FMT Verify this resolution is accepted and add a test to verify
  93  //          128-bit integrals fail and switch to visit_format_arg.
  94  return std::__visit_format_arg(
  95      [](auto __arg) -> uint32_t {
  96        using _Type = decltype(__arg);
  97        if constexpr (same_as<_Type, monostate>)
  98          std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
  99
 100        // [format.string.std]/8
 101        // If { arg-idopt } is used in a width or precision, the value of the
 102        // corresponding formatting argument is used in its place. If the
 103        // corresponding formatting argument is not of standard signed or unsigned
 104        // integer type, or its value is negative for precision or non-positive for
 105        // width, an exception of type format_error is thrown.
 106        //
 107        // When an integral is used in a format function, it is stored as one of
 108        // the types checked below. Other integral types are promoted. For example,
 109        // a signed char is stored as an int.
 110        if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || //
 111                      same_as<_Type, long long> || same_as<_Type, unsigned long long>) {
 112          if constexpr (signed_integral<_Type>) {
 113            if (__arg < 0)
 114              std::__throw_format_error("An argument index may not have a negative value");
 115          }
 116
 117          using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
 118          if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max))
 119            std::__throw_format_error("The value of the argument index exceeds its maximum value");
 120
 121          return __arg;
 122        } else
 123          std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
 124      },
 125      __format_arg);
 126}
 127
 128/// These fields are a filter for which elements to parse.
 129///
 130/// They default to false so when a new field is added it needs to be opted in
 131/// explicitly.
 132struct _LIBCPP_HIDE_FROM_ABI __fields {
 133  uint16_t __sign_                 : 1 {false};
 134  uint16_t __alternate_form_       : 1 {false};
 135  uint16_t __zero_padding_         : 1 {false};
 136  uint16_t __precision_            : 1 {false};
 137  uint16_t __locale_specific_form_ : 1 {false};
 138  uint16_t __type_                 : 1 {false};
 139  // Determines the valid values for fill.
 140  //
 141  // Originally the fill could be any character except { and }. Range-based
 142  // formatters use the colon to mark the beginning of the
 143  // underlying-format-spec. To avoid parsing ambiguities these formatter
 144  // specializations prohibit the use of the colon as a fill character.
 145  uint16_t __use_range_fill_ : 1 {false};
 146  uint16_t __clear_brackets_ : 1 {false};
 147  uint16_t __consume_all_    : 1 {false};
 148};
 149
 150// By not placing this constant in the formatter class it's not duplicated for
 151// char and wchar_t.
 152inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true};
 153inline constexpr __fields __fields_integral{
 154    .__sign_                 = true,
 155    .__alternate_form_       = true,
 156    .__zero_padding_         = true,
 157    .__locale_specific_form_ = true,
 158    .__type_                 = true,
 159    .__consume_all_          = true};
 160inline constexpr __fields __fields_floating_point{
 161    .__sign_                 = true,
 162    .__alternate_form_       = true,
 163    .__zero_padding_         = true,
 164    .__precision_            = true,
 165    .__locale_specific_form_ = true,
 166    .__type_                 = true,
 167    .__consume_all_          = true};
 168inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true};
 169inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true};
 170
 171#  if _LIBCPP_STD_VER >= 23
 172inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true};
 173inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true};
 174inline constexpr __fields __fields_fill_align_width{};
 175#  endif
 176
 177enum class __alignment : uint8_t {
 178  /// No alignment is set in the format string.
 179  __default,
 180  __left,
 181  __center,
 182  __right,
 183  __zero_padding
 184};
 185
 186enum class __sign : uint8_t {
 187  /// No sign is set in the format string.
 188  ///
 189  /// The sign isn't allowed for certain format-types. By using this value
 190  /// it's possible to detect whether or not the user explicitly set the sign
 191  /// flag. For formatting purposes it behaves the same as \ref __minus.
 192  __default,
 193  __minus,
 194  __plus,
 195  __space
 196};
 197
 198enum class __type : uint8_t {
 199  __default = 0,
 200  __string,
 201  __binary_lower_case,
 202  __binary_upper_case,
 203  __octal,
 204  __decimal,
 205  __hexadecimal_lower_case,
 206  __hexadecimal_upper_case,
 207  __pointer_lower_case,
 208  __pointer_upper_case,
 209  __char,
 210  __hexfloat_lower_case,
 211  __hexfloat_upper_case,
 212  __scientific_lower_case,
 213  __scientific_upper_case,
 214  __fixed_lower_case,
 215  __fixed_upper_case,
 216  __general_lower_case,
 217  __general_upper_case,
 218  __debug
 219};
 220
 221_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) {
 222  uint32_t __shift = static_cast<uint32_t>(__t);
 223  if (__shift == 0)
 224    return 1;
 225
 226  if (__shift > 31)
 227    std::__throw_format_error("The type does not fit in the mask");
 228
 229  return 1 << __shift;
 230}
 231
 232inline constexpr uint32_t __type_mask_integer =
 233    __create_type_mask(__type::__binary_lower_case) |      //
 234    __create_type_mask(__type::__binary_upper_case) |      //
 235    __create_type_mask(__type::__decimal) |                //
 236    __create_type_mask(__type::__octal) |                  //
 237    __create_type_mask(__type::__hexadecimal_lower_case) | //
 238    __create_type_mask(__type::__hexadecimal_upper_case);
 239
 240struct __std {
 241  __alignment __alignment_     : 3;
 242  __sign __sign_               : 2;
 243  bool __alternate_form_       : 1;
 244  bool __locale_specific_form_ : 1;
 245  __type __type_;
 246};
 247
 248struct __chrono {
 249  __alignment __alignment_     : 3;
 250  bool __locale_specific_form_ : 1;
 251  bool __hour_                 : 1;
 252  bool __weekday_name_         : 1;
 253  bool __weekday_              : 1;
 254  bool __day_of_year_          : 1;
 255  bool __week_of_year_         : 1;
 256  bool __month_name_           : 1;
 257};
 258
 259// The fill UCS scalar value.
 260//
 261// This is always an array, with 1, 2, or 4 elements.
 262// The size of the data structure is always 32-bits.
 263template <class _CharT>
 264struct __code_point;
 265
 266template <>
 267struct __code_point<char> {
 268  char __data[4] = {' '};
 269};
 270
 271#  if _LIBCPP_HAS_WIDE_CHARACTERS
 272template <>
 273struct __code_point<wchar_t> {
 274  wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
 275};
 276#  endif
 277
 278/// Contains the parsed formatting specifications.
 279///
 280/// This contains information for both the std-format-spec and the
 281/// chrono-format-spec. This results in some unused members for both
 282/// specifications. However these unused members don't increase the size
 283/// of the structure.
 284///
 285/// This struct doesn't cross ABI boundaries so its layout doesn't need to be
 286/// kept stable.
 287template <class _CharT>
 288struct __parsed_specifications {
 289  union {
 290    // The field __alignment_ is the first element in __std_ and __chrono_.
 291    // This allows the code to always inspect this value regards which member
 292    // of the union is the active member [class.union.general]/2.
 293    //
 294    // This is needed since the generic output routines handle the alignment of
 295    // the output.
 296    __alignment __alignment_ : 3;
 297    __std __std_;
 298    __chrono __chrono_;
 299  };
 300
 301  /// The requested width.
 302  ///
 303  /// When the format-spec used an arg-id for this field it has already been
 304  /// replaced with the value of that arg-id.
 305  int32_t __width_;
 306
 307  /// The requested precision.
 308  ///
 309  /// When the format-spec used an arg-id for this field it has already been
 310  /// replaced with the value of that arg-id.
 311  int32_t __precision_;
 312
 313  __code_point<_CharT> __fill_;
 314
 315  _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
 316
 317  _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
 318};
 319
 320// Validate the struct is small and cheap to copy since the struct is passed by
 321// value in formatting functions.
 322static_assert(sizeof(__parsed_specifications<char>) == 16);
 323static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
 324#  if _LIBCPP_HAS_WIDE_CHARACTERS
 325static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
 326static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
 327#  endif
 328
 329/// The parser for the std-format-spec.
 330///
 331/// Note this class is a member of std::formatter specializations. It's
 332/// expected developers will create their own formatter specializations that
 333/// inherit from the std::formatter specializations. This means this class
 334/// must be ABI stable. To aid the stability the unused bits in the class are
 335/// set to zero. That way they can be repurposed if a future revision of the
 336/// Standards adds new fields to std-format-spec.
 337template <class _CharT>
 338class __parser {
 339public:
 340  // Parses the format specification.
 341  //
 342  // Depending on whether the parsing is done compile-time or run-time
 343  // the method slightly differs.
 344  // - Only parses a field when it is in the __fields. Accepting all
 345  //   fields and then validating the valid ones has a performance impact.
 346  //   This is faster but gives slighly worse error messages.
 347  // - At compile-time when a field is not accepted the parser will still
 348  //   parse it and give an error when it's present. This gives a more
 349  //   accurate error.
 350  // The idea is that most times the format instead of the vformat
 351  // functions are used. In that case the error will be detected during
 352  // compilation and there is no need to pay for the run-time overhead.
 353  template <class _ParseContext>
 354  _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) {
 355    auto __begin = __ctx.begin();
 356    auto __end   = __ctx.end();
 357    if (__begin == __end || *__begin == _CharT('}') || (__fields.__use_range_fill_ && *__begin == _CharT(':')))
 358      return __begin;
 359
 360    if (__parse_fill_align(__begin, __end) && __begin == __end)
 361      return __begin;
 362
 363    if (__fields.__sign_) {
 364      if (__parse_sign(__begin) && __begin == __end)
 365        return __begin;
 366    } else if (std::is_constant_evaluated() && __parse_sign(__begin)) {
 367      std::__throw_format_error("The format specification does not allow the sign option");
 368    }
 369
 370    if (__fields.__alternate_form_) {
 371      if (__parse_alternate_form(__begin) && __begin == __end)
 372        return __begin;
 373    } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) {
 374      std::__throw_format_error("The format specifier does not allow the alternate form option");
 375    }
 376
 377    if (__fields.__zero_padding_) {
 378      if (__parse_zero_padding(__begin) && __begin == __end)
 379        return __begin;
 380    } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) {
 381      std::__throw_format_error("The format specifier does not allow the zero-padding option");
 382    }
 383
 384    if (__parse_width(__begin, __end, __ctx) && __begin == __end)
 385      return __begin;
 386
 387    if (__fields.__precision_) {
 388      if (__parse_precision(__begin, __end, __ctx) && __begin == __end)
 389        return __begin;
 390    } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) {
 391      std::__throw_format_error("The format specifier does not allow the precision option");
 392    }
 393
 394    if (__fields.__locale_specific_form_) {
 395      if (__parse_locale_specific_form(__begin) && __begin == __end)
 396        return __begin;
 397    } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) {
 398      std::__throw_format_error("The format specifier does not allow the locale-specific form option");
 399    }
 400
 401    if (__fields.__clear_brackets_) {
 402      if (__parse_clear_brackets(__begin) && __begin == __end)
 403        return __begin;
 404    } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) {
 405      std::__throw_format_error("The format specifier does not allow the n option");
 406    }
 407
 408    if (__fields.__type_)
 409      __parse_type(__begin);
 410
 411    if (!__fields.__consume_all_)
 412      return __begin;
 413
 414    if (__begin != __end && *__begin != _CharT('}'))
 415      std::__throw_format_error("The format specifier should consume the input or end with a '}'");
 416
 417    return __begin;
 418  }
 419
 420  // Validates the selected the parsed data.
 421  //
 422  // The valid fields in the parser may depend on the display type
 423  // selected. But the type is the last optional field, so by the time
 424  // it's known an option can't be used, it already has been parsed.
 425  // This does the validation again.
 426  //
 427  // For example an integral may have a sign, zero-padding, or alternate
 428  // form when the type option is not 'c'. So the generic approach is:
 429  //
 430  // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
 431  // if (__parser.__type_ == __format_spec::__type::__char) {
 432  //   __parser.__validate((__format_spec::__fields_bool, "an integer");
 433  //   ... // more char adjustments
 434  // } else {
 435  //   ... // validate an integral type.
 436  // }
 437  //
 438  // For some types all valid options need a second validation run, like
 439  // boolean types.
 440  //
 441  // Depending on whether the validation is done at compile-time or
 442  // run-time the error differs
 443  // - run-time the exception is thrown and contains the type of field
 444  //   being validated.
 445  // - at compile-time the line with `std::__throw_format_error` is shown
 446  //   in the output. In that case it's important for the error to be on one
 447  //   line.
 448  // Note future versions of C++ may allow better compile-time error
 449  // reporting.
 450  _LIBCPP_HIDE_FROM_ABI constexpr void
 451  __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const {
 452    if (!__fields.__sign_ && __sign_ != __sign::__default) {
 453      if (std::is_constant_evaluated())
 454        std::__throw_format_error("The format specifier does not allow the sign option");
 455      else
 456        __format_spec::__throw_invalid_option_format_error(__id, "sign");
 457    }
 458
 459    if (!__fields.__alternate_form_ && __alternate_form_) {
 460      if (std::is_constant_evaluated())
 461        std::__throw_format_error("The format specifier does not allow the alternate form option");
 462      else
 463        __format_spec::__throw_invalid_option_format_error(__id, "alternate form");
 464    }
 465
 466    if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) {
 467      if (std::is_constant_evaluated())
 468        std::__throw_format_error("The format specifier does not allow the zero-padding option");
 469      else
 470        __format_spec::__throw_invalid_option_format_error(__id, "zero-padding");
 471    }
 472
 473    if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id.
 474      if (std::is_constant_evaluated())
 475        std::__throw_format_error("The format specifier does not allow the precision option");
 476      else
 477        __format_spec::__throw_invalid_option_format_error(__id, "precision");
 478    }
 479
 480    if (!__fields.__locale_specific_form_ && __locale_specific_form_) {
 481      if (std::is_constant_evaluated())
 482        std::__throw_format_error("The format specifier does not allow the locale-specific form option");
 483      else
 484        __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form");
 485    }
 486
 487    if ((__create_type_mask(__type_) & __type_mask) == 0) {
 488      if (std::is_constant_evaluated())
 489        std::__throw_format_error("The format specifier uses an invalid value for the type option");
 490      else
 491        __format_spec::__throw_invalid_type_format_error(__id);
 492    }
 493  }
 494
 495  /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
 496  _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
 497    return __parsed_specifications<_CharT>{
 498        .__std_ = __std{.__alignment_            = __alignment_,
 499                        .__sign_                 = __sign_,
 500                        .__alternate_form_       = __alternate_form_,
 501                        .__locale_specific_form_ = __locale_specific_form_,
 502                        .__type_                 = __type_},
 503        .__width_{__get_width(__ctx)},
 504        .__precision_{__get_precision(__ctx)},
 505        .__fill_{__fill_}};
 506  }
 507
 508  _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
 509    return __parsed_specifications<_CharT>{
 510        .__chrono_ =
 511            __chrono{.__alignment_            = __alignment_,
 512                     .__locale_specific_form_ = __locale_specific_form_,
 513                     .__hour_                 = __hour_,
 514                     .__weekday_name_         = __weekday_name_,
 515                     .__weekday_              = __weekday_,
 516                     .__day_of_year_          = __day_of_year_,
 517                     .__week_of_year_         = __week_of_year_,
 518                     .__month_name_           = __month_name_},
 519        .__width_{__get_width(__ctx)},
 520        .__precision_{__get_precision(__ctx)},
 521        .__fill_{__fill_}};
 522  }
 523
 524  __alignment __alignment_     : 3 {__alignment::__default};
 525  __sign __sign_               : 2 {__sign::__default};
 526  bool __alternate_form_       : 1 {false};
 527  bool __locale_specific_form_ : 1 {false};
 528  bool __clear_brackets_       : 1 {false};
 529  __type __type_{__type::__default};
 530
 531  // These flags are only used for formatting chrono. Since the struct has
 532  // padding space left it's added to this structure.
 533  bool __hour_ : 1 {false};
 534
 535  bool __weekday_name_ : 1 {false};
 536  bool __weekday_      : 1 {false};
 537
 538  bool __day_of_year_  : 1 {false};
 539  bool __week_of_year_ : 1 {false};
 540
 541  bool __month_name_ : 1 {false};
 542
 543  uint8_t __reserved_0_ : 2 {0};
 544  uint8_t __reserved_1_ : 6 {0};
 545  // These two flags are only used internally and not part of the
 546  // __parsed_specifications. Therefore put them at the end.
 547  bool __width_as_arg_     : 1 {false};
 548  bool __precision_as_arg_ : 1 {false};
 549
 550  /// The requested width, either the value or the arg-id.
 551  int32_t __width_{0};
 552
 553  /// The requested precision, either the value or the arg-id.
 554  int32_t __precision_{-1};
 555
 556  __code_point<_CharT> __fill_{};
 557
 558private:
 559  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
 560    switch (__c) {
 561    case _CharT('<'):
 562      __alignment_ = __alignment::__left;
 563      return true;
 564
 565    case _CharT('^'):
 566      __alignment_ = __alignment::__center;
 567      return true;
 568
 569    case _CharT('>'):
 570      __alignment_ = __alignment::__right;
 571      return true;
 572    }
 573    return false;
 574  }
 575
 576  _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill) {
 577    // The forbidden fill characters all code points formed from a single code unit, thus the
 578    // check can be omitted when more code units are used.
 579    if (__fill == _CharT('{'))
 580      std::__throw_format_error("The fill option contains an invalid value");
 581  }
 582
 583#  if _LIBCPP_HAS_UNICODE
 584  // range-fill and tuple-fill are identical
 585  template <contiguous_iterator _Iterator>
 586    requires same_as<_CharT, char>
 587#    if _LIBCPP_HAS_WIDE_CHARACTERS
 588          || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
 589#    endif
 590  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
 591    _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
 592        __begin != __end,
 593        "when called with an empty input the function will cause "
 594        "undefined behavior by evaluating data not in the input");
 595    __unicode::__code_point_view<_CharT> __view{__begin, __end};
 596    __unicode::__consume_result __consumed = __view.__consume();
 597    if (__consumed.__status != __unicode::__consume_result::__ok)
 598      std::__throw_format_error("The format specifier contains malformed Unicode characters");
 599
 600    if (__view.__position() < __end && __parse_alignment(*__view.__position())) {
 601      ptrdiff_t __code_units = __view.__position() - __begin;
 602      if (__code_units == 1)
 603        // The forbidden fill characters all are code points encoded
 604        // in one code unit, thus the check can be omitted when more
 605        // code units are used.
 606        __validate_fill_character(*__begin);
 607
 608      std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0]));
 609      __begin += __code_units + 1;
 610      return true;
 611    }
 612
 613    if (!__parse_alignment(*__begin))
 614      return false;
 615
 616    ++__begin;
 617    return true;
 618  }
 619
 620#    if _LIBCPP_HAS_WIDE_CHARACTERS
 621  template <contiguous_iterator _Iterator>
 622    requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
 623  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
 624    _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
 625        __begin != __end,
 626        "when called with an empty input the function will cause "
 627        "undefined behavior by evaluating data not in the input");
 628    if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) {
 629      if (!__unicode::__is_scalar_value(*__begin))
 630        std::__throw_format_error("The fill option contains an invalid value");
 631
 632      __validate_fill_character(*__begin);
 633
 634      __fill_.__data[0] = *__begin;
 635      __begin += 2;
 636      return true;
 637    }
 638
 639    if (!__parse_alignment(*__begin))
 640      return false;
 641
 642    ++__begin;
 643    return true;
 644  }
 645
 646#    endif // _LIBCPP_HAS_WIDE_CHARACTERS
 647
 648#  else // _LIBCPP_HAS_UNICODE
 649  // range-fill and tuple-fill are identical
 650  template <contiguous_iterator _Iterator>
 651  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
 652    _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
 653        __begin != __end,
 654        "when called with an empty input the function will cause "
 655        "undefined behavior by evaluating data not in the input");
 656    if (__begin + 1 != __end) {
 657      if (__parse_alignment(*(__begin + 1))) {
 658        __validate_fill_character(*__begin);
 659
 660        __fill_.__data[0] = *__begin;
 661        __begin += 2;
 662        return true;
 663      }
 664    }
 665
 666    if (!__parse_alignment(*__begin))
 667      return false;
 668
 669    ++__begin;
 670    return true;
 671  }
 672
 673#  endif // _LIBCPP_HAS_UNICODE
 674
 675  template <contiguous_iterator _Iterator>
 676  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
 677    switch (*__begin) {
 678    case _CharT('-'):
 679      __sign_ = __sign::__minus;
 680      break;
 681    case _CharT('+'):
 682      __sign_ = __sign::__plus;
 683      break;
 684    case _CharT(' '):
 685      __sign_ = __sign::__space;
 686      break;
 687    default:
 688      return false;
 689    }
 690    ++__begin;
 691    return true;
 692  }
 693
 694  template <contiguous_iterator _Iterator>
 695  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) {
 696    if (*__begin != _CharT('#'))
 697      return false;
 698
 699    __alternate_form_ = true;
 700    ++__begin;
 701    return true;
 702  }
 703
 704  template <contiguous_iterator _Iterator>
 705  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) {
 706    if (*__begin != _CharT('0'))
 707      return false;
 708
 709    if (__alignment_ == __alignment::__default)
 710      __alignment_ = __alignment::__zero_padding;
 711    ++__begin;
 712    return true;
 713  }
 714
 715  template <contiguous_iterator _Iterator>
 716  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) {
 717    if (*__begin == _CharT('0'))
 718      std::__throw_format_error("The width option should not have a leading zero");
 719
 720    if (*__begin == _CharT('{')) {
 721      __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
 722      __width_as_arg_                     = true;
 723      __width_                            = __r.__value;
 724      __begin                             = __r.__last;
 725      return true;
 726    }
 727
 728    if (*__begin < _CharT('0') || *__begin > _CharT('9'))
 729      return false;
 730
 731    __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
 732    __width_                            = __r.__value;
 733    _LIBCPP_ASSERT_INTERNAL(__width_ != 0,
 734                            "A zero value isn't allowed and should be impossible, "
 735                            "due to validations in this function");
 736    __begin = __r.__last;
 737    return true;
 738  }
 739
 740  template <contiguous_iterator _Iterator>
 741  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) {
 742    if (*__begin != _CharT('.'))
 743      return false;
 744
 745    ++__begin;
 746    if (__begin == __end)
 747      std::__throw_format_error("End of input while parsing format specifier precision");
 748
 749    if (*__begin == _CharT('{')) {
 750      __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
 751      __precision_as_arg_                      = true;
 752      __precision_                             = __arg_id.__value;
 753      __begin                                  = __arg_id.__last;
 754      return true;
 755    }
 756
 757    if (*__begin < _CharT('0') || *__begin > _CharT('9'))
 758      std::__throw_format_error("The precision option does not contain a value or an argument index");
 759
 760    __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
 761    __precision_                        = __r.__value;
 762    __precision_as_arg_                 = false;
 763    __begin                             = __r.__last;
 764    return true;
 765  }
 766
 767  template <contiguous_iterator _Iterator>
 768  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) {
 769    if (*__begin != _CharT('L'))
 770      return false;
 771
 772    __locale_specific_form_ = true;
 773    ++__begin;
 774    return true;
 775  }
 776
 777  template <contiguous_iterator _Iterator>
 778  _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) {
 779    if (*__begin != _CharT('n'))
 780      return false;
 781
 782    __clear_brackets_ = true;
 783    ++__begin;
 784    return true;
 785  }
 786
 787  template <contiguous_iterator _Iterator>
 788  _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) {
 789    // Determines the type. It does not validate whether the selected type is
 790    // valid. Most formatters have optional fields that are only allowed for
 791    // certain types. These parsers need to do validation after the type has
 792    // been parsed. So its easier to implement the validation for all types in
 793    // the specific parse function.
 794    switch (*__begin) {
 795    case 'A':
 796      __type_ = __type::__hexfloat_upper_case;
 797      break;
 798    case 'B':
 799      __type_ = __type::__binary_upper_case;
 800      break;
 801    case 'E':
 802      __type_ = __type::__scientific_upper_case;
 803      break;
 804    case 'F':
 805      __type_ = __type::__fixed_upper_case;
 806      break;
 807    case 'G':
 808      __type_ = __type::__general_upper_case;
 809      break;
 810    case 'X':
 811      __type_ = __type::__hexadecimal_upper_case;
 812      break;
 813    case 'a':
 814      __type_ = __type::__hexfloat_lower_case;
 815      break;
 816    case 'b':
 817      __type_ = __type::__binary_lower_case;
 818      break;
 819    case 'c':
 820      __type_ = __type::__char;
 821      break;
 822    case 'd':
 823      __type_ = __type::__decimal;
 824      break;
 825    case 'e':
 826      __type_ = __type::__scientific_lower_case;
 827      break;
 828    case 'f':
 829      __type_ = __type::__fixed_lower_case;
 830      break;
 831    case 'g':
 832      __type_ = __type::__general_lower_case;
 833      break;
 834    case 'o':
 835      __type_ = __type::__octal;
 836      break;
 837    case 'p':
 838      __type_ = __type::__pointer_lower_case;
 839      break;
 840    case 'P':
 841      __type_ = __type::__pointer_upper_case;
 842      break;
 843    case 's':
 844      __type_ = __type::__string;
 845      break;
 846    case 'x':
 847      __type_ = __type::__hexadecimal_lower_case;
 848      break;
 849#  if _LIBCPP_STD_VER >= 23
 850    case '?':
 851      __type_ = __type::__debug;
 852      break;
 853#  endif
 854    default:
 855      return;
 856    }
 857    ++__begin;
 858  }
 859
 860  _LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const {
 861    if (!__width_as_arg_)
 862      return __width_;
 863
 864    return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
 865  }
 866
 867  _LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const {
 868    if (!__precision_as_arg_)
 869      return __precision_;
 870
 871    return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
 872  }
 873};
 874
 875// Validates whether the reserved bitfields don't change the size.
 876static_assert(sizeof(__parser<char>) == 16);
 877#  if _LIBCPP_HAS_WIDE_CHARACTERS
 878static_assert(sizeof(__parser<wchar_t>) == 16);
 879#  endif
 880
 881_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
 882  switch (__type) {
 883  case __format_spec::__type::__default:
 884  case __format_spec::__type::__string:
 885  case __format_spec::__type::__debug:
 886    break;
 887
 888  default:
 889    std::__throw_format_error("The type option contains an invalid value for a string formatting argument");
 890  }
 891}
 892
 893template <class _CharT>
 894_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) {
 895  __parser.__validate(__format_spec::__fields_bool, __id);
 896  if (__parser.__alignment_ == __alignment::__default)
 897    __parser.__alignment_ = __alignment::__left;
 898}
 899
 900template <class _CharT>
 901_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) {
 902  __format_spec::__process_display_type_bool_string(__parser, __id);
 903}
 904
 905template <class _CharT>
 906_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) {
 907  switch (__parser.__type_) {
 908  case __format_spec::__type::__default:
 909  case __format_spec::__type::__string:
 910    __format_spec::__process_display_type_bool_string(__parser, __id);
 911    break;
 912
 913  case __format_spec::__type::__binary_lower_case:
 914  case __format_spec::__type::__binary_upper_case:
 915  case __format_spec::__type::__octal:
 916  case __format_spec::__type::__decimal:
 917  case __format_spec::__type::__hexadecimal_lower_case:
 918  case __format_spec::__type::__hexadecimal_upper_case:
 919    break;
 920
 921  default:
 922    __format_spec::__throw_invalid_type_format_error(__id);
 923  }
 924}
 925
 926template <class _CharT>
 927_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) {
 928  switch (__parser.__type_) {
 929  case __format_spec::__type::__default:
 930  case __format_spec::__type::__char:
 931  case __format_spec::__type::__debug:
 932    __format_spec::__process_display_type_char(__parser, __id);
 933    break;
 934
 935  case __format_spec::__type::__binary_lower_case:
 936  case __format_spec::__type::__binary_upper_case:
 937  case __format_spec::__type::__octal:
 938  case __format_spec::__type::__decimal:
 939  case __format_spec::__type::__hexadecimal_lower_case:
 940  case __format_spec::__type::__hexadecimal_upper_case:
 941    break;
 942
 943  default:
 944    __format_spec::__throw_invalid_type_format_error(__id);
 945  }
 946}
 947
 948template <class _CharT>
 949_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) {
 950  switch (__parser.__type_) {
 951  case __format_spec::__type::__default:
 952  case __format_spec::__type::__binary_lower_case:
 953  case __format_spec::__type::__binary_upper_case:
 954  case __format_spec::__type::__octal:
 955  case __format_spec::__type::__decimal:
 956  case __format_spec::__type::__hexadecimal_lower_case:
 957  case __format_spec::__type::__hexadecimal_upper_case:
 958    break;
 959
 960  case __format_spec::__type::__char:
 961    __format_spec::__process_display_type_char(__parser, __id);
 962    break;
 963
 964  default:
 965    __format_spec::__throw_invalid_type_format_error(__id);
 966  }
 967}
 968
 969template <class _CharT>
 970_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) {
 971  switch (__parser.__type_) {
 972  case __format_spec::__type::__default:
 973  case __format_spec::__type::__hexfloat_lower_case:
 974  case __format_spec::__type::__hexfloat_upper_case:
 975    // Precision specific behavior will be handled later.
 976    break;
 977  case __format_spec::__type::__scientific_lower_case:
 978  case __format_spec::__type::__scientific_upper_case:
 979  case __format_spec::__type::__fixed_lower_case:
 980  case __format_spec::__type::__fixed_upper_case:
 981  case __format_spec::__type::__general_lower_case:
 982  case __format_spec::__type::__general_upper_case:
 983    if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
 984      // Set the default precision for the call to to_chars.
 985      __parser.__precision_ = 6;
 986    break;
 987
 988  default:
 989    __format_spec::__throw_invalid_type_format_error(__id);
 990  }
 991}
 992
 993_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) {
 994  switch (__type) {
 995  case __format_spec::__type::__default:
 996  case __format_spec::__type::__pointer_lower_case:
 997  case __format_spec::__type::__pointer_upper_case:
 998    break;
 999
1000  default:
1001    __format_spec::__throw_invalid_type_format_error(__id);
1002  }
1003}
1004
1005template <contiguous_iterator _Iterator>
1006struct __column_width_result {
1007  /// The number of output columns.
1008  size_t __width_;
1009  /// One beyond the last code unit used in the estimation.
1010  ///
1011  /// This limits the original output to fit in the wanted number of columns.
1012  _Iterator __last_;
1013};
1014
1015template <contiguous_iterator _Iterator>
1016__column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>;
1017
1018/// Since a column width can be two it's possible that the requested column
1019/// width can't be achieved. Depending on the intended usage the policy can be
1020/// selected.
1021/// - When used as precision the maximum width may not be exceeded and the
1022///   result should be "rounded down" to the previous boundary.
1023/// - When used as a width we're done once the minimum is reached, but
1024///   exceeding is not an issue. Rounding down is an issue since that will
1025///   result in writing fill characters. Therefore the result needs to be
1026///   "rounded up".
1027enum class __column_width_rounding { __down, __up };
1028
1029#  if _LIBCPP_HAS_UNICODE
1030
1031namespace __detail {
1032template <contiguous_iterator _Iterator>
1033_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering(
1034    _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
1035  using _CharT = iter_value_t<_Iterator>;
1036  __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
1037
1038  __column_width_result<_Iterator> __result{0, __first};
1039  while (__result.__last_ != __last && __result.__width_ <= __maximum) {
1040    typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
1041    int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_);
1042
1043    // When the next entry would exceed the maximum width the previous width
1044    // might be returned. For example when a width of 100 is requested the
1045    // returned width might be 99, since the next code point has an estimated
1046    // column width of 2. This depends on the rounding flag.
1047    // When the maximum is exceeded the loop will abort the next iteration.
1048    if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
1049      return __result;
1050
1051    __result.__width_ += __width;
1052    __result.__last_ = __cluster.__last_;
1053  }
1054
1055  return __result;
1056}
1057
1058} // namespace __detail
1059
1060// Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
1061// Depending on format the relation between the number of code units stored and
1062// the number of output columns differs. The first relation is the number of
1063// code units forming a code point. (The text assumes the code units are
1064// unsigned.)
1065// - UTF-8 The number of code units is between one and four. The first 127
1066//   Unicode code points match the ASCII character set. When the highest bit is
1067//   set it means the code point has more than one code unit.
1068// - UTF-16: The number of code units is between 1 and 2. When the first
1069//   code unit is in the range [0xd800,0xdfff) it means the code point uses two
1070//   code units.
1071// - UTF-32: The number of code units is always one.
1072//
1073// The code point to the number of columns is specified in
1074// [format.string.std]/11. This list might change in the future.
1075//
1076// Another thing to be taken into account is Grapheme clustering. This means
1077// that in some cases multiple code points are combined one element in the
1078// output. For example:
1079// - an ASCII character with a combined diacritical mark
1080// - an emoji with a skin tone modifier
1081// - a group of combined people emoji to create a family
1082// - a combination of flag emoji
1083//
1084// See also:
1085// - [format.string.general]/11
1086// - https://en.wikipedia.org/wiki/UTF-8#Encoding
1087// - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1088
1089_LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
1090
1091/// Determines the number of output columns needed to render the input.
1092///
1093/// \note When the scanner encounters malformed Unicode it acts as-if every
1094/// code unit is a one column code point. Typically a terminal uses the same
1095/// strategy and replaces every malformed code unit with a one column
1096/// replacement character.
1097///
1098/// \param __first    Points to the first element of the input range.
1099/// \param __last     Points beyond the last element of the input range.
1100/// \param __maximum  The maximum number of output columns. The returned number
1101///                   of estimated output columns will not exceed this value.
1102/// \param __rounding Selects the rounding method.
1103///                   \c __down result.__width_ <= __maximum
1104///                   \c __up result.__width_ <= __maximum + 1
1105template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator>
1106_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width(
1107    basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
1108  // The width estimation is done in two steps:
1109  // - Quickly process for the ASCII part. ASCII has the following properties
1110  //   - One code unit is one code point
1111  //   - Every code point has an estimated width of one
1112  // - When needed it will a Unicode Grapheme clustering algorithm to find
1113  //   the proper place for truncation.
1114
1115  if (__str.empty() || __maximum == 0)
1116    return {0, __str.begin()};
1117
1118  // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
1119  // character they might be part of an extended grapheme cluster. For example:
1120  //   an ASCII letter and a COMBINING ACUTE ACCENT
1121  // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
1122  // need to scan one code unit beyond the requested precision. When this code
1123  // unit is non-ASCII we omit the current code unit and let the Grapheme
1124  // clustering algorithm do its work.
1125  auto __it = __str.begin();
1126  if (__format_spec::__is_ascii(*__it)) {
1127    do {
1128      --__maximum;
1129      ++__it;
1130      if (__it == __str.end())
1131        return {__str.size(), __str.end()};
1132
1133      if (__maximum == 0) {
1134        if (__format_spec::__is_ascii(*__it))
1135          return {static_cast<size_t>(__it - __str.begin()), __it};
1136
1137        break;
1138      }
1139    } while (__format_spec::__is_ascii(*__it));
1140    --__it;
1141    ++__maximum;
1142  }
1143
1144  ptrdiff_t __ascii_size = __it - __str.begin();
1145  __column_width_result __result =
1146      __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
1147
1148  __result.__width_ += __ascii_size;
1149  return __result;
1150}
1151#  else // _LIBCPP_HAS_UNICODE
1152template <class _CharT>
1153_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator>
1154__estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
1155  // When Unicode isn't supported assume ASCII and every code unit is one code
1156  // point. In ASCII the estimated column width is always one. Thus there's no
1157  // need for rounding.
1158  size_t __width = std::min(__str.size(), __maximum);
1159  return {__width, __str.begin() + __width};
1160}
1161
1162#  endif // _LIBCPP_HAS_UNICODE
1163
1164} // namespace __format_spec
1165
1166#endif // _LIBCPP_STD_VER >= 20
1167
1168_LIBCPP_END_NAMESPACE_STD
1169
1170_LIBCPP_POP_MACROS
1171
1172#endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H