master
  1// -*- C++ -*-
  2//===----------------------------------------------------------------------===//
  3//
  4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5// See https://llvm.org/LICENSE.txt for license information.
  6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7//
  8//===----------------------------------------------------------------------===//
  9
 10#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
 11#define _LIBCPP___FORMAT_WRITE_ESCAPED_H
 12
 13#include <__algorithm/ranges_copy.h>
 14#include <__algorithm/ranges_for_each.h>
 15#include <__charconv/to_chars_integral.h>
 16#include <__charconv/to_chars_result.h>
 17#include <__chrono/statically_widen.h>
 18#include <__format/escaped_output_table.h>
 19#include <__format/extended_grapheme_cluster_table.h>
 20#include <__format/formatter_output.h>
 21#include <__format/parser_std_format_spec.h>
 22#include <__format/unicode.h>
 23#include <__iterator/back_insert_iterator.h>
 24#include <__memory/addressof.h>
 25#include <__system_error/errc.h>
 26#include <__type_traits/make_unsigned.h>
 27#include <__utility/move.h>
 28#include <string_view>
 29
 30#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 31#  pragma GCC system_header
 32#endif
 33
 34_LIBCPP_PUSH_MACROS
 35#include <__undef_macros>
 36
 37_LIBCPP_BEGIN_NAMESPACE_STD
 38
 39namespace __formatter {
 40
 41#if _LIBCPP_STD_VER >= 20
 42
 43/// Writes a string using format's width estimation algorithm.
 44///
 45/// \note When \c _LIBCPP_HAS_UNICODE is false the function assumes the input is ASCII.
 46template <class _CharT>
 47_LIBCPP_HIDE_FROM_ABI auto
 48__write_string(basic_string_view<_CharT> __str,
 49               output_iterator<const _CharT&> auto __out_it,
 50               __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
 51  if (!__specs.__has_precision())
 52    return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
 53
 54  int __size = __formatter::__truncate(__str, __specs.__precision_);
 55
 56  return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
 57}
 58
 59#endif // _LIBCPP_STD_VER >= 20
 60#if _LIBCPP_STD_VER >= 23
 61
 62struct __nul_terminator {};
 63
 64template <class _CharT>
 65_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
 66  return *__cstr == _CharT('\0');
 67}
 68
 69template <class _CharT>
 70_LIBCPP_HIDE_FROM_ABI void
 71__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
 72  back_insert_iterator __out_it{__str};
 73  std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
 74
 75  char __buffer[8];
 76  to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
 77  _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
 78  std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
 79
 80  __str += _CharT('}');
 81}
 82
 83// [format.string.escaped]/2.2.1.2
 84// ...
 85// then the sequence \u{hex-digit-sequence} is appended to E, where
 86// hex-digit-sequence is the shortest hexadecimal representation of C using
 87// lower-case hexadecimal digits.
 88template <class _CharT>
 89_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
 90  __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
 91}
 92
 93// [format.string.escaped]/2.2.3
 94// Otherwise (X is a sequence of ill-formed code units), each code unit U is
 95// appended to E in order as the sequence \x{hex-digit-sequence}, where
 96// hex-digit-sequence is the shortest hexadecimal representation of U using
 97// lower-case hexadecimal digits.
 98template <class _CharT>
 99_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
100  __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
101}
102
103template <class _CharT>
104[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
105__is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) {
106#  if !_LIBCPP_HAS_UNICODE
107  // For ASCII assume everything above 127 is printable.
108  if (__value > 127)
109    return false;
110#  endif
111
112  // [format.string.escaped]/2.2.1.2.1
113  //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
114  //   value whose Unicode property General_Category has a value in the groups
115  //   Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard,
116  if (!__escaped_output_table::__needs_escape(__value))
117    // [format.string.escaped]/2.2.1.2.2
118    //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
119    //   value with the Unicode property Grapheme_Extend=Yes as described by UAX
120    //   #44 of the Unicode Standard and C is not immediately preceded in S by a
121    //   character P appended to E without translation to an escape sequence,
122    if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) !=
123                               __extended_grapheme_custer_property_boundary::__property::__Extend)
124      return false;
125
126  __formatter::__write_well_formed_escaped_code_unit(__str, __value);
127  return true;
128}
129
130template <class _CharT>
131[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
132  return static_cast<make_unsigned_t<_CharT>>(__value);
133}
134
135enum class __escape_quotation_mark { __apostrophe, __double_quote };
136
137// [format.string.escaped]/2
138template <class _CharT>
139[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(
140    basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) {
141  // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
142  switch (__value) {
143  case _CharT('\t'):
144    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
145    return true;
146  case _CharT('\n'):
147    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
148    return true;
149  case _CharT('\r'):
150    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
151    return true;
152  case _CharT('\''):
153    if (__mark == __escape_quotation_mark::__apostrophe)
154      __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
155    else
156      __str += __value;
157    return true;
158  case _CharT('"'):
159    if (__mark == __escape_quotation_mark::__double_quote)
160      __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
161    else
162      __str += __value;
163    return true;
164  case _CharT('\\'):
165    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
166    return true;
167
168  // 2.2.1.2 - Space
169  case _CharT(' '):
170    __str += __value;
171    return true;
172  }
173
174  // 2.2.2
175  //   Otherwise, if X is a shift sequence, the effect on E and further
176  //   decoding of S is unspecified.
177  // For now shift sequences are ignored and treated as Unicode. Other parts
178  // of the format library do the same. It's unknown how ostream treats them.
179  // TODO FMT determine what to do with shift sequences.
180
181  // 2.2.1.2.1 and 2.2.1.2.2 - Escape
182  return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value));
183}
184
185template <class _CharT>
186_LIBCPP_HIDE_FROM_ABI void
187__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
188  __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
189
190  // When the first code unit has the property Grapheme_Extend=Yes it needs to
191  // be escaped. This happens when the previous code unit was also escaped.
192  bool __escape = true;
193  while (!__view.__at_end()) {
194    auto __first                                  = __view.__position();
195    typename __unicode::__consume_result __result = __view.__consume();
196    if (__result.__status == __unicode::__consume_result::__ok) {
197      __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark);
198      if (!__escape)
199        // 2.2.1.3 - Add the character
200        ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
201    } else {
202      // 2.2.3 sequence of ill-formed code units
203      ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
204        __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
205      });
206    }
207  }
208}
209
210template <class _CharT>
211_LIBCPP_HIDE_FROM_ABI auto
212__format_escaped_char(_CharT __value,
213                      output_iterator<const _CharT&> auto __out_it,
214                      __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
215  basic_string<_CharT> __str;
216  __str += _CharT('\'');
217  __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
218  __str += _CharT('\'');
219  return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
220}
221
222template <class _CharT>
223_LIBCPP_HIDE_FROM_ABI auto
224__format_escaped_string(basic_string_view<_CharT> __values,
225                        output_iterator<const _CharT&> auto __out_it,
226                        __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
227  basic_string<_CharT> __str;
228  __str += _CharT('"');
229  __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
230  __str += _CharT('"');
231  return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
232}
233
234#endif // _LIBCPP_STD_VER >= 23
235
236} // namespace __formatter
237
238_LIBCPP_END_NAMESPACE_STD
239
240_LIBCPP_POP_MACROS
241
242#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H