master
  1// -*- C++ -*-
  2//===----------------------------------------------------------------------===//
  3//
  4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5// See https://llvm.org/LICENSE.txt for license information.
  6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7//
  8//===----------------------------------------------------------------------===//
  9
 10// WARNING, this entire header is generated by
 11// utils/generate_width_estimation_table.py
 12// DO NOT MODIFY!
 13
 14// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
 15//
 16// See Terms of Use <https://www.unicode.org/copyright.html>
 17// for definitions of Unicode Inc.'s Data Files and Software.
 18//
 19// NOTICE TO USER: Carefully read the following legal agreement.
 20// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
 21// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
 22// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
 23// TERMS AND CONDITIONS OF THIS AGREEMENT.
 24// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
 25// THE DATA FILES OR SOFTWARE.
 26//
 27// COPYRIGHT AND PERMISSION NOTICE
 28//
 29// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved.
 30// Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
 31//
 32// Permission is hereby granted, free of charge, to any person obtaining
 33// a copy of the Unicode data files and any associated documentation
 34// (the "Data Files") or Unicode software and any associated documentation
 35// (the "Software") to deal in the Data Files or Software
 36// without restriction, including without limitation the rights to use,
 37// copy, modify, merge, publish, distribute, and/or sell copies of
 38// the Data Files or Software, and to permit persons to whom the Data Files
 39// or Software are furnished to do so, provided that either
 40// (a) this copyright and permission notice appear with all copies
 41// of the Data Files or Software, or
 42// (b) this copyright and permission notice appear in associated
 43// Documentation.
 44//
 45// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
 46// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 47// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 48// NONINFRINGEMENT OF THIRD PARTY RIGHTS.
 49// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
 50// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
 51// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 52// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 53// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 54// PERFORMANCE OF THE DATA FILES OR SOFTWARE.
 55//
 56// Except as contained in this notice, the name of a copyright holder
 57// shall not be used in advertising or otherwise to promote the sale,
 58// use or other dealings in these Data Files or Software without prior
 59// written authorization of the copyright holder.
 60
 61#ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H
 62#define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H
 63
 64#include <__algorithm/ranges_upper_bound.h>
 65#include <__config>
 66#include <__cstddef/ptrdiff_t.h>
 67#include <cstdint>
 68
 69#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 70#  pragma GCC system_header
 71#endif
 72
 73_LIBCPP_BEGIN_NAMESPACE_STD
 74
 75#if _LIBCPP_STD_VER >= 20
 76
 77namespace __width_estimation_table {
 78
 79/// The entries of the characters with an estimated width of 2.
 80///
 81/// Contains the entries for [format.string.std]/12
 82///  -  Any code point with the East_Asian_Width="W" or East_Asian_Width="F"
 83///     Derived Extracted Property as described by UAX #44
 84/// - U+4DC0 - U+4DFF (Yijing Hexagram Symbols)
 85/// - U+1F300 - U+1F5FF (Miscellaneous Symbols and Pictographs)
 86/// - U+1F900 - U+1F9FF (Supplemental Symbols and Pictographs)
 87///
 88/// The data is generated from
 89/// - https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
 90/// - The "overrides" in [format.string.std]/12
 91///
 92/// The format of EastAsianWidth.txt is two fields separated by a semicolon.
 93/// Field 0: Unicode code point value or range of code point values
 94/// Field 1: East_Asian_Width property, consisting of one of the following values:
 95///         "A", "F", "H", "N", "Na", "W"
 96///  - All code points, assigned or unassigned, that are not listed
 97///      explicitly are given the value "N".
 98///  - The unassigned code points in the following blocks default to "W":
 99///         CJK Unified Ideographs Extension A: U+3400..U+4DBF
100///         CJK Unified Ideographs:             U+4E00..U+9FFF
101///         CJK Compatibility Ideographs:       U+F900..U+FAFF
102///  - All undesignated code points in Planes 2 and 3, whether inside or
103///      outside of allocated blocks, default to "W":
104///         Plane 2:                            U+20000..U+2FFFD
105///         Plane 3:                            U+30000..U+3FFFD
106///
107/// The table is similar to the table
108///  __extended_grapheme_custer_property_boundary::__entries
109/// which explains the details of these classes. The only difference is this
110/// table lacks a property, thus having more bits available for the size.
111///
112/// The maximum code point that has an estimated width of 2 is U+3FFFD. This
113/// value can be encoded in 18 bits. Thus the upper 3 bits of the code point
114/// are always 0. These 3 bits are used to enlarge the offset range. This
115/// optimization reduces the table in Unicode 15 from 184 to 104 entries,
116/// saving 320 bytes.
117///
118/// The data has 2 values:
119/// - bits [0, 13] The size of the range, allowing 16384 elements.
120/// - bits [14, 31] The lower bound code point of the range. The upper bound of
121///   the range is lower bound + size.
122_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[110] = {
123    0x0440005f /* 00001100 - 0000115f [   96] */, //
124    0x08c68001 /* 0000231a - 0000231b [    2] */, //
125    0x08ca4001 /* 00002329 - 0000232a [    2] */, //
126    0x08fa4003 /* 000023e9 - 000023ec [    4] */, //
127    0x08fc0000 /* 000023f0 - 000023f0 [    1] */, //
128    0x08fcc000 /* 000023f3 - 000023f3 [    1] */, //
129    0x097f4001 /* 000025fd - 000025fe [    2] */, //
130    0x09850001 /* 00002614 - 00002615 [    2] */, //
131    0x098c0007 /* 00002630 - 00002637 [    8] */, //
132    0x0992000b /* 00002648 - 00002653 [   12] */, //
133    0x099fc000 /* 0000267f - 0000267f [    1] */, //
134    0x09a28005 /* 0000268a - 0000268f [    6] */, //
135    0x09a4c000 /* 00002693 - 00002693 [    1] */, //
136    0x09a84000 /* 000026a1 - 000026a1 [    1] */, //
137    0x09aa8001 /* 000026aa - 000026ab [    2] */, //
138    0x09af4001 /* 000026bd - 000026be [    2] */, //
139    0x09b10001 /* 000026c4 - 000026c5 [    2] */, //
140    0x09b38000 /* 000026ce - 000026ce [    1] */, //
141    0x09b50000 /* 000026d4 - 000026d4 [    1] */, //
142    0x09ba8000 /* 000026ea - 000026ea [    1] */, //
143    0x09bc8001 /* 000026f2 - 000026f3 [    2] */, //
144    0x09bd4000 /* 000026f5 - 000026f5 [    1] */, //
145    0x09be8000 /* 000026fa - 000026fa [    1] */, //
146    0x09bf4000 /* 000026fd - 000026fd [    1] */, //
147    0x09c14000 /* 00002705 - 00002705 [    1] */, //
148    0x09c28001 /* 0000270a - 0000270b [    2] */, //
149    0x09ca0000 /* 00002728 - 00002728 [    1] */, //
150    0x09d30000 /* 0000274c - 0000274c [    1] */, //
151    0x09d38000 /* 0000274e - 0000274e [    1] */, //
152    0x09d4c002 /* 00002753 - 00002755 [    3] */, //
153    0x09d5c000 /* 00002757 - 00002757 [    1] */, //
154    0x09e54002 /* 00002795 - 00002797 [    3] */, //
155    0x09ec0000 /* 000027b0 - 000027b0 [    1] */, //
156    0x09efc000 /* 000027bf - 000027bf [    1] */, //
157    0x0ac6c001 /* 00002b1b - 00002b1c [    2] */, //
158    0x0ad40000 /* 00002b50 - 00002b50 [    1] */, //
159    0x0ad54000 /* 00002b55 - 00002b55 [    1] */, //
160    0x0ba00019 /* 00002e80 - 00002e99 [   26] */, //
161    0x0ba6c058 /* 00002e9b - 00002ef3 [   89] */, //
162    0x0bc000d5 /* 00002f00 - 00002fd5 [  214] */, //
163    0x0bfc004e /* 00002ff0 - 0000303e [   79] */, //
164    0x0c104055 /* 00003041 - 00003096 [   86] */, //
165    0x0c264066 /* 00003099 - 000030ff [  103] */, //
166    0x0c41402a /* 00003105 - 0000312f [   43] */, //
167    0x0c4c405d /* 00003131 - 0000318e [   94] */, //
168    0x0c640055 /* 00003190 - 000031e5 [   86] */, //
169    0x0c7bc02f /* 000031ef - 0000321e [   48] */, //
170    0x0c880027 /* 00003220 - 00003247 [   40] */, //
171    0x0c943fff /* 00003250 - 0000724f [16384] */, //
172    0x1c94323c /* 00007250 - 0000a48c [12861] */, //
173    0x29240036 /* 0000a490 - 0000a4c6 [   55] */, //
174    0x2a58001c /* 0000a960 - 0000a97c [   29] */, //
175    0x2b002ba3 /* 0000ac00 - 0000d7a3 [11172] */, //
176    0x3e4001ff /* 0000f900 - 0000faff [  512] */, //
177    0x3f840009 /* 0000fe10 - 0000fe19 [   10] */, //
178    0x3f8c0022 /* 0000fe30 - 0000fe52 [   35] */, //
179    0x3f950012 /* 0000fe54 - 0000fe66 [   19] */, //
180    0x3f9a0003 /* 0000fe68 - 0000fe6b [    4] */, //
181    0x3fc0405f /* 0000ff01 - 0000ff60 [   96] */, //
182    0x3ff80006 /* 0000ffe0 - 0000ffe6 [    7] */, //
183    0x5bf80004 /* 00016fe0 - 00016fe4 [    5] */, //
184    0x5bfc0001 /* 00016ff0 - 00016ff1 [    2] */, //
185    0x5c0017f7 /* 00017000 - 000187f7 [ 6136] */, //
186    0x620004d5 /* 00018800 - 00018cd5 [ 1238] */, //
187    0x633fc009 /* 00018cff - 00018d08 [   10] */, //
188    0x6bfc0003 /* 0001aff0 - 0001aff3 [    4] */, //
189    0x6bfd4006 /* 0001aff5 - 0001affb [    7] */, //
190    0x6bff4001 /* 0001affd - 0001affe [    2] */, //
191    0x6c000122 /* 0001b000 - 0001b122 [  291] */, //
192    0x6c4c8000 /* 0001b132 - 0001b132 [    1] */, //
193    0x6c540002 /* 0001b150 - 0001b152 [    3] */, //
194    0x6c554000 /* 0001b155 - 0001b155 [    1] */, //
195    0x6c590003 /* 0001b164 - 0001b167 [    4] */, //
196    0x6c5c018b /* 0001b170 - 0001b2fb [  396] */, //
197    0x74c00056 /* 0001d300 - 0001d356 [   87] */, //
198    0x74d80016 /* 0001d360 - 0001d376 [   23] */, //
199    0x7c010000 /* 0001f004 - 0001f004 [    1] */, //
200    0x7c33c000 /* 0001f0cf - 0001f0cf [    1] */, //
201    0x7c638000 /* 0001f18e - 0001f18e [    1] */, //
202    0x7c644009 /* 0001f191 - 0001f19a [   10] */, //
203    0x7c800002 /* 0001f200 - 0001f202 [    3] */, //
204    0x7c84002b /* 0001f210 - 0001f23b [   44] */, //
205    0x7c900008 /* 0001f240 - 0001f248 [    9] */, //
206    0x7c940001 /* 0001f250 - 0001f251 [    2] */, //
207    0x7c980005 /* 0001f260 - 0001f265 [    6] */, //
208    0x7cc0034f /* 0001f300 - 0001f64f [  848] */, //
209    0x7da00045 /* 0001f680 - 0001f6c5 [   70] */, //
210    0x7db30000 /* 0001f6cc - 0001f6cc [    1] */, //
211    0x7db40002 /* 0001f6d0 - 0001f6d2 [    3] */, //
212    0x7db54002 /* 0001f6d5 - 0001f6d7 [    3] */, //
213    0x7db70003 /* 0001f6dc - 0001f6df [    4] */, //
214    0x7dbac001 /* 0001f6eb - 0001f6ec [    2] */, //
215    0x7dbd0008 /* 0001f6f4 - 0001f6fc [    9] */, //
216    0x7df8000b /* 0001f7e0 - 0001f7eb [   12] */, //
217    0x7dfc0000 /* 0001f7f0 - 0001f7f0 [    1] */, //
218    0x7e4000ff /* 0001f900 - 0001f9ff [  256] */, //
219    0x7e9c000c /* 0001fa70 - 0001fa7c [   13] */, //
220    0x7ea00009 /* 0001fa80 - 0001fa89 [   10] */, //
221    0x7ea3c037 /* 0001fa8f - 0001fac6 [   56] */, //
222    0x7eb3800e /* 0001face - 0001fadc [   15] */, //
223    0x7eb7c00a /* 0001fadf - 0001fae9 [   11] */, //
224    0x7ebc0008 /* 0001faf0 - 0001faf8 [    9] */, //
225    0x80003fff /* 00020000 - 00023fff [16384] */, //
226    0x90003fff /* 00024000 - 00027fff [16384] */, //
227    0xa0003fff /* 00028000 - 0002bfff [16384] */, //
228    0xb0003ffd /* 0002c000 - 0002fffd [16382] */, //
229    0xc0003fff /* 00030000 - 00033fff [16384] */, //
230    0xd0003fff /* 00034000 - 00037fff [16384] */, //
231    0xe0003fff /* 00038000 - 0003bfff [16384] */, //
232    0xf0003ffd /* 0003c000 - 0003fffd [16382] */};
233
234/// The upper bound entry of EastAsianWidth.txt.
235///
236/// Values greater than this value may have more than 18 significant bits.
237/// They always have a width of 1. This property makes it possible to store
238/// the table in its compact form.
239inline constexpr uint32_t __table_upper_bound = 0x0003fffd;
240
241/// Returns the estimated width of a Unicode code point.
242///
243/// \\pre The code point is a valid Unicode code point.
244[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept {
245  // Since __table_upper_bound contains the unshifted range do the
246  // comparison without shifting.
247  if (__code_point > __table_upper_bound) [[unlikely]]
248    return 1;
249
250  // When the code-point is less than the first element in the table
251  // the lookup is quite expensive. Since quite some scripts are in
252  // that range, it makes sense to validate that first.
253  // The std_format_spec_string_unicode benchmark gives a measurable
254  // improvement.
255  if (__code_point < (__entries[0] >> 14))
256    return 1;
257
258  ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries;
259  if (__i == 0)
260    return 1;
261
262  --__i;
263  uint32_t __upper_bound = (__entries[__i] >> 14) + (__entries[__i] & 0x3fffu);
264  return 1 + (__code_point <= __upper_bound);
265}
266
267} // namespace __width_estimation_table
268
269#endif // _LIBCPP_STD_VER >= 20
270
271_LIBCPP_END_NAMESPACE_STD
272
273#endif // _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H