master
  1// -*- C++ -*-
  2//===----------------------------------------------------------------------===//
  3//
  4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5// See https://llvm.org/LICENSE.txt for license information.
  6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7//
  8//===----------------------------------------------------------------------===//
  9
 10// WARNING, this entire header is generated by
 11// utils/generate_indic_conjunct_break_table.py
 12// DO NOT MODIFY!
 13
 14// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
 15//
 16// See Terms of Use <https://www.unicode.org/copyright.html>
 17// for definitions of Unicode Inc.'s Data Files and Software.
 18//
 19// NOTICE TO USER: Carefully read the following legal agreement.
 20// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
 21// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
 22// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
 23// TERMS AND CONDITIONS OF THIS AGREEMENT.
 24// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
 25// THE DATA FILES OR SOFTWARE.
 26//
 27// COPYRIGHT AND PERMISSION NOTICE
 28//
 29// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved.
 30// Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
 31//
 32// Permission is hereby granted, free of charge, to any person obtaining
 33// a copy of the Unicode data files and any associated documentation
 34// (the "Data Files") or Unicode software and any associated documentation
 35// (the "Software") to deal in the Data Files or Software
 36// without restriction, including without limitation the rights to use,
 37// copy, modify, merge, publish, distribute, and/or sell copies of
 38// the Data Files or Software, and to permit persons to whom the Data Files
 39// or Software are furnished to do so, provided that either
 40// (a) this copyright and permission notice appear with all copies
 41// of the Data Files or Software, or
 42// (b) this copyright and permission notice appear in associated
 43// Documentation.
 44//
 45// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
 46// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 47// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 48// NONINFRINGEMENT OF THIRD PARTY RIGHTS.
 49// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
 50// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
 51// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 52// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 53// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 54// PERFORMANCE OF THE DATA FILES OR SOFTWARE.
 55//
 56// Except as contained in this notice, the name of a copyright holder
 57// shall not be used in advertising or otherwise to promote the sale,
 58// use or other dealings in these Data Files or Software without prior
 59// written authorization of the copyright holder.
 60
 61#ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
 62#define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
 63
 64#include <__algorithm/ranges_upper_bound.h>
 65#include <__config>
 66#include <__cstddef/ptrdiff_t.h>
 67#include <__iterator/access.h>
 68#include <cstdint>
 69
 70#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 71#  pragma GCC system_header
 72#endif
 73
 74_LIBCPP_BEGIN_NAMESPACE_STD
 75
 76#if _LIBCPP_STD_VER >= 20
 77
 78namespace __indic_conjunct_break {
 79
 80enum class __property : uint8_t {
 81  // Values generated from the data files.
 82  __Consonant,
 83  __Extend,
 84  __Linker,
 85
 86  // The code unit has none of above properties.
 87  __none
 88};
 89
 90/// The entries of the indic conjunct break property table.
 91///
 92/// The data is generated from
 93/// -  https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 94///
 95/// The data has 3 values
 96/// - bits [0, 1] The property. One of the values generated from the datafiles
 97///   of \ref __property
 98/// - bits [2, 10] The size of the range.
 99/// - bits [11, 31] The lower bound code point of the range. The upper bound of
100///   the range is lower bound + size.
101///
102/// The 9 bits for the size allow a maximum range of 512 elements. Some ranges
103/// in the Unicode tables are larger. They are stored in multiple consecutive
104/// ranges in the data table. An alternative would be to store the sizes in a
105/// separate 16-bit value. The original MSVC STL code had such an approach, but
106/// this approach uses less space for the data and is about 4% faster in the
107/// following benchmark.
108/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
109// clang-format off
110_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = {
111    0x001801bd,
112    0x00241819,
113    0x002c88b1,
114    0x002df801,
115    0x002e0805,
116    0x002e2005,
117    0x002e3801,
118    0x00308029,
119    0x00325851,
120    0x00338001,
121    0x0036b019,
122    0x0036f815,
123    0x00373805,
124    0x0037500d,
125    0x00388801,
126    0x00398069,
127    0x003d3029,
128    0x003f5821,
129    0x003fe801,
130    0x0040b00d,
131    0x0040d821,
132    0x00412809,
133    0x00414811,
134    0x0042c809,
135    0x0044b821,
136    0x0046505d,
137    0x0047187d,
138    0x0048a890,
139    0x0049d001,
140    0x0049e001,
141    0x004a081d,
142    0x004a6802,
143    0x004a8819,
144    0x004ac01c,
145    0x004b1005,
146    0x004bc01c,
147    0x004c0801,
148    0x004ca84c,
149    0x004d5018,
150    0x004d9000,
151    0x004db00c,
152    0x004de001,
153    0x004df001,
154    0x004e080d,
155    0x004e6802,
156    0x004eb801,
157    0x004ee004,
158    0x004ef800,
159    0x004f1005,
160    0x004f8004,
161    0x004ff001,
162    0x00500805,
163    0x0051e001,
164    0x00520805,
165    0x00523805,
166    0x00525809,
167    0x00528801,
168    0x00538005,
169    0x0053a801,
170    0x00540805,
171    0x0054a84c,
172    0x00555018,
173    0x00559004,
174    0x0055a810,
175    0x0055e001,
176    0x00560811,
177    0x00563805,
178    0x00566802,
179    0x00571005,
180    0x0057c800,
181    0x0057d015,
182    0x00580801,
183    0x0058a84c,
184    0x00595018,
185    0x00599004,
186    0x0059a810,
187    0x0059e001,
188    0x0059f005,
189    0x005a080d,
190    0x005a6802,
191    0x005aa809,
192    0x005ae004,
193    0x005af800,
194    0x005b1005,
195    0x005b8800,
196    0x005c1001,
197    0x005df001,
198    0x005e0001,
199    0x005e6801,
200    0x005eb801,
201    0x00600001,
202    0x00602001,
203    0x0060a84c,
204    0x0061503c,
205    0x0061e001,
206    0x0061f009,
207    0x00623009,
208    0x00625009,
209    0x00626802,
210    0x0062a805,
211    0x0062c008,
212    0x00631005,
213    0x00640801,
214    0x0065e001,
215    0x0065f805,
216    0x00661001,
217    0x00663009,
218    0x0066500d,
219    0x0066a805,
220    0x00671005,
221    0x00680005,
222    0x0068a894,
223    0x0069d805,
224    0x0069f001,
225    0x006a080d,
226    0x006a6802,
227    0x006ab801,
228    0x006b1005,
229    0x006c0801,
230    0x006e5001,
231    0x006e7801,
232    0x006e9009,
233    0x006eb001,
234    0x006ef801,
235    0x00718801,
236    0x0071a019,
237    0x0072381d,
238    0x00758801,
239    0x0075a021,
240    0x00764019,
241    0x0078c005,
242    0x0079a801,
243    0x0079b801,
244    0x0079c801,
245    0x007b8835,
246    0x007c0011,
247    0x007c3005,
248    0x007c6829,
249    0x007cc88d,
250    0x007e3001,
251    0x0081680d,
252    0x00819015,
253    0x0081c805,
254    0x0081e805,
255    0x0082c005,
256    0x0082f009,
257    0x0083880d,
258    0x00841001,
259    0x00842805,
260    0x00846801,
261    0x0084e801,
262    0x009ae809,
263    0x00b8900d,
264    0x00b99009,
265    0x00ba9005,
266    0x00bb9005,
267    0x00bda005,
268    0x00bdb819,
269    0x00be3001,
270    0x00be4829,
271    0x00bee801,
272    0x00c05809,
273    0x00c07801,
274    0x00c42805,
275    0x00c54801,
276    0x00c90009,
277    0x00c93805,
278    0x00c99001,
279    0x00c9c809,
280    0x00d0b805,
281    0x00d0d801,
282    0x00d2b001,
283    0x00d2c019,
284    0x00d30001,
285    0x00d31001,
286    0x00d3281d,
287    0x00d39825,
288    0x00d3f801,
289    0x00d58079,
290    0x00d8000d,
291    0x00d9a025,
292    0x00da1009,
293    0x00db5821,
294    0x00dc0005,
295    0x00dd100d,
296    0x00dd4015,
297    0x00df3001,
298    0x00df4005,
299    0x00df6801,
300    0x00df7811,
301    0x00e1601d,
302    0x00e1b005,
303    0x00e68009,
304    0x00e6a031,
305    0x00e71019,
306    0x00e76801,
307    0x00e7a001,
308    0x00e7c005,
309    0x00ee00fd,
310    0x01006801,
311    0x01068081,
312    0x01677809,
313    0x016bf801,
314    0x016f007d,
315    0x01815015,
316    0x0184c805,
317    0x0533780d,
318    0x0533a025,
319    0x0534f005,
320    0x05378005,
321    0x05401001,
322    0x05403001,
323    0x05405801,
324    0x05412805,
325    0x05416001,
326    0x05462005,
327    0x05470045,
328    0x0547f801,
329    0x0549301d,
330    0x054a3829,
331    0x054a9801,
332    0x054c0009,
333    0x054d9801,
334    0x054db00d,
335    0x054de005,
336    0x054e0001,
337    0x054f2801,
338    0x05514815,
339    0x05518805,
340    0x0551a805,
341    0x05521801,
342    0x05526001,
343    0x0553e001,
344    0x05558001,
345    0x05559009,
346    0x0555b805,
347    0x0555f005,
348    0x05560801,
349    0x05576005,
350    0x0557b001,
351    0x055f2801,
352    0x055f4001,
353    0x055f6801,
354    0x07d8f001,
355    0x07f0003d,
356    0x07f1003d,
357    0x07fcf005,
358    0x080fe801,
359    0x08170001,
360    0x081bb011,
361    0x08500809,
362    0x08502805,
363    0x0850600d,
364    0x0851c009,
365    0x0851f801,
366    0x08572805,
367    0x0869200d,
368    0x086b4811,
369    0x08755805,
370    0x0877e00d,
371    0x087a3029,
372    0x087c100d,
373    0x08800801,
374    0x0881c039,
375    0x08838001,
376    0x08839805,
377    0x0883f809,
378    0x0885980d,
379    0x0885c805,
380    0x08861001,
381    0x08880009,
382    0x08893811,
383    0x0889681d,
384    0x088b9801,
385    0x088c0005,
386    0x088db021,
387    0x088e0001,
388    0x088e480d,
389    0x088e7801,
390    0x08917809,
391    0x0891a00d,
392    0x0891f001,
393    0x08920801,
394    0x0896f801,
395    0x0897181d,
396    0x08980005,
397    0x0899d805,
398    0x0899f001,
399    0x089a0001,
400    0x089a6801,
401    0x089ab801,
402    0x089b3019,
403    0x089b8011,
404    0x089dc001,
405    0x089dd815,
406    0x089e1001,
407    0x089e2801,
408    0x089e3809,
409    0x089e7009,
410    0x089e9001,
411    0x089f0805,
412    0x08a1c01d,
413    0x08a21009,
414    0x08a23001,
415    0x08a2f001,
416    0x08a58001,
417    0x08a59815,
418    0x08a5d001,
419    0x08a5e801,
420    0x08a5f805,
421    0x08a61005,
422    0x08ad7801,
423    0x08ad900d,
424    0x08ade005,
425    0x08adf805,
426    0x08aee005,
427    0x08b1981d,
428    0x08b1e801,
429    0x08b1f805,
430    0x08b55801,
431    0x08b56801,
432    0x08b5801d,
433    0x08b8e801,
434    0x08b8f801,
435    0x08b9100d,
436    0x08b93811,
437    0x08c17821,
438    0x08c1c805,
439    0x08c98001,
440    0x08c9d80d,
441    0x08ca1801,
442    0x08cea00d,
443    0x08ced005,
444    0x08cf0001,
445    0x08d00825,
446    0x08d19815,
447    0x08d1d80d,
448    0x08d23801,
449    0x08d28815,
450    0x08d2c809,
451    0x08d45031,
452    0x08d4c005,
453    0x08e18019,
454    0x08e1c015,
455    0x08e1f801,
456    0x08e49055,
457    0x08e55019,
458    0x08e59005,
459    0x08e5a805,
460    0x08e98815,
461    0x08e9d001,
462    0x08e9e005,
463    0x08e9f819,
464    0x08ea3801,
465    0x08ec8005,
466    0x08eca801,
467    0x08ecb801,
468    0x08f79805,
469    0x08f80005,
470    0x08f9b011,
471    0x08fa0009,
472    0x08fad001,
473    0x09a20001,
474    0x09a23839,
475    0x0b08f02d,
476    0x0b096809,
477    0x0b578011,
478    0x0b598019,
479    0x0b7a7801,
480    0x0b7c780d,
481    0x0b7f2001,
482    0x0b7f8005,
483    0x0de4e805,
484    0x0e7800b5,
485    0x0e798059,
486    0x0e8b2811,
487    0x0e8b6815,
488    0x0e8bd81d,
489    0x0e8c2819,
490    0x0e8d500d,
491    0x0e921009,
492    0x0ed000d9,
493    0x0ed1d8c5,
494    0x0ed3a801,
495    0x0ed42001,
496    0x0ed4d811,
497    0x0ed50839,
498    0x0f000019,
499    0x0f004041,
500    0x0f00d819,
501    0x0f011805,
502    0x0f013011,
503    0x0f047801,
504    0x0f098019,
505    0x0f157001,
506    0x0f17600d,
507    0x0f27600d,
508    0x0f2f7005,
509    0x0f468019,
510    0x0f4a2019,
511    0x0f9fd811,
512    0x7001017d,
513    0x700803bd};
514// clang-format on
515
516/// Returns the indic conjuct break property of a code point.
517[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept {
518  // The algorithm searches for the upper bound of the range and, when found,
519  // steps back one entry. This algorithm is used since the code point can be
520  // anywhere in the range. After a lower bound is found the next step is to
521  // compare whether the code unit is indeed in the range.
522  //
523  // Since the entry contains a code unit, size, and property the code point
524  // being sought needs to be adjusted. Just shifting the code point to the
525  // proper position doesn't work; suppose an entry has property 0, size 1,
526  // and lower bound 3. This results in the entry 0x1810.
527  // When searching for code point 3 it will search for 0x1800, find 0x1810
528  // and moves to the previous entry. Thus the lower bound value will never
529  // be found.
530  // The simple solution is to set the bits belonging to the property and
531  // size. Then the upper bound for code point 3 will return the entry after
532  // 0x1810. After moving to the previous entry the algorithm arrives at the
533  // correct entry.
534  ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries;
535  if (__i == 0)
536    return __property::__none;
537
538  --__i;
539  uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111);
540  if (__code_point <= __upper_bound)
541    return static_cast<__property>(__entries[__i] & 0b11);
542
543  return __property::__none;
544}
545
546} // namespace __indic_conjunct_break
547
548#endif // _LIBCPP_STD_VER >= 20
549
550_LIBCPP_END_NAMESPACE_STD
551
552#endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H