master
   1//===----------------------------------------------------------------------===//
   2//
   3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4// See https://llvm.org/LICENSE.txt for license information.
   5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6//
   7//===----------------------------------------------------------------------===//
   8
   9// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
  10
  11// TODO TZDB look at optimizations
  12//
  13// The current algorithm is correct but not efficient. For example, in a named
  14// rule based continuation finding the next rule does quite a bit of work,
  15// returns the next rule and "forgets" its state. This could be better.
  16//
  17// It would be possible to cache lookups. If a time for a zone is calculated its
  18// sys_info could be kept and the next lookup could test whether the time is in
  19// a "known" sys_info. The wording in the Standard hints at this slowness by
  20// "suggesting" this could be implemented on the user's side.
  21
  22// TODO TZDB look at removing quirks
  23//
  24// The code has some special rules to adjust the timing at the continuation
  25// switches. This works correctly, but some of the places feel odd. It would be
  26// good to investigate this further and see whether all quirks are needed or
  27// that there are better fixes.
  28//
  29// These quirks often use a 12h interval; this is the scan interval of zdump,
  30// which implies there are no sys_info objects with a duration of less than 12h.
  31
  32// Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120502
  33
  34#include <__config>
  35
  36// TODO(LLVM 23): When upgrading to GCC 16 this can be removed
  37#ifdef _LIBCPP_COMPILER_GCC
  38#  pragma GCC optimize("-O0")
  39#endif
  40
  41#include <algorithm>
  42#include <cctype>
  43#include <chrono>
  44#include <expected>
  45#include <map>
  46#include <numeric>
  47#include <ranges>
  48
  49#include "include/tzdb/time_zone_private.h"
  50#include "include/tzdb/tzdb_list_private.h"
  51
  52// TODO TZDB remove debug printing
  53#ifdef PRINT
  54#  include <print>
  55#endif
  56
  57_LIBCPP_BEGIN_NAMESPACE_STD
  58
  59#ifdef PRINT
  60template <>
  61struct formatter<chrono::sys_info, char> {
  62  template <class ParseContext>
  63  constexpr typename ParseContext::iterator parse(ParseContext& ctx) {
  64    return ctx.begin();
  65  }
  66
  67  template <class FormatContext>
  68  typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const {
  69    return std::format_to(
  70        ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}", info.begin, info.end, info.offset, info.save, info.abbrev);
  71  }
  72};
  73#endif
  74
  75namespace chrono {
  76
  77//===----------------------------------------------------------------------===//
  78//                           Details
  79//===----------------------------------------------------------------------===//
  80
  81struct __sys_info {
  82  sys_info __info;
  83  bool __can_merge; // Can the returned sys_info object be merged with
  84};
  85
  86// Return type for helper function to get a sys_info.
  87// - The expected result returns the "best" sys_info object. This object can be
  88//   before the requested time. Sometimes sys_info objects from different
  89//   continuations share their offset, save, and abbrev and these objects are
  90//   merged to one sys_info object. The __can_merge flag determines whether the
  91//   current result can be merged with the next result.
  92// - The unexpected result means no sys_info object was found and the time is
  93//   the time to be used for the next search iteration.
  94using __sys_info_result = expected<__sys_info, sys_seconds>;
  95
  96template <ranges::forward_range _Range,
  97          class _Type,
  98          class _Proj                                                                                  = identity,
  99          indirect_strict_weak_order<const _Type*, projected<ranges::iterator_t<_Range>, _Proj>> _Comp = ranges::less>
 100[[nodiscard]] static ranges::borrowed_iterator_t<_Range>
 101__binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) {
 102  auto __end = ranges::end(__r);
 103  auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj);
 104  if (__ret == __end)
 105    return __end;
 106
 107  // When the value does not match the predicate it's equal and a valid result
 108  // was found.
 109  return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end;
 110}
 111
 112// Format based on https://data.iana.org/time-zones/tz-how-to.html
 113//
 114// 1  a time zone abbreviation that is a string of three or more characters that
 115//    are either ASCII alphanumerics, "+", or "-"
 116// 2  the string "%z", in which case the "%z" will be replaced by a numeric time
 117//    zone abbreviation
 118// 3  a pair of time zone abbreviations separated by a slash ('/'), in which
 119//    case the first string is the abbreviation for the standard time name and
 120//    the second string is the abbreviation for the daylight saving time name
 121// 4  a string containing "%s", in which case the "%s" will be replaced by the
 122//    text in the appropriate Rule's LETTER column, and the resulting string
 123//    should be a time zone abbreviation
 124//
 125// Rule 1 is not strictly validated since America/Barbados uses a two letter
 126// abbreviation AT.
 127[[nodiscard]] static string
 128__format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) {
 129  bool __shift = false;
 130  string __result;
 131  for (char __c : __continuation.__format) {
 132    if (__shift) {
 133      switch (__c) {
 134      case 's':
 135        std::ranges::copy(__letters, std::back_inserter(__result));
 136        break;
 137
 138      case 'z': {
 139        if (__continuation.__format.size() != 2)
 140          std::__throw_runtime_error(
 141              std::format("corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'",
 142                          __continuation.__format)
 143                  .c_str());
 144        chrono::hh_mm_ss __offset{__continuation.__stdoff + __save};
 145        if (__offset.is_negative()) {
 146          __result += '-';
 147          __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)};
 148        } else
 149          __result += '+';
 150
 151        if (__offset.minutes() != 0min)
 152          std::format_to(std::back_inserter(__result), "{:%H%M}", __offset);
 153        else
 154          std::format_to(std::back_inserter(__result), "{:%H}", __offset);
 155      } break;
 156
 157      default:
 158        std::__throw_runtime_error(
 159            std::format("corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z", __c).c_str());
 160      }
 161      __shift = false;
 162
 163    } else if (__c == '/') {
 164      if (__save != 0s)
 165        __result.clear();
 166      else
 167        break;
 168
 169    } else if (__c == '%') {
 170      __shift = true;
 171    } else if (__c == '+' || __c == '-' || std::isalnum(__c)) {
 172      __result.push_back(__c);
 173    } else {
 174      std::__throw_runtime_error(
 175          std::format(
 176              "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value", __c)
 177              .c_str());
 178    }
 179  }
 180
 181  if (__shift)
 182    std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'");
 183
 184  if (__result.empty())
 185    std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty");
 186
 187  return __result;
 188}
 189
 190[[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) {
 191  seconds __result = static_cast<sys_days>(__ymd).time_since_epoch() + __seconds;
 192  return sys_seconds{__result};
 193}
 194
 195[[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) {
 196  switch (__continuation.__at.__clock) {
 197  case __tz::__clock::__local:
 198    return __continuation.__at.__time - __continuation.__stdoff -
 199           std::visit(
 200               [](const auto& __value) {
 201                 using _Tp = decay_t<decltype(__value)>;
 202                 if constexpr (same_as<_Tp, monostate>)
 203                   return chrono::seconds{0};
 204                 else if constexpr (same_as<_Tp, __tz::__save>)
 205                   return chrono::duration_cast<seconds>(__value.__time);
 206                 else if constexpr (same_as<_Tp, std::string>)
 207                   // For a named rule based continuation the SAVE depends on the RULE
 208                   // active at the end. This should be determined separately.
 209                   return chrono::seconds{0};
 210                 else
 211                   static_assert(false);
 212
 213                 std::__libcpp_unreachable();
 214               },
 215               __continuation.__rules);
 216
 217  case __tz::__clock::__universal:
 218    return __continuation.__at.__time;
 219
 220  case __tz::__clock::__standard:
 221    return __continuation.__at.__time - __continuation.__stdoff;
 222  }
 223  std::__libcpp_unreachable();
 224}
 225
 226[[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) {
 227  return std::visit(
 228      [&](const auto& __value) {
 229        using _Tp = decay_t<decltype(__value)>;
 230        if constexpr (same_as<_Tp, chrono::day>)
 231          return year_month_day{__year, __month, __value};
 232        else if constexpr (same_as<_Tp, weekday_last>)
 233          return year_month_day{static_cast<sys_days>(year_month_weekday_last{__year, __month, __value})};
 234        else if constexpr (same_as<_Tp, __tz::__constrained_weekday>)
 235          return __value(__year, __month);
 236        else
 237          static_assert(false);
 238
 239        std::__libcpp_unreachable();
 240      },
 241      __on);
 242}
 243
 244[[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) {
 245  // Does UNTIL contain the magic value for the last continuation?
 246  if (__continuation.__year == chrono::year::min())
 247    return sys_seconds::max();
 248
 249  year_month_day __ymd = chrono::__to_year_month_day(__continuation.__year, __continuation.__in, __continuation.__on);
 250  return chrono::__to_sys_seconds(__ymd, chrono::__at_to_sys_seconds(__continuation));
 251}
 252
 253// Holds the UNTIL time for a continuation with a named rule.
 254//
 255// Unlike continuations with an fixed SAVE named rules have a variable SAVE.
 256// This means when the UNTIL uses the local wall time the actual UNTIL value can
 257// only be determined when the SAVE is known. This class holds that abstraction.
 258class __named_rule_until {
 259public:
 260  explicit __named_rule_until(const __tz::__continuation& __continuation)
 261      : __until_{chrono::__until_to_sys_seconds(__continuation)},
 262        __needs_adjustment_{
 263            // The last continuation of a ZONE has no UNTIL which basically is
 264            // until the end of _local_ time. This value is expressed by
 265            // sys_seconds::max(). Subtracting the SAVE leaves large value.
 266            // However SAVE can be negative, which would add a value to maximum
 267            // leading to undefined behaviour. In practice this often results in
 268            // an overflow to a very small value.
 269            __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {}
 270
 271  // Gives the unadjusted until value, this is useful when the SAVE is not known
 272  // at all.
 273  sys_seconds __until() const noexcept { return __until_; }
 274
 275  bool __needs_adjustment() const noexcept { return __needs_adjustment_; }
 276
 277  // Returns the UNTIL adjusted for SAVE.
 278  sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; }
 279
 280private:
 281  sys_seconds __until_;
 282  bool __needs_adjustment_;
 283};
 284
 285[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) {
 286  switch (__rule.__at.__clock) {
 287  case __tz::__clock::__local:
 288    // Local time and standard time behave the same. This is not
 289    // correct. Local time needs to adjust for the current saved time.
 290    // To know the saved time the rules need to be known and sorted.
 291    // This needs a time so to avoid the chicken and egg adjust the
 292    // saving of the local time later.
 293    return __rule.__at.__time - __stdoff;
 294
 295  case __tz::__clock::__universal:
 296    return __rule.__at.__time;
 297
 298  case __tz::__clock::__standard:
 299    return __rule.__at.__time - __stdoff;
 300  }
 301  std::__libcpp_unreachable();
 302}
 303
 304[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) {
 305  year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on);
 306
 307  seconds __at = chrono::__at_to_seconds(__stdoff, __rule);
 308  return chrono::__to_sys_seconds(__ymd, __at);
 309}
 310
 311[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) {
 312  return chrono::__from_to_sys_seconds(__stdoff, __rule, __rule.__from);
 313}
 314
 315[[nodiscard]] static const vector<__tz::__rule>&
 316__get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) {
 317  auto __result = chrono::__binary_find(__rules_db, __rule_name, {}, [](const auto& __p) { return __p.first; });
 318  if (__result == std::end(__rules_db))
 319    std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist").c_str());
 320
 321  return __result->second;
 322}
 323
 324// Returns the letters field for a time before the first rule.
 325//
 326// Per https://data.iana.org/time-zones/tz-how-to.html
 327// One wrinkle, not fully explained in zic.8.txt, is what happens when switching
 328// to a named rule. To what values should the SAVE and LETTER data be
 329// initialized?
 330//
 331// 1 If at least one transition has happened, use the SAVE and LETTER data from
 332//   the most recent.
 333// 2 If switching to a named rule before any transition has happened, assume
 334//   standard time (SAVE zero), and use the LETTER data from the earliest
 335//   transition with a SAVE of zero.
 336//
 337// This function implements case 2.
 338[[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) {
 339  auto __letters =
 340      __rules                                                                                //
 341      | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) //
 342      | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; })        //
 343      | views::take(1);
 344
 345  if (__letters.empty())
 346    std::__throw_runtime_error("corrupt tzdb: rule has zero entries");
 347
 348  return __letters.front();
 349}
 350
 351// Determines the information based on the continuation and the rules.
 352//
 353// There are several special cases to take into account
 354//
 355// === Entries before the first rule becomes active ===
 356// Asia/Hong_Kong
 357//   9 - JST 1945 N 18 2        // (1)
 358//   8 HK HK%sT                 // (2)
 359//   R HK 1946 o - Ap 21 0 1 S  // (3)
 360// There (1) is active until Novemer 18th 1945 at 02:00, after this time
 361// (2) becomes active. The first rule entry for HK (3) becomes active
 362// from April 21st 1945 at 01:00. In the period between (2) is active.
 363// This entry has an offset.
 364// This entry has no save, letters, or dst flag. So in the period
 365// after (1) and until (3) no rule entry is associated with the time.
 366
 367[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
 368    sys_seconds __begin,
 369    sys_seconds __end,
 370    const __tz::__continuation& __continuation,
 371    const vector<__tz::__rule>& __rules) {
 372  return sys_info{
 373      __begin,
 374      __end,
 375      __continuation.__stdoff,
 376      chrono::minutes(0),
 377      chrono::__format(__continuation, __letters_before_first_rule(__rules), 0s)};
 378}
 379
 380// Returns the sys_info object for a time before the first rule.
 381// When this first rule has a SAVE of 0s the sys_info for the time before the
 382// first rule and for the first rule are identical and will be merged.
 383[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
 384    sys_seconds __begin,
 385    sys_seconds __rule_end, // The end used when SAVE != 0s
 386    sys_seconds __next_end, // The end used when SAVE == 0s the times are merged
 387    const __tz::__continuation& __continuation,
 388    const vector<__tz::__rule>& __rules,
 389    vector<__tz::__rule>::const_iterator __rule) {
 390  if (__rule->__save.__time != 0s)
 391    return __get_sys_info_before_first_rule(__begin, __rule_end, __continuation, __rules);
 392
 393  return sys_info{
 394      __begin, __next_end, __continuation.__stdoff, 0min, chrono::__format(__continuation, __rule->__letters, 0s)};
 395}
 396
 397[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) {
 398  switch (__rule.__at.__clock) {
 399  case __tz::__clock::__local:
 400    return __rule.__at.__time - __stdoff - __save;
 401
 402  case __tz::__clock::__universal:
 403    return __rule.__at.__time;
 404
 405  case __tz::__clock::__standard:
 406    return __rule.__at.__time - __stdoff;
 407  }
 408  std::__libcpp_unreachable();
 409}
 410
 411[[nodiscard]] static sys_seconds
 412__rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) {
 413  year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on);
 414
 415  seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule);
 416  return chrono::__to_sys_seconds(__ymd, __at);
 417}
 418
 419// Returns the first rule after __time.
 420// Note that a rule can be "active" in multiple years, this may result in an
 421// infinite loop where the same rule is returned every time, use __current to
 422// guard against that.
 423//
 424// When no next rule exists the returned time will be sys_seconds::max(). This
 425// can happen in practice. For example,
 426//
 427//   R So 1945 o - May 24 2 2 M
 428//   R So 1945 o - S 24 3 1 S
 429//   R So 1945 o - N 18 2s 0 -
 430//
 431// Has 3 rules that are all only active in 1945.
 432[[nodiscard]] static pair<sys_seconds, vector<__tz::__rule>::const_iterator>
 433__next_rule(sys_seconds __time,
 434            seconds __stdoff,
 435            seconds __save,
 436            const vector<__tz::__rule>& __rules,
 437            vector<__tz::__rule>::const_iterator __current) {
 438  year __year = year_month_day{chrono::floor<days>(__time)}.year();
 439
 440  // Note it would probably be better to store the pairs in a vector and then
 441  // use min() to get the smallest element
 442  map<sys_seconds, vector<__tz::__rule>::const_iterator> __candidates;
 443  // Note this evaluates all rules which is a waste of effort; when the entries
 444  // are beyond the current year's "next year" (where "next year" is not always
 445  // year + 1) the algorithm should end.
 446  for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) {
 447    for (year __y = __it->__from; __y <= __it->__to; ++__y) {
 448      // Adding the current entry for the current year may lead to infinite
 449      // loops due to the SAVE adjustment. Skip these entries.
 450      if (__y == __year && __it == __current)
 451        continue;
 452
 453      sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, *__it, __y);
 454      if (__t <= __time)
 455        continue;
 456
 457      _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule");
 458      __candidates[__t] = __it;
 459      break;
 460    }
 461  }
 462
 463  if (!__candidates.empty()) [[likely]] {
 464    auto __it = __candidates.begin();
 465
 466    // When no rule is selected the time before the first rule and the first rule
 467    // should not be merged.
 468    if (__time == sys_seconds::min())
 469      return *__it;
 470
 471    // There can be two constitutive rules that are the same. For example,
 472    // Hong Kong
 473    //
 474    // R HK 1973 o - D 30 3:30 1 S          (R1)
 475    // R HK 1965 1976 - Ap Su>=16 3:30 1 S  (R2)
 476    //
 477    // 1973-12-29 19:30:00 R1 becomes active.
 478    // 1974-04-20 18:30:00 R2 becomes active.
 479    // Both rules have a SAVE of 1 hour and LETTERS are S for both of them.
 480    while (__it != __candidates.end()) {
 481      if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters)
 482        return *__it;
 483
 484      ++__it;
 485    }
 486  }
 487
 488  return {sys_seconds::max(), __rules.end()};
 489}
 490
 491// Returns the first rule of a set of rules.
 492// This is not always the first of the listed rules. For example
 493//   R Sa 2008 2009 - Mar Su>=8 0 0 -
 494//   R Sa 2007 2008 - O Su>=8 0 1 -
 495// The transition in October 2007 happens before the transition in March 2008.
 496[[nodiscard]] static vector<__tz::__rule>::const_iterator
 497__first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) {
 498  return chrono::__next_rule(sys_seconds::min(), __stdoff, 0s, __rules, __rules.end()).second;
 499}
 500
 501[[nodiscard]] static __sys_info_result __get_sys_info_rule(
 502    sys_seconds __time,
 503    sys_seconds __continuation_begin,
 504    const __tz::__continuation& __continuation,
 505    const vector<__tz::__rule>& __rules) {
 506  auto __rule = chrono::__first_rule(__continuation.__stdoff, __rules);
 507  _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule");
 508
 509  // Avoid selecting a time before the start of the continuation
 510  __time = std::max(__time, __continuation_begin);
 511
 512  sys_seconds __rule_begin = chrono::__from_to_sys_seconds(__continuation.__stdoff, *__rule);
 513
 514  // The time sought is very likely inside the current rule.
 515  // When the continuation's UNTIL uses the local clock there are edge cases
 516  // where this is not true.
 517  //
 518  // Start to walk the rules to find the proper one.
 519  //
 520  // For now we just walk all the rules TODO TZDB investigate whether a smarter
 521  // algorithm would work.
 522  auto __next = chrono::__next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule);
 523
 524  // Ignore small steps, this happens with America/Punta_Arenas for the
 525  // transition
 526  // -4:42:46 - SMT 1927 S
 527  // -5 x -05/-04 1932 S
 528  // ...
 529  //
 530  // R x 1927 1931 - S 1 0 1 -
 531  // R x 1928 1932 - Ap 1 0 0 -
 532  //
 533  // America/Punta_Arenas  Thu Sep  1 04:42:45 1927 UT = Thu Sep  1 00:42:45 1927 -04 isdst=1 gmtoff=-14400
 534  // America/Punta_Arenas  Sun Apr  1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400
 535  // America/Punta_Arenas  Sun Apr  1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000
 536  //
 537  // Without this there will be a transition
 538  //   [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05
 539
 540  if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) {
 541    if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h)
 542      return __sys_info{__get_sys_info_before_first_rule(
 543                            __continuation_begin, __rule_begin, __next.first, __continuation, __rules, __rule),
 544                        false};
 545
 546    // Europe/Berlin
 547    // 1 c CE%sT 1945 May 24 2          (C1)
 548    // 1 So CE%sT 1946                  (C2)
 549    //
 550    // R c 1944 1945 - Ap M>=1 2s 1 S   (R1)
 551    //
 552    // R So 1945 o - May 24 2 2 M       (R2)
 553    //
 554    // When C2 becomes active the time would be before the first rule R2,
 555    // giving a 1 hour sys_info.
 556    seconds __save = __rule->__save.__time;
 557    __named_rule_until __continuation_end{__continuation};
 558    sys_seconds __sys_info_end = std::min(__continuation_end(__save), __next.first);
 559
 560    return __sys_info{
 561        sys_info{__continuation_begin,
 562                 __sys_info_end,
 563                 __continuation.__stdoff + __save,
 564                 chrono::duration_cast<minutes>(__save),
 565                 chrono::__format(__continuation, __rule->__letters, __save)},
 566        __sys_info_end == __continuation_end(__save)};
 567  }
 568
 569  // See above for America/Asuncion
 570  if (__rule->__save.__time == 0s && __time < __next.first) {
 571    return __sys_info{
 572        sys_info{__continuation_begin,
 573                 __next.first,
 574                 __continuation.__stdoff,
 575                 0min,
 576                 chrono::__format(__continuation, __rule->__letters, 0s)},
 577        false};
 578  }
 579
 580  if (__rule->__save.__time != 0s) {
 581    // another fix for America/Punta_Arenas when not at the start of the
 582    // sys_info object.
 583    seconds __save = __rule->__save.__time;
 584    if (__continuation_begin >= __rule_begin - __save && __time < __next.first) {
 585      return __sys_info{
 586          sys_info{__continuation_begin,
 587                   __next.first,
 588                   __continuation.__stdoff + __save,
 589                   chrono::duration_cast<minutes>(__save),
 590                   chrono::__format(__continuation, __rule->__letters, __save)},
 591          false};
 592    }
 593  }
 594
 595  __named_rule_until __continuation_end{__continuation};
 596  while (__next.second != __rules.end()) {
 597#ifdef PRINT
 598    std::print(
 599        stderr,
 600        "Rule for {}: [{}, {}) off={} save={} duration={}\n",
 601        __time,
 602        __rule_begin,
 603        __next.first,
 604        __continuation.__stdoff,
 605        __rule->__save.__time,
 606        __next.first - __rule_begin);
 607#endif
 608
 609    sys_seconds __end = __continuation_end(__rule->__save.__time);
 610
 611    sys_seconds __sys_info_begin = std::max(__continuation_begin, __rule_begin);
 612    sys_seconds __sys_info_end   = std::min(__end, __next.first);
 613    seconds __diff               = chrono::abs(__sys_info_end - __sys_info_begin);
 614
 615    if (__diff < 12h) {
 616      // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
 617      // -4:16:48 - CMT 1920 May
 618      // -4 - -04 1930 D
 619      // -4 A -04/-03 1969 O 5
 620      // -3 A -03/-02 1999 O 3
 621      // -4 A -04/-03 2000 Mar 3
 622      // ...
 623      //
 624      // ...
 625      // R A 1989 1992 - O Su>=15 0 1 -
 626      // R A 1999 o - O Su>=1 0 1 -
 627      // R A 2000 o - Mar 3 0 0 -
 628      // R A 2007 o - D 30 0 1 -
 629      // ...
 630
 631      // The 1999 switch uses the same rule, but with a different stdoff.
 632      //   R A 1999 o - O Su>=1 0 1 -
 633      //     stdoff -3 -> 1999-10-03 03:00:00
 634      //     stdoff -4 -> 1999-10-03 04:00:00
 635      // This generates an invalid entry and this is evaluated as a transition.
 636      // Looking at the zdump like output in libc++ this generates jumps in
 637      // the UTC time.
 638
 639      __rule         = __next.second;
 640      __next         = __next_rule(__next.first, __continuation.__stdoff, __rule->__save.__time, __rules, __rule);
 641      __end          = __continuation_end(__rule->__save.__time);
 642      __sys_info_end = std::min(__end, __next.first);
 643    }
 644
 645    if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) {
 646      __sys_info_begin = std::max(__continuation_begin, __rule_begin);
 647      __sys_info_end   = std::min(__end, __next.first);
 648
 649      return __sys_info{
 650          sys_info{__sys_info_begin,
 651                   __sys_info_end,
 652                   __continuation.__stdoff + __rule->__save.__time,
 653                   chrono::duration_cast<minutes>(__rule->__save.__time),
 654                   chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)},
 655          __sys_info_end == __end};
 656    }
 657
 658    __rule_begin = __next.first;
 659    __rule       = __next.second;
 660    __next       = __next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule);
 661  }
 662
 663  return __sys_info{
 664      sys_info{std::max(__continuation_begin, __rule_begin),
 665               __continuation_end(__rule->__save.__time),
 666               __continuation.__stdoff + __rule->__save.__time,
 667               chrono::duration_cast<minutes>(__rule->__save.__time),
 668               chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)},
 669      true};
 670}
 671
 672[[nodiscard]] static __sys_info_result __get_sys_info_basic(
 673    sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) {
 674  sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation);
 675  return __sys_info{
 676      sys_info{__continuation_begin,
 677               __continuation_end,
 678               __continuation.__stdoff + __save,
 679               chrono::duration_cast<minutes>(__save),
 680               chrono::__format(__continuation, __continuation.__format, __save)},
 681      true};
 682}
 683
 684[[nodiscard]] static __sys_info_result
 685__get_sys_info(sys_seconds __time,
 686               sys_seconds __continuation_begin,
 687               const __tz::__continuation& __continuation,
 688               const __tz::__rules_storage_type& __rules_db) {
 689  return std::visit(
 690      [&](const auto& __value) {
 691        using _Tp = decay_t<decltype(__value)>;
 692        if constexpr (same_as<_Tp, std::string>)
 693          return chrono::__get_sys_info_rule(
 694              __time, __continuation_begin, __continuation, __get_rules(__rules_db, __value));
 695        else if constexpr (same_as<_Tp, monostate>)
 696          return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, chrono::seconds(0));
 697        else if constexpr (same_as<_Tp, __tz::__save>)
 698          return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, __value.__time);
 699        else
 700          static_assert(false);
 701
 702        std::__libcpp_unreachable();
 703      },
 704      __continuation.__rules);
 705}
 706
 707// The transition from one continuation to the next continuation may result in
 708// two constitutive continuations with the same "offset" information.
 709// [time.zone.info.sys]/3
 710//   The begin and end data members indicate that, for the associated time_zone
 711//   and time_point, the offset and abbrev are in effect in the range
 712//   [begin, end). This information can be used to efficiently iterate the
 713//   transitions of a time_zone.
 714//
 715// Note that this does considers a change in the SAVE field not to be a
 716// different sys_info, zdump does consider this different.
 717//   LWG XXXX The sys_info range should be affected by save
 718// matches the behaviour of the Standard and zdump.
 719//
 720// Iff the "offsets" are the same '__current.__end' is replaced with
 721// '__next.__end', which effectively merges the two objects in one object. The
 722// function returns true if a merge occurred.
 723[[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) {
 724  if (__current.end != __next.begin)
 725    return false;
 726
 727  if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save)
 728    return false;
 729
 730  __current.end = __next.end;
 731  return true;
 732}
 733
 734//===----------------------------------------------------------------------===//
 735//                           Public API
 736//===----------------------------------------------------------------------===//
 737
 738[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr<time_zone::__impl>&& __p) {
 739  _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object");
 740  time_zone result;
 741  result.__impl_ = std::move(__p);
 742  return result;
 743}
 744
 745_LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default;
 746
 747[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); }
 748
 749[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info
 750time_zone::__get_info(sys_seconds __time) const {
 751  optional<sys_info> __result;
 752  bool __valid_result = false; // true iff __result.has_value() is true and
 753                               // __result.begin <= __time < __result.end is true.
 754  bool __can_merge                 = false;
 755  sys_seconds __continuation_begin = sys_seconds::min();
 756  // Iterates over the Zone entry and its continuations. Internally the Zone
 757  // entry is split in a Zone information and the first continuation. The last
 758  // continuation has no UNTIL field. This means the loop should always find a
 759  // continuation.
 760  //
 761  // For more information on background of zone information please consult the
 762  // following information
 763  //   [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html)
 764  //   [tz source info](https://data.iana.org/time-zones/tz-how-to.html)
 765  //   On POSIX systems the zdump tool can be useful:
 766  //     zdump -v Asia/Hong_Kong
 767  //   Gives all transitions in the Hong Kong time zone.
 768  //
 769  // During iteration the result for the current continuation is returned. If
 770  // no continuation is applicable it will return the end time as "error". When
 771  // two continuations are contiguous and contain the "same" information these
 772  // ranges are merged as one range.
 773  // The merging requires keeping any result that occurs before __time,
 774  // likewise when a valid result is found the algorithm needs to test the next
 775  // continuation to see whether it can be merged. For example, Africa/Ceuta
 776  // Continuations
 777  //  0 s WE%sT 1929                   (C1)
 778  //  0 - WET 1967                     (C2)
 779  //  0 Sp WE%sT 1984 Mar 16           (C3)
 780  //
 781  // Rules
 782  //  R s 1926 1929 - O Sa>=1 24s 0 -  (R1)
 783  //
 784  //  R Sp 1967 o - Jun 3 12 1 S       (R2)
 785  //
 786  // The rule R1 is the last rule used in C1. The rule R2 is the first rule in
 787  // C3. Since R2 is the first rule this means when a continuation uses this
 788  // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a
 789  // SAVE of 0, in this case WET.
 790  // This gives the following changes in the information.
 791  //   1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET
 792  //   1929-01-01 00:00:00 C2    becomes active: offset 0 save 0 abbrev WET
 793  //   1967-01-01 00:00:00 C3    becomes active: offset 0 save 0 abbrev WET
 794  //   1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST
 795  //
 796  // The first 3 entries are contiguous and contain the same information, this
 797  // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be
 798  // returned in one sys_info object.
 799
 800  const auto& __continuations                  = __impl_->__continuations();
 801  const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db();
 802  for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) {
 803    const auto& __continuation   = *__it;
 804    __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db);
 805
 806    if (__sys_info) {
 807      _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
 808          __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range");
 809
 810      // Filters out dummy entries
 811      // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
 812      // ...
 813      // -4 A -04/-03 2000 Mar 3 (C1)
 814      // -3 A -03/-02            (C2)
 815      //
 816      // ...
 817      // R A 2000 o - Mar 3 0 0 -
 818      // R A 2007 o - D 30 0 1 -
 819      // ...
 820      //
 821      // This results in an entry
 822      //   [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03
 823      // for [C1 & R1, C1, R2) which due to the end of the continuation is an
 824      // one hour "sys_info". Instead the entry should be ignored and replaced
 825      // by [C2 & R1, C2 & R2) which is the proper range
 826      //   "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02
 827
 828      if (std::holds_alternative<string>(__continuation.__rules) && __sys_info->__can_merge &&
 829          __sys_info->__info.begin + 12h > __sys_info->__info.end) {
 830        __continuation_begin = __sys_info->__info.begin;
 831        continue;
 832      }
 833
 834      if (!__result) {
 835        // First entry found, always keep it.
 836        __result = __sys_info->__info;
 837
 838        __valid_result = __time >= __result->begin && __time < __result->end;
 839        __can_merge    = __sys_info->__can_merge;
 840      } else if (__can_merge && chrono::__merge_continuation(*__result, __sys_info->__info)) {
 841        // The results are merged, update the result state. This may
 842        // "overwrite" a valid sys_info object with another valid sys_info
 843        // object.
 844        __valid_result = __time >= __result->begin && __time < __result->end;
 845        __can_merge    = __sys_info->__can_merge;
 846      } else {
 847        // Here things get interesting:
 848        // For example, America/Argentina/San_Luis
 849        //
 850        //   -3 A -03/-02 2008 Ja 21           (C1)
 851        //   -4 Sa -04/-03 2009 O 11           (C2)
 852        //
 853        //   R A 2007 o - D 30 0 1 -           (R1)
 854        //
 855        //   R Sa 2007 2008 - O Su>=8 0 1 -    (R2)
 856        //
 857        // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00
 858        // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00
 859        // In this case the earlier time is the real time of the transition.
 860        // However the algorithm used gives 2008-01-21 03:00:00.
 861        //
 862        // So we need to calculate the previous UNTIL in the current context and
 863        // see whether it's earlier.
 864
 865        // The results could not be merged.
 866        // - When we have a valid result that result is the final result.
 867        // - Otherwise the result we had is before __time and the result we got
 868        //   is at a later time (possibly valid). This result is always better
 869        //   than the previous result.
 870        if (__valid_result) {
 871          return *__result;
 872        } else {
 873          _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
 874              __it != __continuations.begin(), "the first rule should always seed the result");
 875          const auto& __last = *(__it - 1);
 876          if (std::holds_alternative<string>(__last.__rules)) {
 877            // Europe/Berlin
 878            // 1 c CE%sT 1945 May 24 2          (C1)
 879            // 1 So CE%sT 1946                  (C2)
 880            //
 881            // R c 1944 1945 - Ap M>=1 2s 1 S   (R1)
 882            //
 883            // R So 1945 o - May 24 2 2 M       (R2)
 884            //
 885            // When C2 becomes active the time would be before the first rule R2,
 886            // giving a 1 hour sys_info. This is not valid and the results need
 887            // merging.
 888
 889            if (__result->end != __sys_info->__info.begin) {
 890              // When the UTC gap between the rules is due to the change of
 891              // offsets adjust the new time to remove the gap.
 892              sys_seconds __end   = __result->end - __result->offset;
 893              sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset;
 894              if (__end == __begin) {
 895                __sys_info->__info.begin = __result->end;
 896              }
 897            }
 898          }
 899
 900          __result       = __sys_info->__info;
 901          __valid_result = __time >= __result->begin && __time < __result->end;
 902          __can_merge    = __sys_info->__can_merge;
 903        }
 904      }
 905      __continuation_begin = __result->end;
 906    } else {
 907      __continuation_begin = __sys_info.error();
 908    }
 909  }
 910  if (__valid_result)
 911    return *__result;
 912
 913  std::__throw_runtime_error("tzdb: corrupt db");
 914}
 915
 916// Is the "__local_time" present in "__first" and "__second". If so the
 917// local_info has an ambiguous result.
 918[[nodiscard]] static bool
 919__is_ambiguous(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
 920  std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
 921  std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
 922
 923  return __local_time < __end_first && __local_time >= __begin_second;
 924}
 925
 926// Determines the result of the "__local_time". This expects the object
 927// "__first" to be earlier in time than "__second".
 928[[nodiscard]] static local_info
 929__get_info(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
 930  std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
 931  std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
 932
 933  if (__local_time < __end_first) {
 934    if (__local_time >= __begin_second)
 935      // |--------|
 936      //        |------|
 937      //         ^
 938      return {local_info::ambiguous, __first, __second};
 939
 940    // |--------|
 941    //          |------|
 942    //         ^
 943    return {local_info::unique, __first, sys_info{}};
 944  }
 945
 946  if (__local_time < __begin_second)
 947    // |--------|
 948    //             |------|
 949    //           ^
 950    return {local_info::nonexistent, __first, __second};
 951
 952  // |--------|
 953  //          |------|
 954  //           ^
 955  return {local_info::unique, __second, sys_info{}};
 956}
 957
 958[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info
 959time_zone::__get_info(local_seconds __local_time) const {
 960  seconds __local_seconds = __local_time.time_since_epoch();
 961
 962  /* An example of a typical year with a DST switch displayed in local time.
 963   *
 964   * At the first of April the time goes forward one hour. This means the
 965   * time marked with ~~ is not a valid local time. This is represented by the
 966   * nonexistent value in local_info.result.
 967   *
 968   * At the first of November the time goes backward one hour. This means the
 969   * time marked with ^^ happens twice. This is represented by the ambiguous
 970   * value in local_info.result.
 971   *
 972   * 2020.11.01                  2021.04.01              2021.11.01
 973   * offset +05                  offset +05              offset +05
 974   * save    0s                  save    1h              save    0s
 975   * |------------//----------|
 976   *                             |---------//--------------|
 977   *                                                    |-------------
 978   *                           ~~                        ^^
 979   *
 980   * These shifts can happen due to changes in the current time zone for a
 981   * location. For example, Indian/Kerguelen switched only once. In 1950 from an
 982   * offset of 0 hours to an offset of +05 hours.
 983   *
 984   * During all these shifts the UTC time will not have gaps.
 985   */
 986
 987  // The code needs to determine the system time for the local time. There is no
 988  // information available. Assume the offset between system time and local time
 989  // is 0s. This gives an initial estimate.
 990  sys_seconds __guess{__local_seconds};
 991  sys_info __info = __get_info(__guess);
 992
 993  // At this point the offset can be used to determine an estimate for the local
 994  // time. Before doing that, determine the offset and validate whether the
 995  // local time is the range [chrono::local_seconds::min(),
 996  // chrono::local_seconds::max()).
 997  if (__local_seconds < 0s && __info.offset > 0s)
 998    if (__local_seconds - chrono::local_seconds::min().time_since_epoch() < __info.offset)
 999      return {-1, __info, {}};
1000
1001  if (__local_seconds > 0s && __info.offset < 0s)
1002    if (chrono::local_seconds::max().time_since_epoch() - __local_seconds < -__info.offset)
1003      return {-2, __info, {}};
1004
1005  // Based on the information found in the sys_info, the local time can be
1006  // converted to a system time. This resulting time can be in the following
1007  // locations of the sys_info:
1008  //
1009  //                             |---------//--------------|
1010  //                           1   2.1      2.2         2.3  3
1011  //
1012  // 1. The estimate is before the returned sys_info object.
1013  //    The result is either non-existent or unique in the previous sys_info.
1014  // 2. The estimate is in the sys_info object
1015  //    - If the sys_info begin is not sys_seconds::min(), then it might be at
1016  //      2.1 and could be ambiguous with the previous or unique.
1017  //    - If sys_info end is not sys_seconds::max(), then it might be at 2.3
1018  //      and could be ambiguous with the next or unique.
1019  //    - Else it is at 2.2 and always unique. This case happens when a
1020  //      time zone has no transitions. For example, UTC or GMT+1.
1021  // 3. The estimate is after the returned sys_info object.
1022  //    The result is either non-existent or unique in the next sys_info.
1023  //
1024  // There is no specification where the "middle" starts. Similar issues can
1025  // happen when sys_info objects are "short", then "unique in the next" could
1026  // become "ambiguous in the next and the one following". Theoretically there
1027  // is the option of the following time-line
1028  //
1029  // |------------|
1030  //           |----|
1031  //       |-----------------|
1032  //
1033  // However the local_info object only has 2 sys_info objects, so this option
1034  // is not tested.
1035
1036  sys_seconds __sys_time{__local_seconds - __info.offset};
1037  if (__sys_time < __info.begin)
1038    // Case 1 before __info
1039    return chrono::__get_info(__local_time, __get_info(__info.begin - 1s), __info);
1040
1041  if (__sys_time >= __info.end)
1042    // Case 3 after __info
1043    return chrono::__get_info(__local_time, __info, __get_info(__info.end));
1044
1045  // Case 2 in __info
1046  if (__info.begin != sys_seconds::min()) {
1047    // Case 2.1 Not at the beginning, when not ambiguous the result should test
1048    // case 2.3.
1049    sys_info __prev = __get_info(__info.begin - 1s);
1050    if (__is_ambiguous(__local_time, __prev, __info))
1051      return {local_info::ambiguous, __prev, __info};
1052  }
1053
1054  if (__info.end == sys_seconds::max())
1055    // At the end so it's case 2.2
1056    return {local_info::unique, __info, sys_info{}};
1057
1058  // This tests case 2.2 or case 2.3.
1059  return chrono::__get_info(__local_time, __info, __get_info(__info.end));
1060}
1061
1062} // namespace chrono
1063
1064_LIBCPP_END_NAMESPACE_STD