master
  1//! The Time Zone Information Format (TZif)
  2//! https://datatracker.ietf.org/doc/html/rfc8536
  3
  4const builtin = @import("builtin");
  5
  6const std = @import("std.zig");
  7const Reader = std.Io.Reader;
  8const Allocator = std.mem.Allocator;
  9
 10pub const Transition = struct {
 11    ts: i64,
 12    timetype: *Timetype,
 13};
 14
 15pub const Timetype = struct {
 16    offset: i32,
 17    flags: u8,
 18    name_data: [6:0]u8,
 19
 20    pub fn name(self: *const Timetype) [:0]const u8 {
 21        return std.mem.sliceTo(self.name_data[0..], 0);
 22    }
 23
 24    pub fn isDst(self: Timetype) bool {
 25        return (self.flags & 0x01) > 0;
 26    }
 27
 28    pub fn standardTimeIndicator(self: Timetype) bool {
 29        return (self.flags & 0x02) > 0;
 30    }
 31
 32    pub fn utIndicator(self: Timetype) bool {
 33        return (self.flags & 0x04) > 0;
 34    }
 35};
 36
 37pub const Leapsecond = struct {
 38    occurrence: i48,
 39    correction: i16,
 40};
 41
 42pub const Tz = struct {
 43    allocator: Allocator,
 44    transitions: []const Transition,
 45    timetypes: []const Timetype,
 46    leapseconds: []const Leapsecond,
 47    footer: ?[]const u8,
 48
 49    const Header = extern struct {
 50        magic: [4]u8,
 51        version: u8,
 52        reserved: [15]u8,
 53        counts: extern struct {
 54            isutcnt: u32,
 55            isstdcnt: u32,
 56            leapcnt: u32,
 57            timecnt: u32,
 58            typecnt: u32,
 59            charcnt: u32,
 60        },
 61    };
 62
 63    pub fn parse(allocator: Allocator, reader: *Reader) !Tz {
 64        const legacy_header = try reader.takeStruct(Header, .big);
 65        if (!std.mem.eql(u8, &legacy_header.magic, "TZif")) return error.BadHeader;
 66        if (legacy_header.version != 0 and legacy_header.version != '2' and legacy_header.version != '3')
 67            return error.BadVersion;
 68
 69        if (legacy_header.version == 0)
 70            return parseBlock(allocator, reader, legacy_header, true);
 71
 72        // If the format is modern, just skip over the legacy data
 73        const skip_n = legacy_header.counts.timecnt * 5 +
 74            legacy_header.counts.typecnt * 6 +
 75            legacy_header.counts.charcnt + legacy_header.counts.leapcnt * 8 +
 76            legacy_header.counts.isstdcnt + legacy_header.counts.isutcnt;
 77        try reader.discardAll(skip_n);
 78
 79        var header = try reader.takeStruct(Header, .big);
 80        if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader;
 81        if (header.version != '2' and header.version != '3') return error.BadVersion;
 82
 83        return parseBlock(allocator, reader, header, false);
 84    }
 85
 86    fn parseBlock(allocator: Allocator, reader: *Reader, header: Header, legacy: bool) !Tz {
 87        if (header.counts.isstdcnt != 0 and header.counts.isstdcnt != header.counts.typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt"
 88        if (header.counts.isutcnt != 0 and header.counts.isutcnt != header.counts.typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt"
 89        if (header.counts.typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero
 90        if (header.counts.charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero
 91        if (header.counts.charcnt > 256 + 6) return error.Malformed; // Not explicitly banned by rfc8536 but nonsensical
 92
 93        var leapseconds = try allocator.alloc(Leapsecond, header.counts.leapcnt);
 94        errdefer allocator.free(leapseconds);
 95        var transitions = try allocator.alloc(Transition, header.counts.timecnt);
 96        errdefer allocator.free(transitions);
 97        var timetypes = try allocator.alloc(Timetype, header.counts.typecnt);
 98        errdefer allocator.free(timetypes);
 99
100        // Parse transition types
101        var i: usize = 0;
102        while (i < header.counts.timecnt) : (i += 1) {
103            transitions[i].ts = if (legacy) try reader.takeInt(i32, .big) else try reader.takeInt(i64, .big);
104        }
105
106        i = 0;
107        while (i < header.counts.timecnt) : (i += 1) {
108            const tt = try reader.takeByte();
109            if (tt >= timetypes.len) return error.Malformed; // rfc8536: Each type index MUST be in the range [0, "typecnt" - 1]
110            transitions[i].timetype = &timetypes[tt];
111        }
112
113        // Parse time types
114        i = 0;
115        while (i < header.counts.typecnt) : (i += 1) {
116            const offset = try reader.takeInt(i32, .big);
117            if (offset < -2147483648) return error.Malformed; // rfc8536: utoff [...] MUST NOT be -2**31
118            const dst = try reader.takeByte();
119            if (dst != 0 and dst != 1) return error.Malformed; // rfc8536: (is)dst [...] The value MUST be 0 or 1.
120            const idx = try reader.takeByte();
121            if (idx > header.counts.charcnt - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1]
122            timetypes[i] = .{
123                .offset = offset,
124                .flags = dst,
125                .name_data = undefined,
126            };
127
128            // Temporarily cache idx in name_data to be processed after we've read the designator names below
129            timetypes[i].name_data[0] = idx;
130        }
131
132        var designators_data: [256 + 6]u8 = undefined;
133        try reader.readSliceAll(designators_data[0..header.counts.charcnt]);
134        const designators = designators_data[0..header.counts.charcnt];
135        if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet
136
137        // Iterate through the timetypes again, setting the designator names
138        for (timetypes) |*tt| {
139            const name = std.mem.sliceTo(designators[tt.name_data[0]..], 0);
140            // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX.
141            if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters.
142            @memcpy(tt.name_data[0..name.len], name);
143            tt.name_data[name.len] = 0;
144        }
145
146        // Parse leap seconds
147        i = 0;
148        while (i < header.counts.leapcnt) : (i += 1) {
149            const occur: i64 = if (legacy) try reader.takeInt(i32, .big) else try reader.takeInt(i64, .big);
150            if (occur < 0) return error.Malformed; // rfc8536: occur [...] MUST be nonnegative
151            if (i > 0 and leapseconds[i - 1].occurrence + 2419199 > occur) return error.Malformed; // rfc8536: occur [...] each later value MUST be at least 2419199 greater than the previous value
152            if (occur > std.math.maxInt(i48)) return error.Malformed; // Unreasonably far into the future
153
154            const corr = try reader.takeInt(i32, .big);
155            if (i == 0 and corr != -1 and corr != 1) return error.Malformed; // rfc8536: The correction value in the first leap-second record, if present, MUST be either one (1) or minus one (-1)
156            if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap-second records MUST differ by exactly one (1)
157            if (corr > std.math.maxInt(i16)) return error.Malformed; // Unreasonably large correction
158
159            leapseconds[i] = .{
160                .occurrence = @as(i48, @intCast(occur)),
161                .correction = @as(i16, @intCast(corr)),
162            };
163        }
164
165        // Parse standard/wall indicators
166        i = 0;
167        while (i < header.counts.isstdcnt) : (i += 1) {
168            const stdtime = try reader.takeByte();
169            if (stdtime == 1) {
170                timetypes[i].flags |= 0x02;
171            }
172        }
173
174        // Parse UT/local indicators
175        i = 0;
176        while (i < header.counts.isutcnt) : (i += 1) {
177            const ut = try reader.takeByte();
178            if (ut == 1) {
179                timetypes[i].flags |= 0x04;
180                if (!timetypes[i].standardTimeIndicator()) return error.Malformed; // rfc8536: standard/wall value MUST be one (1) if the UT/local value is one (1)
181            }
182        }
183
184        // Footer
185        var footer: ?[]u8 = null;
186        if (!legacy) {
187            if ((try reader.takeByte()) != '\n') return error.Malformed; // An rfc8536 footer must start with a newline
188            const footer_mem = reader.takeSentinel('\n') catch |err| switch (err) {
189                error.StreamTooLong => return error.OverlargeFooter, // Read more than 128 bytes, much larger than any reasonable POSIX TZ string
190                else => return err,
191            };
192            if (footer_mem.len != 0) {
193                footer = try allocator.dupe(u8, footer_mem);
194            }
195        }
196        errdefer if (footer) |ft| allocator.free(ft);
197
198        return .{
199            .allocator = allocator,
200            .transitions = transitions,
201            .timetypes = timetypes,
202            .leapseconds = leapseconds,
203            .footer = footer,
204        };
205    }
206
207    pub fn deinit(self: *Tz) void {
208        if (self.footer) |footer| {
209            self.allocator.free(footer);
210        }
211        self.allocator.free(self.leapseconds);
212        self.allocator.free(self.transitions);
213        self.allocator.free(self.timetypes);
214    }
215};
216
217test "slim" {
218    const data = @embedFile("tz/asia_tokyo.tzif");
219    var in_stream: Reader = .fixed(data);
220
221    var tz = try std.Tz.parse(std.testing.allocator, &in_stream);
222    defer tz.deinit();
223
224    try std.testing.expectEqual(tz.transitions.len, 9);
225    try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "JDT"));
226    try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 UTC
227    try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:00 UTC (+23s in TAI, and +13 in the data since it doesn't store the initial 10 second offset)
228}
229
230test "fat" {
231    const data = @embedFile("tz/antarctica_davis.tzif");
232    var in_stream: Reader = .fixed(data);
233
234    var tz = try std.Tz.parse(std.testing.allocator, &in_stream);
235    defer tz.deinit();
236
237    try std.testing.expectEqual(tz.transitions.len, 8);
238    try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "+05"));
239    try std.testing.expectEqual(tz.transitions[4].ts, 1268251224); // 2010-03-10 20:00:00 UTC
240}
241
242test "legacy" {
243    // Taken from Slackware 8.0, from 2001
244    const data = @embedFile("tz/europe_vatican.tzif");
245    var in_stream: Reader = .fixed(data);
246
247    var tz = try std.Tz.parse(std.testing.allocator, &in_stream);
248    defer tz.deinit();
249
250    try std.testing.expectEqual(tz.transitions.len, 170);
251    try std.testing.expect(std.mem.eql(u8, tz.transitions[69].timetype.name(), "CET"));
252    try std.testing.expectEqual(tz.transitions[123].ts, 1414285200); // 2014-10-26 01:00:00 UTC
253}