zig/lib/std/compress/flate.zig at master

  1const std = @import("../std.zig");
  2
  3/// When compressing and decompressing, the provided buffer is used as the
  4/// history window, so it must be at least this size.
  5pub const max_window_len = history_len * 2;
  6
  7pub const history_len = 32768;
  8
  9/// Deflate is a lossless data compression file format that uses a combination
 10/// of LZ77 and Huffman coding.
 11pub const Compress = @import("flate/Compress.zig");
 12
 13/// Inflate is the decoding process that consumes a Deflate bitstream and
 14/// produces the original full-size data.
 15pub const Decompress = @import("flate/Decompress.zig");
 16
 17/// Container of the deflate bit stream body. Container adds header before
 18/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
 19/// no footer, raw bit stream).
 20///
 21/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
 22/// addler 32 checksum.
 23///
 24/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
 25/// crc32 checksum and 4 bytes of uncompressed data length.
 26///
 27/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
 28/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
 29pub const Container = enum {
 30    raw, // no header or footer
 31    gzip, // gzip header and footer
 32    zlib, // zlib header and footer
 33
 34    pub fn size(w: Container) usize {
 35        return headerSize(w) + footerSize(w);
 36    }
 37
 38    pub fn headerSize(w: Container) usize {
 39        return header(w).len;
 40    }
 41
 42    pub fn footerSize(w: Container) usize {
 43        return switch (w) {
 44            .gzip => 8,
 45            .zlib => 4,
 46            .raw => 0,
 47        };
 48    }
 49
 50    pub const list = [_]Container{ .raw, .gzip, .zlib };
 51
 52    pub const Error = error{
 53        BadGzipHeader,
 54        BadZlibHeader,
 55        WrongGzipChecksum,
 56        WrongGzipSize,
 57        WrongZlibChecksum,
 58    };
 59
 60    pub fn header(container: Container) []const u8 {
 61        return switch (container) {
 62            // GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
 63            //  - ID1 (IDentification 1), always 0x1f
 64            //  - ID2 (IDentification 2), always 0x8b
 65            //  - CM (Compression Method), always 8 = deflate
 66            //  - FLG (Flags), all set to 0
 67            //  - 4 bytes, MTIME (Modification time), not used, all set to zero
 68            //  - XFL (eXtra FLags), all set to zero
 69            //  - OS (Operating System), 03 = Unix
 70            .gzip => &[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 },
 71            // ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
 72            // 1st byte:
 73            //  - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
 74            //  - The next four bits is the CM (compression method), which is 8 for deflate.
 75            // 2nd byte:
 76            //  - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
 77            //  - The next bit, FDICT, is set if a dictionary is given.
 78            //  - The final five FCHECK bits form a mod-31 checksum.
 79            //
 80            // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
 81            .zlib => &[_]u8{ 0x78, 0b10_0_11100 },
 82            .raw => &.{},
 83        };
 84    }
 85
 86    pub const Hasher = union(Container) {
 87        raw: void,
 88        gzip: struct {
 89            crc: std.hash.Crc32 = .init(),
 90            count: u32 = 0,
 91        },
 92        zlib: std.hash.Adler32,
 93
 94        pub fn init(containter: Container) Hasher {
 95            return switch (containter) {
 96                .gzip => .{ .gzip = .{} },
 97                .zlib => .{ .zlib = .{} },
 98                .raw => .raw,
 99            };
100        }
101
102        pub fn container(h: Hasher) Container {
103            return h;
104        }
105
106        pub fn update(h: *Hasher, buf: []const u8) void {
107            switch (h.*) {
108                .raw => {},
109                .gzip => |*gzip| {
110                    gzip.crc.update(buf);
111                    gzip.count +%= @truncate(buf.len);
112                },
113                .zlib => |*zlib| {
114                    zlib.update(buf);
115                },
116            }
117        }
118
119        pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
120            switch (hasher.*) {
121                .gzip => |*gzip| {
122                    // GZIP 8 bytes footer
123                    //  - 4 bytes, CRC32 (CRC-32)
124                    //  - 4 bytes, ISIZE (Input SIZE) - size of the original
125                    //  (uncompressed) input data modulo 2^32
126                    try writer.writeInt(u32, gzip.crc.final(), .little);
127                    try writer.writeInt(u32, gzip.count, .little);
128                },
129                .zlib => |*zlib| {
130                    // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
131                    // 4 bytes of ADLER32 (Adler-32 checksum)
132                    // Checksum value of the uncompressed data (excluding any
133                    // dictionary data) computed according to Adler-32
134                    // algorithm.
135                    try writer.writeInt(u32, zlib.adler, .big);
136                },
137                .raw => {},
138            }
139        }
140    };
141
142    pub const Metadata = union(Container) {
143        raw: void,
144        gzip: struct {
145            crc: u32 = 0,
146            count: u32 = 0,
147        },
148        zlib: struct {
149            adler: u32 = 0,
150        },
151
152        pub fn init(containter: Container) Metadata {
153            return switch (containter) {
154                .gzip => .{ .gzip = .{} },
155                .zlib => .{ .zlib = .{} },
156                .raw => .raw,
157            };
158        }
159
160        pub fn container(m: Metadata) Container {
161            return m;
162        }
163    };
164};
165
166test {
167    _ = Compress;
168    _ = Decompress;
169}