master
1const std = @import("../std.zig");
2
3/// When compressing and decompressing, the provided buffer is used as the
4/// history window, so it must be at least this size.
5pub const max_window_len = history_len * 2;
6
7pub const history_len = 32768;
8
9/// Deflate is a lossless data compression file format that uses a combination
10/// of LZ77 and Huffman coding.
11pub const Compress = @import("flate/Compress.zig");
12
13/// Inflate is the decoding process that consumes a Deflate bitstream and
14/// produces the original full-size data.
15pub const Decompress = @import("flate/Decompress.zig");
16
17/// Container of the deflate bit stream body. Container adds header before
18/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
19/// no footer, raw bit stream).
20///
21/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
22/// addler 32 checksum.
23///
24/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
25/// crc32 checksum and 4 bytes of uncompressed data length.
26///
27/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
28/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
29pub const Container = enum {
30 raw, // no header or footer
31 gzip, // gzip header and footer
32 zlib, // zlib header and footer
33
34 pub fn size(w: Container) usize {
35 return headerSize(w) + footerSize(w);
36 }
37
38 pub fn headerSize(w: Container) usize {
39 return header(w).len;
40 }
41
42 pub fn footerSize(w: Container) usize {
43 return switch (w) {
44 .gzip => 8,
45 .zlib => 4,
46 .raw => 0,
47 };
48 }
49
50 pub const list = [_]Container{ .raw, .gzip, .zlib };
51
52 pub const Error = error{
53 BadGzipHeader,
54 BadZlibHeader,
55 WrongGzipChecksum,
56 WrongGzipSize,
57 WrongZlibChecksum,
58 };
59
60 pub fn header(container: Container) []const u8 {
61 return switch (container) {
62 // GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
63 // - ID1 (IDentification 1), always 0x1f
64 // - ID2 (IDentification 2), always 0x8b
65 // - CM (Compression Method), always 8 = deflate
66 // - FLG (Flags), all set to 0
67 // - 4 bytes, MTIME (Modification time), not used, all set to zero
68 // - XFL (eXtra FLags), all set to zero
69 // - OS (Operating System), 03 = Unix
70 .gzip => &[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 },
71 // ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
72 // 1st byte:
73 // - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
74 // - The next four bits is the CM (compression method), which is 8 for deflate.
75 // 2nd byte:
76 // - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
77 // - The next bit, FDICT, is set if a dictionary is given.
78 // - The final five FCHECK bits form a mod-31 checksum.
79 //
80 // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
81 .zlib => &[_]u8{ 0x78, 0b10_0_11100 },
82 .raw => &.{},
83 };
84 }
85
86 pub const Hasher = union(Container) {
87 raw: void,
88 gzip: struct {
89 crc: std.hash.Crc32 = .init(),
90 count: u32 = 0,
91 },
92 zlib: std.hash.Adler32,
93
94 pub fn init(containter: Container) Hasher {
95 return switch (containter) {
96 .gzip => .{ .gzip = .{} },
97 .zlib => .{ .zlib = .{} },
98 .raw => .raw,
99 };
100 }
101
102 pub fn container(h: Hasher) Container {
103 return h;
104 }
105
106 pub fn update(h: *Hasher, buf: []const u8) void {
107 switch (h.*) {
108 .raw => {},
109 .gzip => |*gzip| {
110 gzip.crc.update(buf);
111 gzip.count +%= @truncate(buf.len);
112 },
113 .zlib => |*zlib| {
114 zlib.update(buf);
115 },
116 }
117 }
118
119 pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
120 switch (hasher.*) {
121 .gzip => |*gzip| {
122 // GZIP 8 bytes footer
123 // - 4 bytes, CRC32 (CRC-32)
124 // - 4 bytes, ISIZE (Input SIZE) - size of the original
125 // (uncompressed) input data modulo 2^32
126 try writer.writeInt(u32, gzip.crc.final(), .little);
127 try writer.writeInt(u32, gzip.count, .little);
128 },
129 .zlib => |*zlib| {
130 // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
131 // 4 bytes of ADLER32 (Adler-32 checksum)
132 // Checksum value of the uncompressed data (excluding any
133 // dictionary data) computed according to Adler-32
134 // algorithm.
135 try writer.writeInt(u32, zlib.adler, .big);
136 },
137 .raw => {},
138 }
139 }
140 };
141
142 pub const Metadata = union(Container) {
143 raw: void,
144 gzip: struct {
145 crc: u32 = 0,
146 count: u32 = 0,
147 },
148 zlib: struct {
149 adler: u32 = 0,
150 },
151
152 pub fn init(containter: Container) Metadata {
153 return switch (containter) {
154 .gzip => .{ .gzip = .{} },
155 .zlib => .{ .zlib = .{} },
156 .raw => .raw,
157 };
158 }
159
160 pub fn container(m: Metadata) Container {
161 return m;
162 }
163 };
164};
165
166test {
167 _ = Compress;
168 _ = Decompress;
169}