zig/lib/std/compress/lzma2.zig at master

  1const std = @import("../std.zig");
  2const Allocator = std.mem.Allocator;
  3const ArrayList = std.ArrayList;
  4const lzma = std.compress.lzma;
  5const Writer = std.Io.Writer;
  6const Reader = std.Io.Reader;
  7
  8/// An accumulating buffer for LZ sequences
  9pub const AccumBuffer = struct {
 10    /// Buffer
 11    buf: ArrayList(u8),
 12    /// Buffer memory limit
 13    memlimit: usize,
 14    /// Total number of bytes sent through the buffer
 15    len: usize,
 16
 17    pub fn init(memlimit: usize) AccumBuffer {
 18        return .{
 19            .buf = .{},
 20            .memlimit = memlimit,
 21            .len = 0,
 22        };
 23    }
 24
 25    pub fn appendByte(self: *AccumBuffer, allocator: Allocator, byte: u8) !void {
 26        try self.buf.append(allocator, byte);
 27        self.len += 1;
 28    }
 29
 30    /// Reset the internal dictionary
 31    pub fn reset(self: *AccumBuffer, writer: *Writer) !void {
 32        try writer.writeAll(self.buf.items);
 33        self.buf.clearRetainingCapacity();
 34        self.len = 0;
 35    }
 36
 37    /// Retrieve the last byte or return a default
 38    pub fn lastOr(self: AccumBuffer, lit: u8) u8 {
 39        const buf_len = self.buf.items.len;
 40        return if (buf_len == 0)
 41            lit
 42        else
 43            self.buf.items[buf_len - 1];
 44    }
 45
 46    /// Retrieve the n-th last byte
 47    pub fn lastN(self: AccumBuffer, dist: usize) !u8 {
 48        const buf_len = self.buf.items.len;
 49        if (dist > buf_len) {
 50            return error.CorruptInput;
 51        }
 52
 53        return self.buf.items[buf_len - dist];
 54    }
 55
 56    /// Append a literal
 57    pub fn appendLiteral(
 58        self: *AccumBuffer,
 59        allocator: Allocator,
 60        lit: u8,
 61        writer: *Writer,
 62    ) !void {
 63        _ = writer;
 64        if (self.len >= self.memlimit) {
 65            return error.CorruptInput;
 66        }
 67        try self.buf.append(allocator, lit);
 68        self.len += 1;
 69    }
 70
 71    /// Fetch an LZ sequence (length, distance) from inside the buffer
 72    pub fn appendLz(
 73        self: *AccumBuffer,
 74        allocator: Allocator,
 75        len: usize,
 76        dist: usize,
 77        writer: *Writer,
 78    ) !void {
 79        _ = writer;
 80
 81        const buf_len = self.buf.items.len;
 82        if (dist > buf_len) return error.CorruptInput;
 83
 84        try self.buf.ensureUnusedCapacity(allocator, len);
 85        const buffer = self.buf.allocatedSlice();
 86        const src = buffer[buf_len - dist ..][0..len];
 87        const dst = buffer[buf_len..][0..len];
 88
 89        // This is not a @memmove; it intentionally repeats patterns caused by
 90        // iterating one byte at a time.
 91        for (dst, src) |*d, s| d.* = s;
 92
 93        self.buf.items.len = buf_len + len;
 94        self.len += len;
 95    }
 96
 97    pub fn finish(self: *AccumBuffer, writer: *Writer) !void {
 98        try writer.writeAll(self.buf.items);
 99        self.buf.clearRetainingCapacity();
100    }
101
102    pub fn deinit(self: *AccumBuffer, allocator: Allocator) void {
103        self.buf.deinit(allocator);
104        self.* = undefined;
105    }
106};
107
108pub const Decode = struct {
109    lzma_decode: lzma.Decode,
110
111    pub fn init(gpa: Allocator) !Decode {
112        return .{ .lzma_decode = try lzma.Decode.init(gpa, .{ .lc = 0, .lp = 0, .pb = 0 }) };
113    }
114
115    pub fn deinit(self: *Decode, gpa: Allocator) void {
116        self.lzma_decode.deinit(gpa);
117        self.* = undefined;
118    }
119
120    /// Returns how many compressed bytes were consumed.
121    pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !u64 {
122        const gpa = allocating.allocator;
123
124        var accum = AccumBuffer.init(std.math.maxInt(usize));
125        defer accum.deinit(gpa);
126
127        var n_read: u64 = 0;
128
129        while (true) {
130            const status = try reader.takeByte();
131            n_read += 1;
132
133            switch (status) {
134                0 => break,
135                1 => n_read += try parseUncompressed(reader, allocating, &accum, true),
136                2 => n_read += try parseUncompressed(reader, allocating, &accum, false),
137                else => n_read += try d.parseLzma(reader, allocating, &accum, status),
138            }
139        }
140
141        try accum.finish(&allocating.writer);
142        return n_read;
143    }
144
145    fn parseLzma(
146        d: *Decode,
147        reader: *Reader,
148        allocating: *Writer.Allocating,
149        accum: *AccumBuffer,
150        status: u8,
151    ) !u64 {
152        if (status & 0x80 == 0) return error.CorruptInput;
153
154        const Reset = struct {
155            dict: bool,
156            state: bool,
157            props: bool,
158        };
159
160        const reset: Reset = switch ((status >> 5) & 0x3) {
161            0 => .{
162                .dict = false,
163                .state = false,
164                .props = false,
165            },
166            1 => .{
167                .dict = false,
168                .state = true,
169                .props = false,
170            },
171            2 => .{
172                .dict = false,
173                .state = true,
174                .props = true,
175            },
176            3 => .{
177                .dict = true,
178                .state = true,
179                .props = true,
180            },
181            else => unreachable,
182        };
183
184        var n_read: u64 = 0;
185
186        const unpacked_size = blk: {
187            var tmp: u64 = status & 0x1F;
188            tmp <<= 16;
189            tmp |= try reader.takeInt(u16, .big);
190            n_read += 2;
191            break :blk tmp + 1;
192        };
193
194        const packed_size = blk: {
195            const tmp: u17 = try reader.takeInt(u16, .big);
196            n_read += 2;
197            break :blk tmp + 1;
198        };
199
200        if (reset.dict) try accum.reset(&allocating.writer);
201
202        const ld = &d.lzma_decode;
203
204        if (reset.state) {
205            var new_props = ld.properties;
206
207            if (reset.props) {
208                var props = try reader.takeByte();
209                n_read += 1;
210                if (props >= 225) {
211                    return error.CorruptInput;
212                }
213
214                const lc = @as(u4, @intCast(props % 9));
215                props /= 9;
216                const lp = @as(u3, @intCast(props % 5));
217                props /= 5;
218                const pb = @as(u3, @intCast(props));
219
220                if (lc + lp > 4) {
221                    return error.CorruptInput;
222                }
223
224                new_props = .{ .lc = lc, .lp = lp, .pb = pb };
225            }
226
227            try ld.resetState(allocating.allocator, new_props);
228        }
229
230        const expected_unpacked_size = accum.len + unpacked_size;
231        const start_count = n_read;
232        var range_decoder = try lzma.RangeDecoder.initCounting(reader, &n_read);
233
234        while (true) {
235            if (accum.len >= expected_unpacked_size) break;
236            switch (try ld.process(reader, allocating, accum, &range_decoder, &n_read)) {
237                .more => continue,
238                .finished => break,
239            }
240        }
241        if (accum.len != expected_unpacked_size) return error.DecompressedSizeMismatch;
242        if (n_read - start_count != packed_size) return error.CompressedSizeMismatch;
243
244        return n_read;
245    }
246
247    fn parseUncompressed(
248        reader: *Reader,
249        allocating: *Writer.Allocating,
250        accum: *AccumBuffer,
251        reset_dict: bool,
252    ) !usize {
253        const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;
254
255        if (reset_dict) try accum.reset(&allocating.writer);
256
257        const gpa = allocating.allocator;
258
259        for (0..unpacked_size) |_| {
260            try accum.appendByte(gpa, try reader.takeByte());
261        }
262        return 2 + unpacked_size;
263    }
264};
265
266test "decompress hello world stream" {
267    const expected = "Hello\nWorld!\n";
268    const compressed = &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 };
269
270    const gpa = std.testing.allocator;
271
272    var decode = try Decode.init(gpa);
273    defer decode.deinit(gpa);
274
275    var stream: std.Io.Reader = .fixed(compressed);
276    var result: std.Io.Writer.Allocating = .init(gpa);
277    defer result.deinit();
278
279    const n_read = try decode.decompress(&stream, &result);
280    try std.testing.expectEqual(compressed.len, n_read);
281    try std.testing.expectEqualStrings(expected, result.written());
282}