Commit 8e2af21cd9

fn ⌃ ⌥ <70830482+FnControlOption@users.noreply.github.com>
2023-02-02 20:59:56
Add LZMA decoder
1 parent 6f13a72
lib/std/compress/lzma/decode/lzbuffer.zig
@@ -0,0 +1,226 @@
+const std = @import("../../../std.zig");
+const math = std.math;
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+
+/// An accumulating buffer for LZ sequences
+pub const LzAccumBuffer = struct {
+    /// Buffer
+    buf: ArrayListUnmanaged(u8),
+
+    /// Buffer memory limit
+    memlimit: usize,
+
+    /// Total number of bytes sent through the buffer
+    len: usize,
+
+    const Self = @This();
+
+    pub fn init(memlimit: usize) Self {
+        return Self{
+            .buf = .{},
+            .memlimit = memlimit,
+            .len = 0,
+        };
+    }
+
+    pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void {
+        try self.buf.append(allocator, byte);
+        self.len += 1;
+    }
+
+    /// Reset the internal dictionary
+    pub fn reset(self: *Self, writer: anytype) !void {
+        try writer.writeAll(self.buf.items);
+        self.buf.clearRetainingCapacity();
+        self.len = 0;
+    }
+
+    /// Retrieve the last byte or return a default
+    pub fn lastOr(self: Self, lit: u8) u8 {
+        const buf_len = self.buf.items.len;
+        return if (buf_len == 0)
+            lit
+        else
+            self.buf.items[buf_len - 1];
+    }
+
+    /// Retrieve the n-th last byte
+    pub fn lastN(self: Self, dist: usize) !u8 {
+        const buf_len = self.buf.items.len;
+        if (dist > buf_len) {
+            return error.CorruptInput;
+        }
+
+        return self.buf.items[buf_len - dist];
+    }
+
+    /// Append a literal
+    pub fn appendLiteral(
+        self: *Self,
+        allocator: Allocator,
+        lit: u8,
+        writer: anytype,
+    ) !void {
+        _ = writer;
+        if (self.len >= self.memlimit) {
+            return error.CorruptInput;
+        }
+        try self.buf.append(allocator, lit);
+        self.len += 1;
+    }
+
+    /// Fetch an LZ sequence (length, distance) from inside the buffer
+    pub fn appendLz(
+        self: *Self,
+        allocator: Allocator,
+        len: usize,
+        dist: usize,
+        writer: anytype,
+    ) !void {
+        _ = writer;
+
+        const buf_len = self.buf.items.len;
+        if (dist > buf_len) {
+            return error.CorruptInput;
+        }
+
+        var offset = buf_len - dist;
+        var i: usize = 0;
+        while (i < len) : (i += 1) {
+            const x = self.buf.items[offset];
+            try self.buf.append(allocator, x);
+            offset += 1;
+        }
+        self.len += len;
+    }
+
+    pub fn finish(self: *Self, writer: anytype) !void {
+        try writer.writeAll(self.buf.items);
+    }
+
+    pub fn deinit(self: *Self, allocator: Allocator) void {
+        self.buf.deinit(allocator);
+        self.* = undefined;
+    }
+};
+
+/// A circular buffer for LZ sequences
+pub const LzCircularBuffer = struct {
+    /// Circular buffer
+    buf: ArrayListUnmanaged(u8),
+
+    /// Length of the buffer
+    dict_size: usize,
+
+    /// Buffer memory limit
+    memlimit: usize,
+
+    /// Current position
+    cursor: usize,
+
+    /// Total number of bytes sent through the buffer
+    len: usize,
+
+    const Self = @This();
+
+    pub fn init(dict_size: usize, memlimit: usize) Self {
+        return Self{
+            .buf = .{},
+            .dict_size = dict_size,
+            .memlimit = memlimit,
+            .cursor = 0,
+            .len = 0,
+        };
+    }
+
+    pub fn get(self: Self, index: usize) u8 {
+        return if (0 <= index and index < self.buf.items.len)
+            self.buf.items[index]
+        else
+            0;
+    }
+
+    pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void {
+        if (index >= self.memlimit) {
+            return error.CorruptInput;
+        }
+        try self.buf.ensureTotalCapacity(allocator, index + 1);
+        while (self.buf.items.len < index) {
+            self.buf.appendAssumeCapacity(0);
+        }
+        self.buf.appendAssumeCapacity(value);
+    }
+
+    /// Retrieve the last byte or return a default
+    pub fn lastOr(self: Self, lit: u8) u8 {
+        return if (self.len == 0)
+            lit
+        else
+            self.get((self.dict_size + self.cursor - 1) % self.dict_size);
+    }
+
+    /// Retrieve the n-th last byte
+    pub fn lastN(self: Self, dist: usize) !u8 {
+        if (dist > self.dict_size or dist > self.len) {
+            return error.CorruptInput;
+        }
+
+        const offset = (self.dict_size + self.cursor - dist) % self.dict_size;
+        return self.get(offset);
+    }
+
+    /// Append a literal
+    pub fn appendLiteral(
+        self: *Self,
+        allocator: Allocator,
+        lit: u8,
+        writer: anytype,
+    ) !void {
+        try self.set(allocator, self.cursor, lit);
+        self.cursor += 1;
+        self.len += 1;
+
+        // Flush the circular buffer to the output
+        if (self.cursor == self.dict_size) {
+            try writer.writeAll(self.buf.items);
+            self.cursor = 0;
+        }
+    }
+
+    /// Fetch an LZ sequence (length, distance) from inside the buffer
+    pub fn appendLz(
+        self: *Self,
+        allocator: Allocator,
+        len: usize,
+        dist: usize,
+        writer: anytype,
+    ) !void {
+        if (dist > self.dict_size or dist > self.len) {
+            return error.CorruptInput;
+        }
+
+        var offset = (self.dict_size + self.cursor - dist) % self.dict_size;
+        var i: usize = 0;
+        while (i < len) : (i += 1) {
+            const x = self.get(offset);
+            try self.appendLiteral(allocator, x, writer);
+            offset += 1;
+            if (offset == self.dict_size) {
+                offset = 0;
+            }
+        }
+    }
+
+    pub fn finish(self: *Self, writer: anytype) !void {
+        if (self.cursor > 0) {
+            try writer.writeAll(self.buf.items[0..self.cursor]);
+        }
+    }
+
+    pub fn deinit(self: *Self, allocator: Allocator) void {
+        self.buf.deinit(allocator);
+        self.* = undefined;
+    }
+};
lib/std/compress/lzma/decode/lzma.zig
@@ -0,0 +1,398 @@
+const std = @import("../../../std.zig");
+const assert = std.debug.assert;
+const math = std.math;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+const FixedBufferStream = std.io.FixedBufferStream;
+
+const LzCircularBuffer = @import("lzbuffer.zig").LzCircularBuffer;
+const Options = @import("../decode.zig").Options;
+const Vec2D = @import("../vec2d.zig").Vec2D;
+const rangecoder = @import("rangecoder.zig");
+const BitTree = rangecoder.BitTree;
+const LenDecoder = rangecoder.LenDecoder;
+const RangeDecoder = rangecoder.RangeDecoder;
+
+const ProcessingStatus = enum {
+    continue_,
+    finished,
+};
+
+pub const LzmaProperties = struct {
+    lc: u4,
+    lp: u3,
+    pb: u3,
+
+    fn validate(self: LzmaProperties) void {
+        assert(self.lc <= 8);
+        assert(self.lp <= 4);
+        assert(self.pb <= 4);
+    }
+};
+
+pub const LzmaParams = struct {
+    properties: LzmaProperties,
+    dict_size: u32,
+    unpacked_size: ?u64,
+
+    pub fn readHeader(reader: anytype, options: Options) !LzmaParams {
+        var props = try reader.readByte();
+        if (props >= 225) {
+            return error.CorruptInput;
+        }
+
+        const lc = @intCast(u4, props % 9);
+        props /= 9;
+        const lp = @intCast(u3, props % 5);
+        props /= 5;
+        const pb = @intCast(u3, props);
+
+        const dict_size_provided = try reader.readIntLittle(u32);
+        const dict_size = math.max(0x1000, dict_size_provided);
+
+        const unpacked_size = switch (options.unpacked_size) {
+            .read_from_header => blk: {
+                const unpacked_size_provided = try reader.readIntLittle(u64);
+                const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
+                break :blk if (marker_mandatory)
+                    null
+                else
+                    unpacked_size_provided;
+            },
+            .read_header_but_use_provided => |x| blk: {
+                _ = try reader.readIntLittle(u64);
+                break :blk x;
+            },
+            .use_provided => |x| x,
+        };
+
+        return LzmaParams{
+            .properties = LzmaProperties{ .lc = lc, .lp = lp, .pb = pb },
+            .dict_size = dict_size,
+            .unpacked_size = unpacked_size,
+        };
+    }
+};
+
+pub const DecoderState = struct {
+    lzma_props: LzmaProperties,
+    unpacked_size: ?u64,
+    literal_probs: Vec2D(u16),
+    pos_slot_decoder: [4]BitTree(6),
+    align_decoder: BitTree(4),
+    pos_decoders: [115]u16,
+    is_match: [192]u16,
+    is_rep: [12]u16,
+    is_rep_g0: [12]u16,
+    is_rep_g1: [12]u16,
+    is_rep_g2: [12]u16,
+    is_rep_0long: [192]u16,
+    state: usize,
+    rep: [4]usize,
+    len_decoder: LenDecoder,
+    rep_len_decoder: LenDecoder,
+
+    pub fn init(
+        allocator: Allocator,
+        lzma_props: LzmaProperties,
+        unpacked_size: ?u64,
+    ) !DecoderState {
+        return .{
+            .lzma_props = lzma_props,
+            .unpacked_size = unpacked_size,
+            .literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }),
+            .pos_slot_decoder = .{.{}} ** 4,
+            .align_decoder = .{},
+            .pos_decoders = .{0x400} ** 115,
+            .is_match = .{0x400} ** 192,
+            .is_rep = .{0x400} ** 12,
+            .is_rep_g0 = .{0x400} ** 12,
+            .is_rep_g1 = .{0x400} ** 12,
+            .is_rep_g2 = .{0x400} ** 12,
+            .is_rep_0long = .{0x400} ** 192,
+            .state = 0,
+            .rep = .{0} ** 4,
+            .len_decoder = .{},
+            .rep_len_decoder = .{},
+        };
+    }
+
+    pub fn deinit(self: *DecoderState, allocator: Allocator) void {
+        self.literal_probs.deinit(allocator);
+        self.* = undefined;
+    }
+
+    pub fn resetState(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
+        new_props.validate();
+        if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
+            self.literal_probs.fill(0x400);
+        } else {
+            self.literal_probs.deinit(allocator);
+            self.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
+        }
+
+        self.lzma_props = new_props;
+        for (self.pos_slot_decoder) |*t| t.reset();
+        self.align_decoder.reset();
+        self.pos_decoders = .{0x400} ** 115;
+        self.is_match = .{0x400} ** 192;
+        self.is_rep = .{0x400} ** 12;
+        self.is_rep_g0 = .{0x400} ** 12;
+        self.is_rep_g1 = .{0x400} ** 12;
+        self.is_rep_g2 = .{0x400} ** 12;
+        self.is_rep_0long = .{0x400} ** 192;
+        self.state = 0;
+        self.rep = .{0} ** 4;
+        self.len_decoder.reset();
+        self.rep_len_decoder.reset();
+    }
+
+    fn processNextInner(
+        self: *DecoderState,
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+        buffer: anytype,
+        decoder: *RangeDecoder,
+        update: bool,
+    ) !ProcessingStatus {
+        const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1);
+
+        if (!try decoder.decodeBit(
+            reader,
+            &self.is_match[(self.state << 4) + pos_state],
+            update,
+        )) {
+            const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update);
+
+            if (update) {
+                try buffer.appendLiteral(allocator, byte, writer);
+
+                self.state = if (self.state < 4)
+                    0
+                else if (self.state < 10)
+                    self.state - 3
+                else
+                    self.state - 6;
+            }
+            return .continue_;
+        }
+
+        var len: usize = undefined;
+        if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) {
+            if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) {
+                if (!try decoder.decodeBit(
+                    reader,
+                    &self.is_rep_0long[(self.state << 4) + pos_state],
+                    update,
+                )) {
+                    if (update) {
+                        self.state = if (self.state < 7) 9 else 11;
+                        const dist = self.rep[0] + 1;
+                        try buffer.appendLz(allocator, 1, dist, writer);
+                    }
+                    return .continue_;
+                }
+            } else {
+                const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update))
+                    1
+                else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update))
+                    2
+                else
+                    3;
+                if (update) {
+                    const dist = self.rep[idx];
+                    var i = idx;
+                    while (i > 0) : (i -= 1) {
+                        self.rep[i] = self.rep[i - 1];
+                    }
+                    self.rep[0] = dist;
+                }
+            }
+
+            len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update);
+
+            if (update) {
+                self.state = if (self.state < 7) 8 else 11;
+            }
+        } else {
+            if (update) {
+                self.rep[3] = self.rep[2];
+                self.rep[2] = self.rep[1];
+                self.rep[1] = self.rep[0];
+            }
+
+            len = try self.len_decoder.decode(reader, decoder, pos_state, update);
+
+            if (update) {
+                self.state = if (self.state < 7) 7 else 10;
+            }
+
+            const rep_0 = try self.decodeDistance(reader, decoder, len, update);
+
+            if (update) {
+                self.rep[0] = rep_0;
+                if (self.rep[0] == 0xFFFF_FFFF) {
+                    if (decoder.isFinished()) {
+                        return .finished;
+                    }
+                    return error.CorruptInput;
+                }
+            }
+        }
+
+        if (update) {
+            len += 2;
+
+            const dist = self.rep[0] + 1;
+            try buffer.appendLz(allocator, len, dist, writer);
+        }
+
+        return .continue_;
+    }
+
+    fn processNext(
+        self: *DecoderState,
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+        buffer: anytype,
+        decoder: *RangeDecoder,
+    ) !ProcessingStatus {
+        return self.processNextInner(allocator, reader, writer, buffer, decoder, true);
+    }
+
+    pub fn process(
+        self: *DecoderState,
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+        buffer: anytype,
+        decoder: *RangeDecoder,
+    ) !void {
+        while (true) {
+            if (self.unpacked_size) |unpacked_size| {
+                if (buffer.len >= unpacked_size) {
+                    break;
+                }
+            } else if (decoder.isFinished()) {
+                break;
+            }
+
+            if (try self.processNext(allocator, reader, writer, buffer, decoder) == .finished) {
+                break;
+            }
+        }
+
+        if (self.unpacked_size) |len| {
+            if (len != buffer.len) {
+                return error.CorruptInput;
+            }
+        }
+    }
+
+    fn decodeLiteral(
+        self: *DecoderState,
+        reader: anytype,
+        buffer: anytype,
+        decoder: *RangeDecoder,
+        update: bool,
+    ) !u8 {
+        const def_prev_byte = 0;
+        const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
+
+        var result: usize = 1;
+        const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
+            (prev_byte >> (8 - self.lzma_props.lc));
+        const probs = try self.literal_probs.getMut(lit_state);
+
+        if (self.state >= 7) {
+            var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1));
+
+            while (result < 0x100) {
+                const match_bit = (match_byte >> 7) & 1;
+                match_byte <<= 1;
+                const bit = @boolToInt(try decoder.decodeBit(
+                    reader,
+                    &probs[((@as(usize, 1) + match_bit) << 8) + result],
+                    update,
+                ));
+                result = (result << 1) ^ bit;
+                if (match_bit != bit) {
+                    break;
+                }
+            }
+        }
+
+        while (result < 0x100) {
+            result = (result << 1) ^ @boolToInt(try decoder.decodeBit(reader, &probs[result], update));
+        }
+
+        return @truncate(u8, result - 0x100);
+    }
+
+    fn decodeDistance(
+        self: *DecoderState,
+        reader: anytype,
+        decoder: *RangeDecoder,
+        length: usize,
+        update: bool,
+    ) !usize {
+        const len_state = if (length > 3) 3 else length;
+
+        const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update));
+        if (pos_slot < 4)
+            return pos_slot;
+
+        const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
+        var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
+
+        if (pos_slot < 14) {
+            result += try decoder.parseReverseBitTree(
+                reader,
+                num_direct_bits,
+                &self.pos_decoders,
+                result - pos_slot,
+                update,
+            );
+        } else {
+            result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4;
+            result += try self.align_decoder.parseReverse(reader, decoder, update);
+        }
+
+        return result;
+    }
+};
+
+pub const LzmaDecoder = struct {
+    params: LzmaParams,
+    memlimit: usize,
+    state: DecoderState,
+
+    pub fn init(allocator: Allocator, params: LzmaParams, memlimit: ?usize) !LzmaDecoder {
+        return LzmaDecoder{
+            .params = params,
+            .memlimit = memlimit orelse math.maxInt(usize),
+            .state = try DecoderState.init(allocator, params.properties, params.unpacked_size),
+        };
+    }
+
+    pub fn deinit(self: *LzmaDecoder, allocator: Allocator) void {
+        self.state.deinit(allocator);
+        self.* = undefined;
+    }
+
+    pub fn decompress(
+        self: *LzmaDecoder,
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+    ) !void {
+        var buffer = LzCircularBuffer.init(self.params.dict_size, self.memlimit);
+        defer buffer.deinit(allocator);
+
+        var decoder = try RangeDecoder.init(reader);
+        try self.state.process(allocator, reader, writer, &buffer, &decoder);
+        try buffer.finish(writer);
+    }
+};
lib/std/compress/lzma/decode/lzma2.zig
@@ -0,0 +1,169 @@
+const std = @import("../../../std.zig");
+const Allocator = std.mem.Allocator;
+
+const lzma = @import("lzma.zig");
+const DecoderState = lzma.DecoderState;
+const LzmaProperties = lzma.LzmaProperties;
+const LzAccumBuffer = @import("lzbuffer.zig").LzAccumBuffer;
+const RangeDecoder = @import("rangecoder.zig").RangeDecoder;
+
+pub const Lzma2Decoder = struct {
+    lzma_state: DecoderState,
+
+    pub fn init(allocator: Allocator) !Lzma2Decoder {
+        return Lzma2Decoder{
+            .lzma_state = try DecoderState.init(
+                allocator,
+                LzmaProperties{
+                    .lc = 0,
+                    .lp = 0,
+                    .pb = 0,
+                },
+                null,
+            ),
+        };
+    }
+
+    pub fn deinit(self: *Lzma2Decoder, allocator: Allocator) void {
+        self.lzma_state.deinit(allocator);
+        self.* = undefined;
+    }
+
+    pub fn decompress(
+        self: *Lzma2Decoder,
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+    ) !void {
+        var accum = LzAccumBuffer.init(std.math.maxInt(usize));
+        defer accum.deinit(allocator);
+
+        while (true) {
+            const status = try reader.readByte();
+
+            switch (status) {
+                0 => break,
+                1 => try parseUncompressed(allocator, reader, writer, &accum, true),
+                2 => try parseUncompressed(allocator, reader, writer, &accum, false),
+                else => try self.parseLzma(allocator, reader, writer, &accum, status),
+            }
+        }
+
+        try accum.finish(writer);
+    }
+
+    fn parseLzma(
+        self: *Lzma2Decoder,
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+        accum: *LzAccumBuffer,
+        status: u8,
+    ) !void {
+        if (status & 0x80 == 0) {
+            return error.CorruptInput;
+        }
+
+        const Reset = struct {
+            dict: bool,
+            state: bool,
+            props: bool,
+        };
+
+        const reset = switch ((status >> 5) & 0x3) {
+            0 => Reset{
+                .dict = false,
+                .state = false,
+                .props = false,
+            },
+            1 => Reset{
+                .dict = false,
+                .state = true,
+                .props = false,
+            },
+            2 => Reset{
+                .dict = false,
+                .state = true,
+                .props = true,
+            },
+            3 => Reset{
+                .dict = true,
+                .state = true,
+                .props = true,
+            },
+            else => unreachable,
+        };
+
+        const unpacked_size = blk: {
+            var tmp: u64 = status & 0x1F;
+            tmp <<= 16;
+            tmp |= try reader.readIntBig(u16);
+            break :blk tmp + 1;
+        };
+
+        const packed_size = blk: {
+            const tmp: u17 = try reader.readIntBig(u16);
+            break :blk tmp + 1;
+        };
+
+        if (reset.dict) {
+            try accum.reset(writer);
+        }
+
+        if (reset.state) {
+            var new_props = self.lzma_state.lzma_props;
+
+            if (reset.props) {
+                var props = try reader.readByte();
+                if (props >= 225) {
+                    return error.CorruptInput;
+                }
+
+                const lc = @intCast(u4, props % 9);
+                props /= 9;
+                const lp = @intCast(u3, props % 5);
+                props /= 5;
+                const pb = @intCast(u3, props);
+
+                if (lc + lp > 4) {
+                    return error.CorruptInput;
+                }
+
+                new_props = LzmaProperties{ .lc = lc, .lp = lp, .pb = pb };
+            }
+
+            try self.lzma_state.resetState(allocator, new_props);
+        }
+
+        self.lzma_state.unpacked_size = unpacked_size + accum.len;
+
+        var counter = std.io.countingReader(reader);
+        const counter_reader = counter.reader();
+
+        var rangecoder = try RangeDecoder.init(counter_reader);
+        try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder);
+
+        if (counter.bytes_read != packed_size) {
+            return error.CorruptInput;
+        }
+    }
+
+    fn parseUncompressed(
+        allocator: Allocator,
+        reader: anytype,
+        writer: anytype,
+        accum: *LzAccumBuffer,
+        reset_dict: bool,
+    ) !void {
+        const unpacked_size = try reader.readIntBig(u16) + 1; // TODO: overflow
+
+        if (reset_dict) {
+            try accum.reset(writer);
+        }
+
+        var i: @TypeOf(unpacked_size) = 0;
+        while (i < unpacked_size) : (i += 1) {
+            try accum.appendByte(allocator, try reader.readByte());
+        }
+    }
+};
lib/std/compress/lzma/decode/rangecoder.zig
@@ -0,0 +1,184 @@
+const std = @import("../../../std.zig");
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+const FixedBufferStream = std.io.FixedBufferStream;
+
+pub const RangeDecoder = struct {
+    range: u32,
+    code: u32,
+
+    pub fn init(reader: anytype) !RangeDecoder {
+        const reserved = try reader.readByte();
+        if (reserved != 0) {
+            return error.CorruptInput;
+        }
+        return RangeDecoder{
+            .range = 0xFFFF_FFFF,
+            .code = try reader.readIntBig(u32),
+        };
+    }
+
+    pub fn fromParts(
+        range: u32,
+        code: u32,
+    ) RangeDecoder {
+        return .{
+            .range = range,
+            .code = code,
+        };
+    }
+
+    pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
+        self.range = range;
+        self.code = code;
+    }
+
+    pub inline fn isFinished(self: RangeDecoder) bool {
+        return self.code == 0;
+    }
+
+    inline fn normalize(self: *RangeDecoder, reader: anytype) !void {
+        if (self.range < 0x0100_0000) {
+            self.range <<= 8;
+            self.code = (self.code << 8) ^ @as(u32, try reader.readByte());
+        }
+    }
+
+    inline fn getBit(self: *RangeDecoder, reader: anytype) !bool {
+        self.range >>= 1;
+
+        const bit = self.code >= self.range;
+        if (bit)
+            self.code -= self.range;
+
+        try self.normalize(reader);
+        return bit;
+    }
+
+    pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 {
+        var result: u32 = 0;
+        var i: usize = 0;
+        while (i < count) : (i += 1)
+            result = (result << 1) ^ @boolToInt(try self.getBit(reader));
+        return result;
+    }
+
+    pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool {
+        const bound = (self.range >> 11) * prob.*;
+
+        if (self.code < bound) {
+            if (update)
+                prob.* += (0x800 - prob.*) >> 5;
+            self.range = bound;
+
+            try self.normalize(reader);
+            return false;
+        } else {
+            if (update)
+                prob.* -= prob.* >> 5;
+            self.code -= bound;
+            self.range -= bound;
+
+            try self.normalize(reader);
+            return true;
+        }
+    }
+
+    fn parseBitTree(
+        self: *RangeDecoder,
+        reader: anytype,
+        num_bits: u5,
+        probs: []u16,
+        update: bool,
+    ) !u32 {
+        var tmp: u32 = 1;
+        var i: @TypeOf(num_bits) = 0;
+        while (i < num_bits) : (i += 1) {
+            const bit = try self.decodeBit(reader, &probs[tmp], update);
+            tmp = (tmp << 1) ^ @boolToInt(bit);
+        }
+        return tmp - (@as(u32, 1) << num_bits);
+    }
+
+    pub fn parseReverseBitTree(
+        self: *RangeDecoder,
+        reader: anytype,
+        num_bits: u5,
+        probs: []u16,
+        offset: usize,
+        update: bool,
+    ) !u32 {
+        var result: u32 = 0;
+        var tmp: usize = 1;
+        var i: @TypeOf(num_bits) = 0;
+        while (i < num_bits) : (i += 1) {
+            const bit = @boolToInt(try self.decodeBit(reader, &probs[offset + tmp], update));
+            tmp = (tmp << 1) ^ bit;
+            result ^= @as(u32, bit) << i;
+        }
+        return result;
+    }
+};
+
+pub fn BitTree(comptime num_bits: usize) type {
+    return struct {
+        probs: [1 << num_bits]u16 = .{0x400} ** (1 << num_bits),
+
+        const Self = @This();
+
+        pub fn parse(
+            self: *Self,
+            reader: anytype,
+            decoder: *RangeDecoder,
+            update: bool,
+        ) !u32 {
+            return decoder.parseBitTree(reader, num_bits, &self.probs, update);
+        }
+
+        pub fn parseReverse(
+            self: *Self,
+            reader: anytype,
+            decoder: *RangeDecoder,
+            update: bool,
+        ) !u32 {
+            return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update);
+        }
+
+        pub fn reset(self: *Self) void {
+            mem.set(u16, &self.probs, 0x400);
+        }
+    };
+}
+
+pub const LenDecoder = struct {
+    choice: u16 = 0x400,
+    choice2: u16 = 0x400,
+    low_coder: [16]BitTree(3) = .{.{}} ** 16,
+    mid_coder: [16]BitTree(3) = .{.{}} ** 16,
+    high_coder: BitTree(8) = .{},
+
+    pub fn decode(
+        self: *LenDecoder,
+        reader: anytype,
+        decoder: *RangeDecoder,
+        pos_state: usize,
+        update: bool,
+    ) !usize {
+        if (!try decoder.decodeBit(reader, &self.choice, update)) {
+            return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update));
+        } else if (!try decoder.decodeBit(reader, &self.choice2, update)) {
+            return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8;
+        } else {
+            return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16;
+        }
+    }
+
+    pub fn reset(self: *LenDecoder) void {
+        self.choice = 0x400;
+        self.choice2 = 0x400;
+        for (self.low_coder) |*t| t.reset();
+        for (self.mid_coder) |*t| t.reset();
+        self.high_coder.reset();
+    }
+};
lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma
Binary file
lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma
Binary file
lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma
Binary file
lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma
Binary file
lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma
Binary file
lib/std/compress/lzma/decode.zig
@@ -0,0 +1,16 @@
+pub const lzbuffer = @import("decode/lzbuffer.zig");
+pub const lzma = @import("decode/lzma.zig");
+pub const lzma2 = @import("decode/lzma2.zig");
+pub const rangecoder = @import("decode/rangecoder.zig");
+
+pub const Options = struct {
+    unpacked_size: UnpackedSize = .read_from_header,
+    memlimit: ?usize = null,
+    allow_incomplete: bool = false,
+};
+
+pub const UnpackedSize = union(enum) {
+    read_from_header,
+    read_header_but_use_provided: ?u64,
+    use_provided: ?u64,
+};
lib/std/compress/lzma/lzma2_test.zig
@@ -0,0 +1,27 @@
+const std = @import("../../std.zig");
+const lzma = @import("../lzma.zig");
+
+fn testDecompress(compressed: []const u8, writer: anytype) !void {
+    const allocator = std.testing.allocator;
+    var stream = std.io.fixedBufferStream(compressed);
+    try lzma.lzma2Decompress(allocator, stream.reader(), writer);
+}
+
+fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
+    const allocator = std.testing.allocator;
+    var decomp = std.ArrayList(u8).init(allocator);
+    defer decomp.deinit();
+    try testDecompress(compressed, decomp.writer());
+    try std.testing.expectEqualSlices(u8, expected, decomp.items);
+}
+
+fn testDecompressError(expected: anyerror, compressed: []const u8) !void {
+    return std.testing.expectError(expected, testDecompress(compressed, std.io.null_writer));
+}
+
+test {
+    try testDecompressEqual(
+        "Hello\nWorld!\n",
+        &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 },
+    );
+}
lib/std/compress/lzma/lzma_test.zig
@@ -0,0 +1,87 @@
+const std = @import("../../std.zig");
+const lzma = @import("../lzma.zig");
+
+fn testDecompress(compressed: []const u8, writer: anytype) !void {
+    const allocator = std.testing.allocator;
+    var stream = std.io.fixedBufferStream(compressed);
+    try lzma.lzmaDecompress(allocator, stream.reader(), writer, .{});
+}
+
+fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
+    const allocator = std.testing.allocator;
+    var decomp = std.ArrayList(u8).init(allocator);
+    defer decomp.deinit();
+    try testDecompress(compressed, decomp.writer());
+    try std.testing.expectEqualSlices(u8, expected, decomp.items);
+}
+
+fn testDecompressError(expected: anyerror, compressed: []const u8) !void {
+    return std.testing.expectError(expected, testDecompress(compressed, std.io.null_writer));
+}
+
+test "decompress empty world" {
+    try testDecompressEqual(
+        "",
+        &[_]u8{
+            0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x83, 0xff,
+            0xfb, 0xff, 0xff, 0xc0, 0x00, 0x00, 0x00,
+        },
+    );
+}
+
+test "decompress hello world" {
+    try testDecompressEqual(
+        "Hello world\n",
+        &[_]u8{
+            0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19,
+            0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe,
+            0xa5, 0xb0, 0x00,
+        },
+    );
+}
+
+test "decompress huge dict" {
+    try testDecompressEqual(
+        "Hello world\n",
+        &[_]u8{
+            0x5d, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19,
+            0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe,
+            0xa5, 0xb0, 0x00,
+        },
+    );
+}
+
+test "unknown size with end of payload marker" {
+    try testDecompressEqual(
+        "Hello\nWorld!\n",
+        @embedFile("testdata/good-unknown_size-with_eopm.lzma"),
+    );
+}
+
+test "known size without end of payload marker" {
+    try testDecompressEqual(
+        "Hello\nWorld!\n",
+        @embedFile("testdata/good-known_size-without_eopm.lzma"),
+    );
+}
+
+test "known size with end of payload marker" {
+    try testDecompressEqual(
+        "Hello\nWorld!\n",
+        @embedFile("testdata/good-known_size-with_eopm.lzma"),
+    );
+}
+
+test "too big uncompressed size in header" {
+    try testDecompressError(
+        error.CorruptInput,
+        @embedFile("testdata/bad-too_big_size-with_eopm.lzma"),
+    );
+}
+
+test "too small uncompressed size in header" {
+    try testDecompressError(
+        error.CorruptInput,
+        @embedFile("testdata/bad-too_small_size-without_eopm-3.lzma"),
+    );
+}
lib/std/compress/lzma/vec2d.zig
@@ -0,0 +1,128 @@
+const std = @import("../../std.zig");
+const math = std.math;
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+
+pub fn Vec2D(comptime T: type) type {
+    return struct {
+        data: []T,
+        cols: usize,
+
+        const Self = @This();
+
+        pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self {
+            const len = try math.mul(usize, size[0], size[1]);
+            const data = try allocator.alloc(T, len);
+            mem.set(T, data, value);
+            return Self{
+                .data = data,
+                .cols = size[1],
+            };
+        }
+
+        pub fn deinit(self: *Self, allocator: Allocator) void {
+            allocator.free(self.data);
+            self.* = undefined;
+        }
+
+        pub fn fill(self: *Self, value: T) void {
+            mem.set(T, self.data, value);
+        }
+
+        inline fn _get(self: Self, row: usize) ![]T {
+            const start_row = try math.mul(usize, row, self.cols);
+            const end_row = try math.add(usize, start_row, self.cols);
+            return self.data[start_row..end_row];
+        }
+
+        pub fn get(self: Self, row: usize) ![]const T {
+            return self._get(row);
+        }
+
+        pub fn getMut(self: *Self, row: usize) ![]T {
+            return self._get(row);
+        }
+    };
+}
+
+const testing = std.testing;
+const expectEqualSlices = std.testing.expectEqualSlices;
+const expectError = std.testing.expectError;
+
+test "Vec2D.init" {
+    const allocator = testing.allocator;
+    var vec2d = try Vec2D(i32).init(allocator, 1, .{ 2, 3 });
+    defer vec2d.deinit(allocator);
+
+    try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(0));
+    try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(1));
+}
+
+test "Vec2D.init overflow" {
+    const allocator = testing.allocator;
+    try expectError(
+        error.Overflow,
+        Vec2D(i32).init(allocator, 1, .{ math.maxInt(usize), math.maxInt(usize) }),
+    );
+}
+
+test "Vec2D.fill" {
+    const allocator = testing.allocator;
+    var vec2d = try Vec2D(i32).init(allocator, 0, .{ 2, 3 });
+    defer vec2d.deinit(allocator);
+
+    vec2d.fill(7);
+
+    try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(0));
+    try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(1));
+}
+
+test "Vec2D.get" {
+    var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 };
+    const vec2d = Vec2D(i32){
+        .data = &data,
+        .cols = 2,
+    };
+
+    try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0));
+    try expectEqualSlices(i32, &.{ 2, 3 }, try vec2d.get(1));
+    try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2));
+    try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3));
+}
+
+test "Vec2D.getMut" {
+    var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 };
+    var vec2d = Vec2D(i32){
+        .data = &data,
+        .cols = 2,
+    };
+
+    const row = try vec2d.getMut(1);
+    row[1] = 9;
+
+    try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0));
+    // (1, 1) should be 9.
+    try expectEqualSlices(i32, &.{ 2, 9 }, try vec2d.get(1));
+    try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2));
+    try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3));
+}
+
+test "Vec2D.get multiplication overflow" {
+    const allocator = testing.allocator;
+    var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 4 });
+    defer matrix.deinit(allocator);
+
+    const row = (math.maxInt(usize) / 4) + 1;
+    try expectError(error.Overflow, matrix.get(row));
+    try expectError(error.Overflow, matrix.getMut(row));
+}
+
+test "Vec2D.get addition overflow" {
+    const allocator = testing.allocator;
+    var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 5 });
+    defer matrix.deinit(allocator);
+
+    const row = math.maxInt(usize) / 5;
+    try expectError(error.Overflow, matrix.get(row));
+    try expectError(error.Overflow, matrix.getMut(row));
+}
lib/std/compress/xz/block.zig
@@ -1,6 +1,7 @@
 const std = @import("../../std.zig");
-const lzma = @import("lzma.zig");
+const lzma = std.compress.lzma;
 const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
 const Crc32 = std.hash.Crc32;
 const Crc64 = std.hash.crc.Crc64Xz;
 const Sha256 = std.crypto.hash.sha2.Sha256;
@@ -32,8 +33,7 @@ pub fn Decoder(comptime ReaderType: type) type {
         inner_reader: ReaderType,
         check: xz.Check,
         err: ?Error,
-        accum: lzma.LzAccumBuffer,
-        lzma_state: lzma.DecoderState,
+        to_read: ArrayListUnmanaged(u8),
         block_count: usize,
 
         fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
@@ -42,15 +42,13 @@ pub fn Decoder(comptime ReaderType: type) type {
                 .inner_reader = in_reader,
                 .check = check,
                 .err = null,
-                .accum = .{},
-                .lzma_state = try lzma.DecoderState.init(allocator),
+                .to_read = .{},
                 .block_count = 0,
             };
         }
 
         pub fn deinit(self: *Self) void {
-            self.accum.deinit(self.allocator);
-            self.lzma_state.deinit(self.allocator);
+            self.to_read.deinit(self.allocator);
         }
 
         pub fn reader(self: *Self) Reader {
@@ -59,9 +57,13 @@ pub fn Decoder(comptime ReaderType: type) type {
 
         pub fn read(self: *Self, output: []u8) Error!usize {
             while (true) {
-                if (self.accum.to_read.items.len > 0) {
-                    const n = self.accum.read(output);
-                    if (self.accum.to_read.items.len == 0 and self.err != null) {
+                if (self.to_read.items.len > 0) {
+                    const input = self.to_read.items;
+                    const n = std.math.min(input.len, output.len);
+                    std.mem.copy(u8, output[0..n], input[0..n]);
+                    std.mem.copy(u8, input, input[n..]);
+                    self.to_read.shrinkRetainingCapacity(input.len - n);
+                    if (self.to_read.items.len == 0 and self.err != null) {
                         if (self.err.? == DecodeError.EndOfStreamWithNoError) {
                             return n;
                         }
@@ -77,15 +79,12 @@ pub fn Decoder(comptime ReaderType: type) type {
                 }
                 self.readBlock() catch |e| {
                     self.err = e;
-                    if (self.accum.to_read.items.len == 0) {
-                        try self.accum.reset(self.allocator);
-                    }
                 };
             }
         }
 
         fn readBlock(self: *Self) Error!void {
-            const unpacked_pos = self.accum.to_read.items.len;
+            const unpacked_pos = self.to_read.items.len;
 
             var block_counter = std.io.countingReader(self.inner_reader);
             const block_reader = block_counter.reader();
@@ -156,15 +155,18 @@ pub fn Decoder(comptime ReaderType: type) type {
 
             // Compressed Data
             var packed_counter = std.io.countingReader(block_reader);
-            const packed_reader = packed_counter.reader();
-            while (try self.readLzma2Chunk(packed_reader)) {}
+            try lzma.lzma2Decompress(
+                self.allocator,
+                packed_counter.reader(),
+                self.to_read.writer(self.allocator),
+            );
 
             if (packed_size) |s| {
                 if (s != packed_counter.bytes_read)
                     return error.CorruptInput;
             }
 
-            const unpacked_bytes = self.accum.to_read.items[unpacked_pos..];
+            const unpacked_bytes = self.to_read.items[unpacked_pos..];
             if (unpacked_size) |s| {
                 if (s != unpacked_bytes.len)
                     return error.CorruptInput;
@@ -205,113 +207,5 @@ pub fn Decoder(comptime ReaderType: type) type {
 
             self.block_count += 1;
         }
-
-        fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool {
-            const status = try packed_reader.readByte();
-            switch (status) {
-                0 => {
-                    try self.accum.reset(self.allocator);
-                    return false;
-                },
-                1, 2 => {
-                    if (status == 1)
-                        try self.accum.reset(self.allocator);
-
-                    const size = try packed_reader.readIntBig(u16) + 1;
-                    try self.accum.ensureUnusedCapacity(self.allocator, size);
-
-                    var i: usize = 0;
-                    while (i < size) : (i += 1)
-                        self.accum.appendAssumeCapacity(try packed_reader.readByte());
-
-                    return true;
-                },
-                else => {
-                    if (status & 0x80 == 0)
-                        return error.CorruptInput;
-
-                    const Reset = struct {
-                        dict: bool,
-                        state: bool,
-                        props: bool,
-                    };
-
-                    const reset = switch ((status >> 5) & 0x3) {
-                        0 => Reset{
-                            .dict = false,
-                            .state = false,
-                            .props = false,
-                        },
-                        1 => Reset{
-                            .dict = false,
-                            .state = true,
-                            .props = false,
-                        },
-                        2 => Reset{
-                            .dict = false,
-                            .state = true,
-                            .props = true,
-                        },
-                        3 => Reset{
-                            .dict = true,
-                            .state = true,
-                            .props = true,
-                        },
-                        else => unreachable,
-                    };
-
-                    const unpacked_size = blk: {
-                        var tmp: u64 = status & 0x1F;
-                        tmp <<= 16;
-                        tmp |= try packed_reader.readIntBig(u16);
-                        break :blk tmp + 1;
-                    };
-
-                    const packed_size = blk: {
-                        const tmp: u17 = try packed_reader.readIntBig(u16);
-                        break :blk tmp + 1;
-                    };
-
-                    if (reset.dict)
-                        try self.accum.reset(self.allocator);
-
-                    if (reset.state) {
-                        var new_props = self.lzma_state.lzma_props;
-
-                        if (reset.props) {
-                            var props = try packed_reader.readByte();
-                            if (props >= 225)
-                                return error.CorruptInput;
-
-                            const lc = @intCast(u4, props % 9);
-                            props /= 9;
-                            const lp = @intCast(u3, props % 5);
-                            props /= 5;
-                            const pb = @intCast(u3, props);
-
-                            if (lc + lp > 4)
-                                return error.CorruptInput;
-
-                            new_props = .{ .lc = lc, .lp = lp, .pb = pb };
-                        }
-
-                        try self.lzma_state.reset_state(self.allocator, new_props);
-                    }
-
-                    self.lzma_state.unpacked_size = unpacked_size + self.accum.len();
-
-                    const buffer = try self.allocator.alloc(u8, packed_size);
-                    defer self.allocator.free(buffer);
-
-                    for (buffer) |*b|
-                        b.* = try packed_reader.readByte();
-
-                    var rangecoder = try lzma.RangeDecoder.init(buffer);
-                    try self.lzma_state.process(self.allocator, &self.accum, &rangecoder);
-
-                    return true;
-                },
-            }
-        }
     };
 }
lib/std/compress/xz/lzma.zig
@@ -1,658 +0,0 @@
-// Ported from https://github.com/gendx/lzma-rs
-
-const std = @import("../../std.zig");
-const assert = std.debug.assert;
-const Allocator = std.mem.Allocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-
-const LzmaProperties = struct {
-    lc: u4,
-    lp: u3,
-    pb: u3,
-
-    fn validate(self: LzmaProperties) void {
-        assert(self.lc <= 8);
-        assert(self.lp <= 4);
-        assert(self.pb <= 4);
-    }
-};
-
-pub const DecoderState = struct {
-    lzma_props: LzmaProperties,
-    unpacked_size: ?u64,
-    literal_probs: Vec2D(u16),
-    pos_slot_decoder: [4]BitTree,
-    align_decoder: BitTree,
-    pos_decoders: [115]u16,
-    is_match: [192]u16,
-    is_rep: [12]u16,
-    is_rep_g0: [12]u16,
-    is_rep_g1: [12]u16,
-    is_rep_g2: [12]u16,
-    is_rep_0long: [192]u16,
-    state: usize,
-    rep: [4]usize,
-    len_decoder: LenDecoder,
-    rep_len_decoder: LenDecoder,
-
-    pub fn init(allocator: Allocator) !DecoderState {
-        return .{
-            .lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 },
-            .unpacked_size = null,
-            .literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300),
-            .pos_slot_decoder = .{
-                try BitTree.init(allocator, 6),
-                try BitTree.init(allocator, 6),
-                try BitTree.init(allocator, 6),
-                try BitTree.init(allocator, 6),
-            },
-            .align_decoder = try BitTree.init(allocator, 4),
-            .pos_decoders = .{0x400} ** 115,
-            .is_match = .{0x400} ** 192,
-            .is_rep = .{0x400} ** 12,
-            .is_rep_g0 = .{0x400} ** 12,
-            .is_rep_g1 = .{0x400} ** 12,
-            .is_rep_g2 = .{0x400} ** 12,
-            .is_rep_0long = .{0x400} ** 192,
-            .state = 0,
-            .rep = .{0} ** 4,
-            .len_decoder = try LenDecoder.init(allocator),
-            .rep_len_decoder = try LenDecoder.init(allocator),
-        };
-    }
-
-    pub fn deinit(self: *DecoderState, allocator: Allocator) void {
-        self.literal_probs.deinit(allocator);
-        for (self.pos_slot_decoder) |*t| t.deinit(allocator);
-        self.align_decoder.deinit(allocator);
-        self.len_decoder.deinit(allocator);
-        self.rep_len_decoder.deinit(allocator);
-    }
-
-    pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
-        new_props.validate();
-        if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
-            self.literal_probs.fill(0x400);
-        } else {
-            self.literal_probs.deinit(allocator);
-            self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
-        }
-
-        self.lzma_props = new_props;
-        for (self.pos_slot_decoder) |*t| t.reset();
-        self.align_decoder.reset();
-        self.pos_decoders = .{0x400} ** 115;
-        self.is_match = .{0x400} ** 192;
-        self.is_rep = .{0x400} ** 12;
-        self.is_rep_g0 = .{0x400} ** 12;
-        self.is_rep_g1 = .{0x400} ** 12;
-        self.is_rep_g2 = .{0x400} ** 12;
-        self.is_rep_0long = .{0x400} ** 192;
-        self.state = 0;
-        self.rep = .{0} ** 4;
-        self.len_decoder.reset();
-        self.rep_len_decoder.reset();
-    }
-
-    fn processNextInner(
-        self: *DecoderState,
-        allocator: Allocator,
-        output: *LzAccumBuffer,
-        rangecoder: *RangeDecoder,
-        update: bool,
-    ) !ProcessingStatus {
-        const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1);
-
-        if (!try rangecoder.decodeBit(
-            &self.is_match[(self.state << 4) + pos_state],
-            update,
-        )) {
-            const byte: u8 = try self.decodeLiteral(output, rangecoder, update);
-
-            if (update) {
-                try output.appendLiteral(allocator, byte);
-
-                self.state = if (self.state < 4)
-                    0
-                else if (self.state < 10)
-                    self.state - 3
-                else
-                    self.state - 6;
-            }
-            return .continue_;
-        }
-
-        var len: usize = undefined;
-        if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) {
-            if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) {
-                if (!try rangecoder.decodeBit(
-                    &self.is_rep_0long[(self.state << 4) + pos_state],
-                    update,
-                )) {
-                    if (update) {
-                        self.state = if (self.state < 7) 9 else 11;
-                        const dist = self.rep[0] + 1;
-                        try output.appendLz(allocator, 1, dist);
-                    }
-                    return .continue_;
-                }
-            } else {
-                const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update))
-                    1
-                else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update))
-                    2
-                else
-                    3;
-                if (update) {
-                    const dist = self.rep[idx];
-                    var i = idx;
-                    while (i > 0) : (i -= 1) {
-                        self.rep[i] = self.rep[i - 1];
-                    }
-                    self.rep[0] = dist;
-                }
-            }
-
-            len = try self.rep_len_decoder.decode(rangecoder, pos_state, update);
-
-            if (update) {
-                self.state = if (self.state < 7) 8 else 11;
-            }
-        } else {
-            if (update) {
-                self.rep[3] = self.rep[2];
-                self.rep[2] = self.rep[1];
-                self.rep[1] = self.rep[0];
-            }
-
-            len = try self.len_decoder.decode(rangecoder, pos_state, update);
-
-            if (update) {
-                self.state = if (self.state < 7) 7 else 10;
-            }
-
-            const rep_0 = try self.decodeDistance(rangecoder, len, update);
-
-            if (update) {
-                self.rep[0] = rep_0;
-                if (self.rep[0] == 0xFFFF_FFFF) {
-                    if (rangecoder.isFinished()) {
-                        return .finished;
-                    }
-                    return error.CorruptInput;
-                }
-            }
-        }
-
-        if (update) {
-            len += 2;
-
-            const dist = self.rep[0] + 1;
-            try output.appendLz(allocator, len, dist);
-        }
-
-        return .continue_;
-    }
-
-    fn processNext(
-        self: *DecoderState,
-        allocator: Allocator,
-        output: *LzAccumBuffer,
-        rangecoder: *RangeDecoder,
-    ) !ProcessingStatus {
-        return self.processNextInner(allocator, output, rangecoder, true);
-    }
-
-    pub fn process(
-        self: *DecoderState,
-        allocator: Allocator,
-        output: *LzAccumBuffer,
-        rangecoder: *RangeDecoder,
-    ) !void {
-        while (true) {
-            if (self.unpacked_size) |unpacked_size| {
-                if (output.len() >= unpacked_size) {
-                    break;
-                }
-            } else if (rangecoder.isFinished()) {
-                break;
-            }
-
-            if (try self.processNext(allocator, output, rangecoder) == .finished) {
-                break;
-            }
-        }
-
-        if (self.unpacked_size) |len| {
-            if (len != output.len()) {
-                return error.CorruptInput;
-            }
-        }
-    }
-
-    fn decodeLiteral(
-        self: *DecoderState,
-        output: *LzAccumBuffer,
-        rangecoder: *RangeDecoder,
-        update: bool,
-    ) !u8 {
-        const def_prev_byte = 0;
-        const prev_byte = @as(usize, output.lastOr(def_prev_byte));
-
-        var result: usize = 1;
-        const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
-            (prev_byte >> (8 - self.lzma_props.lc));
-        const probs = try self.literal_probs.get(lit_state);
-
-        if (self.state >= 7) {
-            var match_byte = @as(usize, try output.lastN(self.rep[0] + 1));
-
-            while (result < 0x100) {
-                const match_bit = (match_byte >> 7) & 1;
-                match_byte <<= 1;
-                const bit = @boolToInt(try rangecoder.decodeBit(
-                    &probs[((@as(usize, 1) + match_bit) << 8) + result],
-                    update,
-                ));
-                result = (result << 1) ^ bit;
-                if (match_bit != bit) {
-                    break;
-                }
-            }
-        }
-
-        while (result < 0x100) {
-            result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update));
-        }
-
-        return @truncate(u8, result - 0x100);
-    }
-
-    fn decodeDistance(
-        self: *DecoderState,
-        rangecoder: *RangeDecoder,
-        length: usize,
-        update: bool,
-    ) !usize {
-        const len_state = if (length > 3) 3 else length;
-
-        const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update));
-        if (pos_slot < 4)
-            return pos_slot;
-
-        const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
-        var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
-
-        if (pos_slot < 14) {
-            result += try rangecoder.parseReverseBitTree(
-                num_direct_bits,
-                &self.pos_decoders,
-                result - pos_slot,
-                update,
-            );
-        } else {
-            result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4;
-            result += try self.align_decoder.parseReverse(rangecoder, update);
-        }
-
-        return result;
-    }
-};
-
-const ProcessingStatus = enum {
-    continue_,
-    finished,
-};
-
-pub const LzAccumBuffer = struct {
-    to_read: ArrayListUnmanaged(u8) = .{},
-    buf: ArrayListUnmanaged(u8) = .{},
-
-    pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
-        self.to_read.deinit(allocator);
-        self.buf.deinit(allocator);
-    }
-
-    pub fn read(self: *LzAccumBuffer, output: []u8) usize {
-        const input = self.to_read.items;
-        const n = std.math.min(input.len, output.len);
-        std.mem.copy(u8, output[0..n], input[0..n]);
-        std.mem.copy(u8, input, input[n..]);
-        self.to_read.shrinkRetainingCapacity(input.len - n);
-        return n;
-    }
-
-    pub fn ensureUnusedCapacity(
-        self: *LzAccumBuffer,
-        allocator: Allocator,
-        additional_count: usize,
-    ) !void {
-        try self.buf.ensureUnusedCapacity(allocator, additional_count);
-    }
-
-    pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void {
-        self.buf.appendAssumeCapacity(byte);
-    }
-
-    pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void {
-        try self.to_read.appendSlice(allocator, self.buf.items);
-        self.buf.clearRetainingCapacity();
-    }
-
-    pub fn len(self: *const LzAccumBuffer) usize {
-        return self.buf.items.len;
-    }
-
-    pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 {
-        const buf_len = self.buf.items.len;
-        return if (buf_len == 0)
-            lit
-        else
-            self.buf.items[buf_len - 1];
-    }
-
-    pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 {
-        const buf_len = self.buf.items.len;
-        if (dist > buf_len) {
-            return error.CorruptInput;
-        }
-
-        return self.buf.items[buf_len - dist];
-    }
-
-    pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void {
-        try self.buf.append(allocator, lit);
-    }
-
-    pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void {
-        const buf_len = self.buf.items.len;
-        if (dist > buf_len) {
-            return error.CorruptInput;
-        }
-
-        var offset = buf_len - dist;
-        var i: usize = 0;
-        while (i < length) : (i += 1) {
-            const x = self.buf.items[offset];
-            try self.buf.append(allocator, x);
-            offset += 1;
-        }
-    }
-};
-
-pub const RangeDecoder = struct {
-    stream: std.io.FixedBufferStream([]const u8),
-    range: u32,
-    code: u32,
-
-    pub fn init(buffer: []const u8) !RangeDecoder {
-        var dec = RangeDecoder{
-            .stream = std.io.fixedBufferStream(buffer),
-            .range = 0xFFFF_FFFF,
-            .code = 0,
-        };
-        const reader = dec.stream.reader();
-        _ = try reader.readByte();
-        dec.code = try reader.readIntBig(u32);
-        return dec;
-    }
-
-    pub fn fromParts(
-        buffer: []const u8,
-        range: u32,
-        code: u32,
-    ) RangeDecoder {
-        return .{
-            .stream = std.io.fixedBufferStream(buffer),
-            .range = range,
-            .code = code,
-        };
-    }
-
-    pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
-        self.range = range;
-        self.code = code;
-    }
-
-    pub fn readInto(self: *RangeDecoder, dest: []u8) !usize {
-        return self.stream.read(dest);
-    }
-
-    pub inline fn isFinished(self: *const RangeDecoder) bool {
-        return self.code == 0 and self.isEof();
-    }
-
-    pub inline fn isEof(self: *const RangeDecoder) bool {
-        return self.stream.pos == self.stream.buffer.len;
-    }
-
-    inline fn normalize(self: *RangeDecoder) !void {
-        if (self.range < 0x0100_0000) {
-            self.range <<= 8;
-            self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte());
-        }
-    }
-
-    inline fn getBit(self: *RangeDecoder) !bool {
-        self.range >>= 1;
-
-        const bit = self.code >= self.range;
-        if (bit)
-            self.code -= self.range;
-
-        try self.normalize();
-        return bit;
-    }
-
-    fn get(self: *RangeDecoder, count: usize) !u32 {
-        var result: u32 = 0;
-        var i: usize = 0;
-        while (i < count) : (i += 1)
-            result = (result << 1) ^ @boolToInt(try self.getBit());
-        return result;
-    }
-
-    pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool {
-        const bound = (self.range >> 11) * prob.*;
-
-        if (self.code < bound) {
-            if (update)
-                prob.* += (0x800 - prob.*) >> 5;
-            self.range = bound;
-
-            try self.normalize();
-            return false;
-        } else {
-            if (update)
-                prob.* -= prob.* >> 5;
-            self.code -= bound;
-            self.range -= bound;
-
-            try self.normalize();
-            return true;
-        }
-    }
-
-    fn parseBitTree(
-        self: *RangeDecoder,
-        num_bits: u5,
-        probs: []u16,
-        update: bool,
-    ) !u32 {
-        var tmp: u32 = 1;
-        var i: u5 = 0;
-        while (i < num_bits) : (i += 1) {
-            const bit = try self.decodeBit(&probs[tmp], update);
-            tmp = (tmp << 1) ^ @boolToInt(bit);
-        }
-        return tmp - (@as(u32, 1) << num_bits);
-    }
-
-    pub fn parseReverseBitTree(
-        self: *RangeDecoder,
-        num_bits: u5,
-        probs: []u16,
-        offset: usize,
-        update: bool,
-    ) !u32 {
-        var result: u32 = 0;
-        var tmp: usize = 1;
-        var i: u5 = 0;
-        while (i < num_bits) : (i += 1) {
-            const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update));
-            tmp = (tmp << 1) ^ bit;
-            result ^= @as(u32, bit) << i;
-        }
-        return result;
-    }
-};
-
-fn Vec2D(comptime T: type) type {
-    return struct {
-        data: []T,
-        cols: usize,
-
-        const Self = @This();
-
-        pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self {
-            const len = try std.math.mul(usize, rows, cols);
-            var vec2d = Self{
-                .data = try allocator.alloc(T, len),
-                .cols = cols,
-            };
-            vec2d.fill(data);
-            return vec2d;
-        }
-
-        pub fn deinit(self: *Self, allocator: Allocator) void {
-            allocator.free(self.data);
-        }
-
-        pub fn fill(self: *Self, value: T) void {
-            std.mem.set(T, self.data, value);
-        }
-
-        pub fn get(self: *Self, row: usize) ![]T {
-            const start_row = try std.math.mul(usize, row, self.cols);
-            return self.data[start_row .. start_row + self.cols];
-        }
-    };
-}
-
-const BitTree = struct {
-    num_bits: u5,
-    probs: ArrayListUnmanaged(u16),
-
-    pub fn init(allocator: Allocator, num_bits: u5) !BitTree {
-        var probs_len = @as(usize, 1) << num_bits;
-        var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len);
-        while (probs_len > 0) : (probs_len -= 1)
-            probs.appendAssumeCapacity(0x400);
-        return .{ .num_bits = num_bits, .probs = probs };
-    }
-
-    pub fn deinit(self: *BitTree, allocator: Allocator) void {
-        self.probs.deinit(allocator);
-    }
-
-    pub fn parse(
-        self: *BitTree,
-        rangecoder: *RangeDecoder,
-        update: bool,
-    ) !u32 {
-        return rangecoder.parseBitTree(self.num_bits, self.probs.items, update);
-    }
-
-    pub fn parseReverse(
-        self: *BitTree,
-        rangecoder: *RangeDecoder,
-        update: bool,
-    ) !u32 {
-        return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update);
-    }
-
-    pub fn reset(self: *BitTree) void {
-        std.mem.set(u16, self.probs.items, 0x400);
-    }
-};
-
-const LenDecoder = struct {
-    choice: u16,
-    choice2: u16,
-    low_coder: [16]BitTree,
-    mid_coder: [16]BitTree,
-    high_coder: BitTree,
-
-    pub fn init(allocator: Allocator) !LenDecoder {
-        return .{
-            .choice = 0x400,
-            .choice2 = 0x400,
-            .low_coder = .{
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-            },
-            .mid_coder = .{
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-                try BitTree.init(allocator, 3),
-            },
-            .high_coder = try BitTree.init(allocator, 8),
-        };
-    }
-
-    pub fn deinit(self: *LenDecoder, allocator: Allocator) void {
-        for (self.low_coder) |*t| t.deinit(allocator);
-        for (self.mid_coder) |*t| t.deinit(allocator);
-        self.high_coder.deinit(allocator);
-    }
-
-    pub fn decode(
-        self: *LenDecoder,
-        rangecoder: *RangeDecoder,
-        pos_state: usize,
-        update: bool,
-    ) !usize {
-        if (!try rangecoder.decodeBit(&self.choice, update)) {
-            return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update));
-        } else if (!try rangecoder.decodeBit(&self.choice2, update)) {
-            return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8;
-        } else {
-            return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16;
-        }
-    }
-
-    pub fn reset(self: *LenDecoder) void {
-        self.choice = 0x400;
-        self.choice2 = 0x400;
-        for (self.low_coder) |*t| t.reset();
-        for (self.mid_coder) |*t| t.reset();
-        self.high_coder.reset();
-    }
-};
lib/std/compress/lzma.zig
@@ -0,0 +1,36 @@
+const std = @import("../std.zig");
+const Allocator = std.mem.Allocator;
+const FixedBufferStream = std.io.FixedBufferStream;
+
+pub const decode = @import("lzma/decode.zig");
+pub const LzmaParams = decode.lzma.LzmaParams;
+pub const LzmaDecoder = decode.lzma.LzmaDecoder;
+pub const Lzma2Decoder = decode.lzma2.Lzma2Decoder;
+
+pub fn lzmaDecompress(
+    allocator: Allocator,
+    reader: anytype,
+    writer: anytype,
+    options: decode.Options,
+) !void {
+    const params = try LzmaParams.readHeader(reader, options);
+    var decoder = try LzmaDecoder.init(allocator, params, options.memlimit);
+    defer decoder.deinit(allocator);
+    return decoder.decompress(allocator, reader, writer);
+}
+
+pub fn lzma2Decompress(
+    allocator: Allocator,
+    reader: anytype,
+    writer: anytype,
+) !void {
+    var decoder = try Lzma2Decoder.init(allocator);
+    defer decoder.deinit(allocator);
+    return decoder.decompress(allocator, reader, writer);
+}
+
+test {
+    _ = @import("lzma/lzma_test.zig");
+    _ = @import("lzma/lzma2_test.zig");
+    _ = @import("lzma/vec2d.zig");
+}
lib/std/compress.zig
@@ -2,8 +2,9 @@ const std = @import("std.zig");
 
 pub const deflate = @import("compress/deflate.zig");
 pub const gzip = @import("compress/gzip.zig");
-pub const zlib = @import("compress/zlib.zig");
+pub const lzma = @import("compress/lzma.zig");
 pub const xz = @import("compress/xz.zig");
+pub const zlib = @import("compress/zlib.zig");
 
 pub fn HashedReader(
     comptime ReaderType: anytype,
@@ -38,6 +39,7 @@ pub fn hashedReader(
 test {
     _ = deflate;
     _ = gzip;
-    _ = zlib;
+    _ = lzma;
     _ = xz;
+    _ = zlib;
 }
build.zig
@@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void {
                 "compress-gettysburg.txt",
                 "compress-pi.txt",
                 "rfc1951.txt",
+                // exclude files from lib/std/compress/lzma/testdata
+                ".lzma",
                 // exclude files from lib/std/compress/xz/testdata
                 ".xz",
                 // exclude files from lib/std/tz/