Commit 8e2af21cd9
Changed files (18)
lib
std
compress
lzma
lib/std/compress/lzma/decode/lzbuffer.zig
@@ -0,0 +1,226 @@
+const std = @import("../../../std.zig");
+const math = std.math;
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+
+/// An accumulating buffer for LZ sequences
+pub const LzAccumBuffer = struct {
+ /// Buffer
+ buf: ArrayListUnmanaged(u8),
+
+ /// Buffer memory limit
+ memlimit: usize,
+
+ /// Total number of bytes sent through the buffer
+ len: usize,
+
+ const Self = @This();
+
+ pub fn init(memlimit: usize) Self {
+ return Self{
+ .buf = .{},
+ .memlimit = memlimit,
+ .len = 0,
+ };
+ }
+
+ pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void {
+ try self.buf.append(allocator, byte);
+ self.len += 1;
+ }
+
+ /// Reset the internal dictionary
+ pub fn reset(self: *Self, writer: anytype) !void {
+ try writer.writeAll(self.buf.items);
+ self.buf.clearRetainingCapacity();
+ self.len = 0;
+ }
+
+ /// Retrieve the last byte or return a default
+ pub fn lastOr(self: Self, lit: u8) u8 {
+ const buf_len = self.buf.items.len;
+ return if (buf_len == 0)
+ lit
+ else
+ self.buf.items[buf_len - 1];
+ }
+
+ /// Retrieve the n-th last byte
+ pub fn lastN(self: Self, dist: usize) !u8 {
+ const buf_len = self.buf.items.len;
+ if (dist > buf_len) {
+ return error.CorruptInput;
+ }
+
+ return self.buf.items[buf_len - dist];
+ }
+
+ /// Append a literal
+ pub fn appendLiteral(
+ self: *Self,
+ allocator: Allocator,
+ lit: u8,
+ writer: anytype,
+ ) !void {
+ _ = writer;
+ if (self.len >= self.memlimit) {
+ return error.CorruptInput;
+ }
+ try self.buf.append(allocator, lit);
+ self.len += 1;
+ }
+
+ /// Fetch an LZ sequence (length, distance) from inside the buffer
+ pub fn appendLz(
+ self: *Self,
+ allocator: Allocator,
+ len: usize,
+ dist: usize,
+ writer: anytype,
+ ) !void {
+ _ = writer;
+
+ const buf_len = self.buf.items.len;
+ if (dist > buf_len) {
+ return error.CorruptInput;
+ }
+
+ var offset = buf_len - dist;
+ var i: usize = 0;
+ while (i < len) : (i += 1) {
+ const x = self.buf.items[offset];
+ try self.buf.append(allocator, x);
+ offset += 1;
+ }
+ self.len += len;
+ }
+
+ pub fn finish(self: *Self, writer: anytype) !void {
+ try writer.writeAll(self.buf.items);
+ }
+
+ pub fn deinit(self: *Self, allocator: Allocator) void {
+ self.buf.deinit(allocator);
+ self.* = undefined;
+ }
+};
+
+/// A circular buffer for LZ sequences
+pub const LzCircularBuffer = struct {
+ /// Circular buffer
+ buf: ArrayListUnmanaged(u8),
+
+ /// Length of the buffer
+ dict_size: usize,
+
+ /// Buffer memory limit
+ memlimit: usize,
+
+ /// Current position
+ cursor: usize,
+
+ /// Total number of bytes sent through the buffer
+ len: usize,
+
+ const Self = @This();
+
+ pub fn init(dict_size: usize, memlimit: usize) Self {
+ return Self{
+ .buf = .{},
+ .dict_size = dict_size,
+ .memlimit = memlimit,
+ .cursor = 0,
+ .len = 0,
+ };
+ }
+
+ pub fn get(self: Self, index: usize) u8 {
+ return if (0 <= index and index < self.buf.items.len)
+ self.buf.items[index]
+ else
+ 0;
+ }
+
+ pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void {
+ if (index >= self.memlimit) {
+ return error.CorruptInput;
+ }
+ try self.buf.ensureTotalCapacity(allocator, index + 1);
+ while (self.buf.items.len < index) {
+ self.buf.appendAssumeCapacity(0);
+ }
+ self.buf.appendAssumeCapacity(value);
+ }
+
+ /// Retrieve the last byte or return a default
+ pub fn lastOr(self: Self, lit: u8) u8 {
+ return if (self.len == 0)
+ lit
+ else
+ self.get((self.dict_size + self.cursor - 1) % self.dict_size);
+ }
+
+ /// Retrieve the n-th last byte
+ pub fn lastN(self: Self, dist: usize) !u8 {
+ if (dist > self.dict_size or dist > self.len) {
+ return error.CorruptInput;
+ }
+
+ const offset = (self.dict_size + self.cursor - dist) % self.dict_size;
+ return self.get(offset);
+ }
+
+ /// Append a literal
+ pub fn appendLiteral(
+ self: *Self,
+ allocator: Allocator,
+ lit: u8,
+ writer: anytype,
+ ) !void {
+ try self.set(allocator, self.cursor, lit);
+ self.cursor += 1;
+ self.len += 1;
+
+ // Flush the circular buffer to the output
+ if (self.cursor == self.dict_size) {
+ try writer.writeAll(self.buf.items);
+ self.cursor = 0;
+ }
+ }
+
+ /// Fetch an LZ sequence (length, distance) from inside the buffer
+ pub fn appendLz(
+ self: *Self,
+ allocator: Allocator,
+ len: usize,
+ dist: usize,
+ writer: anytype,
+ ) !void {
+ if (dist > self.dict_size or dist > self.len) {
+ return error.CorruptInput;
+ }
+
+ var offset = (self.dict_size + self.cursor - dist) % self.dict_size;
+ var i: usize = 0;
+ while (i < len) : (i += 1) {
+ const x = self.get(offset);
+ try self.appendLiteral(allocator, x, writer);
+ offset += 1;
+ if (offset == self.dict_size) {
+ offset = 0;
+ }
+ }
+ }
+
+ pub fn finish(self: *Self, writer: anytype) !void {
+ if (self.cursor > 0) {
+ try writer.writeAll(self.buf.items[0..self.cursor]);
+ }
+ }
+
+ pub fn deinit(self: *Self, allocator: Allocator) void {
+ self.buf.deinit(allocator);
+ self.* = undefined;
+ }
+};
lib/std/compress/lzma/decode/lzma.zig
@@ -0,0 +1,398 @@
+const std = @import("../../../std.zig");
+const assert = std.debug.assert;
+const math = std.math;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+const FixedBufferStream = std.io.FixedBufferStream;
+
+const LzCircularBuffer = @import("lzbuffer.zig").LzCircularBuffer;
+const Options = @import("../decode.zig").Options;
+const Vec2D = @import("../vec2d.zig").Vec2D;
+const rangecoder = @import("rangecoder.zig");
+const BitTree = rangecoder.BitTree;
+const LenDecoder = rangecoder.LenDecoder;
+const RangeDecoder = rangecoder.RangeDecoder;
+
+const ProcessingStatus = enum {
+ continue_,
+ finished,
+};
+
+pub const LzmaProperties = struct {
+ lc: u4,
+ lp: u3,
+ pb: u3,
+
+ fn validate(self: LzmaProperties) void {
+ assert(self.lc <= 8);
+ assert(self.lp <= 4);
+ assert(self.pb <= 4);
+ }
+};
+
+pub const LzmaParams = struct {
+ properties: LzmaProperties,
+ dict_size: u32,
+ unpacked_size: ?u64,
+
+ pub fn readHeader(reader: anytype, options: Options) !LzmaParams {
+ var props = try reader.readByte();
+ if (props >= 225) {
+ return error.CorruptInput;
+ }
+
+ const lc = @intCast(u4, props % 9);
+ props /= 9;
+ const lp = @intCast(u3, props % 5);
+ props /= 5;
+ const pb = @intCast(u3, props);
+
+ const dict_size_provided = try reader.readIntLittle(u32);
+ const dict_size = math.max(0x1000, dict_size_provided);
+
+ const unpacked_size = switch (options.unpacked_size) {
+ .read_from_header => blk: {
+ const unpacked_size_provided = try reader.readIntLittle(u64);
+ const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
+ break :blk if (marker_mandatory)
+ null
+ else
+ unpacked_size_provided;
+ },
+ .read_header_but_use_provided => |x| blk: {
+ _ = try reader.readIntLittle(u64);
+ break :blk x;
+ },
+ .use_provided => |x| x,
+ };
+
+ return LzmaParams{
+ .properties = LzmaProperties{ .lc = lc, .lp = lp, .pb = pb },
+ .dict_size = dict_size,
+ .unpacked_size = unpacked_size,
+ };
+ }
+};
+
+pub const DecoderState = struct {
+ lzma_props: LzmaProperties,
+ unpacked_size: ?u64,
+ literal_probs: Vec2D(u16),
+ pos_slot_decoder: [4]BitTree(6),
+ align_decoder: BitTree(4),
+ pos_decoders: [115]u16,
+ is_match: [192]u16,
+ is_rep: [12]u16,
+ is_rep_g0: [12]u16,
+ is_rep_g1: [12]u16,
+ is_rep_g2: [12]u16,
+ is_rep_0long: [192]u16,
+ state: usize,
+ rep: [4]usize,
+ len_decoder: LenDecoder,
+ rep_len_decoder: LenDecoder,
+
+ pub fn init(
+ allocator: Allocator,
+ lzma_props: LzmaProperties,
+ unpacked_size: ?u64,
+ ) !DecoderState {
+ return .{
+ .lzma_props = lzma_props,
+ .unpacked_size = unpacked_size,
+ .literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }),
+ .pos_slot_decoder = .{.{}} ** 4,
+ .align_decoder = .{},
+ .pos_decoders = .{0x400} ** 115,
+ .is_match = .{0x400} ** 192,
+ .is_rep = .{0x400} ** 12,
+ .is_rep_g0 = .{0x400} ** 12,
+ .is_rep_g1 = .{0x400} ** 12,
+ .is_rep_g2 = .{0x400} ** 12,
+ .is_rep_0long = .{0x400} ** 192,
+ .state = 0,
+ .rep = .{0} ** 4,
+ .len_decoder = .{},
+ .rep_len_decoder = .{},
+ };
+ }
+
+ pub fn deinit(self: *DecoderState, allocator: Allocator) void {
+ self.literal_probs.deinit(allocator);
+ self.* = undefined;
+ }
+
+ pub fn resetState(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
+ new_props.validate();
+ if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
+ self.literal_probs.fill(0x400);
+ } else {
+ self.literal_probs.deinit(allocator);
+ self.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
+ }
+
+ self.lzma_props = new_props;
+ for (self.pos_slot_decoder) |*t| t.reset();
+ self.align_decoder.reset();
+ self.pos_decoders = .{0x400} ** 115;
+ self.is_match = .{0x400} ** 192;
+ self.is_rep = .{0x400} ** 12;
+ self.is_rep_g0 = .{0x400} ** 12;
+ self.is_rep_g1 = .{0x400} ** 12;
+ self.is_rep_g2 = .{0x400} ** 12;
+ self.is_rep_0long = .{0x400} ** 192;
+ self.state = 0;
+ self.rep = .{0} ** 4;
+ self.len_decoder.reset();
+ self.rep_len_decoder.reset();
+ }
+
+ fn processNextInner(
+ self: *DecoderState,
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ buffer: anytype,
+ decoder: *RangeDecoder,
+ update: bool,
+ ) !ProcessingStatus {
+ const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1);
+
+ if (!try decoder.decodeBit(
+ reader,
+ &self.is_match[(self.state << 4) + pos_state],
+ update,
+ )) {
+ const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update);
+
+ if (update) {
+ try buffer.appendLiteral(allocator, byte, writer);
+
+ self.state = if (self.state < 4)
+ 0
+ else if (self.state < 10)
+ self.state - 3
+ else
+ self.state - 6;
+ }
+ return .continue_;
+ }
+
+ var len: usize = undefined;
+ if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) {
+ if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) {
+ if (!try decoder.decodeBit(
+ reader,
+ &self.is_rep_0long[(self.state << 4) + pos_state],
+ update,
+ )) {
+ if (update) {
+ self.state = if (self.state < 7) 9 else 11;
+ const dist = self.rep[0] + 1;
+ try buffer.appendLz(allocator, 1, dist, writer);
+ }
+ return .continue_;
+ }
+ } else {
+ const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update))
+ 1
+ else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update))
+ 2
+ else
+ 3;
+ if (update) {
+ const dist = self.rep[idx];
+ var i = idx;
+ while (i > 0) : (i -= 1) {
+ self.rep[i] = self.rep[i - 1];
+ }
+ self.rep[0] = dist;
+ }
+ }
+
+ len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update);
+
+ if (update) {
+ self.state = if (self.state < 7) 8 else 11;
+ }
+ } else {
+ if (update) {
+ self.rep[3] = self.rep[2];
+ self.rep[2] = self.rep[1];
+ self.rep[1] = self.rep[0];
+ }
+
+ len = try self.len_decoder.decode(reader, decoder, pos_state, update);
+
+ if (update) {
+ self.state = if (self.state < 7) 7 else 10;
+ }
+
+ const rep_0 = try self.decodeDistance(reader, decoder, len, update);
+
+ if (update) {
+ self.rep[0] = rep_0;
+ if (self.rep[0] == 0xFFFF_FFFF) {
+ if (decoder.isFinished()) {
+ return .finished;
+ }
+ return error.CorruptInput;
+ }
+ }
+ }
+
+ if (update) {
+ len += 2;
+
+ const dist = self.rep[0] + 1;
+ try buffer.appendLz(allocator, len, dist, writer);
+ }
+
+ return .continue_;
+ }
+
+ fn processNext(
+ self: *DecoderState,
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ buffer: anytype,
+ decoder: *RangeDecoder,
+ ) !ProcessingStatus {
+ return self.processNextInner(allocator, reader, writer, buffer, decoder, true);
+ }
+
+ pub fn process(
+ self: *DecoderState,
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ buffer: anytype,
+ decoder: *RangeDecoder,
+ ) !void {
+ while (true) {
+ if (self.unpacked_size) |unpacked_size| {
+ if (buffer.len >= unpacked_size) {
+ break;
+ }
+ } else if (decoder.isFinished()) {
+ break;
+ }
+
+ if (try self.processNext(allocator, reader, writer, buffer, decoder) == .finished) {
+ break;
+ }
+ }
+
+ if (self.unpacked_size) |len| {
+ if (len != buffer.len) {
+ return error.CorruptInput;
+ }
+ }
+ }
+
+ fn decodeLiteral(
+ self: *DecoderState,
+ reader: anytype,
+ buffer: anytype,
+ decoder: *RangeDecoder,
+ update: bool,
+ ) !u8 {
+ const def_prev_byte = 0;
+ const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
+
+ var result: usize = 1;
+ const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
+ (prev_byte >> (8 - self.lzma_props.lc));
+ const probs = try self.literal_probs.getMut(lit_state);
+
+ if (self.state >= 7) {
+ var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1));
+
+ while (result < 0x100) {
+ const match_bit = (match_byte >> 7) & 1;
+ match_byte <<= 1;
+ const bit = @boolToInt(try decoder.decodeBit(
+ reader,
+ &probs[((@as(usize, 1) + match_bit) << 8) + result],
+ update,
+ ));
+ result = (result << 1) ^ bit;
+ if (match_bit != bit) {
+ break;
+ }
+ }
+ }
+
+ while (result < 0x100) {
+ result = (result << 1) ^ @boolToInt(try decoder.decodeBit(reader, &probs[result], update));
+ }
+
+ return @truncate(u8, result - 0x100);
+ }
+
+ fn decodeDistance(
+ self: *DecoderState,
+ reader: anytype,
+ decoder: *RangeDecoder,
+ length: usize,
+ update: bool,
+ ) !usize {
+ const len_state = if (length > 3) 3 else length;
+
+ const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update));
+ if (pos_slot < 4)
+ return pos_slot;
+
+ const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
+ var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
+
+ if (pos_slot < 14) {
+ result += try decoder.parseReverseBitTree(
+ reader,
+ num_direct_bits,
+ &self.pos_decoders,
+ result - pos_slot,
+ update,
+ );
+ } else {
+ result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4;
+ result += try self.align_decoder.parseReverse(reader, decoder, update);
+ }
+
+ return result;
+ }
+};
+
+pub const LzmaDecoder = struct {
+ params: LzmaParams,
+ memlimit: usize,
+ state: DecoderState,
+
+ pub fn init(allocator: Allocator, params: LzmaParams, memlimit: ?usize) !LzmaDecoder {
+ return LzmaDecoder{
+ .params = params,
+ .memlimit = memlimit orelse math.maxInt(usize),
+ .state = try DecoderState.init(allocator, params.properties, params.unpacked_size),
+ };
+ }
+
+ pub fn deinit(self: *LzmaDecoder, allocator: Allocator) void {
+ self.state.deinit(allocator);
+ self.* = undefined;
+ }
+
+ pub fn decompress(
+ self: *LzmaDecoder,
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ ) !void {
+ var buffer = LzCircularBuffer.init(self.params.dict_size, self.memlimit);
+ defer buffer.deinit(allocator);
+
+ var decoder = try RangeDecoder.init(reader);
+ try self.state.process(allocator, reader, writer, &buffer, &decoder);
+ try buffer.finish(writer);
+ }
+};
lib/std/compress/lzma/decode/lzma2.zig
@@ -0,0 +1,169 @@
+const std = @import("../../../std.zig");
+const Allocator = std.mem.Allocator;
+
+const lzma = @import("lzma.zig");
+const DecoderState = lzma.DecoderState;
+const LzmaProperties = lzma.LzmaProperties;
+const LzAccumBuffer = @import("lzbuffer.zig").LzAccumBuffer;
+const RangeDecoder = @import("rangecoder.zig").RangeDecoder;
+
+pub const Lzma2Decoder = struct {
+ lzma_state: DecoderState,
+
+ pub fn init(allocator: Allocator) !Lzma2Decoder {
+ return Lzma2Decoder{
+ .lzma_state = try DecoderState.init(
+ allocator,
+ LzmaProperties{
+ .lc = 0,
+ .lp = 0,
+ .pb = 0,
+ },
+ null,
+ ),
+ };
+ }
+
+ pub fn deinit(self: *Lzma2Decoder, allocator: Allocator) void {
+ self.lzma_state.deinit(allocator);
+ self.* = undefined;
+ }
+
+ pub fn decompress(
+ self: *Lzma2Decoder,
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ ) !void {
+ var accum = LzAccumBuffer.init(std.math.maxInt(usize));
+ defer accum.deinit(allocator);
+
+ while (true) {
+ const status = try reader.readByte();
+
+ switch (status) {
+ 0 => break,
+ 1 => try parseUncompressed(allocator, reader, writer, &accum, true),
+ 2 => try parseUncompressed(allocator, reader, writer, &accum, false),
+ else => try self.parseLzma(allocator, reader, writer, &accum, status),
+ }
+ }
+
+ try accum.finish(writer);
+ }
+
+ fn parseLzma(
+ self: *Lzma2Decoder,
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ accum: *LzAccumBuffer,
+ status: u8,
+ ) !void {
+ if (status & 0x80 == 0) {
+ return error.CorruptInput;
+ }
+
+ const Reset = struct {
+ dict: bool,
+ state: bool,
+ props: bool,
+ };
+
+ const reset = switch ((status >> 5) & 0x3) {
+ 0 => Reset{
+ .dict = false,
+ .state = false,
+ .props = false,
+ },
+ 1 => Reset{
+ .dict = false,
+ .state = true,
+ .props = false,
+ },
+ 2 => Reset{
+ .dict = false,
+ .state = true,
+ .props = true,
+ },
+ 3 => Reset{
+ .dict = true,
+ .state = true,
+ .props = true,
+ },
+ else => unreachable,
+ };
+
+ const unpacked_size = blk: {
+ var tmp: u64 = status & 0x1F;
+ tmp <<= 16;
+ tmp |= try reader.readIntBig(u16);
+ break :blk tmp + 1;
+ };
+
+ const packed_size = blk: {
+ const tmp: u17 = try reader.readIntBig(u16);
+ break :blk tmp + 1;
+ };
+
+ if (reset.dict) {
+ try accum.reset(writer);
+ }
+
+ if (reset.state) {
+ var new_props = self.lzma_state.lzma_props;
+
+ if (reset.props) {
+ var props = try reader.readByte();
+ if (props >= 225) {
+ return error.CorruptInput;
+ }
+
+ const lc = @intCast(u4, props % 9);
+ props /= 9;
+ const lp = @intCast(u3, props % 5);
+ props /= 5;
+ const pb = @intCast(u3, props);
+
+ if (lc + lp > 4) {
+ return error.CorruptInput;
+ }
+
+ new_props = LzmaProperties{ .lc = lc, .lp = lp, .pb = pb };
+ }
+
+ try self.lzma_state.resetState(allocator, new_props);
+ }
+
+ self.lzma_state.unpacked_size = unpacked_size + accum.len;
+
+ var counter = std.io.countingReader(reader);
+ const counter_reader = counter.reader();
+
+ var rangecoder = try RangeDecoder.init(counter_reader);
+ try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder);
+
+ if (counter.bytes_read != packed_size) {
+ return error.CorruptInput;
+ }
+ }
+
+ fn parseUncompressed(
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ accum: *LzAccumBuffer,
+ reset_dict: bool,
+ ) !void {
+ const unpacked_size = try reader.readIntBig(u16) + 1; // TODO: overflow
+
+ if (reset_dict) {
+ try accum.reset(writer);
+ }
+
+ var i: @TypeOf(unpacked_size) = 0;
+ while (i < unpacked_size) : (i += 1) {
+ try accum.appendByte(allocator, try reader.readByte());
+ }
+ }
+};
lib/std/compress/lzma/decode/rangecoder.zig
@@ -0,0 +1,184 @@
+const std = @import("../../../std.zig");
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+const FixedBufferStream = std.io.FixedBufferStream;
+
+pub const RangeDecoder = struct {
+ range: u32,
+ code: u32,
+
+ pub fn init(reader: anytype) !RangeDecoder {
+ const reserved = try reader.readByte();
+ if (reserved != 0) {
+ return error.CorruptInput;
+ }
+ return RangeDecoder{
+ .range = 0xFFFF_FFFF,
+ .code = try reader.readIntBig(u32),
+ };
+ }
+
+ pub fn fromParts(
+ range: u32,
+ code: u32,
+ ) RangeDecoder {
+ return .{
+ .range = range,
+ .code = code,
+ };
+ }
+
+ pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
+ self.range = range;
+ self.code = code;
+ }
+
+ pub inline fn isFinished(self: RangeDecoder) bool {
+ return self.code == 0;
+ }
+
+ inline fn normalize(self: *RangeDecoder, reader: anytype) !void {
+ if (self.range < 0x0100_0000) {
+ self.range <<= 8;
+ self.code = (self.code << 8) ^ @as(u32, try reader.readByte());
+ }
+ }
+
+ inline fn getBit(self: *RangeDecoder, reader: anytype) !bool {
+ self.range >>= 1;
+
+ const bit = self.code >= self.range;
+ if (bit)
+ self.code -= self.range;
+
+ try self.normalize(reader);
+ return bit;
+ }
+
+ pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 {
+ var result: u32 = 0;
+ var i: usize = 0;
+ while (i < count) : (i += 1)
+ result = (result << 1) ^ @boolToInt(try self.getBit(reader));
+ return result;
+ }
+
+ pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool {
+ const bound = (self.range >> 11) * prob.*;
+
+ if (self.code < bound) {
+ if (update)
+ prob.* += (0x800 - prob.*) >> 5;
+ self.range = bound;
+
+ try self.normalize(reader);
+ return false;
+ } else {
+ if (update)
+ prob.* -= prob.* >> 5;
+ self.code -= bound;
+ self.range -= bound;
+
+ try self.normalize(reader);
+ return true;
+ }
+ }
+
+ fn parseBitTree(
+ self: *RangeDecoder,
+ reader: anytype,
+ num_bits: u5,
+ probs: []u16,
+ update: bool,
+ ) !u32 {
+ var tmp: u32 = 1;
+ var i: @TypeOf(num_bits) = 0;
+ while (i < num_bits) : (i += 1) {
+ const bit = try self.decodeBit(reader, &probs[tmp], update);
+ tmp = (tmp << 1) ^ @boolToInt(bit);
+ }
+ return tmp - (@as(u32, 1) << num_bits);
+ }
+
+ pub fn parseReverseBitTree(
+ self: *RangeDecoder,
+ reader: anytype,
+ num_bits: u5,
+ probs: []u16,
+ offset: usize,
+ update: bool,
+ ) !u32 {
+ var result: u32 = 0;
+ var tmp: usize = 1;
+ var i: @TypeOf(num_bits) = 0;
+ while (i < num_bits) : (i += 1) {
+ const bit = @boolToInt(try self.decodeBit(reader, &probs[offset + tmp], update));
+ tmp = (tmp << 1) ^ bit;
+ result ^= @as(u32, bit) << i;
+ }
+ return result;
+ }
+};
+
+pub fn BitTree(comptime num_bits: usize) type {
+ return struct {
+ probs: [1 << num_bits]u16 = .{0x400} ** (1 << num_bits),
+
+ const Self = @This();
+
+ pub fn parse(
+ self: *Self,
+ reader: anytype,
+ decoder: *RangeDecoder,
+ update: bool,
+ ) !u32 {
+ return decoder.parseBitTree(reader, num_bits, &self.probs, update);
+ }
+
+ pub fn parseReverse(
+ self: *Self,
+ reader: anytype,
+ decoder: *RangeDecoder,
+ update: bool,
+ ) !u32 {
+ return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update);
+ }
+
+ pub fn reset(self: *Self) void {
+ mem.set(u16, &self.probs, 0x400);
+ }
+ };
+}
+
+pub const LenDecoder = struct {
+ choice: u16 = 0x400,
+ choice2: u16 = 0x400,
+ low_coder: [16]BitTree(3) = .{.{}} ** 16,
+ mid_coder: [16]BitTree(3) = .{.{}} ** 16,
+ high_coder: BitTree(8) = .{},
+
+ pub fn decode(
+ self: *LenDecoder,
+ reader: anytype,
+ decoder: *RangeDecoder,
+ pos_state: usize,
+ update: bool,
+ ) !usize {
+ if (!try decoder.decodeBit(reader, &self.choice, update)) {
+ return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update));
+ } else if (!try decoder.decodeBit(reader, &self.choice2, update)) {
+ return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8;
+ } else {
+ return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16;
+ }
+ }
+
+ pub fn reset(self: *LenDecoder) void {
+ self.choice = 0x400;
+ self.choice2 = 0x400;
+ for (self.low_coder) |*t| t.reset();
+ for (self.mid_coder) |*t| t.reset();
+ self.high_coder.reset();
+ }
+};
lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma
Binary file
lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma
Binary file
lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma
Binary file
lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma
Binary file
lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma
Binary file
lib/std/compress/lzma/decode.zig
@@ -0,0 +1,16 @@
+pub const lzbuffer = @import("decode/lzbuffer.zig");
+pub const lzma = @import("decode/lzma.zig");
+pub const lzma2 = @import("decode/lzma2.zig");
+pub const rangecoder = @import("decode/rangecoder.zig");
+
+pub const Options = struct {
+ unpacked_size: UnpackedSize = .read_from_header,
+ memlimit: ?usize = null,
+ allow_incomplete: bool = false,
+};
+
+pub const UnpackedSize = union(enum) {
+ read_from_header,
+ read_header_but_use_provided: ?u64,
+ use_provided: ?u64,
+};
lib/std/compress/lzma/lzma2_test.zig
@@ -0,0 +1,27 @@
+const std = @import("../../std.zig");
+const lzma = @import("../lzma.zig");
+
+fn testDecompress(compressed: []const u8, writer: anytype) !void {
+ const allocator = std.testing.allocator;
+ var stream = std.io.fixedBufferStream(compressed);
+ try lzma.lzma2Decompress(allocator, stream.reader(), writer);
+}
+
+fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
+ const allocator = std.testing.allocator;
+ var decomp = std.ArrayList(u8).init(allocator);
+ defer decomp.deinit();
+ try testDecompress(compressed, decomp.writer());
+ try std.testing.expectEqualSlices(u8, expected, decomp.items);
+}
+
+fn testDecompressError(expected: anyerror, compressed: []const u8) !void {
+ return std.testing.expectError(expected, testDecompress(compressed, std.io.null_writer));
+}
+
+test {
+ try testDecompressEqual(
+ "Hello\nWorld!\n",
+ &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 },
+ );
+}
lib/std/compress/lzma/lzma_test.zig
@@ -0,0 +1,87 @@
+const std = @import("../../std.zig");
+const lzma = @import("../lzma.zig");
+
+fn testDecompress(compressed: []const u8, writer: anytype) !void {
+ const allocator = std.testing.allocator;
+ var stream = std.io.fixedBufferStream(compressed);
+ try lzma.lzmaDecompress(allocator, stream.reader(), writer, .{});
+}
+
+fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
+ const allocator = std.testing.allocator;
+ var decomp = std.ArrayList(u8).init(allocator);
+ defer decomp.deinit();
+ try testDecompress(compressed, decomp.writer());
+ try std.testing.expectEqualSlices(u8, expected, decomp.items);
+}
+
+fn testDecompressError(expected: anyerror, compressed: []const u8) !void {
+ return std.testing.expectError(expected, testDecompress(compressed, std.io.null_writer));
+}
+
+test "decompress empty world" {
+ try testDecompressEqual(
+ "",
+ &[_]u8{
+ 0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x83, 0xff,
+ 0xfb, 0xff, 0xff, 0xc0, 0x00, 0x00, 0x00,
+ },
+ );
+}
+
+test "decompress hello world" {
+ try testDecompressEqual(
+ "Hello world\n",
+ &[_]u8{
+ 0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19,
+ 0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe,
+ 0xa5, 0xb0, 0x00,
+ },
+ );
+}
+
+test "decompress huge dict" {
+ try testDecompressEqual(
+ "Hello world\n",
+ &[_]u8{
+ 0x5d, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19,
+ 0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe,
+ 0xa5, 0xb0, 0x00,
+ },
+ );
+}
+
+test "unknown size with end of payload marker" {
+ try testDecompressEqual(
+ "Hello\nWorld!\n",
+ @embedFile("testdata/good-unknown_size-with_eopm.lzma"),
+ );
+}
+
+test "known size without end of payload marker" {
+ try testDecompressEqual(
+ "Hello\nWorld!\n",
+ @embedFile("testdata/good-known_size-without_eopm.lzma"),
+ );
+}
+
+test "known size with end of payload marker" {
+ try testDecompressEqual(
+ "Hello\nWorld!\n",
+ @embedFile("testdata/good-known_size-with_eopm.lzma"),
+ );
+}
+
+test "too big uncompressed size in header" {
+ try testDecompressError(
+ error.CorruptInput,
+ @embedFile("testdata/bad-too_big_size-with_eopm.lzma"),
+ );
+}
+
+test "too small uncompressed size in header" {
+ try testDecompressError(
+ error.CorruptInput,
+ @embedFile("testdata/bad-too_small_size-without_eopm-3.lzma"),
+ );
+}
lib/std/compress/lzma/vec2d.zig
@@ -0,0 +1,128 @@
+const std = @import("../../std.zig");
+const math = std.math;
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+
+pub fn Vec2D(comptime T: type) type {
+ return struct {
+ data: []T,
+ cols: usize,
+
+ const Self = @This();
+
+ pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self {
+ const len = try math.mul(usize, size[0], size[1]);
+ const data = try allocator.alloc(T, len);
+ mem.set(T, data, value);
+ return Self{
+ .data = data,
+ .cols = size[1],
+ };
+ }
+
+ pub fn deinit(self: *Self, allocator: Allocator) void {
+ allocator.free(self.data);
+ self.* = undefined;
+ }
+
+ pub fn fill(self: *Self, value: T) void {
+ mem.set(T, self.data, value);
+ }
+
+ inline fn _get(self: Self, row: usize) ![]T {
+ const start_row = try math.mul(usize, row, self.cols);
+ const end_row = try math.add(usize, start_row, self.cols);
+ return self.data[start_row..end_row];
+ }
+
+ pub fn get(self: Self, row: usize) ![]const T {
+ return self._get(row);
+ }
+
+ pub fn getMut(self: *Self, row: usize) ![]T {
+ return self._get(row);
+ }
+ };
+}
+
+const testing = std.testing;
+const expectEqualSlices = std.testing.expectEqualSlices;
+const expectError = std.testing.expectError;
+
+test "Vec2D.init" {
+ const allocator = testing.allocator;
+ var vec2d = try Vec2D(i32).init(allocator, 1, .{ 2, 3 });
+ defer vec2d.deinit(allocator);
+
+ try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(0));
+ try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(1));
+}
+
+test "Vec2D.init overflow" {
+ const allocator = testing.allocator;
+ try expectError(
+ error.Overflow,
+ Vec2D(i32).init(allocator, 1, .{ math.maxInt(usize), math.maxInt(usize) }),
+ );
+}
+
+test "Vec2D.fill" {
+ const allocator = testing.allocator;
+ var vec2d = try Vec2D(i32).init(allocator, 0, .{ 2, 3 });
+ defer vec2d.deinit(allocator);
+
+ vec2d.fill(7);
+
+ try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(0));
+ try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(1));
+}
+
+test "Vec2D.get" {
+ var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 };
+ const vec2d = Vec2D(i32){
+ .data = &data,
+ .cols = 2,
+ };
+
+ try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0));
+ try expectEqualSlices(i32, &.{ 2, 3 }, try vec2d.get(1));
+ try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2));
+ try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3));
+}
+
+test "Vec2D.getMut" {
+ var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 };
+ var vec2d = Vec2D(i32){
+ .data = &data,
+ .cols = 2,
+ };
+
+ const row = try vec2d.getMut(1);
+ row[1] = 9;
+
+ try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0));
+ // (1, 1) should be 9.
+ try expectEqualSlices(i32, &.{ 2, 9 }, try vec2d.get(1));
+ try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2));
+ try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3));
+}
+
+test "Vec2D.get multiplication overflow" {
+ const allocator = testing.allocator;
+ var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 4 });
+ defer matrix.deinit(allocator);
+
+ const row = (math.maxInt(usize) / 4) + 1;
+ try expectError(error.Overflow, matrix.get(row));
+ try expectError(error.Overflow, matrix.getMut(row));
+}
+
+test "Vec2D.get addition overflow" {
+ const allocator = testing.allocator;
+ var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 5 });
+ defer matrix.deinit(allocator);
+
+ const row = math.maxInt(usize) / 5;
+ try expectError(error.Overflow, matrix.get(row));
+ try expectError(error.Overflow, matrix.getMut(row));
+}
lib/std/compress/xz/block.zig
@@ -1,6 +1,7 @@
const std = @import("../../std.zig");
-const lzma = @import("lzma.zig");
+const lzma = std.compress.lzma;
const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
const Crc32 = std.hash.Crc32;
const Crc64 = std.hash.crc.Crc64Xz;
const Sha256 = std.crypto.hash.sha2.Sha256;
@@ -32,8 +33,7 @@ pub fn Decoder(comptime ReaderType: type) type {
inner_reader: ReaderType,
check: xz.Check,
err: ?Error,
- accum: lzma.LzAccumBuffer,
- lzma_state: lzma.DecoderState,
+ to_read: ArrayListUnmanaged(u8),
block_count: usize,
fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
@@ -42,15 +42,13 @@ pub fn Decoder(comptime ReaderType: type) type {
.inner_reader = in_reader,
.check = check,
.err = null,
- .accum = .{},
- .lzma_state = try lzma.DecoderState.init(allocator),
+ .to_read = .{},
.block_count = 0,
};
}
pub fn deinit(self: *Self) void {
- self.accum.deinit(self.allocator);
- self.lzma_state.deinit(self.allocator);
+ self.to_read.deinit(self.allocator);
}
pub fn reader(self: *Self) Reader {
@@ -59,9 +57,13 @@ pub fn Decoder(comptime ReaderType: type) type {
pub fn read(self: *Self, output: []u8) Error!usize {
while (true) {
- if (self.accum.to_read.items.len > 0) {
- const n = self.accum.read(output);
- if (self.accum.to_read.items.len == 0 and self.err != null) {
+ if (self.to_read.items.len > 0) {
+ const input = self.to_read.items;
+ const n = std.math.min(input.len, output.len);
+ std.mem.copy(u8, output[0..n], input[0..n]);
+ std.mem.copy(u8, input, input[n..]);
+ self.to_read.shrinkRetainingCapacity(input.len - n);
+ if (self.to_read.items.len == 0 and self.err != null) {
if (self.err.? == DecodeError.EndOfStreamWithNoError) {
return n;
}
@@ -77,15 +79,12 @@ pub fn Decoder(comptime ReaderType: type) type {
}
self.readBlock() catch |e| {
self.err = e;
- if (self.accum.to_read.items.len == 0) {
- try self.accum.reset(self.allocator);
- }
};
}
}
fn readBlock(self: *Self) Error!void {
- const unpacked_pos = self.accum.to_read.items.len;
+ const unpacked_pos = self.to_read.items.len;
var block_counter = std.io.countingReader(self.inner_reader);
const block_reader = block_counter.reader();
@@ -156,15 +155,18 @@ pub fn Decoder(comptime ReaderType: type) type {
// Compressed Data
var packed_counter = std.io.countingReader(block_reader);
- const packed_reader = packed_counter.reader();
- while (try self.readLzma2Chunk(packed_reader)) {}
+ try lzma.lzma2Decompress(
+ self.allocator,
+ packed_counter.reader(),
+ self.to_read.writer(self.allocator),
+ );
if (packed_size) |s| {
if (s != packed_counter.bytes_read)
return error.CorruptInput;
}
- const unpacked_bytes = self.accum.to_read.items[unpacked_pos..];
+ const unpacked_bytes = self.to_read.items[unpacked_pos..];
if (unpacked_size) |s| {
if (s != unpacked_bytes.len)
return error.CorruptInput;
@@ -205,113 +207,5 @@ pub fn Decoder(comptime ReaderType: type) type {
self.block_count += 1;
}
-
- fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool {
- const status = try packed_reader.readByte();
- switch (status) {
- 0 => {
- try self.accum.reset(self.allocator);
- return false;
- },
- 1, 2 => {
- if (status == 1)
- try self.accum.reset(self.allocator);
-
- const size = try packed_reader.readIntBig(u16) + 1;
- try self.accum.ensureUnusedCapacity(self.allocator, size);
-
- var i: usize = 0;
- while (i < size) : (i += 1)
- self.accum.appendAssumeCapacity(try packed_reader.readByte());
-
- return true;
- },
- else => {
- if (status & 0x80 == 0)
- return error.CorruptInput;
-
- const Reset = struct {
- dict: bool,
- state: bool,
- props: bool,
- };
-
- const reset = switch ((status >> 5) & 0x3) {
- 0 => Reset{
- .dict = false,
- .state = false,
- .props = false,
- },
- 1 => Reset{
- .dict = false,
- .state = true,
- .props = false,
- },
- 2 => Reset{
- .dict = false,
- .state = true,
- .props = true,
- },
- 3 => Reset{
- .dict = true,
- .state = true,
- .props = true,
- },
- else => unreachable,
- };
-
- const unpacked_size = blk: {
- var tmp: u64 = status & 0x1F;
- tmp <<= 16;
- tmp |= try packed_reader.readIntBig(u16);
- break :blk tmp + 1;
- };
-
- const packed_size = blk: {
- const tmp: u17 = try packed_reader.readIntBig(u16);
- break :blk tmp + 1;
- };
-
- if (reset.dict)
- try self.accum.reset(self.allocator);
-
- if (reset.state) {
- var new_props = self.lzma_state.lzma_props;
-
- if (reset.props) {
- var props = try packed_reader.readByte();
- if (props >= 225)
- return error.CorruptInput;
-
- const lc = @intCast(u4, props % 9);
- props /= 9;
- const lp = @intCast(u3, props % 5);
- props /= 5;
- const pb = @intCast(u3, props);
-
- if (lc + lp > 4)
- return error.CorruptInput;
-
- new_props = .{ .lc = lc, .lp = lp, .pb = pb };
- }
-
- try self.lzma_state.reset_state(self.allocator, new_props);
- }
-
- self.lzma_state.unpacked_size = unpacked_size + self.accum.len();
-
- const buffer = try self.allocator.alloc(u8, packed_size);
- defer self.allocator.free(buffer);
-
- for (buffer) |*b|
- b.* = try packed_reader.readByte();
-
- var rangecoder = try lzma.RangeDecoder.init(buffer);
- try self.lzma_state.process(self.allocator, &self.accum, &rangecoder);
-
- return true;
- },
- }
- }
};
}
lib/std/compress/xz/lzma.zig
@@ -1,658 +0,0 @@
-// Ported from https://github.com/gendx/lzma-rs
-
-const std = @import("../../std.zig");
-const assert = std.debug.assert;
-const Allocator = std.mem.Allocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-
-const LzmaProperties = struct {
- lc: u4,
- lp: u3,
- pb: u3,
-
- fn validate(self: LzmaProperties) void {
- assert(self.lc <= 8);
- assert(self.lp <= 4);
- assert(self.pb <= 4);
- }
-};
-
-pub const DecoderState = struct {
- lzma_props: LzmaProperties,
- unpacked_size: ?u64,
- literal_probs: Vec2D(u16),
- pos_slot_decoder: [4]BitTree,
- align_decoder: BitTree,
- pos_decoders: [115]u16,
- is_match: [192]u16,
- is_rep: [12]u16,
- is_rep_g0: [12]u16,
- is_rep_g1: [12]u16,
- is_rep_g2: [12]u16,
- is_rep_0long: [192]u16,
- state: usize,
- rep: [4]usize,
- len_decoder: LenDecoder,
- rep_len_decoder: LenDecoder,
-
- pub fn init(allocator: Allocator) !DecoderState {
- return .{
- .lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 },
- .unpacked_size = null,
- .literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300),
- .pos_slot_decoder = .{
- try BitTree.init(allocator, 6),
- try BitTree.init(allocator, 6),
- try BitTree.init(allocator, 6),
- try BitTree.init(allocator, 6),
- },
- .align_decoder = try BitTree.init(allocator, 4),
- .pos_decoders = .{0x400} ** 115,
- .is_match = .{0x400} ** 192,
- .is_rep = .{0x400} ** 12,
- .is_rep_g0 = .{0x400} ** 12,
- .is_rep_g1 = .{0x400} ** 12,
- .is_rep_g2 = .{0x400} ** 12,
- .is_rep_0long = .{0x400} ** 192,
- .state = 0,
- .rep = .{0} ** 4,
- .len_decoder = try LenDecoder.init(allocator),
- .rep_len_decoder = try LenDecoder.init(allocator),
- };
- }
-
- pub fn deinit(self: *DecoderState, allocator: Allocator) void {
- self.literal_probs.deinit(allocator);
- for (self.pos_slot_decoder) |*t| t.deinit(allocator);
- self.align_decoder.deinit(allocator);
- self.len_decoder.deinit(allocator);
- self.rep_len_decoder.deinit(allocator);
- }
-
- pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
- new_props.validate();
- if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
- self.literal_probs.fill(0x400);
- } else {
- self.literal_probs.deinit(allocator);
- self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
- }
-
- self.lzma_props = new_props;
- for (self.pos_slot_decoder) |*t| t.reset();
- self.align_decoder.reset();
- self.pos_decoders = .{0x400} ** 115;
- self.is_match = .{0x400} ** 192;
- self.is_rep = .{0x400} ** 12;
- self.is_rep_g0 = .{0x400} ** 12;
- self.is_rep_g1 = .{0x400} ** 12;
- self.is_rep_g2 = .{0x400} ** 12;
- self.is_rep_0long = .{0x400} ** 192;
- self.state = 0;
- self.rep = .{0} ** 4;
- self.len_decoder.reset();
- self.rep_len_decoder.reset();
- }
-
- fn processNextInner(
- self: *DecoderState,
- allocator: Allocator,
- output: *LzAccumBuffer,
- rangecoder: *RangeDecoder,
- update: bool,
- ) !ProcessingStatus {
- const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1);
-
- if (!try rangecoder.decodeBit(
- &self.is_match[(self.state << 4) + pos_state],
- update,
- )) {
- const byte: u8 = try self.decodeLiteral(output, rangecoder, update);
-
- if (update) {
- try output.appendLiteral(allocator, byte);
-
- self.state = if (self.state < 4)
- 0
- else if (self.state < 10)
- self.state - 3
- else
- self.state - 6;
- }
- return .continue_;
- }
-
- var len: usize = undefined;
- if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) {
- if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) {
- if (!try rangecoder.decodeBit(
- &self.is_rep_0long[(self.state << 4) + pos_state],
- update,
- )) {
- if (update) {
- self.state = if (self.state < 7) 9 else 11;
- const dist = self.rep[0] + 1;
- try output.appendLz(allocator, 1, dist);
- }
- return .continue_;
- }
- } else {
- const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update))
- 1
- else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update))
- 2
- else
- 3;
- if (update) {
- const dist = self.rep[idx];
- var i = idx;
- while (i > 0) : (i -= 1) {
- self.rep[i] = self.rep[i - 1];
- }
- self.rep[0] = dist;
- }
- }
-
- len = try self.rep_len_decoder.decode(rangecoder, pos_state, update);
-
- if (update) {
- self.state = if (self.state < 7) 8 else 11;
- }
- } else {
- if (update) {
- self.rep[3] = self.rep[2];
- self.rep[2] = self.rep[1];
- self.rep[1] = self.rep[0];
- }
-
- len = try self.len_decoder.decode(rangecoder, pos_state, update);
-
- if (update) {
- self.state = if (self.state < 7) 7 else 10;
- }
-
- const rep_0 = try self.decodeDistance(rangecoder, len, update);
-
- if (update) {
- self.rep[0] = rep_0;
- if (self.rep[0] == 0xFFFF_FFFF) {
- if (rangecoder.isFinished()) {
- return .finished;
- }
- return error.CorruptInput;
- }
- }
- }
-
- if (update) {
- len += 2;
-
- const dist = self.rep[0] + 1;
- try output.appendLz(allocator, len, dist);
- }
-
- return .continue_;
- }
-
- fn processNext(
- self: *DecoderState,
- allocator: Allocator,
- output: *LzAccumBuffer,
- rangecoder: *RangeDecoder,
- ) !ProcessingStatus {
- return self.processNextInner(allocator, output, rangecoder, true);
- }
-
- pub fn process(
- self: *DecoderState,
- allocator: Allocator,
- output: *LzAccumBuffer,
- rangecoder: *RangeDecoder,
- ) !void {
- while (true) {
- if (self.unpacked_size) |unpacked_size| {
- if (output.len() >= unpacked_size) {
- break;
- }
- } else if (rangecoder.isFinished()) {
- break;
- }
-
- if (try self.processNext(allocator, output, rangecoder) == .finished) {
- break;
- }
- }
-
- if (self.unpacked_size) |len| {
- if (len != output.len()) {
- return error.CorruptInput;
- }
- }
- }
-
- fn decodeLiteral(
- self: *DecoderState,
- output: *LzAccumBuffer,
- rangecoder: *RangeDecoder,
- update: bool,
- ) !u8 {
- const def_prev_byte = 0;
- const prev_byte = @as(usize, output.lastOr(def_prev_byte));
-
- var result: usize = 1;
- const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
- (prev_byte >> (8 - self.lzma_props.lc));
- const probs = try self.literal_probs.get(lit_state);
-
- if (self.state >= 7) {
- var match_byte = @as(usize, try output.lastN(self.rep[0] + 1));
-
- while (result < 0x100) {
- const match_bit = (match_byte >> 7) & 1;
- match_byte <<= 1;
- const bit = @boolToInt(try rangecoder.decodeBit(
- &probs[((@as(usize, 1) + match_bit) << 8) + result],
- update,
- ));
- result = (result << 1) ^ bit;
- if (match_bit != bit) {
- break;
- }
- }
- }
-
- while (result < 0x100) {
- result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update));
- }
-
- return @truncate(u8, result - 0x100);
- }
-
- fn decodeDistance(
- self: *DecoderState,
- rangecoder: *RangeDecoder,
- length: usize,
- update: bool,
- ) !usize {
- const len_state = if (length > 3) 3 else length;
-
- const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update));
- if (pos_slot < 4)
- return pos_slot;
-
- const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
- var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
-
- if (pos_slot < 14) {
- result += try rangecoder.parseReverseBitTree(
- num_direct_bits,
- &self.pos_decoders,
- result - pos_slot,
- update,
- );
- } else {
- result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4;
- result += try self.align_decoder.parseReverse(rangecoder, update);
- }
-
- return result;
- }
-};
-
-const ProcessingStatus = enum {
- continue_,
- finished,
-};
-
-pub const LzAccumBuffer = struct {
- to_read: ArrayListUnmanaged(u8) = .{},
- buf: ArrayListUnmanaged(u8) = .{},
-
- pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
- self.to_read.deinit(allocator);
- self.buf.deinit(allocator);
- }
-
- pub fn read(self: *LzAccumBuffer, output: []u8) usize {
- const input = self.to_read.items;
- const n = std.math.min(input.len, output.len);
- std.mem.copy(u8, output[0..n], input[0..n]);
- std.mem.copy(u8, input, input[n..]);
- self.to_read.shrinkRetainingCapacity(input.len - n);
- return n;
- }
-
- pub fn ensureUnusedCapacity(
- self: *LzAccumBuffer,
- allocator: Allocator,
- additional_count: usize,
- ) !void {
- try self.buf.ensureUnusedCapacity(allocator, additional_count);
- }
-
- pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void {
- self.buf.appendAssumeCapacity(byte);
- }
-
- pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void {
- try self.to_read.appendSlice(allocator, self.buf.items);
- self.buf.clearRetainingCapacity();
- }
-
- pub fn len(self: *const LzAccumBuffer) usize {
- return self.buf.items.len;
- }
-
- pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 {
- const buf_len = self.buf.items.len;
- return if (buf_len == 0)
- lit
- else
- self.buf.items[buf_len - 1];
- }
-
- pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 {
- const buf_len = self.buf.items.len;
- if (dist > buf_len) {
- return error.CorruptInput;
- }
-
- return self.buf.items[buf_len - dist];
- }
-
- pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void {
- try self.buf.append(allocator, lit);
- }
-
- pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void {
- const buf_len = self.buf.items.len;
- if (dist > buf_len) {
- return error.CorruptInput;
- }
-
- var offset = buf_len - dist;
- var i: usize = 0;
- while (i < length) : (i += 1) {
- const x = self.buf.items[offset];
- try self.buf.append(allocator, x);
- offset += 1;
- }
- }
-};
-
-pub const RangeDecoder = struct {
- stream: std.io.FixedBufferStream([]const u8),
- range: u32,
- code: u32,
-
- pub fn init(buffer: []const u8) !RangeDecoder {
- var dec = RangeDecoder{
- .stream = std.io.fixedBufferStream(buffer),
- .range = 0xFFFF_FFFF,
- .code = 0,
- };
- const reader = dec.stream.reader();
- _ = try reader.readByte();
- dec.code = try reader.readIntBig(u32);
- return dec;
- }
-
- pub fn fromParts(
- buffer: []const u8,
- range: u32,
- code: u32,
- ) RangeDecoder {
- return .{
- .stream = std.io.fixedBufferStream(buffer),
- .range = range,
- .code = code,
- };
- }
-
- pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
- self.range = range;
- self.code = code;
- }
-
- pub fn readInto(self: *RangeDecoder, dest: []u8) !usize {
- return self.stream.read(dest);
- }
-
- pub inline fn isFinished(self: *const RangeDecoder) bool {
- return self.code == 0 and self.isEof();
- }
-
- pub inline fn isEof(self: *const RangeDecoder) bool {
- return self.stream.pos == self.stream.buffer.len;
- }
-
- inline fn normalize(self: *RangeDecoder) !void {
- if (self.range < 0x0100_0000) {
- self.range <<= 8;
- self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte());
- }
- }
-
- inline fn getBit(self: *RangeDecoder) !bool {
- self.range >>= 1;
-
- const bit = self.code >= self.range;
- if (bit)
- self.code -= self.range;
-
- try self.normalize();
- return bit;
- }
-
- fn get(self: *RangeDecoder, count: usize) !u32 {
- var result: u32 = 0;
- var i: usize = 0;
- while (i < count) : (i += 1)
- result = (result << 1) ^ @boolToInt(try self.getBit());
- return result;
- }
-
- pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool {
- const bound = (self.range >> 11) * prob.*;
-
- if (self.code < bound) {
- if (update)
- prob.* += (0x800 - prob.*) >> 5;
- self.range = bound;
-
- try self.normalize();
- return false;
- } else {
- if (update)
- prob.* -= prob.* >> 5;
- self.code -= bound;
- self.range -= bound;
-
- try self.normalize();
- return true;
- }
- }
-
- fn parseBitTree(
- self: *RangeDecoder,
- num_bits: u5,
- probs: []u16,
- update: bool,
- ) !u32 {
- var tmp: u32 = 1;
- var i: u5 = 0;
- while (i < num_bits) : (i += 1) {
- const bit = try self.decodeBit(&probs[tmp], update);
- tmp = (tmp << 1) ^ @boolToInt(bit);
- }
- return tmp - (@as(u32, 1) << num_bits);
- }
-
- pub fn parseReverseBitTree(
- self: *RangeDecoder,
- num_bits: u5,
- probs: []u16,
- offset: usize,
- update: bool,
- ) !u32 {
- var result: u32 = 0;
- var tmp: usize = 1;
- var i: u5 = 0;
- while (i < num_bits) : (i += 1) {
- const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update));
- tmp = (tmp << 1) ^ bit;
- result ^= @as(u32, bit) << i;
- }
- return result;
- }
-};
-
-fn Vec2D(comptime T: type) type {
- return struct {
- data: []T,
- cols: usize,
-
- const Self = @This();
-
- pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self {
- const len = try std.math.mul(usize, rows, cols);
- var vec2d = Self{
- .data = try allocator.alloc(T, len),
- .cols = cols,
- };
- vec2d.fill(data);
- return vec2d;
- }
-
- pub fn deinit(self: *Self, allocator: Allocator) void {
- allocator.free(self.data);
- }
-
- pub fn fill(self: *Self, value: T) void {
- std.mem.set(T, self.data, value);
- }
-
- pub fn get(self: *Self, row: usize) ![]T {
- const start_row = try std.math.mul(usize, row, self.cols);
- return self.data[start_row .. start_row + self.cols];
- }
- };
-}
-
-const BitTree = struct {
- num_bits: u5,
- probs: ArrayListUnmanaged(u16),
-
- pub fn init(allocator: Allocator, num_bits: u5) !BitTree {
- var probs_len = @as(usize, 1) << num_bits;
- var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len);
- while (probs_len > 0) : (probs_len -= 1)
- probs.appendAssumeCapacity(0x400);
- return .{ .num_bits = num_bits, .probs = probs };
- }
-
- pub fn deinit(self: *BitTree, allocator: Allocator) void {
- self.probs.deinit(allocator);
- }
-
- pub fn parse(
- self: *BitTree,
- rangecoder: *RangeDecoder,
- update: bool,
- ) !u32 {
- return rangecoder.parseBitTree(self.num_bits, self.probs.items, update);
- }
-
- pub fn parseReverse(
- self: *BitTree,
- rangecoder: *RangeDecoder,
- update: bool,
- ) !u32 {
- return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update);
- }
-
- pub fn reset(self: *BitTree) void {
- std.mem.set(u16, self.probs.items, 0x400);
- }
-};
-
-const LenDecoder = struct {
- choice: u16,
- choice2: u16,
- low_coder: [16]BitTree,
- mid_coder: [16]BitTree,
- high_coder: BitTree,
-
- pub fn init(allocator: Allocator) !LenDecoder {
- return .{
- .choice = 0x400,
- .choice2 = 0x400,
- .low_coder = .{
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- },
- .mid_coder = .{
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- try BitTree.init(allocator, 3),
- },
- .high_coder = try BitTree.init(allocator, 8),
- };
- }
-
- pub fn deinit(self: *LenDecoder, allocator: Allocator) void {
- for (self.low_coder) |*t| t.deinit(allocator);
- for (self.mid_coder) |*t| t.deinit(allocator);
- self.high_coder.deinit(allocator);
- }
-
- pub fn decode(
- self: *LenDecoder,
- rangecoder: *RangeDecoder,
- pos_state: usize,
- update: bool,
- ) !usize {
- if (!try rangecoder.decodeBit(&self.choice, update)) {
- return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update));
- } else if (!try rangecoder.decodeBit(&self.choice2, update)) {
- return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8;
- } else {
- return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16;
- }
- }
-
- pub fn reset(self: *LenDecoder) void {
- self.choice = 0x400;
- self.choice2 = 0x400;
- for (self.low_coder) |*t| t.reset();
- for (self.mid_coder) |*t| t.reset();
- self.high_coder.reset();
- }
-};
lib/std/compress/lzma.zig
@@ -0,0 +1,36 @@
+const std = @import("../std.zig");
+const Allocator = std.mem.Allocator;
+const FixedBufferStream = std.io.FixedBufferStream;
+
+pub const decode = @import("lzma/decode.zig");
+pub const LzmaParams = decode.lzma.LzmaParams;
+pub const LzmaDecoder = decode.lzma.LzmaDecoder;
+pub const Lzma2Decoder = decode.lzma2.Lzma2Decoder;
+
+pub fn lzmaDecompress(
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+ options: decode.Options,
+) !void {
+ const params = try LzmaParams.readHeader(reader, options);
+ var decoder = try LzmaDecoder.init(allocator, params, options.memlimit);
+ defer decoder.deinit(allocator);
+ return decoder.decompress(allocator, reader, writer);
+}
+
+pub fn lzma2Decompress(
+ allocator: Allocator,
+ reader: anytype,
+ writer: anytype,
+) !void {
+ var decoder = try Lzma2Decoder.init(allocator);
+ defer decoder.deinit(allocator);
+ return decoder.decompress(allocator, reader, writer);
+}
+
+test {
+ _ = @import("lzma/lzma_test.zig");
+ _ = @import("lzma/lzma2_test.zig");
+ _ = @import("lzma/vec2d.zig");
+}
lib/std/compress.zig
@@ -2,8 +2,9 @@ const std = @import("std.zig");
pub const deflate = @import("compress/deflate.zig");
pub const gzip = @import("compress/gzip.zig");
-pub const zlib = @import("compress/zlib.zig");
+pub const lzma = @import("compress/lzma.zig");
pub const xz = @import("compress/xz.zig");
+pub const zlib = @import("compress/zlib.zig");
pub fn HashedReader(
comptime ReaderType: anytype,
@@ -38,6 +39,7 @@ pub fn hashedReader(
test {
_ = deflate;
_ = gzip;
- _ = zlib;
+ _ = lzma;
_ = xz;
+ _ = zlib;
}
build.zig
@@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void {
"compress-gettysburg.txt",
"compress-pi.txt",
"rfc1951.txt",
+ // exclude files from lib/std/compress/lzma/testdata
+ ".lzma",
// exclude files from lib/std/compress/xz/testdata
".xz",
// exclude files from lib/std/tz/