Commit 06ce15e8f7
Changed files (32)
lib
std
compress
xz
testdata
lib/std/compress/xz/testdata/good-0-empty.xz
Binary file
lib/std/compress/xz/testdata/good-0cat-empty.xz
Binary file
lib/std/compress/xz/testdata/good-0catpad-empty.xz
Binary file
lib/std/compress/xz/testdata/good-0pad-empty.xz
Binary file
lib/std/compress/xz/testdata/good-1-3delta-lzma2.xz
Binary file
lib/std/compress/xz/testdata/good-1-arm64-lzma2-1.xz
Binary file
lib/std/compress/xz/testdata/good-1-arm64-lzma2-2.xz
Binary file
lib/std/compress/xz/testdata/good-1-block_header-1.xz
Binary file
lib/std/compress/xz/testdata/good-1-block_header-2.xz
Binary file
lib/std/compress/xz/testdata/good-1-block_header-3.xz
Binary file
lib/std/compress/xz/testdata/good-1-check-crc32.xz
Binary file
lib/std/compress/xz/testdata/good-1-check-crc64.xz
Binary file
lib/std/compress/xz/testdata/good-1-check-none.xz
Binary file
lib/std/compress/xz/testdata/good-1-check-sha256.xz
Binary file
lib/std/compress/xz/testdata/good-1-delta-lzma2.tiff.xz
Binary file
lib/std/compress/xz/testdata/good-1-empty-bcj-lzma2.xz
Binary file
lib/std/compress/xz/testdata/good-1-lzma2-1.xz
Binary file
lib/std/compress/xz/testdata/good-1-lzma2-2.xz
Binary file
lib/std/compress/xz/testdata/good-1-lzma2-3.xz
Binary file
lib/std/compress/xz/testdata/good-1-lzma2-4.xz
Binary file
lib/std/compress/xz/testdata/good-1-lzma2-5.xz
Binary file
lib/std/compress/xz/testdata/good-1-sparc-lzma2.xz
Binary file
lib/std/compress/xz/testdata/good-1-x86-lzma2.xz
Binary file
lib/std/compress/xz/testdata/good-2-lzma2.xz
Binary file
lib/std/compress/xz/block.zig
@@ -0,0 +1,319 @@
+const std = @import("std");
+const check = @import("check.zig");
+const lzma = @import("lzma.zig");
+const multibyte = @import("multibyte.zig");
+const Allocator = std.mem.Allocator;
+const Crc32 = std.hash.Crc32;
+const Crc64 = std.hash.crc.Crc64Xz;
+const Sha256 = std.crypto.hash.sha2.Sha256;
+
+const DecodeError = error{
+ CorruptInput,
+ EndOfStream,
+ EndOfStreamWithNoError,
+ WrongChecksum,
+ Unsupported,
+ Overflow,
+};
+
+pub fn decoder(allocator: Allocator, reader: anytype, check_kind: check.Kind) !Decoder(@TypeOf(reader)) {
+ return Decoder(@TypeOf(reader)).init(allocator, reader, check_kind);
+}
+
+pub fn Decoder(comptime ReaderType: type) type {
+ return struct {
+ const Self = @This();
+ pub const Error =
+ ReaderType.Error ||
+ DecodeError ||
+ Allocator.Error;
+ pub const Reader = std.io.Reader(*Self, Error, read);
+
+ allocator: Allocator,
+ inner_reader: ReaderType,
+ check_kind: check.Kind,
+ err: ?Error,
+ accum: lzma.LzAccumBuffer,
+ lzma_state: lzma.DecoderState,
+ block_count: usize,
+
+ fn init(allocator: Allocator, in_reader: ReaderType, check_kind: check.Kind) !Self {
+ return Self{
+ .allocator = allocator,
+ .inner_reader = in_reader,
+ .check_kind = check_kind,
+ .err = null,
+ .accum = .{},
+ .lzma_state = try lzma.DecoderState.init(allocator),
+ .block_count = 0,
+ };
+ }
+
+ pub fn deinit(self: *Self) void {
+ self.accum.deinit(self.allocator);
+ self.lzma_state.deinit(self.allocator);
+ }
+
+ pub fn reader(self: *Self) Reader {
+ return .{ .context = self };
+ }
+
+ pub fn read(self: *Self, output: []u8) Error!usize {
+ while (true) {
+ if (self.accum.to_read.items.len > 0) {
+ const n = self.accum.read(output);
+ if (self.accum.to_read.items.len == 0 and self.err != null) {
+ if (self.err.? == DecodeError.EndOfStreamWithNoError) {
+ return n;
+ }
+ return self.err.?;
+ }
+ return n;
+ }
+ if (self.err != null) {
+ if (self.err.? == DecodeError.EndOfStreamWithNoError) {
+ return 0;
+ }
+ return self.err.?;
+ }
+ self.readBlock() catch |e| {
+ self.err = e;
+ if (self.accum.to_read.items.len == 0) {
+ try self.accum.reset(self.allocator);
+ }
+ };
+ }
+ }
+
+ fn readBlock(self: *Self) Error!void {
+ const unpacked_pos = self.accum.to_read.items.len;
+
+ var block_counter = std.io.countingReader(self.inner_reader);
+ const block_reader = block_counter.reader();
+
+ var packed_size: ?u64 = null;
+ var unpacked_size: ?u64 = null;
+
+ // Block Header
+ {
+ var header_hasher = std.compress.hashedReader(block_reader, Crc32.init());
+ const header_reader = header_hasher.reader();
+
+ const header_size = try header_reader.readByte() * 4;
+ if (header_size == 0)
+ return error.EndOfStreamWithNoError;
+
+ const Flags = packed struct(u8) {
+ last_filter_index: u2,
+ reserved: u4,
+ has_packed_size: bool,
+ has_unpacked_size: bool,
+ };
+
+ const flags = try header_reader.readStruct(Flags);
+ const filter_count = @as(u3, flags.last_filter_index) + 1;
+ if (filter_count > 1)
+ return error.Unsupported;
+
+ if (flags.has_packed_size)
+ packed_size = try multibyte.readInt(header_reader);
+
+ if (flags.has_unpacked_size)
+ unpacked_size = try multibyte.readInt(header_reader);
+
+ const FilterId = enum(u64) {
+ lzma2 = 0x21,
+ _,
+ };
+
+ const filter_id = @intToEnum(
+ FilterId,
+ try multibyte.readInt(header_reader),
+ );
+
+ if (@enumToInt(filter_id) >= 0x4000_0000_0000_0000)
+ return error.CorruptInput;
+
+ if (filter_id != .lzma2)
+ return error.Unsupported;
+
+ const properties_size = try multibyte.readInt(header_reader);
+ if (properties_size != 1)
+ return error.CorruptInput;
+
+ // TODO: use filter properties
+ _ = try header_reader.readByte();
+
+ while (block_counter.bytes_read != header_size) {
+ if (try header_reader.readByte() != 0)
+ return error.CorruptInput;
+ }
+
+ const hash_a = header_hasher.hasher.final();
+ const hash_b = try header_reader.readIntLittle(u32);
+ if (hash_a != hash_b)
+ return error.WrongChecksum;
+ }
+
+ // Compressed Data
+ var packed_counter = std.io.countingReader(block_reader);
+ const packed_reader = packed_counter.reader();
+ while (try self.readLzma2Chunk(packed_reader)) {}
+
+ if (packed_size) |s| {
+ if (s != packed_counter.bytes_read)
+ return error.CorruptInput;
+ }
+
+ const unpacked_bytes = self.accum.to_read.items[unpacked_pos..];
+ if (unpacked_size) |s| {
+ if (s != unpacked_bytes.len)
+ return error.CorruptInput;
+ }
+
+ // Block Padding
+ while (block_counter.bytes_read % 4 != 0) {
+ if (try block_reader.readByte() != 0)
+ return error.CorruptInput;
+ }
+
+ // Check
+ switch (self.check_kind) {
+ .none => {},
+ .crc32 => {
+ const hash_a = Crc32.hash(unpacked_bytes);
+ const hash_b = try self.inner_reader.readIntLittle(u32);
+ if (hash_a != hash_b)
+ return error.WrongChecksum;
+ },
+ .crc64 => {
+ const hash_a = Crc64.hash(unpacked_bytes);
+ const hash_b = try self.inner_reader.readIntLittle(u64);
+ if (hash_a != hash_b)
+ return error.WrongChecksum;
+ },
+ .sha256 => {
+ var hash_a: [Sha256.digest_length]u8 = undefined;
+ Sha256.hash(unpacked_bytes, &hash_a, .{});
+
+ var hash_b: [Sha256.digest_length]u8 = undefined;
+ try self.inner_reader.readNoEof(&hash_b);
+
+ if (!std.mem.eql(u8, &hash_a, &hash_b))
+ return error.WrongChecksum;
+ },
+ else => return error.Unsupported,
+ }
+
+ self.block_count += 1;
+ }
+
+ fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool {
+ const status = try packed_reader.readByte();
+ switch (status) {
+ 0 => {
+ try self.accum.reset(self.allocator);
+ return false;
+ },
+ 1, 2 => {
+ if (status == 1)
+ try self.accum.reset(self.allocator);
+
+ const size = try packed_reader.readIntBig(u16) + 1;
+ try self.accum.ensureUnusedCapacity(self.allocator, size);
+
+ var i: usize = 0;
+ while (i < size) : (i += 1)
+ self.accum.appendAssumeCapacity(try packed_reader.readByte());
+
+ return true;
+ },
+ else => {
+ if (status & 0x80 == 0)
+ return error.CorruptInput;
+
+ const Reset = struct {
+ dict: bool,
+ state: bool,
+ props: bool,
+ };
+
+ const reset = switch ((status >> 5) & 0x3) {
+ 0 => Reset{
+ .dict = false,
+ .state = false,
+ .props = false,
+ },
+ 1 => Reset{
+ .dict = false,
+ .state = true,
+ .props = false,
+ },
+ 2 => Reset{
+ .dict = false,
+ .state = true,
+ .props = true,
+ },
+ 3 => Reset{
+ .dict = true,
+ .state = true,
+ .props = true,
+ },
+ else => unreachable,
+ };
+
+ const unpacked_size = blk: {
+ var tmp: u64 = status & 0x1F;
+ tmp <<= 16;
+ tmp |= try packed_reader.readIntBig(u16);
+ break :blk tmp + 1;
+ };
+
+ const packed_size = blk: {
+ var tmp: u64 = try packed_reader.readIntBig(u16);
+ break :blk tmp + 1;
+ };
+
+ if (reset.dict)
+ try self.accum.reset(self.allocator);
+
+ if (reset.state) {
+ var new_props = self.lzma_state.lzma_props;
+
+ if (reset.props) {
+ var props = try packed_reader.readByte();
+ if (props >= 225)
+ return error.CorruptInput;
+
+ const lc = @intCast(u4, props % 9);
+ props /= 9;
+ const lp = @intCast(u3, props % 5);
+ props /= 5;
+ const pb = @intCast(u3, props);
+
+ if (lc + lp > 4)
+ return error.CorruptInput;
+
+ new_props = .{ .lc = lc, .lp = lp, .pb = pb };
+ }
+
+ try self.lzma_state.reset_state(self.allocator, new_props);
+ }
+
+ self.lzma_state.unpacked_size = unpacked_size + self.accum.len();
+
+ const buffer = try self.allocator.alloc(u8, packed_size);
+ defer self.allocator.free(buffer);
+
+ for (buffer) |*b|
+ b.* = try packed_reader.readByte();
+
+ var rangecoder = try lzma.RangeDecoder.init(buffer);
+ try self.lzma_state.process(self.allocator, &self.accum, &rangecoder);
+
+ return true;
+ },
+ }
+ }
+ };
+}
lib/std/compress/xz/check.zig
@@ -0,0 +1,7 @@
+pub const Kind = enum(u4) {
+ none = 0x00,
+ crc32 = 0x01,
+ crc64 = 0x04,
+ sha256 = 0x0A,
+ _,
+};
lib/std/compress/xz/lzma.zig
@@ -0,0 +1,658 @@
+// Ported from https://github.com/gendx/lzma-rs
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+
+const LzmaProperties = struct {
+ lc: u4,
+ lp: u3,
+ pb: u3,
+
+ fn validate(self: LzmaProperties) void {
+ assert(self.lc <= 8);
+ assert(self.lp <= 4);
+ assert(self.pb <= 4);
+ }
+};
+
+pub const DecoderState = struct {
+ lzma_props: LzmaProperties,
+ unpacked_size: ?u64,
+ literal_probs: Vec2D(u16),
+ pos_slot_decoder: [4]BitTree,
+ align_decoder: BitTree,
+ pos_decoders: [115]u16,
+ is_match: [192]u16,
+ is_rep: [12]u16,
+ is_rep_g0: [12]u16,
+ is_rep_g1: [12]u16,
+ is_rep_g2: [12]u16,
+ is_rep_0long: [192]u16,
+ state: usize,
+ rep: [4]usize,
+ len_decoder: LenDecoder,
+ rep_len_decoder: LenDecoder,
+
+ pub fn init(allocator: Allocator) !DecoderState {
+ return .{
+ .lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 },
+ .unpacked_size = null,
+ .literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300),
+ .pos_slot_decoder = .{
+ try BitTree.init(allocator, 6),
+ try BitTree.init(allocator, 6),
+ try BitTree.init(allocator, 6),
+ try BitTree.init(allocator, 6),
+ },
+ .align_decoder = try BitTree.init(allocator, 4),
+ .pos_decoders = .{0x400} ** 115,
+ .is_match = .{0x400} ** 192,
+ .is_rep = .{0x400} ** 12,
+ .is_rep_g0 = .{0x400} ** 12,
+ .is_rep_g1 = .{0x400} ** 12,
+ .is_rep_g2 = .{0x400} ** 12,
+ .is_rep_0long = .{0x400} ** 192,
+ .state = 0,
+ .rep = .{0} ** 4,
+ .len_decoder = try LenDecoder.init(allocator),
+ .rep_len_decoder = try LenDecoder.init(allocator),
+ };
+ }
+
+ pub fn deinit(self: *DecoderState, allocator: Allocator) void {
+ self.literal_probs.deinit(allocator);
+ for (self.pos_slot_decoder) |*t| t.deinit(allocator);
+ self.align_decoder.deinit(allocator);
+ self.len_decoder.deinit(allocator);
+ self.rep_len_decoder.deinit(allocator);
+ }
+
+ pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
+ new_props.validate();
+ if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
+ self.literal_probs.fill(0x400);
+ } else {
+ self.literal_probs.deinit(allocator);
+ self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
+ }
+
+ self.lzma_props = new_props;
+ for (self.pos_slot_decoder) |*t| t.reset();
+ self.align_decoder.reset();
+ self.pos_decoders = .{0x400} ** 115;
+ self.is_match = .{0x400} ** 192;
+ self.is_rep = .{0x400} ** 12;
+ self.is_rep_g0 = .{0x400} ** 12;
+ self.is_rep_g1 = .{0x400} ** 12;
+ self.is_rep_g2 = .{0x400} ** 12;
+ self.is_rep_0long = .{0x400} ** 192;
+ self.state = 0;
+ self.rep = .{0} ** 4;
+ self.len_decoder.reset();
+ self.rep_len_decoder.reset();
+ }
+
+ fn processNextInner(
+ self: *DecoderState,
+ allocator: Allocator,
+ output: *LzAccumBuffer,
+ rangecoder: *RangeDecoder,
+ update: bool,
+ ) !ProcessingStatus {
+ const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1);
+
+ if (!try rangecoder.decodeBit(
+ &self.is_match[(self.state << 4) + pos_state],
+ update,
+ )) {
+ const byte: u8 = try self.decodeLiteral(output, rangecoder, update);
+
+ if (update) {
+ try output.appendLiteral(allocator, byte);
+
+ self.state = if (self.state < 4)
+ 0
+ else if (self.state < 10)
+ self.state - 3
+ else
+ self.state - 6;
+ }
+ return .continue_;
+ }
+
+ var len: usize = undefined;
+ if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) {
+ if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) {
+ if (!try rangecoder.decodeBit(
+ &self.is_rep_0long[(self.state << 4) + pos_state],
+ update,
+ )) {
+ if (update) {
+ self.state = if (self.state < 7) 9 else 11;
+ const dist = self.rep[0] + 1;
+ try output.appendLz(allocator, 1, dist);
+ }
+ return .continue_;
+ }
+ } else {
+ const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update))
+ 1
+ else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update))
+ 2
+ else
+ 3;
+ if (update) {
+ const dist = self.rep[idx];
+ var i = idx;
+ while (i > 0) : (i -= 1) {
+ self.rep[i] = self.rep[i - 1];
+ }
+ self.rep[0] = dist;
+ }
+ }
+
+ len = try self.rep_len_decoder.decode(rangecoder, pos_state, update);
+
+ if (update) {
+ self.state = if (self.state < 7) 8 else 11;
+ }
+ } else {
+ if (update) {
+ self.rep[3] = self.rep[2];
+ self.rep[2] = self.rep[1];
+ self.rep[1] = self.rep[0];
+ }
+
+ len = try self.len_decoder.decode(rangecoder, pos_state, update);
+
+ if (update) {
+ self.state = if (self.state < 7) 7 else 10;
+ }
+
+ const rep_0 = try self.decodeDistance(rangecoder, len, update);
+
+ if (update) {
+ self.rep[0] = rep_0;
+ if (self.rep[0] == 0xFFFF_FFFF) {
+ if (rangecoder.isFinished()) {
+ return .finished;
+ }
+ return error.CorruptInput;
+ }
+ }
+ }
+
+ if (update) {
+ len += 2;
+
+ const dist = self.rep[0] + 1;
+ try output.appendLz(allocator, len, dist);
+ }
+
+ return .continue_;
+ }
+
+ fn processNext(
+ self: *DecoderState,
+ allocator: Allocator,
+ output: *LzAccumBuffer,
+ rangecoder: *RangeDecoder,
+ ) !ProcessingStatus {
+ return self.processNextInner(allocator, output, rangecoder, true);
+ }
+
+ pub fn process(
+ self: *DecoderState,
+ allocator: Allocator,
+ output: *LzAccumBuffer,
+ rangecoder: *RangeDecoder,
+ ) !void {
+ while (true) {
+ if (self.unpacked_size) |unpacked_size| {
+ if (output.len() >= unpacked_size) {
+ break;
+ }
+ } else if (rangecoder.isFinished()) {
+ break;
+ }
+
+ if (try self.processNext(allocator, output, rangecoder) == .finished) {
+ break;
+ }
+ }
+
+ if (self.unpacked_size) |len| {
+ if (len != output.len()) {
+ return error.CorruptInput;
+ }
+ }
+ }
+
+ fn decodeLiteral(
+ self: *DecoderState,
+ output: *LzAccumBuffer,
+ rangecoder: *RangeDecoder,
+ update: bool,
+ ) !u8 {
+ const def_prev_byte = 0;
+ const prev_byte = @as(usize, output.lastOr(def_prev_byte));
+
+ var result: usize = 1;
+ const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
+ (prev_byte >> (8 - self.lzma_props.lc));
+ const probs = try self.literal_probs.get(lit_state);
+
+ if (self.state >= 7) {
+ var match_byte = @as(usize, try output.lastN(self.rep[0] + 1));
+
+ while (result < 0x100) {
+ const match_bit = (match_byte >> 7) & 1;
+ match_byte <<= 1;
+ const bit = @boolToInt(try rangecoder.decodeBit(
+ &probs[((@as(usize, 1) + match_bit) << 8) + result],
+ update,
+ ));
+ result = (result << 1) ^ bit;
+ if (match_bit != bit) {
+ break;
+ }
+ }
+ }
+
+ while (result < 0x100) {
+ result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update));
+ }
+
+ return @truncate(u8, result - 0x100);
+ }
+
+ fn decodeDistance(
+ self: *DecoderState,
+ rangecoder: *RangeDecoder,
+ length: usize,
+ update: bool,
+ ) !usize {
+ const len_state = if (length > 3) 3 else length;
+
+ const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update));
+ if (pos_slot < 4)
+ return pos_slot;
+
+ const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
+ var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
+
+ if (pos_slot < 14) {
+ result += try rangecoder.parseReverseBitTree(
+ num_direct_bits,
+ &self.pos_decoders,
+ result - pos_slot,
+ update,
+ );
+ } else {
+ result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4;
+ result += try self.align_decoder.parseReverse(rangecoder, update);
+ }
+
+ return result;
+ }
+};
+
+const ProcessingStatus = enum {
+ continue_,
+ finished,
+};
+
+pub const LzAccumBuffer = struct {
+ to_read: ArrayListUnmanaged(u8) = .{},
+ buf: ArrayListUnmanaged(u8) = .{},
+
+ pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
+ self.to_read.deinit(allocator);
+ self.buf.deinit(allocator);
+ }
+
+ pub fn read(self: *LzAccumBuffer, output: []u8) usize {
+ const input = self.to_read.items;
+ const n = std.math.min(input.len, output.len);
+ std.mem.copy(u8, output[0..n], input[0..n]);
+ std.mem.copy(u8, input, input[n..]);
+ self.to_read.shrinkRetainingCapacity(input.len - n);
+ return n;
+ }
+
+ pub fn ensureUnusedCapacity(
+ self: *LzAccumBuffer,
+ allocator: Allocator,
+ additional_count: usize,
+ ) !void {
+ try self.buf.ensureUnusedCapacity(allocator, additional_count);
+ }
+
+ pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void {
+ self.buf.appendAssumeCapacity(byte);
+ }
+
+ pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void {
+ try self.to_read.appendSlice(allocator, self.buf.items);
+ self.buf.clearRetainingCapacity();
+ }
+
+ pub fn len(self: *const LzAccumBuffer) usize {
+ return self.buf.items.len;
+ }
+
+ pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 {
+ const buf_len = self.buf.items.len;
+ return if (buf_len == 0)
+ lit
+ else
+ self.buf.items[buf_len - 1];
+ }
+
+ pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 {
+ const buf_len = self.buf.items.len;
+ if (dist > buf_len) {
+ return error.CorruptInput;
+ }
+
+ return self.buf.items[buf_len - dist];
+ }
+
+ pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void {
+ try self.buf.append(allocator, lit);
+ }
+
+ pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void {
+ const buf_len = self.buf.items.len;
+ if (dist > buf_len) {
+ return error.CorruptInput;
+ }
+
+ var offset = buf_len - dist;
+ var i: usize = 0;
+ while (i < length) : (i += 1) {
+ const x = self.buf.items[offset];
+ try self.buf.append(allocator, x);
+ offset += 1;
+ }
+ }
+};
+
+pub const RangeDecoder = struct {
+ stream: std.io.FixedBufferStream([]const u8),
+ range: u32,
+ code: u32,
+
+ pub fn init(buffer: []const u8) !RangeDecoder {
+ var dec = RangeDecoder{
+ .stream = std.io.fixedBufferStream(buffer),
+ .range = 0xFFFF_FFFF,
+ .code = 0,
+ };
+ const reader = dec.stream.reader();
+ _ = try reader.readByte();
+ dec.code = try reader.readIntBig(u32);
+ return dec;
+ }
+
+ pub fn fromParts(
+ buffer: []const u8,
+ range: u32,
+ code: u32,
+ ) RangeDecoder {
+ return .{
+ .stream = std.io.fixedBufferStream(buffer),
+ .range = range,
+ .code = code,
+ };
+ }
+
+ pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
+ self.range = range;
+ self.code = code;
+ }
+
+ pub fn readInto(self: *RangeDecoder, dest: []u8) !usize {
+ return self.stream.read(dest);
+ }
+
+ pub inline fn isFinished(self: *const RangeDecoder) bool {
+ return self.code == 0 and self.isEof();
+ }
+
+ pub inline fn isEof(self: *const RangeDecoder) bool {
+ return self.stream.pos == self.stream.buffer.len;
+ }
+
+ inline fn normalize(self: *RangeDecoder) !void {
+ if (self.range < 0x0100_0000) {
+ self.range <<= 8;
+ self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte());
+ }
+ }
+
+ inline fn getBit(self: *RangeDecoder) !bool {
+ self.range >>= 1;
+
+ const bit = self.code >= self.range;
+ if (bit)
+ self.code -= self.range;
+
+ try self.normalize();
+ return bit;
+ }
+
+ fn get(self: *RangeDecoder, count: usize) !u32 {
+ var result: u32 = 0;
+ var i: usize = 0;
+ while (i < count) : (i += 1)
+ result = (result << 1) ^ @boolToInt(try self.getBit());
+ return result;
+ }
+
+ pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool {
+ const bound = (self.range >> 11) * prob.*;
+
+ if (self.code < bound) {
+ if (update)
+ prob.* += (0x800 - prob.*) >> 5;
+ self.range = bound;
+
+ try self.normalize();
+ return false;
+ } else {
+ if (update)
+ prob.* -= prob.* >> 5;
+ self.code -= bound;
+ self.range -= bound;
+
+ try self.normalize();
+ return true;
+ }
+ }
+
+ fn parseBitTree(
+ self: *RangeDecoder,
+ num_bits: u5,
+ probs: []u16,
+ update: bool,
+ ) !u32 {
+ var tmp: u32 = 1;
+ var i: u5 = 0;
+ while (i < num_bits) : (i += 1) {
+ const bit = try self.decodeBit(&probs[tmp], update);
+ tmp = (tmp << 1) ^ @boolToInt(bit);
+ }
+ return tmp - (@as(u32, 1) << num_bits);
+ }
+
+ pub fn parseReverseBitTree(
+ self: *RangeDecoder,
+ num_bits: u5,
+ probs: []u16,
+ offset: usize,
+ update: bool,
+ ) !u32 {
+ var result: u32 = 0;
+ var tmp: usize = 1;
+ var i: u5 = 0;
+ while (i < num_bits) : (i += 1) {
+ const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update));
+ tmp = (tmp << 1) ^ bit;
+ result ^= @as(u32, bit) << i;
+ }
+ return result;
+ }
+};
+
+fn Vec2D(comptime T: type) type {
+ return struct {
+ data: []T,
+ cols: usize,
+
+ const Self = @This();
+
+ pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self {
+ const len = try std.math.mul(usize, rows, cols);
+ var vec2d = Self{
+ .data = try allocator.alloc(T, len),
+ .cols = cols,
+ };
+ vec2d.fill(data);
+ return vec2d;
+ }
+
+ pub fn deinit(self: *Self, allocator: Allocator) void {
+ allocator.free(self.data);
+ }
+
+ pub fn fill(self: *Self, value: T) void {
+ std.mem.set(T, self.data, value);
+ }
+
+ pub fn get(self: *Self, row: usize) ![]T {
+ const start_row = try std.math.mul(usize, row, self.cols);
+ return self.data[start_row .. start_row + self.cols];
+ }
+ };
+}
+
+const BitTree = struct {
+ num_bits: u5,
+ probs: ArrayListUnmanaged(u16),
+
+ pub fn init(allocator: Allocator, num_bits: u5) !BitTree {
+ var probs_len = @as(usize, 1) << num_bits;
+ var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len);
+ while (probs_len > 0) : (probs_len -= 1)
+ probs.appendAssumeCapacity(0x400);
+ return .{ .num_bits = num_bits, .probs = probs };
+ }
+
+ pub fn deinit(self: *BitTree, allocator: Allocator) void {
+ self.probs.deinit(allocator);
+ }
+
+ pub fn parse(
+ self: *BitTree,
+ rangecoder: *RangeDecoder,
+ update: bool,
+ ) !u32 {
+ return rangecoder.parseBitTree(self.num_bits, self.probs.items, update);
+ }
+
+ pub fn parseReverse(
+ self: *BitTree,
+ rangecoder: *RangeDecoder,
+ update: bool,
+ ) !u32 {
+ return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update);
+ }
+
+ pub fn reset(self: *BitTree) void {
+ std.mem.set(u16, self.probs.items, 0x400);
+ }
+};
+
+const LenDecoder = struct {
+ choice: u16,
+ choice2: u16,
+ low_coder: [16]BitTree,
+ mid_coder: [16]BitTree,
+ high_coder: BitTree,
+
+ pub fn init(allocator: Allocator) !LenDecoder {
+ return .{
+ .choice = 0x400,
+ .choice2 = 0x400,
+ .low_coder = .{
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ },
+ .mid_coder = .{
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ try BitTree.init(allocator, 3),
+ },
+ .high_coder = try BitTree.init(allocator, 8),
+ };
+ }
+
+ pub fn deinit(self: *LenDecoder, allocator: Allocator) void {
+ for (self.low_coder) |*t| t.deinit(allocator);
+ for (self.mid_coder) |*t| t.deinit(allocator);
+ self.high_coder.deinit(allocator);
+ }
+
+ pub fn decode(
+ self: *LenDecoder,
+ rangecoder: *RangeDecoder,
+ pos_state: usize,
+ update: bool,
+ ) !usize {
+ if (!try rangecoder.decodeBit(&self.choice, update)) {
+ return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update));
+ } else if (!try rangecoder.decodeBit(&self.choice2, update)) {
+ return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8;
+ } else {
+ return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16;
+ }
+ }
+
+ pub fn reset(self: *LenDecoder) void {
+ self.choice = 0x400;
+ self.choice2 = 0x400;
+ for (self.low_coder) |*t| t.reset();
+ for (self.mid_coder) |*t| t.reset();
+ self.high_coder.reset();
+ }
+};
lib/std/compress/xz/multibyte.zig
@@ -0,0 +1,23 @@
+const Multibyte = packed struct(u8) {
+ value: u7,
+ more: bool,
+};
+
+pub fn readInt(reader: anytype) !u64 {
+ const max_size = 9;
+
+ var chunk = try reader.readStruct(Multibyte);
+ var num: u64 = chunk.value;
+ var i: u6 = 0;
+
+ while (chunk.more) {
+ chunk = try reader.readStruct(Multibyte);
+ i += 1;
+ if (i >= max_size or @bitCast(u8, chunk) == 0x00)
+ return error.CorruptInput;
+
+ num |= @as(u64, chunk.value) << (i * 7);
+ }
+
+ return num;
+}
lib/std/compress/xz/stream.zig
@@ -0,0 +1,136 @@
+const std = @import("std");
+const block = @import("block.zig");
+const check = @import("check.zig");
+const multibyte = @import("multibyte.zig");
+const Allocator = std.mem.Allocator;
+const Crc32 = std.hash.Crc32;
+
+test {
+ _ = @import("stream_test.zig");
+}
+
+const Flags = packed struct(u16) {
+ reserved1: u8,
+ check_kind: check.Kind,
+ reserved2: u4,
+};
+
+pub fn stream(allocator: Allocator, reader: anytype) !Stream(@TypeOf(reader)) {
+ return Stream(@TypeOf(reader)).init(allocator, reader);
+}
+
+pub fn Stream(comptime ReaderType: type) type {
+ return struct {
+ const Self = @This();
+
+ pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
+ pub const Reader = std.io.Reader(*Self, Error, read);
+
+ allocator: Allocator,
+ block_decoder: block.Decoder(ReaderType),
+ in_reader: ReaderType,
+
+ fn init(allocator: Allocator, source: ReaderType) !Self {
+ const Header = extern struct {
+ magic: [6]u8,
+ flags: Flags,
+ crc32: u32,
+ };
+
+ const header = try source.readStruct(Header);
+
+ if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
+ return error.BadHeader;
+
+ if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0)
+ return error.BadHeader;
+
+ const hash = Crc32.hash(std.mem.asBytes(&header.flags));
+ if (hash != header.crc32)
+ return error.WrongChecksum;
+
+ return Self{
+ .allocator = allocator,
+ .block_decoder = try block.decoder(allocator, source, header.flags.check_kind),
+ .in_reader = source,
+ };
+ }
+
+ pub fn deinit(self: *Self) void {
+ self.block_decoder.deinit();
+ }
+
+ pub fn reader(self: *Self) Reader {
+ return .{ .context = self };
+ }
+
+ pub fn read(self: *Self, buffer: []u8) Error!usize {
+ if (buffer.len == 0)
+ return 0;
+
+ const r = try self.block_decoder.read(buffer);
+ if (r != 0)
+ return r;
+
+ const index_size = blk: {
+ var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
+ hasher.hasher.update(&[1]u8{0x00});
+
+ var counter = std.io.countingReader(hasher.reader());
+ counter.bytes_read += 1;
+
+ const counting_reader = counter.reader();
+
+ const record_count = try multibyte.readInt(counting_reader);
+ if (record_count != self.block_decoder.block_count)
+ return error.CorruptInput;
+
+ var i: usize = 0;
+ while (i < record_count) : (i += 1) {
+ // TODO: validate records
+ _ = try multibyte.readInt(counting_reader);
+ _ = try multibyte.readInt(counting_reader);
+ }
+
+ while (counter.bytes_read % 4 != 0) {
+ if (try counting_reader.readByte() != 0)
+ return error.CorruptInput;
+ }
+
+ const hash_a = hasher.hasher.final();
+ const hash_b = try counting_reader.readIntLittle(u32);
+ if (hash_a != hash_b)
+ return error.WrongChecksum;
+
+ break :blk counter.bytes_read;
+ };
+
+ const Footer = extern struct {
+ crc32: u32,
+ backward_size: u32,
+ flags: Flags,
+ magic: [2]u8,
+ };
+
+ const footer = try self.in_reader.readStruct(Footer);
+ const backward_size = (footer.backward_size + 1) * 4;
+ if (backward_size != index_size)
+ return error.CorruptInput;
+
+ if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0)
+ return error.CorruptInput;
+
+ var hasher = Crc32.init();
+ hasher.update(std.mem.asBytes(&footer.backward_size));
+ hasher.update(std.mem.asBytes(&footer.flags));
+ const hash = hasher.final();
+ if (hash != footer.crc32)
+ return error.WrongChecksum;
+
+ if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' }))
+ return error.CorruptInput;
+
+ return 0;
+ }
+ };
+}
lib/std/compress/xz/stream_test.zig
@@ -0,0 +1,80 @@
+const std = @import("std");
+const testing = std.testing;
+const stream = @import("stream.zig").stream;
+
+fn decompress(data: []const u8) ![]u8 {
+ var in_stream = std.io.fixedBufferStream(data);
+
+ var xz_stream = try stream(testing.allocator, in_stream.reader());
+ defer xz_stream.deinit();
+
+ return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
+}
+
+fn testReader(data: []const u8, comptime expected: []const u8) !void {
+ const buf = try decompress(data);
+ defer testing.allocator.free(buf);
+
+ try testing.expectEqualSlices(u8, expected, buf);
+}
+
+test "compressed data" {
+ try testReader(@embedFile("testdata/good-0-empty.xz"), "");
+
+ inline for ([_][]const u8{
+ "good-1-check-none.xz",
+ "good-1-check-crc32.xz",
+ "good-1-check-crc64.xz",
+ "good-1-check-sha256.xz",
+ "good-2-lzma2.xz",
+ "good-1-block_header-1.xz",
+ "good-1-block_header-2.xz",
+ "good-1-block_header-3.xz",
+ }) |filename| {
+ try testReader(@embedFile("testdata/" ++ filename),
+ \\Hello
+ \\World!
+ \\
+ );
+ }
+
+ inline for ([_][]const u8{
+ "good-1-lzma2-1.xz",
+ "good-1-lzma2-2.xz",
+ "good-1-lzma2-3.xz",
+ "good-1-lzma2-4.xz",
+ }) |filename| {
+ try testReader(@embedFile("testdata/" ++ filename),
+ \\Lorem ipsum dolor sit amet, consectetur adipisicing
+ \\elit, sed do eiusmod tempor incididunt ut
+ \\labore et dolore magna aliqua. Ut enim
+ \\ad minim veniam, quis nostrud exercitation ullamco
+ \\laboris nisi ut aliquip ex ea commodo
+ \\consequat. Duis aute irure dolor in reprehenderit
+ \\in voluptate velit esse cillum dolore eu
+ \\fugiat nulla pariatur. Excepteur sint occaecat cupidatat
+ \\non proident, sunt in culpa qui officia
+ \\deserunt mollit anim id est laborum.
+ \\
+ );
+ }
+
+ try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), "");
+}
+
+test "unsupported" {
+ inline for ([_][]const u8{
+ "good-1-delta-lzma2.tiff.xz",
+ "good-1-x86-lzma2.xz",
+ "good-1-sparc-lzma2.xz",
+ "good-1-arm64-lzma2-1.xz",
+ "good-1-arm64-lzma2-2.xz",
+ "good-1-3delta-lzma2.xz",
+ "good-1-empty-bcj-lzma2.xz",
+ }) |filename| {
+ try testing.expectError(
+ error.Unsupported,
+ decompress(@embedFile("testdata/" ++ filename)),
+ );
+ }
+}
lib/std/compress/xz.zig
@@ -0,0 +1,5 @@
+pub usingnamespace @import("xz/stream.zig");
+
+test {
+ _ = @import("xz/stream.zig");
+}
build.zig
@@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void {
"compress-gettysburg.txt",
"compress-pi.txt",
"rfc1951.txt",
+ // exclude files from lib/std/compress/xz/testdata
+ ".xz",
// exclude files from lib/std/tz/
".tzif",
// others