Commit 58e60697e2
Changed files (5)
lib
std
compress
lib/std/compress/lzma/test.zig
@@ -1,19 +1,19 @@
const std = @import("../../std.zig");
-const lzma = @import("../lzma.zig");
+const lzma = std.compress.lzma;
fn testDecompress(compressed: []const u8) ![]u8 {
- const allocator = std.testing.allocator;
- var stream = std.io.fixedBufferStream(compressed);
- var decompressor = try lzma.decompress(allocator, stream.reader());
+ const gpa = std.testing.allocator;
+ var stream: std.Io.Reader = .fixed(compressed);
+
+ var decompressor = try lzma.Decompress.initOptions(&stream, gpa, &.{}, .{}, std.math.maxInt(u32));
defer decompressor.deinit();
- const reader = decompressor.reader();
- return reader.readAllAlloc(allocator, std.math.maxInt(usize));
+ return decompressor.reader.allocRemaining(gpa, .unlimited);
}
fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
- const allocator = std.testing.allocator;
+ const gpa = std.testing.allocator;
const decomp = try testDecompress(compressed);
- defer allocator.free(decomp);
+ defer gpa.free(decomp);
try std.testing.expectEqualSlices(u8, expected, decomp);
}
@@ -89,11 +89,13 @@ test "too small uncompressed size in header" {
}
test "reading one byte" {
+ const gpa = std.testing.allocator;
const compressed = @embedFile("testdata/good-known_size-with_eopm.lzma");
- var stream = std.io.fixedBufferStream(compressed);
- var decompressor = try lzma.decompress(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(compressed);
+ var decompressor = try lzma.Decompress.initOptions(&stream, gpa, &.{}, .{}, std.math.maxInt(u32));
defer decompressor.deinit();
- var buffer = [1]u8{0};
- _ = try decompressor.read(buffer[0..]);
+ var buffer: [1]u8 = undefined;
+ try decompressor.reader.readSliceAll(&buffer);
+ try std.testing.expectEqual(72, buffer[0]);
}
lib/std/compress/xz/Decompress.zig
@@ -0,0 +1,288 @@
+const Decompress = @This();
+const std = @import("../../std.zig");
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const Crc32 = std.hash.Crc32;
+const Crc64 = std.hash.crc.Crc64Xz;
+const Sha256 = std.crypto.hash.sha2.Sha256;
+const lzma2 = std.compress.lzma2;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
+
+/// Underlying compressed data stream to pull bytes from.
+input: *Reader,
+/// Uncompressed bytes output by this stream implementation.
+reader: Reader,
+gpa: Allocator,
+check: Check,
+block_count: usize,
+err: ?Error,
+
+pub const Error = error{
+ ReadFailed,
+ OutOfMemory,
+ CorruptInput,
+ EndOfStream,
+ WrongChecksum,
+ Unsupported,
+ Overflow,
+};
+
+pub const Check = enum(u4) {
+ none = 0x00,
+ crc32 = 0x01,
+ crc64 = 0x04,
+ sha256 = 0x0A,
+ _,
+};
+
+pub const StreamFlags = packed struct(u16) {
+ null: u8 = 0,
+ check: Check,
+ reserved: u4 = 0,
+};
+
+pub const InitError = error{
+ NotXzStream,
+ WrongChecksum,
+};
+
+/// XZ uses a series of LZMA2 blocks which each specify a dictionary size
+/// anywhere from 4K to 4G. Thus, this API dynamically allocates the dictionary
+/// as-needed.
+pub fn init(
+ input: *Reader,
+ gpa: Allocator,
+ /// Decompress takes ownership of this buffer and resizes it with `gpa`.
+ buffer: []u8,
+) Decompress {
+ const magic = try input.takeBytes(6);
+ if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
+ return error.NotXzStream;
+
+ const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags)));
+ const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable;
+ const stored_hash = try input.readInt(u32, .little);
+ if (actual_hash != stored_hash) return error.WrongChecksum;
+
+ return .{
+ .input = input,
+ .reader = .{
+ .vtable = &.{
+ .stream = stream,
+ .readVec = readVec,
+ },
+ .buffer = buffer,
+ .seek = 0,
+ .end = 0,
+ },
+ .gpa = gpa,
+ .check = stream_flags.check,
+ .block_count = 0,
+ .err = null,
+ };
+}
+
+fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
+ _ = w;
+ _ = limit;
+ const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+ _ = d;
+ @panic("TODO");
+}
+
+fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
+ _ = data;
+ const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+ _ = d;
+ @panic("TODO");
+}
+
+// if (buffer.len == 0)
+// return 0;
+//
+// const r = try self.block_decode.read(buffer);
+// if (r != 0)
+// return r;
+//
+// const index_size = blk: {
+// var hasher = hashedReader(self.in_reader, Crc32.init());
+// hasher.hasher.update(&[1]u8{0x00});
+//
+// var counter = std.io.countingReader(hasher.reader());
+// counter.bytes_read += 1;
+//
+// const counting_reader = counter.reader();
+//
+// const record_count = try std.leb.readUleb128(u64, counting_reader);
+// if (record_count != self.block_decode.block_count)
+// return error.CorruptInput;
+//
+// var i: usize = 0;
+// while (i < record_count) : (i += 1) {
+// // TODO: validate records
+// _ = try std.leb.readUleb128(u64, counting_reader);
+// _ = try std.leb.readUleb128(u64, counting_reader);
+// }
+//
+// while (counter.bytes_read % 4 != 0) {
+// if (try counting_reader.readByte() != 0)
+// return error.CorruptInput;
+// }
+//
+// const hash_a = hasher.hasher.final();
+// const hash_b = try counting_reader.readInt(u32, .little);
+// if (hash_a != hash_b)
+// return error.WrongChecksum;
+//
+// break :blk counter.bytes_read;
+// };
+//
+// const hash_a = try self.in_reader.readInt(u32, .little);
+//
+// const hash_b = blk: {
+// var hasher = hashedReader(self.in_reader, Crc32.init());
+// const hashed_reader = hasher.reader();
+//
+// const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
+// if (backward_size != index_size)
+// return error.CorruptInput;
+//
+// var check: Check = undefined;
+// try readStreamFlags(hashed_reader, &check);
+//
+// break :blk hasher.hasher.final();
+// };
+//
+// if (hash_a != hash_b)
+// return error.WrongChecksum;
+//
+// const magic = try self.in_reader.readBytesNoEof(2);
+// if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
+// return error.CorruptInput;
+//
+// return 0;
+//}
+
+//fn readBlock(self: *BlockDecode) Error!void {
+// var block_counter = std.io.countingReader(self.inner_reader);
+// const block_reader = block_counter.reader();
+//
+// var packed_size: ?u64 = null;
+// var unpacked_size: ?u64 = null;
+//
+// // Block Header
+// {
+// var header_hasher = hashedReader(block_reader, Crc32.init());
+// const header_reader = header_hasher.reader();
+//
+// const header_size = @as(u64, try header_reader.readByte()) * 4;
+// if (header_size == 0)
+// return error.EndOfStreamWithNoError;
+//
+// const Flags = packed struct(u8) {
+// last_filter_index: u2,
+// reserved: u4,
+// has_packed_size: bool,
+// has_unpacked_size: bool,
+// };
+//
+// const flags = @as(Flags, @bitCast(try header_reader.readByte()));
+// const filter_count = @as(u3, flags.last_filter_index) + 1;
+// if (filter_count > 1)
+// return error.Unsupported;
+//
+// if (flags.has_packed_size)
+// packed_size = try std.leb.readUleb128(u64, header_reader);
+//
+// if (flags.has_unpacked_size)
+// unpacked_size = try std.leb.readUleb128(u64, header_reader);
+//
+// const FilterId = enum(u64) {
+// lzma2 = 0x21,
+// _,
+// };
+//
+// const filter_id = @as(
+// FilterId,
+// @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
+// );
+//
+// if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
+// return error.CorruptInput;
+//
+// if (filter_id != .lzma2)
+// return error.Unsupported;
+//
+// const properties_size = try std.leb.readUleb128(u64, header_reader);
+// if (properties_size != 1)
+// return error.CorruptInput;
+//
+// // TODO: use filter properties
+// _ = try header_reader.readByte();
+//
+// while (block_counter.bytes_read != header_size) {
+// if (try header_reader.readByte() != 0)
+// return error.CorruptInput;
+// }
+//
+// const hash_a = header_hasher.hasher.final();
+// const hash_b = try header_reader.readInt(u32, .little);
+// if (hash_a != hash_b)
+// return error.WrongChecksum;
+// }
+//
+// // Compressed Data
+// var packed_counter = std.io.countingReader(block_reader);
+// try lzma2.decompress(
+// self.allocator,
+// packed_counter.reader(),
+// self.to_read.writer(self.allocator),
+// );
+//
+// if (packed_size) |s| {
+// if (s != packed_counter.bytes_read)
+// return error.CorruptInput;
+// }
+//
+// const unpacked_bytes = self.to_read.items;
+// if (unpacked_size) |s| {
+// if (s != unpacked_bytes.len)
+// return error.CorruptInput;
+// }
+//
+// // Block Padding
+// while (block_counter.bytes_read % 4 != 0) {
+// if (try block_reader.readByte() != 0)
+// return error.CorruptInput;
+// }
+//
+// switch (self.check) {
+// .none => {},
+// .crc32 => {
+// const hash_a = Crc32.hash(unpacked_bytes);
+// const hash_b = try self.inner_reader.readInt(u32, .little);
+// if (hash_a != hash_b)
+// return error.WrongChecksum;
+// },
+// .crc64 => {
+// const hash_a = Crc64.hash(unpacked_bytes);
+// const hash_b = try self.inner_reader.readInt(u64, .little);
+// if (hash_a != hash_b)
+// return error.WrongChecksum;
+// },
+// .sha256 => {
+// var hash_a: [Sha256.digest_length]u8 = undefined;
+// Sha256.hash(unpacked_bytes, &hash_a, .{});
+//
+// var hash_b: [Sha256.digest_length]u8 = undefined;
+// try self.inner_reader.readNoEof(&hash_b);
+//
+// if (!std.mem.eql(u8, &hash_a, &hash_b))
+// return error.WrongChecksum;
+// },
+// else => return error.Unsupported,
+// }
+//
+// self.block_count += 1;
+//}
lib/std/compress/lzma.zig
@@ -4,49 +4,34 @@ const mem = std.mem;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const ArrayList = std.ArrayList;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
pub const RangeDecoder = struct {
range: u32,
code: u32,
- pub fn init(reader: anytype) !RangeDecoder {
- const reserved = try reader.readByte();
- if (reserved != 0) {
- return error.CorruptInput;
- }
- return RangeDecoder{
- .range = 0xFFFF_FFFF,
- .code = try reader.readInt(u32, .big),
- };
- }
-
- pub fn fromParts(
- range: u32,
- code: u32,
- ) RangeDecoder {
+ pub fn init(reader: *Reader) !RangeDecoder {
+ const reserved = try reader.takeByte();
+ if (reserved != 0) return error.InvalidRangeCode;
return .{
- .range = range,
- .code = code,
+ .range = 0xFFFF_FFFF,
+ .code = try reader.takeInt(u32, .big),
};
}
- pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
- self.range = range;
- self.code = code;
- }
-
- pub inline fn isFinished(self: RangeDecoder) bool {
+ pub fn isFinished(self: RangeDecoder) bool {
return self.code == 0;
}
- inline fn normalize(self: *RangeDecoder, reader: anytype) !void {
+ fn normalize(self: *RangeDecoder, reader: *Reader) !void {
if (self.range < 0x0100_0000) {
self.range <<= 8;
- self.code = (self.code << 8) ^ @as(u32, try reader.readByte());
+ self.code = (self.code << 8) ^ @as(u32, try reader.takeByte());
}
}
- inline fn getBit(self: *RangeDecoder, reader: anytype) !bool {
+ fn getBit(self: *RangeDecoder, reader: *Reader) !bool {
self.range >>= 1;
const bit = self.code >= self.range;
@@ -57,7 +42,7 @@ pub const RangeDecoder = struct {
return bit;
}
- pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 {
+ pub fn get(self: *RangeDecoder, reader: *Reader, count: usize) !u32 {
var result: u32 = 0;
var i: usize = 0;
while (i < count) : (i += 1)
@@ -65,7 +50,7 @@ pub const RangeDecoder = struct {
return result;
}
- pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool {
+ pub fn decodeBit(self: *RangeDecoder, reader: *Reader, prob: *u16, update: bool) !bool {
const bound = (self.range >> 11) * prob.*;
if (self.code < bound) {
@@ -88,7 +73,7 @@ pub const RangeDecoder = struct {
fn parseBitTree(
self: *RangeDecoder,
- reader: anytype,
+ reader: *Reader,
num_bits: u5,
probs: []u16,
update: bool,
@@ -104,7 +89,7 @@ pub const RangeDecoder = struct {
pub fn parseReverseBitTree(
self: *RangeDecoder,
- reader: anytype,
+ reader: *Reader,
num_bits: u5,
probs: []u16,
offset: usize,
@@ -123,7 +108,7 @@ pub const RangeDecoder = struct {
};
pub const Decode = struct {
- lzma_props: Properties,
+ properties: Properties,
unpacked_size: ?u64,
literal_probs: Vec2d,
pos_slot_decoder: [4]BitTree(6),
@@ -141,14 +126,14 @@ pub const Decode = struct {
rep_len_decoder: LenDecoder,
pub fn init(
- allocator: Allocator,
- lzma_props: Properties,
+ gpa: Allocator,
+ properties: Properties,
unpacked_size: ?u64,
) !Decode {
return .{
- .lzma_props = lzma_props,
+ .properties = properties,
.unpacked_size = unpacked_size,
- .literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }),
+ .literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }),
.pos_slot_decoder = @splat(.{}),
.align_decoder = .{},
.pos_decoders = @splat(0x400),
@@ -165,21 +150,21 @@ pub const Decode = struct {
};
}
- pub fn deinit(self: *Decode, allocator: Allocator) void {
- self.literal_probs.deinit(allocator);
+ pub fn deinit(self: *Decode, gpa: Allocator) void {
+ self.literal_probs.deinit(gpa);
self.* = undefined;
}
- pub fn resetState(self: *Decode, allocator: Allocator, new_props: Properties) !void {
+ pub fn resetState(self: *Decode, gpa: Allocator, new_props: Properties) !void {
new_props.validate();
- if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
+ if (self.properties.lc + self.properties.lp == new_props.lc + new_props.lp) {
self.literal_probs.fill(0x400);
} else {
- self.literal_probs.deinit(allocator);
- self.literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
+ self.literal_probs.deinit(gpa);
+ self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
}
- self.lzma_props = new_props;
+ self.properties = new_props;
for (&self.pos_slot_decoder) |*t| t.reset();
self.align_decoder.reset();
self.pos_decoders = @splat(0x400);
@@ -195,26 +180,23 @@ pub const Decode = struct {
self.rep_len_decoder.reset();
}
- fn processNextInner(
+ fn processNext(
self: *Decode,
- allocator: Allocator,
- reader: anytype,
- writer: anytype,
- buffer: anytype,
+ reader: *Reader,
+ allocating: *Writer.Allocating,
+ buffer: *CircularBuffer,
decoder: *RangeDecoder,
update: bool,
) !ProcessingStatus {
- const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1);
+ const gpa = allocating.allocator;
+ const writer = &allocating.writer;
+ const pos_state = buffer.len & ((@as(usize, 1) << self.properties.pb) - 1);
- if (!try decoder.decodeBit(
- reader,
- &self.is_match[(self.state << 4) + pos_state],
- update,
- )) {
+ if (!try decoder.decodeBit(reader, &self.is_match[(self.state << 4) + pos_state], update)) {
const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update);
if (update) {
- try buffer.appendLiteral(allocator, byte, writer);
+ try buffer.appendLiteral(gpa, byte, writer);
self.state = if (self.state < 4)
0
@@ -223,7 +205,7 @@ pub const Decode = struct {
else
self.state - 6;
}
- return .continue_;
+ return .more;
}
var len: usize = undefined;
@@ -237,9 +219,9 @@ pub const Decode = struct {
if (update) {
self.state = if (self.state < 7) 9 else 11;
const dist = self.rep[0] + 1;
- try buffer.appendLz(allocator, 1, dist, writer);
+ try buffer.appendLz(gpa, 1, dist, writer);
}
- return .continue_;
+ return .more;
}
} else {
const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update))
@@ -293,31 +275,19 @@ pub const Decode = struct {
len += 2;
const dist = self.rep[0] + 1;
- try buffer.appendLz(allocator, len, dist, writer);
+ try buffer.appendLz(gpa, len, dist, writer);
}
- return .continue_;
- }
-
- fn processNext(
- self: *Decode,
- allocator: Allocator,
- reader: anytype,
- writer: anytype,
- buffer: anytype,
- decoder: *RangeDecoder,
- ) !ProcessingStatus {
- return self.processNextInner(allocator, reader, writer, buffer, decoder, true);
+ return .more;
}
pub fn process(
self: *Decode,
- allocator: Allocator,
- reader: anytype,
- writer: anytype,
- buffer: anytype,
+ reader: *Reader,
+ allocating: *Writer.Allocating,
+ buffer: *CircularBuffer,
decoder: *RangeDecoder,
- ) !ProcessingStatus {
+ ) !void {
process_next: {
if (self.unpacked_size) |unpacked_size| {
if (buffer.len >= unpacked_size) {
@@ -326,26 +296,24 @@ pub const Decode = struct {
} else if (decoder.isFinished()) {
break :process_next;
}
-
- switch (try self.processNext(allocator, reader, writer, buffer, decoder)) {
- .continue_ => return .continue_,
- .finished => break :process_next,
+ switch (try self.processNext(reader, allocating, buffer, decoder, true)) {
+ .more => return,
+ .finished => {},
}
}
if (self.unpacked_size) |unpacked_size| {
- if (buffer.len != unpacked_size) {
- return error.CorruptInput;
- }
+ if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch;
}
- return .finished;
+ try buffer.finish(&allocating.writer);
+ self.state = math.maxInt(usize);
}
fn decodeLiteral(
self: *Decode,
- reader: anytype,
- buffer: anytype,
+ reader: *Reader,
+ buffer: *CircularBuffer,
decoder: *RangeDecoder,
update: bool,
) !u8 {
@@ -353,9 +321,9 @@ pub const Decode = struct {
const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
var result: usize = 1;
- const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
- (prev_byte >> (8 - self.lzma_props.lc));
- const probs = try self.literal_probs.getMut(lit_state);
+ const lit_state = ((buffer.len & ((@as(usize, 1) << self.properties.lp) - 1)) << self.properties.lc) +
+ (prev_byte >> (8 - self.properties.lc));
+ const probs = try self.literal_probs.get(lit_state);
if (self.state >= 7) {
var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1));
@@ -384,7 +352,7 @@ pub const Decode = struct {
fn decodeDistance(
self: *Decode,
- reader: anytype,
+ reader: *Reader,
decoder: *RangeDecoder,
length: usize,
update: bool,
@@ -415,46 +383,40 @@ pub const Decode = struct {
}
/// A circular buffer for LZ sequences
- pub const LzCircularBuffer = struct {
+ pub const CircularBuffer = struct {
/// Circular buffer
buf: ArrayList(u8),
-
/// Length of the buffer
dict_size: usize,
-
/// Buffer memory limit
- memlimit: usize,
-
+ mem_limit: usize,
/// Current position
cursor: usize,
-
/// Total number of bytes sent through the buffer
len: usize,
- const Self = @This();
-
- pub fn init(dict_size: usize, memlimit: usize) Self {
- return Self{
+ pub fn init(dict_size: usize, mem_limit: usize) CircularBuffer {
+ return .{
.buf = .{},
.dict_size = dict_size,
- .memlimit = memlimit,
+ .mem_limit = mem_limit,
.cursor = 0,
.len = 0,
};
}
- pub fn get(self: Self, index: usize) u8 {
+ pub fn get(self: CircularBuffer, index: usize) u8 {
return if (0 <= index and index < self.buf.items.len)
self.buf.items[index]
else
0;
}
- pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void {
- if (index >= self.memlimit) {
+ pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void {
+ if (index >= self.mem_limit) {
return error.CorruptInput;
}
- try self.buf.ensureTotalCapacity(allocator, index + 1);
+ try self.buf.ensureTotalCapacity(gpa, index + 1);
while (self.buf.items.len < index) {
self.buf.appendAssumeCapacity(0);
}
@@ -462,7 +424,7 @@ pub const Decode = struct {
}
/// Retrieve the last byte or return a default
- pub fn lastOr(self: Self, lit: u8) u8 {
+ pub fn lastOr(self: CircularBuffer, lit: u8) u8 {
return if (self.len == 0)
lit
else
@@ -470,7 +432,7 @@ pub const Decode = struct {
}
/// Retrieve the n-th last byte
- pub fn lastN(self: Self, dist: usize) !u8 {
+ pub fn lastN(self: CircularBuffer, dist: usize) !u8 {
if (dist > self.dict_size or dist > self.len) {
return error.CorruptInput;
}
@@ -481,12 +443,12 @@ pub const Decode = struct {
/// Append a literal
pub fn appendLiteral(
- self: *Self,
- allocator: Allocator,
+ self: *CircularBuffer,
+ gpa: Allocator,
lit: u8,
- writer: anytype,
+ writer: *Writer,
) !void {
- try self.set(allocator, self.cursor, lit);
+ try self.set(gpa, self.cursor, lit);
self.cursor += 1;
self.len += 1;
@@ -499,11 +461,11 @@ pub const Decode = struct {
/// Fetch an LZ sequence (length, distance) from inside the buffer
pub fn appendLz(
- self: *Self,
- allocator: Allocator,
+ self: *CircularBuffer,
+ gpa: Allocator,
len: usize,
dist: usize,
- writer: anytype,
+ writer: *Writer,
) !void {
if (dist > self.dict_size or dist > self.len) {
return error.CorruptInput;
@@ -513,7 +475,7 @@ pub const Decode = struct {
var i: usize = 0;
while (i < len) : (i += 1) {
const x = self.get(offset);
- try self.appendLiteral(allocator, x, writer);
+ try self.appendLiteral(gpa, x, writer);
offset += 1;
if (offset == self.dict_size) {
offset = 0;
@@ -521,15 +483,15 @@ pub const Decode = struct {
}
}
- pub fn finish(self: *Self, writer: anytype) !void {
+ pub fn finish(self: *CircularBuffer, writer: *Writer) !void {
if (self.cursor > 0) {
try writer.writeAll(self.buf.items[0..self.cursor]);
self.cursor = 0;
}
}
- pub fn deinit(self: *Self, allocator: Allocator) void {
- self.buf.deinit(allocator);
+ pub fn deinit(self: *CircularBuffer, gpa: Allocator) void {
+ self.buf.deinit(gpa);
self.* = undefined;
}
};
@@ -538,11 +500,9 @@ pub const Decode = struct {
return struct {
probs: [1 << num_bits]u16 = @splat(0x400),
- const Self = @This();
-
pub fn parse(
- self: *Self,
- reader: anytype,
+ self: *@This(),
+ reader: *Reader,
decoder: *RangeDecoder,
update: bool,
) !u32 {
@@ -550,15 +510,15 @@ pub const Decode = struct {
}
pub fn parseReverse(
- self: *Self,
- reader: anytype,
+ self: *@This(),
+ reader: *Reader,
decoder: *RangeDecoder,
update: bool,
) !u32 {
return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update);
}
- pub fn reset(self: *Self) void {
+ pub fn reset(self: *@This()) void {
@memset(&self.probs, 0x400);
}
};
@@ -573,7 +533,7 @@ pub const Decode = struct {
pub fn decode(
self: *LenDecoder,
- reader: anytype,
+ reader: *Reader,
decoder: *RangeDecoder,
pos_state: usize,
update: bool,
@@ -600,45 +560,35 @@ pub const Decode = struct {
data: []u16,
cols: usize,
- const Self = @This();
-
- pub fn init(allocator: Allocator, value: u16, size: struct { usize, usize }) !Self {
+ pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d {
const len = try math.mul(usize, size[0], size[1]);
- const data = try allocator.alloc(u16, len);
+ const data = try gpa.alloc(u16, len);
@memset(data, value);
- return Self{
+ return .{
.data = data,
.cols = size[1],
};
}
- pub fn deinit(self: *Self, allocator: Allocator) void {
- allocator.free(self.data);
+ pub fn deinit(self: *Vec2d, gpa: Allocator) void {
+ gpa.free(self.data);
self.* = undefined;
}
- pub fn fill(self: *Self, value: u16) void {
+ pub fn fill(self: *Vec2d, value: u16) void {
@memset(self.data, value);
}
- inline fn _get(self: Self, row: usize) ![]u16 {
+ fn get(self: Vec2d, row: usize) ![]u16 {
const start_row = try math.mul(usize, row, self.cols);
const end_row = try math.add(usize, start_row, self.cols);
return self.data[start_row..end_row];
}
-
- pub fn get(self: Self, row: usize) ![]const u16 {
- return self._get(row);
- }
-
- pub fn getMut(self: *Self, row: usize) ![]u16 {
- return self._get(row);
- }
};
pub const Options = struct {
unpacked_size: UnpackedSize = .read_from_header,
- memlimit: ?usize = null,
+ mem_limit: ?usize = null,
allow_incomplete: bool = false,
};
@@ -649,7 +599,7 @@ pub const Decode = struct {
};
const ProcessingStatus = enum {
- continue_,
+ more,
finished,
};
@@ -670,39 +620,34 @@ pub const Decode = struct {
dict_size: u32,
unpacked_size: ?u64,
- pub fn readHeader(reader: anytype, options: Options) !Params {
- var props = try reader.readByte();
- if (props >= 225) {
- return error.CorruptInput;
- }
+ pub fn readHeader(reader: *Reader, options: Options) !Params {
+ var props = try reader.takeByte();
+ if (props >= 225) return error.CorruptInput;
- const lc = @as(u4, @intCast(props % 9));
+ const lc: u4 = @intCast(props % 9);
props /= 9;
- const lp = @as(u3, @intCast(props % 5));
+ const lp: u3 = @intCast(props % 5);
props /= 5;
- const pb = @as(u3, @intCast(props));
+ const pb: u3 = @intCast(props);
- const dict_size_provided = try reader.readInt(u32, .little);
+ const dict_size_provided = try reader.takeInt(u32, .little);
const dict_size = @max(0x1000, dict_size_provided);
const unpacked_size = switch (options.unpacked_size) {
.read_from_header => blk: {
- const unpacked_size_provided = try reader.readInt(u64, .little);
+ const unpacked_size_provided = try reader.takeInt(u64, .little);
const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
- break :blk if (marker_mandatory)
- null
- else
- unpacked_size_provided;
+ break :blk if (marker_mandatory) null else unpacked_size_provided;
},
.read_header_but_use_provided => |x| blk: {
- _ = try reader.readInt(u64, .little);
+ _ = try reader.takeInt(u64, .little);
break :blk x;
},
.use_provided => |x| x,
};
- return Params{
- .properties = Properties{ .lc = lc, .lp = lp, .pb = pb },
+ return .{
+ .properties = .{ .lc = lc, .lp = lp, .pb = pb },
.dict_size = dict_size,
.unpacked_size = unpacked_size,
};
@@ -710,84 +655,121 @@ pub const Decode = struct {
};
};
-pub fn decompress(
- allocator: Allocator,
- reader: anytype,
-) !Decompress(@TypeOf(reader)) {
- return decompressWithOptions(allocator, reader, .{});
-}
-
-pub fn decompressWithOptions(
- allocator: Allocator,
- reader: anytype,
- options: Decode.Options,
-) !Decompress(@TypeOf(reader)) {
- const params = try Decode.Params.readHeader(reader, options);
- return Decompress(@TypeOf(reader)).init(allocator, reader, params, options.memlimit);
-}
-
-pub fn Decompress(comptime ReaderType: type) type {
- return struct {
- const Self = @This();
-
- pub const Error =
- ReaderType.Error ||
- Allocator.Error ||
- error{ CorruptInput, EndOfStream, Overflow };
-
- pub const Reader = std.io.GenericReader(*Self, Error, read);
+pub const Decompress = struct {
+ gpa: Allocator,
+ input: *Reader,
+ reader: Reader,
+ buffer: Decode.CircularBuffer,
+ range_decoder: RangeDecoder,
+ decode: Decode,
+ err: ?Error,
+
+ pub const Error = error{
+ OutOfMemory,
+ ReadFailed,
+ CorruptInput,
+ DecompressedSizeMismatch,
+ EndOfStream,
+ Overflow,
+ };
- allocator: Allocator,
- in_reader: ReaderType,
- to_read: std.ArrayListUnmanaged(u8),
+ /// Takes ownership of `buffer` which may be resized with `gpa`.
+ ///
+ /// LZMA was explicitly designed to take advantage of large heap memory
+ /// being available, with a dictionary size anywhere from 4K to 4G. Thus,
+ /// this API dynamically allocates the dictionary as-needed.
+ pub fn initParams(
+ input: *Reader,
+ gpa: Allocator,
+ buffer: []u8,
+ params: Decode.Params,
+ mem_limit: usize,
+ ) !Decompress {
+ return .{
+ .gpa = gpa,
+ .input = input,
+ .buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit),
+ .range_decoder = try RangeDecoder.init(input),
+ .decode = try Decode.init(gpa, params.properties, params.unpacked_size),
+ .reader = .{
+ .buffer = buffer,
+ .vtable = &.{
+ .readVec = readVec,
+ .stream = stream,
+ },
+ .seek = 0,
+ .end = 0,
+ },
+ .err = null,
+ };
+ }
- buffer: Decode.LzCircularBuffer,
- decoder: RangeDecoder,
- state: Decode,
+ /// Takes ownership of `buffer` which may be resized with `gpa`.
+ ///
+ /// LZMA was explicitly designed to take advantage of large heap memory
+ /// being available, with a dictionary size anywhere from 4K to 4G. Thus,
+ /// this API dynamically allocates the dictionary as-needed.
+ pub fn initOptions(
+ input: *Reader,
+ gpa: Allocator,
+ buffer: []u8,
+ options: Decode.Options,
+ mem_limit: usize,
+ ) !Decompress {
+ const params = try Decode.Params.readHeader(input, options);
+ return initParams(input, gpa, buffer, params, mem_limit);
+ }
- pub fn init(allocator: Allocator, source: ReaderType, params: Decode.Params, memlimit: ?usize) !Self {
- return Self{
- .allocator = allocator,
- .in_reader = source,
- .to_read = .{},
+ /// Reclaim ownership of the buffer passed to `init`.
+ pub fn takeBuffer(d: *Decompress) []u8 {
+ const buffer = d.reader.buffer;
+ d.reader.buffer = &.{};
+ return buffer;
+ }
- .buffer = Decode.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)),
- .decoder = try RangeDecoder.init(source),
- .state = try Decode.init(allocator, params.properties, params.unpacked_size),
- };
- }
+ pub fn deinit(d: *Decompress) void {
+ const gpa = d.gpa;
+ gpa.free(d.reader.buffer);
+ d.buffer.deinit(gpa);
+ d.decode.deinit(gpa);
+ d.* = undefined;
+ }
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
+ fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
+ _ = data;
+ return readIndirect(r);
+ }
- pub fn deinit(self: *Self) void {
- self.to_read.deinit(self.allocator);
- self.buffer.deinit(self.allocator);
- self.state.deinit(self.allocator);
- self.* = undefined;
- }
+ fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
+ _ = w;
+ _ = limit;
+ return readIndirect(r);
+ }
- pub fn read(self: *Self, output: []u8) Error!usize {
- const writer = self.to_read.writer(self.allocator);
- while (self.to_read.items.len < output.len) {
- switch (try self.state.process(self.allocator, self.in_reader, writer, &self.buffer, &self.decoder)) {
- .continue_ => {},
- .finished => {
- try self.buffer.finish(writer);
- break;
- },
- }
- }
- const input = self.to_read.items;
- const n = @min(input.len, output.len);
- @memcpy(output[0..n], input[0..n]);
- std.mem.copyForwards(u8, input[0 .. input.len - n], input[n..]);
- self.to_read.shrinkRetainingCapacity(input.len - n);
- return n;
- }
- };
-}
+ fn readIndirect(r: *Reader) Reader.Error!usize {
+ const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+ const gpa = d.gpa;
+ var allocating = Writer.Allocating.initOwnedSlice(gpa, r.buffer);
+ allocating.writer.end = r.end;
+ defer r.end = allocating.writer.end;
+ if (d.decode.state == math.maxInt(usize)) return error.EndOfStream;
+ d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
+ error.WriteFailed => {
+ d.err = error.OutOfMemory;
+ return error.ReadFailed;
+ },
+ error.EndOfStream => {
+ d.err = error.EndOfStream;
+ return error.ReadFailed;
+ },
+ else => |e| {
+ d.err = e;
+ return error.ReadFailed;
+ },
+ };
+ return 0;
+ }
+};
test {
_ = @import("lzma/test.zig");
lib/std/compress/lzma2.zig
@@ -2,6 +2,8 @@ const std = @import("../std.zig");
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const lzma = std.compress.lzma;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
/// An accumulating buffer for LZ sequences
pub const LzAccumBuffer = struct {
@@ -14,30 +16,28 @@ pub const LzAccumBuffer = struct {
/// Total number of bytes sent through the buffer
len: usize,
- const Self = @This();
-
- pub fn init(memlimit: usize) Self {
- return Self{
+ pub fn init(memlimit: usize) LzAccumBuffer {
+ return .{
.buf = .{},
.memlimit = memlimit,
.len = 0,
};
}
- pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void {
+ pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void {
try self.buf.append(allocator, byte);
self.len += 1;
}
/// Reset the internal dictionary
- pub fn reset(self: *Self, writer: anytype) !void {
+ pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void {
try writer.writeAll(self.buf.items);
self.buf.clearRetainingCapacity();
self.len = 0;
}
/// Retrieve the last byte or return a default
- pub fn lastOr(self: Self, lit: u8) u8 {
+ pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 {
const buf_len = self.buf.items.len;
return if (buf_len == 0)
lit
@@ -46,7 +46,7 @@ pub const LzAccumBuffer = struct {
}
/// Retrieve the n-th last byte
- pub fn lastN(self: Self, dist: usize) !u8 {
+ pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
@@ -57,10 +57,10 @@ pub const LzAccumBuffer = struct {
/// Append a literal
pub fn appendLiteral(
- self: *Self,
+ self: *LzAccumBuffer,
allocator: Allocator,
lit: u8,
- writer: anytype,
+ writer: *Writer,
) !void {
_ = writer;
if (self.len >= self.memlimit) {
@@ -72,11 +72,11 @@ pub const LzAccumBuffer = struct {
/// Fetch an LZ sequence (length, distance) from inside the buffer
pub fn appendLz(
- self: *Self,
+ self: *LzAccumBuffer,
allocator: Allocator,
len: usize,
dist: usize,
- writer: anytype,
+ writer: *Writer,
) !void {
_ = writer;
@@ -95,23 +95,23 @@ pub const LzAccumBuffer = struct {
self.len += len;
}
- pub fn finish(self: *Self, writer: anytype) !void {
+ pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void {
try writer.writeAll(self.buf.items);
self.buf.clearRetainingCapacity();
}
- pub fn deinit(self: *Self, allocator: Allocator) void {
+ pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
self.buf.deinit(allocator);
self.* = undefined;
}
};
pub const Decode = struct {
- lzma_state: lzma.Decode,
+ lzma_decode: lzma.Decode,
pub fn init(allocator: Allocator) !Decode {
return Decode{
- .lzma_state = try lzma.Decode.init(
+ .lzma_decode = try lzma.Decode.init(
allocator,
.{
.lc = 0,
@@ -124,15 +124,15 @@ pub const Decode = struct {
}
pub fn deinit(self: *Decode, allocator: Allocator) void {
- self.lzma_state.deinit(allocator);
+ self.lzma_decode.deinit(allocator);
self.* = undefined;
}
pub fn decompress(
self: *Decode,
allocator: Allocator,
- reader: anytype,
- writer: anytype,
+ reader: *Reader,
+ writer: *Writer,
) !void {
var accum = LzAccumBuffer.init(std.math.maxInt(usize));
defer accum.deinit(allocator);
@@ -154,8 +154,8 @@ pub const Decode = struct {
fn parseLzma(
self: *Decode,
allocator: Allocator,
- reader: anytype,
- writer: anytype,
+ reader: *Reader,
+ writer: *Writer,
accum: *LzAccumBuffer,
status: u8,
) !void {
@@ -210,7 +210,7 @@ pub const Decode = struct {
}
if (reset.state) {
- var new_props = self.lzma_state.lzma_props;
+ var new_props = self.lzma_decode.properties;
if (reset.props) {
var props = try reader.readByte();
@@ -231,16 +231,16 @@ pub const Decode = struct {
new_props = .{ .lc = lc, .lp = lp, .pb = pb };
}
- try self.lzma_state.resetState(allocator, new_props);
+ try self.lzma_decode.resetState(allocator, new_props);
}
- self.lzma_state.unpacked_size = unpacked_size + accum.len;
+ self.lzma_decode.unpacked_size = unpacked_size + accum.len;
var counter = std.io.countingReader(reader);
const counter_reader = counter.reader();
var rangecoder = try lzma.RangeDecoder.init(counter_reader);
- while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
+ while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
if (counter.bytes_read != packed_size) {
return error.CorruptInput;
@@ -249,8 +249,8 @@ pub const Decode = struct {
fn parseUncompressed(
allocator: Allocator,
- reader: anytype,
- writer: anytype,
+ reader: *Reader,
+ writer: *Writer,
accum: *LzAccumBuffer,
reset_dict: bool,
) !void {
@@ -267,24 +267,19 @@ pub const Decode = struct {
}
};
-pub fn decompress(
- allocator: Allocator,
- reader: anytype,
- writer: anytype,
-) !void {
- var decoder = try Decode.init(allocator);
- defer decoder.deinit(allocator);
- return decoder.decompress(allocator, reader, writer);
-}
-
-test {
+test "decompress hello world stream" {
const expected = "Hello\nWorld!\n";
const compressed = &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 };
- const allocator = std.testing.allocator;
- var decomp = std.array_list.Managed(u8).init(allocator);
- defer decomp.deinit();
- var stream = std.io.fixedBufferStream(compressed);
- try decompress(allocator, stream.reader(), decomp.writer());
- try std.testing.expectEqualSlices(u8, expected, decomp.items);
+ const gpa = std.testing.allocator;
+
+ var stream: std.Io.Reader = .fixed(compressed);
+
+ var decode = try Decode.init(gpa, &stream);
+ defer decode.deinit(gpa);
+
+ const result = try decode.reader.allocRemaining(gpa, .unlimited);
+ defer gpa.free(result);
+
+ try std.testing.expectEqualStrings(expected, result);
}
lib/std/compress/xz.zig
@@ -1,368 +1,4 @@
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-const ArrayList = std.ArrayList;
-const Crc32 = std.hash.Crc32;
-const Crc64 = std.hash.crc.Crc64Xz;
-const Sha256 = std.crypto.hash.sha2.Sha256;
-const lzma2 = std.compress.lzma2;
-
-pub const Check = enum(u4) {
- none = 0x00,
- crc32 = 0x01,
- crc64 = 0x04,
- sha256 = 0x0A,
- _,
-};
-
-fn readStreamFlags(reader: anytype, check: *Check) !void {
- const reserved1 = try reader.readByte();
- if (reserved1 != 0) return error.CorruptInput;
- const byte = try reader.readByte();
- if ((byte >> 4) != 0) return error.CorruptInput;
- check.* = @enumFromInt(@as(u4, @truncate(byte)));
-}
-
-pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
- return Decompress(@TypeOf(reader)).init(allocator, reader);
-}
-
-pub fn Decompress(comptime ReaderType: type) type {
- return struct {
- const Self = @This();
-
- pub const Error = ReaderType.Error || Decoder(ReaderType).Error;
- pub const Reader = std.io.GenericReader(*Self, Error, read);
-
- allocator: Allocator,
- block_decoder: Decoder(ReaderType),
- in_reader: ReaderType,
-
- fn init(allocator: Allocator, source: ReaderType) !Self {
- const magic = try source.readBytesNoEof(6);
- if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
- return error.BadHeader;
-
- var check: Check = undefined;
- const hash_a = blk: {
- var hasher = hashedReader(source, Crc32.init());
- try readStreamFlags(hasher.reader(), &check);
- break :blk hasher.hasher.final();
- };
-
- const hash_b = try source.readInt(u32, .little);
- if (hash_a != hash_b)
- return error.WrongChecksum;
-
- return Self{
- .allocator = allocator,
- .block_decoder = try decoder(allocator, source, check),
- .in_reader = source,
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.block_decoder.deinit();
- }
-
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
-
- pub fn read(self: *Self, buffer: []u8) Error!usize {
- if (buffer.len == 0)
- return 0;
-
- const r = try self.block_decoder.read(buffer);
- if (r != 0)
- return r;
-
- const index_size = blk: {
- var hasher = hashedReader(self.in_reader, Crc32.init());
- hasher.hasher.update(&[1]u8{0x00});
-
- var counter = std.io.countingReader(hasher.reader());
- counter.bytes_read += 1;
-
- const counting_reader = counter.reader();
-
- const record_count = try std.leb.readUleb128(u64, counting_reader);
- if (record_count != self.block_decoder.block_count)
- return error.CorruptInput;
-
- var i: usize = 0;
- while (i < record_count) : (i += 1) {
- // TODO: validate records
- _ = try std.leb.readUleb128(u64, counting_reader);
- _ = try std.leb.readUleb128(u64, counting_reader);
- }
-
- while (counter.bytes_read % 4 != 0) {
- if (try counting_reader.readByte() != 0)
- return error.CorruptInput;
- }
-
- const hash_a = hasher.hasher.final();
- const hash_b = try counting_reader.readInt(u32, .little);
- if (hash_a != hash_b)
- return error.WrongChecksum;
-
- break :blk counter.bytes_read;
- };
-
- const hash_a = try self.in_reader.readInt(u32, .little);
-
- const hash_b = blk: {
- var hasher = hashedReader(self.in_reader, Crc32.init());
- const hashed_reader = hasher.reader();
-
- const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
- if (backward_size != index_size)
- return error.CorruptInput;
-
- var check: Check = undefined;
- try readStreamFlags(hashed_reader, &check);
-
- break :blk hasher.hasher.final();
- };
-
- if (hash_a != hash_b)
- return error.WrongChecksum;
-
- const magic = try self.in_reader.readBytesNoEof(2);
- if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
- return error.CorruptInput;
-
- return 0;
- }
- };
-}
-
-pub fn HashedReader(ReaderType: type, HasherType: type) type {
- return struct {
- child_reader: ReaderType,
- hasher: HasherType,
-
- pub const Error = ReaderType.Error;
- pub const Reader = std.io.GenericReader(*@This(), Error, read);
-
- pub fn read(self: *@This(), buf: []u8) Error!usize {
- const amt = try self.child_reader.read(buf);
- self.hasher.update(buf[0..amt]);
- return amt;
- }
-
- pub fn reader(self: *@This()) Reader {
- return .{ .context = self };
- }
- };
-}
-
-pub fn hashedReader(
- reader: anytype,
- hasher: anytype,
-) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
- return .{ .child_reader = reader, .hasher = hasher };
-}
-
-const DecodeError = error{
- CorruptInput,
- EndOfStream,
- EndOfStreamWithNoError,
- WrongChecksum,
- Unsupported,
- Overflow,
-};
-
-pub fn decoder(allocator: Allocator, reader: anytype, check: Check) !Decoder(@TypeOf(reader)) {
- return Decoder(@TypeOf(reader)).init(allocator, reader, check);
-}
-
-pub fn Decoder(comptime ReaderType: type) type {
- return struct {
- const Self = @This();
- pub const Error =
- ReaderType.Error ||
- DecodeError ||
- Allocator.Error;
- pub const Reader = std.io.GenericReader(*Self, Error, read);
-
- allocator: Allocator,
- inner_reader: ReaderType,
- check: Check,
- err: ?Error,
- to_read: ArrayList(u8),
- read_pos: usize,
- block_count: usize,
-
- fn init(allocator: Allocator, in_reader: ReaderType, check: Check) !Self {
- return Self{
- .allocator = allocator,
- .inner_reader = in_reader,
- .check = check,
- .err = null,
- .to_read = .{},
- .read_pos = 0,
- .block_count = 0,
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.to_read.deinit(self.allocator);
- }
-
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
-
- pub fn read(self: *Self, output: []u8) Error!usize {
- while (true) {
- const unread_len = self.to_read.items.len - self.read_pos;
- if (unread_len > 0) {
- const n = @min(unread_len, output.len);
- @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]);
- self.read_pos += n;
- return n;
- }
- if (self.err) |e| {
- if (e == DecodeError.EndOfStreamWithNoError) {
- return 0;
- }
- return e;
- }
- if (self.read_pos > 0) {
- self.to_read.shrinkRetainingCapacity(0);
- self.read_pos = 0;
- }
- self.readBlock() catch |e| {
- self.err = e;
- };
- }
- }
-
- fn readBlock(self: *Self) Error!void {
- var block_counter = std.io.countingReader(self.inner_reader);
- const block_reader = block_counter.reader();
-
- var packed_size: ?u64 = null;
- var unpacked_size: ?u64 = null;
-
- // Block Header
- {
- var header_hasher = hashedReader(block_reader, Crc32.init());
- const header_reader = header_hasher.reader();
-
- const header_size = @as(u64, try header_reader.readByte()) * 4;
- if (header_size == 0)
- return error.EndOfStreamWithNoError;
-
- const Flags = packed struct(u8) {
- last_filter_index: u2,
- reserved: u4,
- has_packed_size: bool,
- has_unpacked_size: bool,
- };
-
- const flags = @as(Flags, @bitCast(try header_reader.readByte()));
- const filter_count = @as(u3, flags.last_filter_index) + 1;
- if (filter_count > 1)
- return error.Unsupported;
-
- if (flags.has_packed_size)
- packed_size = try std.leb.readUleb128(u64, header_reader);
-
- if (flags.has_unpacked_size)
- unpacked_size = try std.leb.readUleb128(u64, header_reader);
-
- const FilterId = enum(u64) {
- lzma2 = 0x21,
- _,
- };
-
- const filter_id = @as(
- FilterId,
- @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
- );
-
- if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
- return error.CorruptInput;
-
- if (filter_id != .lzma2)
- return error.Unsupported;
-
- const properties_size = try std.leb.readUleb128(u64, header_reader);
- if (properties_size != 1)
- return error.CorruptInput;
-
- // TODO: use filter properties
- _ = try header_reader.readByte();
-
- while (block_counter.bytes_read != header_size) {
- if (try header_reader.readByte() != 0)
- return error.CorruptInput;
- }
-
- const hash_a = header_hasher.hasher.final();
- const hash_b = try header_reader.readInt(u32, .little);
- if (hash_a != hash_b)
- return error.WrongChecksum;
- }
-
- // Compressed Data
- var packed_counter = std.io.countingReader(block_reader);
- try lzma2.decompress(
- self.allocator,
- packed_counter.reader(),
- self.to_read.writer(self.allocator),
- );
-
- if (packed_size) |s| {
- if (s != packed_counter.bytes_read)
- return error.CorruptInput;
- }
-
- const unpacked_bytes = self.to_read.items;
- if (unpacked_size) |s| {
- if (s != unpacked_bytes.len)
- return error.CorruptInput;
- }
-
- // Block Padding
- while (block_counter.bytes_read % 4 != 0) {
- if (try block_reader.readByte() != 0)
- return error.CorruptInput;
- }
-
- switch (self.check) {
- .none => {},
- .crc32 => {
- const hash_a = Crc32.hash(unpacked_bytes);
- const hash_b = try self.inner_reader.readInt(u32, .little);
- if (hash_a != hash_b)
- return error.WrongChecksum;
- },
- .crc64 => {
- const hash_a = Crc64.hash(unpacked_bytes);
- const hash_b = try self.inner_reader.readInt(u64, .little);
- if (hash_a != hash_b)
- return error.WrongChecksum;
- },
- .sha256 => {
- var hash_a: [Sha256.digest_length]u8 = undefined;
- Sha256.hash(unpacked_bytes, &hash_a, .{});
-
- var hash_b: [Sha256.digest_length]u8 = undefined;
- try self.inner_reader.readNoEof(&hash_b);
-
- if (!std.mem.eql(u8, &hash_a, &hash_b))
- return error.WrongChecksum;
- },
- else => return error.Unsupported,
- }
-
- self.block_count += 1;
- }
- };
-}
+pub const Decompress = @import("xz/Decompress.zig");
test {
_ = @import("xz/test.zig");