Commit d87eb7d4e4
Changed files (2)
lib
std
compress
lib/std/compress/xz/Decompress.zig
@@ -26,6 +26,8 @@ pub const Error = error{
WrongChecksum,
Unsupported,
Overflow,
+ InvalidRangeCode,
+ DecompressedSizeMismatch,
};
pub const Check = enum(u4) {
@@ -55,14 +57,14 @@ pub fn init(
gpa: Allocator,
/// Decompress takes ownership of this buffer and resizes it with `gpa`.
buffer: []u8,
-) Decompress {
- const magic = try input.takeBytes(6);
- if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
+) !Decompress {
+ const magic = try input.takeArray(6);
+ if (!std.mem.eql(u8, magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
return error.NotXzStream;
const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags)));
const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable;
- const stored_hash = try input.readInt(u32, .little);
+ const stored_hash = try input.takeInt(u32, .little);
if (actual_hash != stored_hash) return error.WrongChecksum;
return .{
@@ -71,6 +73,7 @@ pub fn init(
.vtable = &.{
.stream = stream,
.readVec = readVec,
+ .discard = discard,
},
.buffer = buffer,
.seek = 0,
@@ -83,206 +86,232 @@ pub fn init(
};
}
+/// Reclaim ownership of the buffer passed to `init`.
+pub fn takeBuffer(d: *Decompress) []u8 {
+ const buffer = d.reader.buffer;
+ d.reader.buffer = &.{};
+ return buffer;
+}
+
+pub fn deinit(d: *Decompress) void {
+ const gpa = d.gpa;
+ gpa.free(d.reader.buffer);
+ d.* = undefined;
+}
+
+fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
+ _ = data;
+ return readIndirect(r);
+}
+
fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
_ = w;
_ = limit;
+ return readIndirect(r);
+}
+
+fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
_ = d;
+ _ = limit;
@panic("TODO");
}
-fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
- _ = data;
+fn readIndirect(r: *Reader) Reader.Error!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+ const gpa = d.gpa;
+ const input = d.input;
+
+ var allocating = Writer.Allocating.initOwnedSlice(gpa, r.buffer);
+ allocating.writer.end = r.end;
+ defer {
+ r.buffer = allocating.writer.buffer;
+ r.end = allocating.writer.end;
+ }
+
+ if (d.block_count == std.math.maxInt(usize)) return error.EndOfStream;
+
+ readBlock(input, &allocating) catch |err| switch (err) {
+ error.WriteFailed => {
+ d.err = error.OutOfMemory;
+ return error.ReadFailed;
+ },
+ error.SuccessfulEndOfStream => {
+ finish(d);
+ d.block_count = std.math.maxInt(usize);
+ return error.EndOfStream;
+ },
+ else => |e| {
+ d.err = e;
+ return error.ReadFailed;
+ },
+ };
+ switch (d.check) {
+ .none => {},
+ .crc32 => {
+ const declared_checksum = try input.takeInt(u32, .little);
+ // TODO
+ //const hash_a = Crc32.hash(unpacked_bytes);
+ //if (hash_a != hash_b) return error.WrongChecksum;
+ _ = declared_checksum;
+ },
+ .crc64 => {
+ const declared_checksum = try input.takeInt(u64, .little);
+ // TODO
+ //const hash_a = Crc64.hash(unpacked_bytes);
+ //if (hash_a != hash_b) return error.WrongChecksum;
+ _ = declared_checksum;
+ },
+ .sha256 => {
+ const declared_hash = try input.take(Sha256.digest_length);
+ // TODO
+ //var hash_a: [Sha256.digest_length]u8 = undefined;
+ //Sha256.hash(unpacked_bytes, &hash_a, .{});
+ //if (!std.mem.eql(u8, &hash_a, &hash_b))
+ // return error.WrongChecksum;
+ _ = declared_hash;
+ },
+ else => {
+ d.err = error.Unsupported;
+ return error.ReadFailed;
+ },
+ }
+ d.block_count += 1;
+ return 0;
+}
+
+fn readBlock(input: *Reader, allocating: *Writer.Allocating) !void {
+ var packed_size: ?u64 = null;
+ var unpacked_size: ?u64 = null;
+
+ {
+ // Read the block header via peeking so that we can hash the whole thing too.
+ const first_byte: usize = try input.peekByte();
+ if (first_byte == 0) return error.SuccessfulEndOfStream;
+
+ const declared_header_size = first_byte * 4;
+ try input.fill(declared_header_size);
+ const header_seek_start = input.seek;
+ input.toss(1);
+
+ const Flags = packed struct(u8) {
+ last_filter_index: u2,
+ reserved: u4,
+ has_packed_size: bool,
+ has_unpacked_size: bool,
+ };
+ const flags = try input.takeStruct(Flags, .little);
+
+ const filter_count = @as(u3, flags.last_filter_index) + 1;
+ if (filter_count > 1) return error.Unsupported;
+
+ if (flags.has_packed_size) packed_size = try input.takeLeb128(u64);
+ if (flags.has_unpacked_size) unpacked_size = try input.takeLeb128(u64);
+
+ const FilterId = enum(u64) {
+ lzma2 = 0x21,
+ _,
+ };
+
+ const filter_id: FilterId = @enumFromInt(try input.takeLeb128(u64));
+ if (filter_id != .lzma2) return error.Unsupported;
+
+ const properties_size = try input.takeLeb128(u64);
+ if (properties_size != 1) return error.CorruptInput;
+ // TODO: use filter properties
+ _ = try input.takeByte();
+
+ const actual_header_size = input.seek - header_seek_start;
+ if (actual_header_size > declared_header_size) return error.CorruptInput;
+ var remaining_bytes = declared_header_size - actual_header_size;
+ while (remaining_bytes != 0) {
+ if (try input.takeByte() != 0) return error.CorruptInput;
+ remaining_bytes -= 1;
+ }
+
+ const header_slice = input.buffer[header_seek_start..][0..declared_header_size];
+ const actual_hash = Crc32.hash(header_slice);
+ const declared_hash = try input.takeInt(u32, .little);
+ if (actual_hash != declared_hash) return error.WrongChecksum;
+ }
+
+ // Compressed Data
+
+ var lzma2_decode = try lzma2.Decode.init(allocating.allocator);
+ const before_size = allocating.writer.end;
+ try lzma2_decode.decompress(input, allocating);
+ const unpacked_bytes = allocating.writer.end - before_size;
+
+ // TODO restore this check
+ //if (packed_size) |s| {
+ // if (s != packed_counter.bytes_read)
+ // return error.CorruptInput;
+ //}
+
+ if (unpacked_size) |s| {
+ if (s != unpacked_bytes) return error.CorruptInput;
+ }
+
+ // Block Padding
+ if (true) @panic("TODO account for block padding");
+ //while (block_counter.bytes_read % 4 != 0) {
+ // if (try block_reader.takeByte() != 0)
+ // return error.CorruptInput;
+ //}
+
+}
+
+fn finish(d: *Decompress) void {
_ = d;
@panic("TODO");
-}
+ //const input = d.input;
+ //const index_size = blk: {
+ // const record_count = try input.takeLeb128(u64);
+ // if (record_count != d.block_decode.block_count)
+ // return error.CorruptInput;
+
+ // var i: usize = 0;
+ // while (i < record_count) : (i += 1) {
+ // // TODO: validate records
+ // _ = try std.leb.readUleb128(u64, counting_reader);
+ // _ = try std.leb.readUleb128(u64, counting_reader);
+ // }
+
+ // while (counter.bytes_read % 4 != 0) {
+ // if (try counting_reader.takeByte() != 0)
+ // return error.CorruptInput;
+ // }
+
+ // const hash_a = hasher.hasher.final();
+ // const hash_b = try counting_reader.takeInt(u32, .little);
+ // if (hash_a != hash_b)
+ // return error.WrongChecksum;
+
+ // break :blk counter.bytes_read;
+ //};
+
+ //const hash_a = try d.in_reader.takeInt(u32, .little);
-// if (buffer.len == 0)
-// return 0;
-//
-// const r = try self.block_decode.read(buffer);
-// if (r != 0)
-// return r;
-//
-// const index_size = blk: {
-// var hasher = hashedReader(self.in_reader, Crc32.init());
-// hasher.hasher.update(&[1]u8{0x00});
-//
-// var counter = std.io.countingReader(hasher.reader());
-// counter.bytes_read += 1;
-//
-// const counting_reader = counter.reader();
-//
-// const record_count = try std.leb.readUleb128(u64, counting_reader);
-// if (record_count != self.block_decode.block_count)
-// return error.CorruptInput;
-//
-// var i: usize = 0;
-// while (i < record_count) : (i += 1) {
-// // TODO: validate records
-// _ = try std.leb.readUleb128(u64, counting_reader);
-// _ = try std.leb.readUleb128(u64, counting_reader);
-// }
-//
-// while (counter.bytes_read % 4 != 0) {
-// if (try counting_reader.readByte() != 0)
-// return error.CorruptInput;
-// }
-//
-// const hash_a = hasher.hasher.final();
-// const hash_b = try counting_reader.readInt(u32, .little);
-// if (hash_a != hash_b)
-// return error.WrongChecksum;
-//
-// break :blk counter.bytes_read;
-// };
-//
-// const hash_a = try self.in_reader.readInt(u32, .little);
-//
-// const hash_b = blk: {
-// var hasher = hashedReader(self.in_reader, Crc32.init());
-// const hashed_reader = hasher.reader();
-//
-// const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
-// if (backward_size != index_size)
-// return error.CorruptInput;
-//
-// var check: Check = undefined;
-// try readStreamFlags(hashed_reader, &check);
-//
-// break :blk hasher.hasher.final();
-// };
-//
-// if (hash_a != hash_b)
-// return error.WrongChecksum;
-//
-// const magic = try self.in_reader.readBytesNoEof(2);
-// if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
-// return error.CorruptInput;
-//
-// return 0;
-//}
-
-//fn readBlock(self: *BlockDecode) Error!void {
-// var block_counter = std.io.countingReader(self.inner_reader);
-// const block_reader = block_counter.reader();
-//
-// var packed_size: ?u64 = null;
-// var unpacked_size: ?u64 = null;
-//
-// // Block Header
-// {
-// var header_hasher = hashedReader(block_reader, Crc32.init());
-// const header_reader = header_hasher.reader();
-//
-// const header_size = @as(u64, try header_reader.readByte()) * 4;
-// if (header_size == 0)
-// return error.EndOfStreamWithNoError;
-//
-// const Flags = packed struct(u8) {
-// last_filter_index: u2,
-// reserved: u4,
-// has_packed_size: bool,
-// has_unpacked_size: bool,
-// };
-//
-// const flags = @as(Flags, @bitCast(try header_reader.readByte()));
-// const filter_count = @as(u3, flags.last_filter_index) + 1;
-// if (filter_count > 1)
-// return error.Unsupported;
-//
-// if (flags.has_packed_size)
-// packed_size = try std.leb.readUleb128(u64, header_reader);
-//
-// if (flags.has_unpacked_size)
-// unpacked_size = try std.leb.readUleb128(u64, header_reader);
-//
-// const FilterId = enum(u64) {
-// lzma2 = 0x21,
-// _,
-// };
-//
-// const filter_id = @as(
-// FilterId,
-// @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
-// );
-//
-// if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
-// return error.CorruptInput;
-//
-// if (filter_id != .lzma2)
-// return error.Unsupported;
-//
-// const properties_size = try std.leb.readUleb128(u64, header_reader);
-// if (properties_size != 1)
-// return error.CorruptInput;
-//
-// // TODO: use filter properties
-// _ = try header_reader.readByte();
-//
-// while (block_counter.bytes_read != header_size) {
-// if (try header_reader.readByte() != 0)
-// return error.CorruptInput;
-// }
-//
-// const hash_a = header_hasher.hasher.final();
-// const hash_b = try header_reader.readInt(u32, .little);
-// if (hash_a != hash_b)
-// return error.WrongChecksum;
-// }
-//
-// // Compressed Data
-// var packed_counter = std.io.countingReader(block_reader);
-// try lzma2.decompress(
-// self.allocator,
-// packed_counter.reader(),
-// self.to_read.writer(self.allocator),
-// );
-//
-// if (packed_size) |s| {
-// if (s != packed_counter.bytes_read)
-// return error.CorruptInput;
-// }
-//
-// const unpacked_bytes = self.to_read.items;
-// if (unpacked_size) |s| {
-// if (s != unpacked_bytes.len)
-// return error.CorruptInput;
-// }
-//
-// // Block Padding
-// while (block_counter.bytes_read % 4 != 0) {
-// if (try block_reader.readByte() != 0)
-// return error.CorruptInput;
-// }
-//
-// switch (self.check) {
-// .none => {},
-// .crc32 => {
-// const hash_a = Crc32.hash(unpacked_bytes);
-// const hash_b = try self.inner_reader.readInt(u32, .little);
-// if (hash_a != hash_b)
-// return error.WrongChecksum;
-// },
-// .crc64 => {
-// const hash_a = Crc64.hash(unpacked_bytes);
-// const hash_b = try self.inner_reader.readInt(u64, .little);
-// if (hash_a != hash_b)
-// return error.WrongChecksum;
-// },
-// .sha256 => {
-// var hash_a: [Sha256.digest_length]u8 = undefined;
-// Sha256.hash(unpacked_bytes, &hash_a, .{});
-//
-// var hash_b: [Sha256.digest_length]u8 = undefined;
-// try self.inner_reader.readNoEof(&hash_b);
-//
-// if (!std.mem.eql(u8, &hash_a, &hash_b))
-// return error.WrongChecksum;
-// },
-// else => return error.Unsupported,
-// }
-//
-// self.block_count += 1;
-//}
+ //const hash_b = blk: {
+ // var hasher = hashedReader(d.in_reader, Crc32.init());
+ // const hashed_reader = hasher.reader();
+
+ // const backward_size = (@as(u64, try hashed_reader.takeInt(u32, .little)) + 1) * 4;
+ // if (backward_size != index_size)
+ // return error.CorruptInput;
+
+ // var check: Check = undefined;
+ // try readStreamFlags(hashed_reader, &check);
+
+ // break :blk hasher.hasher.final();
+ //};
+
+ //if (hash_a != hash_b)
+ // return error.WrongChecksum;
+
+ //const magic = try d.in_reader.takeBytesNoEof(2);
+ //if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
+ // return error.CorruptInput;
+
+ //return 0;
+}
lib/std/compress/xz/test.zig
@@ -3,19 +3,23 @@ const testing = std.testing;
const xz = std.compress.xz;
fn decompress(data: []const u8) ![]u8 {
- var in_stream = std.io.fixedBufferStream(data);
+ const gpa = testing.allocator;
- var xz_stream = try xz.decompress(testing.allocator, in_stream.reader());
+ var in_stream: std.Io.Reader = .fixed(data);
+
+ var xz_stream = try xz.Decompress.init(&in_stream, gpa, &.{});
defer xz_stream.deinit();
- return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
+ return xz_stream.reader.allocRemaining(gpa, .unlimited);
}
fn testReader(data: []const u8, comptime expected: []const u8) !void {
- const buf = try decompress(data);
- defer testing.allocator.free(buf);
+ const gpa = testing.allocator;
+
+ const result = try decompress(data);
+ defer gpa.free(result);
- try testing.expectEqualSlices(u8, expected, buf);
+ try testing.expectEqualSlices(u8, expected, result);
}
test "compressed data" {