Commit a8ae6c2f42
lib/std/compress/lzma.zig
@@ -105,7 +105,6 @@ pub const RangeDecoder = struct {
pub const Decode = struct {
properties: Properties,
- unpacked_size: ?u64,
literal_probs: Vec2d,
pos_slot_decoder: [4]BitTree(6),
align_decoder: BitTree(4),
@@ -121,15 +120,10 @@ pub const Decode = struct {
len_decoder: LenDecoder,
rep_len_decoder: LenDecoder,
- pub fn init(
- gpa: Allocator,
- properties: Properties,
- unpacked_size: ?u64,
- ) !Decode {
+ pub fn init(gpa: Allocator, properties: Properties) !Decode {
return .{
.properties = properties,
- .unpacked_size = unpacked_size,
- .literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }),
+ .literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (properties.lc + properties.lp), 0x300),
.pos_slot_decoder = @splat(.{}),
.align_decoder = .{},
.pos_decoders = @splat(0x400),
@@ -157,7 +151,7 @@ pub const Decode = struct {
self.literal_probs.fill(0x400);
} else {
self.literal_probs.deinit(gpa);
- self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
+ self.literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
}
self.properties = new_props;
@@ -176,11 +170,12 @@ pub const Decode = struct {
self.rep_len_decoder.reset();
}
- fn processNext(
+ pub fn process(
self: *Decode,
reader: *Reader,
allocating: *Writer.Allocating,
- buffer: *CircularBuffer,
+ /// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
+ buffer: anytype,
decoder: *RangeDecoder,
) !ProcessingStatus {
const gpa = allocating.allocator;
@@ -256,39 +251,11 @@ pub const Decode = struct {
return .more;
}
- pub fn process(
- self: *Decode,
- reader: *Reader,
- allocating: *Writer.Allocating,
- buffer: *CircularBuffer,
- decoder: *RangeDecoder,
- ) !void {
- process_next: {
- if (self.unpacked_size) |unpacked_size| {
- if (buffer.len >= unpacked_size) {
- break :process_next;
- }
- } else if (decoder.isFinished()) {
- break :process_next;
- }
- switch (try self.processNext(reader, allocating, buffer, decoder)) {
- .more => return,
- .finished => {},
- }
- }
-
- if (self.unpacked_size) |unpacked_size| {
- if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch;
- }
-
- try buffer.finish(&allocating.writer);
- self.state = math.maxInt(usize);
- }
-
fn decodeLiteral(
self: *Decode,
reader: *Reader,
- buffer: *CircularBuffer,
+ /// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
+ buffer: anytype,
decoder: *RangeDecoder,
) !u8 {
const def_prev_byte = 0;
@@ -377,10 +344,7 @@ pub const Decode = struct {
}
pub fn get(self: CircularBuffer, index: usize) u8 {
- return if (0 <= index and index < self.buf.items.len)
- self.buf.items[index]
- else
- 0;
+ return if (0 <= index and index < self.buf.items.len) self.buf.items[index] else 0;
}
pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void {
@@ -524,29 +488,29 @@ pub const Decode = struct {
data: []u16,
cols: usize,
- pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d {
- const len = try math.mul(usize, size[0], size[1]);
+ pub fn init(gpa: Allocator, value: u16, w: usize, h: usize) !Vec2d {
+ const len = try math.mul(usize, w, h);
const data = try gpa.alloc(u16, len);
@memset(data, value);
return .{
.data = data,
- .cols = size[1],
+ .cols = h,
};
}
- pub fn deinit(self: *Vec2d, gpa: Allocator) void {
- gpa.free(self.data);
- self.* = undefined;
+ pub fn deinit(v: *Vec2d, gpa: Allocator) void {
+ gpa.free(v.data);
+ v.* = undefined;
}
- pub fn fill(self: *Vec2d, value: u16) void {
- @memset(self.data, value);
+ pub fn fill(v: *Vec2d, value: u16) void {
+ @memset(v.data, value);
}
- fn get(self: Vec2d, row: usize) ![]u16 {
- const start_row = try math.mul(usize, row, self.cols);
- const end_row = try math.add(usize, start_row, self.cols);
- return self.data[start_row..end_row];
+ fn get(v: Vec2d, row: usize) ![]u16 {
+ const start_row = try math.mul(usize, row, v.cols);
+ const end_row = try math.add(usize, start_row, v.cols);
+ return v.data[start_row..end_row];
}
};
@@ -627,6 +591,7 @@ pub const Decompress = struct {
range_decoder: RangeDecoder,
decode: Decode,
err: ?Error,
+ unpacked_size: ?u64,
pub const Error = error{
OutOfMemory,
@@ -654,7 +619,7 @@ pub const Decompress = struct {
.input = input,
.buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit),
.range_decoder = try RangeDecoder.init(input),
- .decode = try Decode.init(gpa, params.properties, params.unpacked_size),
+ .decode = try Decode.init(gpa, params.properties),
.reader = .{
.buffer = buffer,
.vtable = &.{
@@ -666,6 +631,7 @@ pub const Decompress = struct {
.end = 0,
},
.err = null,
+ .unpacked_size = params.unpacked_size,
};
}
@@ -728,20 +694,46 @@ pub const Decompress = struct {
r.end = allocating.writer.end;
}
if (d.decode.state == math.maxInt(usize)) return error.EndOfStream;
- d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
+
+ process_next: {
+ if (d.unpacked_size) |unpacked_size| {
+ if (d.buffer.len >= unpacked_size) break :process_next;
+ } else if (d.range_decoder.isFinished()) {
+ break :process_next;
+ }
+ switch (d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
+ error.WriteFailed => {
+ d.err = error.OutOfMemory;
+ return error.ReadFailed;
+ },
+ error.EndOfStream => {
+ d.err = error.EndOfStream;
+ return error.ReadFailed;
+ },
+ else => |e| {
+ d.err = e;
+ return error.ReadFailed;
+ },
+ }) {
+ .more => return 0,
+ .finished => break :process_next,
+ }
+ }
+
+ if (d.unpacked_size) |unpacked_size| {
+ if (d.buffer.len != unpacked_size) {
+ d.err = error.DecompressedSizeMismatch;
+ return error.ReadFailed;
+ }
+ }
+
+ d.buffer.finish(&allocating.writer) catch |err| switch (err) {
error.WriteFailed => {
d.err = error.OutOfMemory;
return error.ReadFailed;
},
- error.EndOfStream => {
- d.err = error.EndOfStream;
- return error.ReadFailed;
- },
- else => |e| {
- d.err = e;
- return error.ReadFailed;
- },
};
+ d.decode.state = math.maxInt(usize);
return 0;
}
};
lib/std/compress/lzma2.zig
@@ -6,17 +6,15 @@ const Writer = std.Io.Writer;
const Reader = std.Io.Reader;
/// An accumulating buffer for LZ sequences
-pub const LzAccumBuffer = struct {
+pub const AccumBuffer = struct {
/// Buffer
buf: ArrayList(u8),
-
/// Buffer memory limit
memlimit: usize,
-
/// Total number of bytes sent through the buffer
len: usize,
- pub fn init(memlimit: usize) LzAccumBuffer {
+ pub fn init(memlimit: usize) AccumBuffer {
return .{
.buf = .{},
.memlimit = memlimit,
@@ -24,20 +22,20 @@ pub const LzAccumBuffer = struct {
};
}
- pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void {
+ pub fn appendByte(self: *AccumBuffer, allocator: Allocator, byte: u8) !void {
try self.buf.append(allocator, byte);
self.len += 1;
}
/// Reset the internal dictionary
- pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void {
+ pub fn reset(self: *AccumBuffer, writer: *Writer) !void {
try writer.writeAll(self.buf.items);
self.buf.clearRetainingCapacity();
self.len = 0;
}
/// Retrieve the last byte or return a default
- pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 {
+ pub fn lastOr(self: AccumBuffer, lit: u8) u8 {
const buf_len = self.buf.items.len;
return if (buf_len == 0)
lit
@@ -46,7 +44,7 @@ pub const LzAccumBuffer = struct {
}
/// Retrieve the n-th last byte
- pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 {
+ pub fn lastN(self: AccumBuffer, dist: usize) !u8 {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
@@ -57,7 +55,7 @@ pub const LzAccumBuffer = struct {
/// Append a literal
pub fn appendLiteral(
- self: *LzAccumBuffer,
+ self: *AccumBuffer,
allocator: Allocator,
lit: u8,
writer: *Writer,
@@ -72,7 +70,7 @@ pub const LzAccumBuffer = struct {
/// Fetch an LZ sequence (length, distance) from inside the buffer
pub fn appendLz(
- self: *LzAccumBuffer,
+ self: *AccumBuffer,
allocator: Allocator,
len: usize,
dist: usize,
@@ -95,12 +93,12 @@ pub const LzAccumBuffer = struct {
self.len += len;
}
- pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void {
+ pub fn finish(self: *AccumBuffer, writer: *Writer) !void {
try writer.writeAll(self.buf.items);
self.buf.clearRetainingCapacity();
}
- pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
+ pub fn deinit(self: *AccumBuffer, allocator: Allocator) void {
self.buf.deinit(allocator);
self.* = undefined;
}
@@ -109,59 +107,43 @@ pub const LzAccumBuffer = struct {
pub const Decode = struct {
lzma_decode: lzma.Decode,
- pub fn init(allocator: Allocator) !Decode {
- return Decode{
- .lzma_decode = try lzma.Decode.init(
- allocator,
- .{
- .lc = 0,
- .lp = 0,
- .pb = 0,
- },
- null,
- ),
- };
+ pub fn init(gpa: Allocator) !Decode {
+ return .{ .lzma_decode = try lzma.Decode.init(gpa, .{ .lc = 0, .lp = 0, .pb = 0 }) };
}
- pub fn deinit(self: *Decode, allocator: Allocator) void {
- self.lzma_decode.deinit(allocator);
+ pub fn deinit(self: *Decode, gpa: Allocator) void {
+ self.lzma_decode.deinit(gpa);
self.* = undefined;
}
- pub fn decompress(
- self: *Decode,
- allocator: Allocator,
- reader: *Reader,
- writer: *Writer,
- ) !void {
- var accum = LzAccumBuffer.init(std.math.maxInt(usize));
- defer accum.deinit(allocator);
+ pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !void {
+ const gpa = allocating.allocator;
+
+ var accum = AccumBuffer.init(std.math.maxInt(usize));
+ defer accum.deinit(gpa);
while (true) {
- const status = try reader.readByte();
+ const status = try reader.takeByte();
switch (status) {
0 => break,
- 1 => try parseUncompressed(allocator, reader, writer, &accum, true),
- 2 => try parseUncompressed(allocator, reader, writer, &accum, false),
- else => try self.parseLzma(allocator, reader, writer, &accum, status),
+ 1 => try parseUncompressed(reader, allocating, &accum, true),
+ 2 => try parseUncompressed(reader, allocating, &accum, false),
+ else => try d.parseLzma(reader, allocating, &accum, status),
}
}
- try accum.finish(writer);
+ try accum.finish(&allocating.writer);
}
fn parseLzma(
- self: *Decode,
- allocator: Allocator,
+ d: *Decode,
reader: *Reader,
- writer: *Writer,
- accum: *LzAccumBuffer,
+ allocating: *Writer.Allocating,
+ accum: *AccumBuffer,
status: u8,
) !void {
- if (status & 0x80 == 0) {
- return error.CorruptInput;
- }
+ if (status & 0x80 == 0) return error.CorruptInput;
const Reset = struct {
dict: bool,
@@ -169,23 +151,23 @@ pub const Decode = struct {
props: bool,
};
- const reset = switch ((status >> 5) & 0x3) {
- 0 => Reset{
+ const reset: Reset = switch ((status >> 5) & 0x3) {
+ 0 => .{
.dict = false,
.state = false,
.props = false,
},
- 1 => Reset{
+ 1 => .{
.dict = false,
.state = true,
.props = false,
},
- 2 => Reset{
+ 2 => .{
.dict = false,
.state = true,
.props = true,
},
- 3 => Reset{
+ 3 => .{
.dict = true,
.state = true,
.props = true,
@@ -196,24 +178,24 @@ pub const Decode = struct {
const unpacked_size = blk: {
var tmp: u64 = status & 0x1F;
tmp <<= 16;
- tmp |= try reader.readInt(u16, .big);
+ tmp |= try reader.takeInt(u16, .big);
break :blk tmp + 1;
};
const packed_size = blk: {
- const tmp: u17 = try reader.readInt(u16, .big);
+ const tmp: u17 = try reader.takeInt(u16, .big);
break :blk tmp + 1;
};
- if (reset.dict) {
- try accum.reset(writer);
- }
+ if (reset.dict) try accum.reset(&allocating.writer);
+
+ const ld = &d.lzma_decode;
if (reset.state) {
- var new_props = self.lzma_decode.properties;
+ var new_props = ld.properties;
if (reset.props) {
- var props = try reader.readByte();
+ var props = try reader.takeByte();
if (props >= 225) {
return error.CorruptInput;
}
@@ -231,38 +213,44 @@ pub const Decode = struct {
new_props = .{ .lc = lc, .lp = lp, .pb = pb };
}
- try self.lzma_decode.resetState(allocator, new_props);
+ try ld.resetState(allocating.allocator, new_props);
}
- self.lzma_decode.unpacked_size = unpacked_size + accum.len;
+ var range_decoder = try lzma.RangeDecoder.init(reader);
- var counter = std.io.countingReader(reader);
- const counter_reader = counter.reader();
-
- var rangecoder = try lzma.RangeDecoder.init(counter_reader);
- while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
-
- if (counter.bytes_read != packed_size) {
- return error.CorruptInput;
+ while (true) {
+ if (accum.len >= unpacked_size) break;
+ if (range_decoder.isFinished()) break;
+ switch (try ld.process(reader, allocating, accum, &range_decoder)) {
+ .more => continue,
+ .finished => break,
+ }
}
+ if (accum.len != unpacked_size) return error.DecompressedSizeMismatch;
+
+ // TODO restore this error
+ //if (counter.bytes_read != packed_size) {
+ // return error.CorruptInput;
+ //}
+ _ = packed_size;
}
fn parseUncompressed(
- allocator: Allocator,
reader: *Reader,
- writer: *Writer,
- accum: *LzAccumBuffer,
+ allocating: *Writer.Allocating,
+ accum: *AccumBuffer,
reset_dict: bool,
) !void {
- const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1;
+ const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;
- if (reset_dict) {
- try accum.reset(writer);
- }
+ if (reset_dict) try accum.reset(&allocating.writer);
+
+ const gpa = allocating.allocator;
- var i: @TypeOf(unpacked_size) = 0;
- while (i < unpacked_size) : (i += 1) {
- try accum.appendByte(allocator, try reader.readByte());
+ var i = unpacked_size;
+ while (i != 0) {
+ try accum.appendByte(gpa, try reader.takeByte());
+ i -= 1;
}
}
};
@@ -273,13 +261,13 @@ test "decompress hello world stream" {
const gpa = std.testing.allocator;
- var stream: std.Io.Reader = .fixed(compressed);
-
- var decode = try Decode.init(gpa, &stream);
+ var decode = try Decode.init(gpa);
defer decode.deinit(gpa);
- const result = try decode.reader.allocRemaining(gpa, .unlimited);
- defer gpa.free(result);
+ var stream: std.Io.Reader = .fixed(compressed);
+ var result: std.Io.Writer.Allocating = .init(gpa);
+ defer result.deinit();
- try std.testing.expectEqualStrings(expected, result);
+ try decode.decompress(&stream, &result);
+ try std.testing.expectEqualStrings(expected, result.written());
}