Commit 824c157e0c
Changed files (7)
lib
lib/std/compress/flate/BlockWriter.zig
@@ -31,7 +31,26 @@ fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code,
fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
-pub fn init(bw: *BlockWriter) void {
+pub fn init(output: *Writer) BlockWriter {
+ return .{
+ .output = output,
+ .codegen_freq = undefined,
+ .literal_freq = undefined,
+ .distance_freq = undefined,
+ .codegen = undefined,
+ .literal_encoding = undefined,
+ .distance_encoding = undefined,
+ .codegen_encoding = undefined,
+ .fixed_literal_encoding = undefined,
+ .fixed_distance_encoding = undefined,
+ .huff_distance = undefined,
+ .fixed_literal_codes = undefined,
+ .fixed_distance_codes = undefined,
+ .distance_codes = undefined,
+ };
+}
+
+pub fn initBuffers(bw: *BlockWriter) void {
bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes);
bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes);
bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes);
lib/std/compress/flate/Compress.zig
@@ -122,22 +122,7 @@ pub const Options = struct {
pub fn init(output: *Writer, buffer: []u8, options: Options) Compress {
return .{
- .block_writer = .{
- .output = output,
- .codegen_freq = undefined,
- .literal_freq = undefined,
- .distance_freq = undefined,
- .codegen = undefined,
- .literal_encoding = undefined,
- .distance_encoding = undefined,
- .codegen_encoding = undefined,
- .fixed_literal_encoding = undefined,
- .fixed_distance_encoding = undefined,
- .huff_distance = undefined,
- .fixed_literal_codes = undefined,
- .fixed_distance_codes = undefined,
- .distance_codes = undefined,
- },
+ .block_writer = .init(output),
.level = .get(options.level),
.hasher = .init(options.container),
.state = .header,
@@ -188,20 +173,21 @@ fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize
}
const buffered = me.buffered();
- const min_lookahead = flate.match.min_length + flate.match.max_length;
+ const min_lookahead = Token.min_length + Token.max_length;
const history_plus_lookahead_len = flate.history_len + min_lookahead;
if (buffered.len < history_plus_lookahead_len) return 0;
const lookahead = buffered[flate.history_len..];
- _ = lookahead;
// TODO tokenize
+ _ = lookahead;
//c.hasher.update(lookahead[0..n]);
@panic("TODO");
}
pub fn end(c: *Compress) !void {
try endUnflushed(c);
- try c.output.flush();
+ const out = c.block_writer.output;
+ try out.flush();
}
pub fn endUnflushed(c: *Compress) !void {
@@ -227,7 +213,7 @@ pub fn endUnflushed(c: *Compress) !void {
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
- std.mem.writeInt(u32, try out.writableArray(4), zlib.final, .big);
+ std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big);
},
.raw => {},
}
@@ -243,15 +229,16 @@ pub const Simple = struct {
pub const Strategy = enum { huffman, store };
- pub fn init(out: *Writer, buffer: []u8, container: Container) !Simple {
- const self: Simple = .{
+ pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple {
+ const header = container.header();
+ try output.writeAll(header);
+ return .{
.buffer = buffer,
.wp = 0,
- .block_writer = .init(out),
+ .block_writer = .init(output),
.hasher = .init(container),
+ .strategy = strategy,
};
- try container.writeHeader(self.out);
- return self;
}
pub fn flush(self: *Simple) !void {
@@ -263,7 +250,7 @@ pub const Simple = struct {
pub fn finish(self: *Simple) !void {
try self.flushBuffer(true);
try self.block_writer.flush();
- try self.hasher.container().writeFooter(&self.hasher, self.out);
+ try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output);
}
fn flushBuffer(self: *Simple, final: bool) !void {
@@ -300,7 +287,13 @@ test "generate a Huffman code from an array of frequencies" {
};
var codes: [19]HuffmanEncoder.Code = undefined;
- var enc: HuffmanEncoder = .{ .codes = &codes };
+ var enc: HuffmanEncoder = .{
+ .codes = &codes,
+ .freq_cache = undefined,
+ .bit_count = undefined,
+ .lns = undefined,
+ .lfs = undefined,
+ };
enc.generate(freqs[0..], 7);
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
@@ -337,247 +330,3 @@ test "generate a Huffman code from an array of frequencies" {
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
}
-
-test "tokenization" {
- const L = Token.initLiteral;
- const M = Token.initMatch;
-
- const cases = [_]struct {
- data: []const u8,
- tokens: []const Token,
- }{
- .{
- .data = "Blah blah blah blah blah!",
- .tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') },
- },
- .{
- .data = "ABCDEABCD ABCDEABCD",
- .tokens = &[_]Token{
- L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '),
- L('A'), M(10, 8),
- },
- },
- };
-
- for (cases) |c| {
- inline for (Container.list) |container| { // for each wrapping
-
- var cw = std.Io.countingWriter(std.Io.null_writer);
- const cww = cw.writer();
- var df = try Compress(container, @TypeOf(cww), TestTokenWriter).init(cww, .{});
-
- _ = try df.write(c.data);
- try df.flush();
-
- // df.token_writer.show();
- try expect(df.block_writer.pos == c.tokens.len); // number of tokens written
- try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match
-
- try testing.expectEqual(container.headerSize(), cw.bytes_written);
- try df.finish();
- try testing.expectEqual(container.size(), cw.bytes_written);
- }
- }
-}
-
-// Tests that tokens written are equal to expected token list.
-const TestTokenWriter = struct {
- const Self = @This();
-
- pos: usize = 0,
- actual: [128]Token = undefined,
-
- pub fn init(_: anytype) Self {
- return .{};
- }
- pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
- for (tokens) |t| {
- self.actual[self.pos] = t;
- self.pos += 1;
- }
- }
-
- pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
-
- pub fn get(self: *Self) []Token {
- return self.actual[0..self.pos];
- }
-
- pub fn show(self: *Self) void {
- std.debug.print("\n", .{});
- for (self.get()) |t| {
- t.show();
- }
- }
-
- pub fn flush(_: *Self) !void {}
-};
-
-test "file tokenization" {
- const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
- const cases = [_]struct {
- data: []const u8, // uncompressed content
- // expected number of tokens producet in deflate tokenization
- tokens_count: [levels.len]usize = .{0} ** levels.len,
- }{
- .{
- .data = @embedFile("testdata/rfc1951.txt"),
- .tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 },
- },
-
- .{
- .data = @embedFile("testdata/block_writer/huffman-null-max.input"),
- .tokens_count = .{ 257, 257, 257, 257, 257, 257 },
- },
- .{
- .data = @embedFile("testdata/block_writer/huffman-pi.input"),
- .tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 },
- },
- .{
- .data = @embedFile("testdata/block_writer/huffman-text.input"),
- .tokens_count = .{ 235, 234, 234, 234, 234, 234 },
- },
- .{
- .data = @embedFile("testdata/fuzz/roundtrip1.input"),
- .tokens_count = .{ 333, 331, 331, 331, 331, 331 },
- },
- .{
- .data = @embedFile("testdata/fuzz/roundtrip2.input"),
- .tokens_count = .{ 334, 334, 334, 334, 334, 334 },
- },
- };
-
- for (cases) |case| { // for each case
- const data = case.data;
-
- for (levels, 0..) |level, i| { // for each compression level
- var original: std.Io.Reader = .fixed(data);
-
- // buffer for decompressed data
- var al = std.ArrayList(u8).init(testing.allocator);
- defer al.deinit();
- const writer = al.writer();
-
- // create compressor
- const WriterType = @TypeOf(writer);
- const TokenWriter = TokenDecoder(@TypeOf(writer));
- var cmp = try Compress(.raw, WriterType, TokenWriter).init(writer, .{ .level = level });
-
- // Stream uncompressed `original` data to the compressor. It will
- // produce tokens list and pass that list to the TokenDecoder. This
- // TokenDecoder uses CircularBuffer from inflate to convert list of
- // tokens back to the uncompressed stream.
- try cmp.compress(original.reader());
- try cmp.flush();
- const expected_count = case.tokens_count[i];
- const actual = cmp.block_writer.tokens_count;
- if (expected_count == 0) {
- std.debug.print("actual token count {d}\n", .{actual});
- } else {
- try testing.expectEqual(expected_count, actual);
- }
-
- try testing.expectEqual(data.len, al.items.len);
- try testing.expectEqualSlices(u8, data, al.items);
- }
- }
-}
-
-const TokenDecoder = struct {
- output: *Writer,
- tokens_count: usize,
-
- pub fn init(output: *Writer) TokenDecoder {
- return .{
- .output = output,
- .tokens_count = 0,
- };
- }
-
- pub fn write(self: *TokenDecoder, tokens: []const Token, _: bool, _: ?[]const u8) !void {
- self.tokens_count += tokens.len;
- for (tokens) |t| {
- switch (t.kind) {
- .literal => self.hist.write(t.literal()),
- .match => try self.hist.writeMatch(t.length(), t.distance()),
- }
- if (self.hist.free() < 285) try self.flushWin();
- }
- try self.flushWin();
- }
-
- fn flushWin(self: *TokenDecoder) !void {
- while (true) {
- const buf = self.hist.read();
- if (buf.len == 0) break;
- try self.output.writeAll(buf);
- }
- }
-};
-
-test "store simple compressor" {
- if (true) return error.SkipZigTest;
- //const data = "Hello world!";
- //const expected = [_]u8{
- // 0x1, // block type 0, final bit set
- // 0xc, 0x0, // len = 12
- // 0xf3, 0xff, // ~len
- // 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', //
- // //0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
- //};
-
- //var fbs: std.Io.Reader = .fixed(data);
- //var al = std.ArrayList(u8).init(testing.allocator);
- //defer al.deinit();
-
- //var cmp = try store.compressor(.raw, al.writer());
- //try cmp.compress(&fbs);
- //try cmp.finish();
- //try testing.expectEqualSlices(u8, &expected, al.items);
-
- //fbs = .fixed(data);
- //try al.resize(0);
-
- //// huffman only compresoor will also emit store block for this small sample
- //var hc = try huffman.compressor(.raw, al.writer());
- //try hc.compress(&fbs);
- //try hc.finish();
- //try testing.expectEqualSlices(u8, &expected, al.items);
-}
-
-test "sliding window match" {
- const data = "Blah blah blah blah blah!";
- var win: Writer = .{};
- try expect(win.write(data) == data.len);
- try expect(win.wp == data.len);
- try expect(win.rp == 0);
-
- // length between l symbols
- try expect(win.match(1, 6, 0) == 18);
- try expect(win.match(1, 11, 0) == 13);
- try expect(win.match(1, 16, 0) == 8);
- try expect(win.match(1, 21, 0) == 0);
-
- // position 15 = "blah blah!"
- // position 20 = "blah!"
- try expect(win.match(15, 20, 0) == 4);
- try expect(win.match(15, 20, 3) == 4);
- try expect(win.match(15, 20, 4) == 0);
-}
-
-test "sliding window slide" {
- var win: Writer = .{};
- win.wp = Writer.buffer_len - 11;
- win.rp = Writer.buffer_len - 111;
- win.buffer[win.rp] = 0xab;
- try expect(win.lookahead().len == 100);
- try expect(win.tokensBuffer().?.len == win.rp);
-
- const n = win.slide();
- try expect(n == 32757);
- try expect(win.buffer[win.rp] == 0xab);
- try expect(win.rp == Writer.hist_len - 111);
- try expect(win.wp == Writer.hist_len - 11);
- try expect(win.lookahead().len == 100);
- try expect(win.tokensBuffer() == null);
-}
lib/std/compress/flate/Decompress.zig
@@ -4,8 +4,8 @@ const Container = flate.Container;
const Token = @import("Token.zig");
const testing = std.testing;
const Decompress = @This();
-const Writer = std.io.Writer;
-const Reader = std.io.Reader;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
input: *Reader,
reader: Reader,
@@ -129,7 +129,7 @@ fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol {
return sym;
}
-pub fn stream(r: *Reader, w: *Writer, limit: std.io.Limit) Reader.StreamError!usize {
+pub fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
return readInner(d, w, limit) catch |err| switch (err) {
error.EndOfStream => return error.EndOfStream,
@@ -143,7 +143,8 @@ pub fn stream(r: *Reader, w: *Writer, limit: std.io.Limit) Reader.StreamError!us
};
}
-fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.StreamError)!usize {
+fn readInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader.StreamError)!usize {
+ var remaining = @intFromEnum(limit);
const in = d.input;
sw: switch (d.state) {
.protocol_header => switch (d.hasher.container()) {
@@ -182,15 +183,9 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
continue :sw .block_header;
},
.zlib => {
- const Header = extern struct {
- cmf: packed struct(u8) {
- cm: u4,
- cinfo: u4,
- },
- flg: u8,
- };
- const header = try in.takeStruct(Header);
- if (header.cmf.cm != 8 or header.cmf.cinfo > 7) return error.BadZlibHeader;
+ const header = try in.takeArray(2);
+ const cmf: packed struct(u8) { cm: u4, cinfo: u4 } = @bitCast(header[0]);
+ if (cmf.cm != 8 or cmf.cinfo > 7) return error.BadZlibHeader;
continue :sw .block_header;
},
.raw => continue :sw .block_header,
@@ -219,7 +214,7 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
// lengths for code lengths
var cl_lens = [_]u4{0} ** 19;
for (0..hclen) |i| {
- cl_lens[flate.huffman.codegen_order[i]] = try d.takeBits(u3);
+ cl_lens[flate.HuffmanEncoder.codegen_order[i]] = try d.takeBits(u3);
}
var cl_dec: CodegenDecoder = .{};
try cl_dec.generate(&cl_lens);
@@ -259,52 +254,56 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
return n;
},
.fixed_block => {
- const start = w.count;
- while (@intFromEnum(limit) > w.count - start) {
+ while (remaining > 0) {
const code = try d.readFixedCode();
switch (code) {
- 0...255 => try w.writeBytePreserve(flate.history_len, @intCast(code)),
+ 0...255 => {
+ try w.writeBytePreserve(flate.history_len, @intCast(code));
+ remaining -= 1;
+ },
256 => {
d.state = if (d.final_block) .protocol_footer else .block_header;
- return w.count - start;
+ return @intFromEnum(limit) - remaining;
},
257...285 => {
// Handles fixed block non literal (length) code.
// Length code is followed by 5 bits of distance code.
const length = try d.decodeLength(@intCast(code - 257));
const distance = try d.decodeDistance(try d.takeBitsReverseBuffered(u5));
- try writeMatch(w, length, distance);
+ remaining = try writeMatch(w, length, distance, remaining);
},
else => return error.InvalidCode,
}
}
d.state = .fixed_block;
- return w.count - start;
+ return @intFromEnum(limit) - remaining;
},
.dynamic_block => {
- // In larger archives most blocks are usually dynamic, so decompression
- // performance depends on this logic.
- const start = w.count;
- while (@intFromEnum(limit) > w.count - start) {
+ // In larger archives most blocks are usually dynamic, so
+ // decompression performance depends on this logic.
+ while (remaining > 0) {
const sym = try d.decodeSymbol(&d.lit_dec);
switch (sym.kind) {
- .literal => try w.writeBytePreserve(flate.history_len, sym.symbol),
+ .literal => {
+ try w.writeBytePreserve(flate.history_len, sym.symbol);
+ remaining -= 1;
+ },
.match => {
// Decode match backreference <length, distance>
const length = try d.decodeLength(sym.symbol);
const dsm = try d.decodeSymbol(&d.dst_dec);
const distance = try d.decodeDistance(dsm.symbol);
- try writeMatch(w, length, distance);
+ remaining = try writeMatch(w, length, distance, remaining);
},
.end_of_block => {
d.state = if (d.final_block) .protocol_footer else .block_header;
- return w.count - start;
+ return @intFromEnum(limit) - remaining;
},
}
}
d.state = .dynamic_block;
- return w.count - start;
+ return @intFromEnum(limit) - remaining;
},
.protocol_footer => {
d.alignBitsToByte();
@@ -314,7 +313,7 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
if (try in.takeInt(u32, .little) != gzip.count) return error.WrongGzipSize;
},
.zlib => |*zlib| {
- const chksum: u32 = @byteSwap(zlib.final());
+ const chksum: u32 = @byteSwap(zlib.adler);
if (try in.takeInt(u32, .big) != chksum) return error.WrongZlibChecksum;
},
.raw => {},
@@ -328,10 +327,11 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
/// Write match (back-reference to the same data slice) starting at `distance`
/// back from current write position, and `length` of bytes.
-fn writeMatch(bw: *Writer, length: u16, distance: u16) !void {
- _ = bw;
+fn writeMatch(w: *Writer, length: u16, distance: u16, remaining: usize) !usize {
+ _ = w;
_ = length;
_ = distance;
+ _ = remaining;
@panic("TODO");
}
@@ -622,7 +622,13 @@ test "init/find" {
test "encode/decode literals" {
var codes: [flate.HuffmanEncoder.max_num_frequencies]flate.HuffmanEncoder.Code = undefined;
for (1..286) |j| { // for all different number of codes
- var enc: flate.HuffmanEncoder = .{ .codes = &codes };
+ var enc: flate.HuffmanEncoder = .{
+ .codes = &codes,
+ .freq_cache = undefined,
+ .bit_count = undefined,
+ .lns = undefined,
+ .lfs = undefined,
+ };
// create frequencies
var freq = [_]u16{0} ** 286;
freq[256] = 1; // ensure we have end of block code
@@ -857,7 +863,7 @@ test "fuzzing tests" {
const r = &decompress.reader;
if (c.err) |expected_err| {
try testing.expectError(error.ReadFailed, r.streamRemaining(&aw.writer));
- try testing.expectError(expected_err, decompress.read_err.?);
+ try testing.expectEqual(expected_err, decompress.read_err orelse return error.TestFailed);
} else {
_ = try r.streamRemaining(&aw.writer);
try testing.expectEqualStrings(c.out, aw.getWritten());
@@ -891,3 +897,148 @@ test "reading into empty buffer" {
var buf: [0]u8 = undefined;
try testing.expectEqual(0, try r.readVec(&.{&buf}));
}
+
+test "don't read past deflate stream's end" {
+ try testDecompress(.zlib, &[_]u8{
+ 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
+ 0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
+ 0x83, 0x95, 0x0b, 0xf5,
+ }, &[_]u8{
+ 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
+ 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0xff,
+ });
+}
+
+test "zlib header" {
+ // Truncated header
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.zlib, &[_]u8{0x78}, ""),
+ );
+ // Wrong CM
+ try testing.expectError(
+ error.BadZlibHeader,
+ testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
+ );
+ // Wrong CINFO
+ try testing.expectError(
+ error.BadZlibHeader,
+ testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
+ );
+ // Wrong checksum
+ try testing.expectError(
+ error.WrongZlibChecksum,
+ testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
+ );
+ // Truncated checksum
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
+ );
+}
+
+test "gzip header" {
+ // Truncated header
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
+ );
+ // Wrong CM
+ try testing.expectError(
+ error.BadGzipHeader,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03,
+ }, undefined),
+ );
+
+ // Wrong checksum
+ try testing.expectError(
+ error.WrongGzipChecksum,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x00,
+ }, undefined),
+ );
+ // Truncated checksum
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
+ }, undefined),
+ );
+ // Wrong initial size
+ try testing.expectError(
+ error.WrongGzipSize,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ }, undefined),
+ );
+ // Truncated initial size field
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00,
+ }, undefined),
+ );
+
+ try testDecompress(.gzip, &[_]u8{
+ // GZIP header
+ 0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
+ // header.FHCRC (should cover entire header)
+ 0x99, 0xd6,
+ // GZIP data
+ 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ }, "");
+}
+
+fn testDecompress(container: Container, compressed: []const u8, expected_plain: []const u8) !void {
+ var in: std.Io.Reader = .fixed(compressed);
+ var aw: std.Io.Writer.Allocating = .init(testing.allocator);
+ defer aw.deinit();
+
+ var decompress: Decompress = .init(&in, container, &.{});
+ _ = try decompress.reader.streamRemaining(&aw.writer);
+ try testing.expectEqualSlices(u8, expected_plain, aw.getWritten());
+}
+
+test "zlib should not overshoot" {
+ // Compressed zlib data with extra 4 bytes at the end.
+ const data = [_]u8{
+ 0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
+ 0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08,
+ 0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34,
+ 0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
+ };
+
+ var reader: std.Io.Reader = .fixed(&data);
+
+ var decompress: Decompress = .init(&reader, .zlib, &.{});
+ var out: [128]u8 = undefined;
+
+ {
+ const n = try decompress.reader.readSliceShort(out[0..]);
+
+ // Expected decompressed data
+ try std.testing.expectEqual(46, n);
+ try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]);
+
+ // Decompressor don't overshoot underlying reader.
+ // It is leaving it at the end of compressed data chunk.
+ try std.testing.expectEqual(data.len - 4, reader.seek);
+ // TODO what was this testing, exactly?
+ //try std.testing.expectEqual(0, decompress.unreadBytes());
+ }
+
+ // 4 bytes after compressed chunk are available in reader.
+ const n = try reader.readSliceShort(out[0..]);
+ try std.testing.expectEqual(n, 4);
+ try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]);
+}
lib/std/compress/flate/HuffmanEncoder.zig
@@ -135,7 +135,7 @@ fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize)
// of ancestors of the rightmost node at level i.
// leaf_counts[i][j] is the number of literals at the left
// of the level j ancestor.
- var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(0);
+ var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0));
{
var level = @as(u32, 1);
@@ -389,7 +389,8 @@ pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder
}
test "generate a Huffman code for the fixed literal table specific to Deflate" {
- const enc = fixedLiteralEncoder();
+ var codes: [max_num_frequencies]Code = undefined;
+ const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes);
for (enc.codes) |c| {
switch (c.len) {
7 => {
lib/std/compress/flate/Lookup.zig
@@ -6,14 +6,19 @@ const std = @import("std");
const testing = std.testing;
const expect = testing.expect;
const flate = @import("../flate.zig");
+const Token = @import("Token.zig");
const Lookup = @This();
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
const chain_len = 2 * flate.history_len;
+pub const bits = 15;
+pub const len = 1 << bits;
+pub const shift = 32 - bits;
+
// Maps hash => first position
-head: [flate.lookup.len]u16 = [_]u16{0} ** flate.lookup.len,
+head: [len]u16 = [_]u16{0} ** len,
// Maps position => previous positions for the same hash value
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
@@ -52,8 +57,8 @@ pub fn slide(self: *Lookup, n: u16) void {
// Add `len` 4 bytes hashes from `data` into lookup.
// Position of the first byte is `pos`.
-pub fn bulkAdd(self: *Lookup, data: []const u8, len: u16, pos: u16) void {
- if (len == 0 or data.len < flate.match.min_length) {
+pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
+ if (length == 0 or data.len < Token.min_length) {
return;
}
var hb =
@@ -64,7 +69,7 @@ pub fn bulkAdd(self: *Lookup, data: []const u8, len: u16, pos: u16) void {
_ = self.set(hashu(hb), pos);
var i = pos;
- for (4..@min(len + 3, data.len)) |j| {
+ for (4..@min(length + 3, data.len)) |j| {
hb = (hb << 8) | @as(u32, data[j]);
i += 1;
_ = self.set(hashu(hb), i);
@@ -80,7 +85,7 @@ fn hash(b: *const [4]u8) u32 {
}
fn hashu(v: u32) u32 {
- return @intCast((v *% prime4) >> flate.lookup.shift);
+ return @intCast((v *% prime4) >> shift);
}
test add {
lib/std/compress/flate/Token.zig
@@ -6,7 +6,6 @@ const std = @import("std");
const assert = std.debug.assert;
const print = std.debug.print;
const expect = std.testing.expect;
-const match = std.compress.flate.match;
const Token = @This();
@@ -21,16 +20,23 @@ dist: u15 = 0,
len_lit: u8 = 0,
kind: Kind = .literal,
+pub const base_length = 3; // smallest match length per the RFC section 3.2.5
+pub const min_length = 4; // min length used in this algorithm
+pub const max_length = 258;
+
+pub const min_distance = 1;
+pub const max_distance = std.compress.flate.history_len;
+
pub fn literal(t: Token) u8 {
return t.len_lit;
}
pub fn distance(t: Token) u16 {
- return @as(u16, t.dist) + match.min_distance;
+ return @as(u16, t.dist) + min_distance;
}
pub fn length(t: Token) u16 {
- return @as(u16, t.len_lit) + match.base_length;
+ return @as(u16, t.len_lit) + base_length;
}
pub fn initLiteral(lit: u8) Token {
@@ -40,12 +46,12 @@ pub fn initLiteral(lit: u8) Token {
// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
pub fn initMatch(dist: u16, len: u16) Token {
- assert(len >= match.min_length and len <= match.max_length);
- assert(dist >= match.min_distance and dist <= match.max_distance);
+ assert(len >= min_length and len <= max_length);
+ assert(dist >= min_distance and dist <= max_distance);
return .{
.kind = .match,
- .dist = @intCast(dist - match.min_distance),
- .len_lit = @intCast(len - match.base_length),
+ .dist = @intCast(dist - min_distance),
+ .len_lit = @intCast(len - base_length),
};
}
lib/std/compress/flate.zig
@@ -1,7 +1,23 @@
-const builtin = @import("builtin");
const std = @import("../std.zig");
-const testing = std.testing;
-const Writer = std.Io.Writer;
+
+/// When decompressing, the output buffer is used as the history window, so
+/// less than this may result in failure to decompress streams that were
+/// compressed with a larger window.
+pub const max_window_len = history_len * 2;
+
+pub const history_len = 32768;
+
+/// Deflate is a lossless data compression file format that uses a combination
+/// of LZ77 and Huffman coding.
+pub const Compress = @import("flate/Compress.zig");
+
+/// Inflate is the decoding process that takes a Deflate bitstream for
+/// decompression and correctly produces the original full-size data or file.
+pub const Decompress = @import("flate/Decompress.zig");
+
+/// Compression without Lempel-Ziv match searching. Faster compression, less
+/// memory requirements but bigger compressed sizes.
+pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
/// Container of the deflate bit stream body. Container adds header before
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
@@ -13,7 +29,6 @@ const Writer = std.Io.Writer;
/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
/// crc32 checksum and 4 bytes of uncompressed data length.
///
-///
/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
pub const Container = enum {
@@ -84,7 +99,7 @@ pub const Container = enum {
pub fn init(containter: Container) Hasher {
return switch (containter) {
.gzip => .{ .gzip = .{} },
- .zlib => .{ .zlib = .init() },
+ .zlib => .{ .zlib = .{} },
.raw => .raw,
};
}
@@ -107,7 +122,7 @@ pub const Container = enum {
}
}
- pub fn writeFooter(hasher: *Hasher, writer: *Writer) Writer.Error!void {
+ pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
var bits: [4]u8 = undefined;
switch (hasher.*) {
.gzip => |*gzip| {
@@ -135,484 +150,6 @@ pub const Container = enum {
};
};
-/// When decompressing, the output buffer is used as the history window, so
-/// less than this may result in failure to decompress streams that were
-/// compressed with a larger window.
-pub const max_window_len = 1 << 16;
-
-/// Deflate is a lossless data compression file format that uses a combination
-/// of LZ77 and Huffman coding.
-pub const Compress = @import("flate/Compress.zig");
-
-/// Inflate is the decoding process that takes a Deflate bitstream for
-/// decompression and correctly produces the original full-size data or file.
-pub const Decompress = @import("flate/Decompress.zig");
-
-/// Compression without Lempel-Ziv match searching. Faster compression, less
-/// memory requirements but bigger compressed sizes.
-pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
-
-test "compress/decompress" {
- const print = std.debug.print;
- var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer
- var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer
-
- const levels = [_]Compress.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
- const cases = [_]struct {
- data: []const u8, // uncompressed content
- // compressed data sizes per level 4-9
- gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
- huffman_only_size: usize = 0,
- store_size: usize = 0,
- }{
- .{
- .data = @embedFile("flate/testdata/rfc1951.txt"),
- .gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
- .huffman_only_size = 20287,
- .store_size = 36967,
- },
- .{
- .data = @embedFile("flate/testdata/fuzz/roundtrip1.input"),
- .gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
- .huffman_only_size = 393,
- .store_size = 393,
- },
- .{
- .data = @embedFile("flate/testdata/fuzz/roundtrip2.input"),
- .gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
- .huffman_only_size = 394,
- .store_size = 394,
- },
- .{
- .data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"),
- .gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
- .huffman_only_size = 498,
- .store_size = 747,
- },
- };
-
- for (cases, 0..) |case, case_no| {
- const data = case.data;
-
- for (levels, 0..) |level, i| {
- for (Container.list) |container| {
- var compressed_size: usize = if (case.gzip_sizes[i] > 0)
- case.gzip_sizes[i] - Container.gzip.size() + container.size()
- else
- 0;
-
- // compress original stream to compressed stream
- {
- var compressed: Writer = .fixed(&cmp_buf);
- var compress: Compress = .init(&compressed, &.{}, .{ .container = .raw, .level = level });
- try compress.writer.writeAll(data);
- try compress.end();
-
- if (compressed_size == 0) {
- if (container == .gzip)
- print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
- compressed_size = compressed.end;
- }
- try testing.expectEqual(compressed_size, compressed.end);
- }
- // decompress compressed stream to decompressed stream
- {
- var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
- var decompressed: Writer = .fixed(&dcm_buf);
- var decompress: Decompress = .init(&compressed, container, &.{});
- _ = try decompress.reader.streamRemaining(&decompressed);
- try testing.expectEqualSlices(u8, data, decompressed.buffered());
- }
-
- // compressor writer interface
- {
- var compressed: Writer = .fixed(&cmp_buf);
- var cmp = try Compress.init(&compressed, &.{}, .{
- .level = level,
- .container = container,
- });
- var cmp_wrt = cmp.writer();
- try cmp_wrt.writeAll(data);
- try cmp.finish();
-
- try testing.expectEqual(compressed_size, compressed.pos);
- }
- // decompressor reader interface
- {
- var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
- var decompress: Decompress = .init(&compressed, container, &.{});
- const n = try decompress.reader.readSliceShort(&dcm_buf);
- try testing.expectEqual(data.len, n);
- try testing.expectEqualSlices(u8, data, dcm_buf[0..n]);
- }
- }
- }
- // huffman only compression
- {
- for (Container.list) |container| {
- var compressed_size: usize = if (case.huffman_only_size > 0)
- case.huffman_only_size - Container.gzip.size() + container.size()
- else
- 0;
-
- // compress original stream to compressed stream
- {
- var original: std.Io.Reader = .fixed(data);
- var compressed: Writer = .fixed(&cmp_buf);
- var cmp = try Compress.Huffman.init(container, &compressed);
- try cmp.compress(original.reader());
- try cmp.finish();
- if (compressed_size == 0) {
- if (container == .gzip)
- print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
- compressed_size = compressed.pos;
- }
- try testing.expectEqual(compressed_size, compressed.pos);
- }
- // decompress compressed stream to decompressed stream
- {
- var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
- var decompress: Decompress = .init(&compressed, container, &.{});
- var decompressed: Writer = .fixed(&dcm_buf);
- _ = try decompress.reader.streamRemaining(&decompressed);
- try testing.expectEqualSlices(u8, data, decompressed.buffered());
- }
- }
- }
-
- // store only
- {
- for (Container.list) |container| {
- var compressed_size: usize = if (case.store_size > 0)
- case.store_size - Container.gzip.size() + container.size()
- else
- 0;
-
- // compress original stream to compressed stream
- {
- var original: std.Io.Reader = .fixed(data);
- var compressed: Writer = .fixed(&cmp_buf);
- var cmp = try Compress.SimpleCompressor(.store, container).init(&compressed);
- try cmp.compress(original.reader());
- try cmp.finish();
- if (compressed_size == 0) {
- if (container == .gzip)
- print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
- compressed_size = compressed.pos;
- }
-
- try testing.expectEqual(compressed_size, compressed.pos);
- }
- // decompress compressed stream to decompressed stream
- {
- var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
- var decompress: Decompress = .init(&compressed, container, &.{});
- var decompressed: Writer = .fixed(&dcm_buf);
- _ = try decompress.reader.streamRemaining(&decompressed);
- try testing.expectEqualSlices(u8, data, decompressed.buffered());
- }
- }
- }
- }
-}
-
-fn testDecompress(container: Container, compressed: []const u8, expected_plain: []const u8) !void {
- var in: std.Io.Reader = .fixed(compressed);
- var aw: std.Io.Writer.Allocating = .init(testing.allocator);
- defer aw.deinit();
-
- var decompress: Decompress = .init(&in, container, &.{});
- _ = try decompress.reader.streamRemaining(&aw.writer);
- try testing.expectEqualSlices(u8, expected_plain, aw.getWritten());
-}
-
-test "don't read past deflate stream's end" {
- try testDecompress(.zlib, &[_]u8{
- 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
- 0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
- 0x83, 0x95, 0x0b, 0xf5,
- }, &[_]u8{
- 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
- 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
- 0x00, 0x00, 0xff, 0xff, 0xff,
- });
-}
-
-test "zlib header" {
- // Truncated header
- try testing.expectError(
- error.EndOfStream,
- testDecompress(.zlib, &[_]u8{0x78}, ""),
- );
- // Wrong CM
- try testing.expectError(
- error.BadZlibHeader,
- testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
- );
- // Wrong CINFO
- try testing.expectError(
- error.BadZlibHeader,
- testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
- );
- // Wrong checksum
- try testing.expectError(
- error.WrongZlibChecksum,
- testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
- );
- // Truncated checksum
- try testing.expectError(
- error.EndOfStream,
- testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
- );
-}
-
-test "gzip header" {
- // Truncated header
- try testing.expectError(
- error.EndOfStream,
- testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
- );
- // Wrong CM
- try testing.expectError(
- error.BadGzipHeader,
- testDecompress(.gzip, &[_]u8{
- 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03,
- }, undefined),
- );
-
- // Wrong checksum
- try testing.expectError(
- error.WrongGzipChecksum,
- testDecompress(.gzip, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x00,
- }, undefined),
- );
- // Truncated checksum
- try testing.expectError(
- error.EndOfStream,
- testDecompress(.gzip, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
- }, undefined),
- );
- // Wrong initial size
- try testing.expectError(
- error.WrongGzipSize,
- testDecompress(.gzip, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- }, undefined),
- );
- // Truncated initial size field
- try testing.expectError(
- error.EndOfStream,
- testDecompress(.gzip, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00,
- }, undefined),
- );
-
- try testDecompress(.gzip, &[_]u8{
- // GZIP header
- 0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
- // header.FHCRC (should cover entire header)
- 0x99, 0xd6,
- // GZIP data
- 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- }, "");
-}
-
-test "public interface" {
- const plain_data_buf = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a };
-
- // deflate final stored block, header + plain (stored) data
- const deflate_block = [_]u8{
- 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
- } ++ plain_data_buf;
-
- const plain_data: []const u8 = &plain_data_buf;
- const gzip_data: []const u8 = &deflate_block;
-
- //// gzip header/footer + deflate block
- //const gzip_data =
- // [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes)
- // deflate_block ++
- // [_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes)
-
- //// zlib header/footer + deflate block
- //const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)}
- // deflate_block ++
- // [_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum
-
- // TODO
- //const gzip = @import("gzip.zig");
- //const zlib = @import("zlib.zig");
-
- var buffer1: [64]u8 = undefined;
- var buffer2: [64]u8 = undefined;
-
- // decompress
- {
- var plain: Writer = .fixed(&buffer2);
- var in: std.Io.Reader = .fixed(gzip_data);
- var d: Decompress = .init(&in, .raw, &.{});
- _ = try d.reader.streamRemaining(&plain);
- try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- }
-
- // compress/decompress
- {
- var plain: Writer = .fixed(&buffer2);
- var compressed: Writer = .fixed(&buffer1);
-
- var cmp: Compress = .init(&compressed, &.{}, .{});
- try cmp.writer.writeAll(plain_data);
- try cmp.end();
-
- var r: std.Io.Reader = .fixed(&buffer1);
- var d: Decompress = .init(&r, .raw, &.{});
- _ = try d.reader.streamRemaining(&plain);
- try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- }
-
- // compressor/decompressor
- {
- var plain: Writer = .fixed(&buffer2);
- var compressed: Writer = .fixed(&buffer1);
-
- var cmp: Compress = .init(&compressed, &.{}, .{});
- try cmp.writer.writeAll(plain_data);
- try cmp.end();
-
- var r: std.Io.Reader = .fixed(&buffer1);
- var dcp = Decompress(&r);
- try dcp.decompress(&plain);
- try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- }
-
- // huffman
- {
- // huffman compress/decompress
- {
- var plain: Writer = .fixed(&buffer2);
- var compressed: Writer = .fixed(&buffer1);
-
- var in: std.Io.Reader = .fixed(plain_data);
- try HuffmanEncoder.compress(&in, &compressed);
-
- var r: std.Io.Reader = .fixed(&buffer1);
- var d: Decompress = .init(&r, .raw, &.{});
- _ = try d.reader.streamRemaining(&plain);
- try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- }
-
- // huffman compressor/decompressor
- {
- var plain: Writer = .fixed(&buffer2);
- var compressed: Writer = .fixed(&buffer1);
-
- var in: std.Io.Reader = .fixed(plain_data);
- var cmp = try HuffmanEncoder.Compressor(&compressed);
- try cmp.compress(&in);
- try cmp.finish();
-
- var r: std.Io.Reader = .fixed(&buffer1);
- var d: Decompress = .init(&r, .raw, &.{});
- _ = try d.reader.streamRemaining(&plain);
- try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- }
- }
-
- // TODO
- //{
- // // store compress/decompress
- // {
- // var plain: Writer = .fixed(&buffer2);
- // var compressed: Writer = .fixed(&buffer1);
-
- // var in: std.Io.Reader = .fixed(plain_data);
- // try store.compress(&in, &compressed);
-
- // var r: std.Io.Reader = .fixed(&buffer1);
- // var d: Decompress = .init(&r, .raw, &.{});
- // _ = try d.reader.streamRemaining(&plain);
- // try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- // }
-
- // // store compressor/decompressor
- // {
- // var plain: Writer = .fixed(&buffer2);
- // var compressed: Writer = .fixed(&buffer1);
-
- // var in: std.Io.Reader = .fixed(plain_data);
- // var cmp = try store.compressor(&compressed);
- // try cmp.compress(&in);
- // try cmp.finish();
-
- // var r: std.Io.Reader = .fixed(&buffer1);
- // var d: Decompress = .init(&r, .raw, &.{});
- // _ = try d.reader.streamRemaining(&plain);
- // try testing.expectEqualSlices(u8, plain_data, plain.buffered());
- // }
- //}
-}
-
-pub const match = struct {
- pub const base_length = 3; // smallest match length per the RFC section 3.2.5
- pub const min_length = 4; // min length used in this algorithm
- pub const max_length = 258;
-
- pub const min_distance = 1;
- pub const max_distance = 32768;
-};
-
-pub const history_len = match.max_distance;
-
-pub const lookup = struct {
- pub const bits = 15;
- pub const len = 1 << bits;
- pub const shift = 32 - bits;
-};
-
-test "zlib should not overshoot" {
- // Compressed zlib data with extra 4 bytes at the end.
- const data = [_]u8{
- 0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
- 0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08,
- 0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34,
- 0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
- };
-
- var reader: std.Io.Reader = .fixed(&data);
-
- var decompress: Decompress = .init(&reader, .zlib, &.{});
- var out: [128]u8 = undefined;
-
- {
- const n = try decompress.reader.readSliceShort(out[0..]);
-
- // Expected decompressed data
- try std.testing.expectEqual(46, n);
- try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]);
-
- // Decompressor don't overshoot underlying reader.
- // It is leaving it at the end of compressed data chunk.
- try std.testing.expectEqual(data.len - 4, reader.seek);
- // TODO what was this testing, exactly?
- //try std.testing.expectEqual(0, decompress.unreadBytes());
- }
-
- // 4 bytes after compressed chunk are available in reader.
- const n = try reader.readSliceShort(out[0..]);
- try std.testing.expectEqual(n, 4);
- try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]);
-}
-
test {
_ = HuffmanEncoder;
_ = Compress;