Commit f2508abfa6

Igor Anić <igor.anic@gmail.com>
2024-03-02 23:23:23
flate: use 4 bytes lookahead for zlib
That ensures no bytes are left in the BitReader buffer after we reach end of the stream.
1 parent 7112816
Changed files (5)
lib/std/compress/flate/container.zig
@@ -154,6 +154,7 @@ pub const Container = enum {
     pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void {
         switch (wrap) {
             .gzip => {
+                try reader.fill(0);
                 if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
                 if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
             },
lib/std/compress/flate/inflate.zig
@@ -3,7 +3,7 @@ const assert = std.debug.assert;
 const testing = std.testing;
 
 const hfd = @import("huffman_decoder.zig");
-const BitReader = @import("bit_reader.zig").BitReader64;
+const BitReader = @import("bit_reader.zig").BitReader;
 const CircularBuffer = @import("CircularBuffer.zig");
 const Container = @import("container.zig").Container;
 const Token = @import("Token.zig");
@@ -17,8 +17,16 @@ pub fn decompress(comptime container: Container, reader: anytype, writer: anytyp
 }
 
 /// Inflate decompressor for the reader type.
-pub fn decompressor(comptime container: Container, reader: anytype) Inflate(container, @TypeOf(reader)) {
-    return Inflate(container, @TypeOf(reader)).init(reader);
+pub fn decompressor(comptime container: Container, reader: anytype) Decompressor(container, @TypeOf(reader)) {
+    return Decompressor(container, @TypeOf(reader)).init(reader);
+}
+
+pub fn Decompressor(comptime container: Container, comptime ReaderType: type) type {
+    // zlib has 4 bytes footer, lookahead of 4 bytes ensures that we will not overshoot.
+    // gzip has 8 bytes footer so we will not overshoot even with 8 bytes of lookahead.
+    // For raw deflate there is always possibility of overshot so we use 8 bytes lookahead.
+    const lookahead: type = if (container == .zlib) u32 else u64;
+    return Inflate(container, lookahead, ReaderType);
 }
 
 /// Inflate decompresses deflate bit stream. Reads compressed data from reader
@@ -40,9 +48,12 @@ pub fn decompressor(comptime container: Container, reader: anytype) Inflate(cont
 ///   * 64K for history (CircularBuffer)
 ///   * ~10K huffman decoders (Literal and DistanceDecoder)
 ///
-pub fn Inflate(comptime container: Container, comptime ReaderType: type) type {
+pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comptime ReaderType: type) type {
+    assert(LookaheadType == u32 or LookaheadType == u64);
+    const BitReaderType = BitReader(LookaheadType, ReaderType);
+
     return struct {
-        const BitReaderType = BitReader(ReaderType);
+        //const BitReaderType = BitReader(ReaderType);
         const F = BitReaderType.flag;
 
         bits: BitReaderType = .{},
@@ -219,9 +230,14 @@ pub fn Inflate(comptime container: Container, comptime ReaderType: type) type {
                 switch (sym.kind) {
                     .literal => self.hist.write(sym.symbol),
                     .match => { // Decode match backreference <length, distance>
-                        try self.bits.fill(5 + 15 + 13); // so we can use buffered reads
+                        // fill so we can use buffered reads
+                        if (LookaheadType == u32)
+                            try self.bits.fill(5 + 15)
+                        else
+                            try self.bits.fill(5 + 15 + 13);
                         const length = try self.decodeLength(sym.symbol);
                         const dsm = try self.decodeSymbol(&self.dst_dec);
+                        if (LookaheadType == u32) try self.bits.fill(13);
                         const distance = try self.decodeDistance(dsm.symbol);
                         try self.hist.writeMatch(length, distance);
                     },
lib/std/compress/flate.zig
@@ -13,7 +13,7 @@ pub fn decompress(reader: anytype, writer: anytype) !void {
 
 /// Decompressor type
 pub fn Decompressor(comptime ReaderType: type) type {
-    return inflate.Inflate(.raw, ReaderType);
+    return inflate.Decompressor(.raw, ReaderType);
 }
 
 /// Create Decompressor which will read compressed data from reader.
lib/std/compress/gzip.zig
@@ -8,7 +8,7 @@ pub fn decompress(reader: anytype, writer: anytype) !void {
 
 /// Decompressor type
 pub fn Decompressor(comptime ReaderType: type) type {
-    return inflate.Inflate(.gzip, ReaderType);
+    return inflate.Decompressor(.gzip, ReaderType);
 }
 
 /// Create Decompressor which will read compressed data from reader.
lib/std/compress/zlib.zig
@@ -8,7 +8,7 @@ pub fn decompress(reader: anytype, writer: anytype) !void {
 
 /// Decompressor type
 pub fn Decompressor(comptime ReaderType: type) type {
-    return inflate.Inflate(.zlib, ReaderType);
+    return inflate.Decompressor(.zlib, ReaderType);
 }
 
 /// Create Decompressor which will read compressed data from reader.