Commit `89f9c5cb37`

dweiller <4678790+dweiller@users.noreplay.github.com>

2023-02-02 10:49:11

std.compress.zstandard: improve doc comments

master

1 parent 7e27556

Changed files (2)

lib

std

compress

zstandard

decode

block.zig

decompress.zig

@@ -23,7 +23,6 @@ pub const Error = error{
     ReservedBlock,
     MalformedRleBlock,
     MalformedCompressedBlock,
-    EndOfStream,
 };
 
 pub const DecodeState = struct {
@@ -92,11 +91,17 @@ pub const DecodeState = struct {
     /// stream and Huffman tree from `literals` and reads the FSE tables from
     /// `source`.
     ///
-    /// Errors:
-    ///   - returns `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
-    ///     first byte does not have any bits set.
-    ///   - returns `error.TreelessLiteralsFirst` `literals` is a treeless literals section
-    ///     and the decode state does not have a Huffman tree from a previous block.
+    /// Errors returned:
+    ///   - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
+    ///     first byte does not have any bits set
+    ///   - `error.TreelessLiteralsFirst` `literals` is a treeless literals
+    ///     section and the decode state does not have a Huffman tree from a
+    ///     previous block
+    ///   - `error.RepeatModeFirst` on the first call if one of the sequence FSE
+    ///     tables is set to repeat mode
+    ///   - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
+    ///   - `error.MalformedFseTable` if there are errors decoding an FSE table
+    ///   - `error.EndOfStream` if `source` ends before all FSE tables are read
     pub fn prepare(
         self: *DecodeState,
         source: anytype,
@@ -132,8 +137,10 @@ pub const DecodeState = struct {
         }
     }
 
-    /// Read initial FSE states for sequence decoding. Returns `error.EndOfStream`
-    /// if `bit_reader` does not contain enough bits.
+    /// Read initial FSE states for sequence decoding.
+    ///
+    /// Errors returned:
+    ///   - `error.EndOfStream` if `bit_reader` does not contain enough bits.
     pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
         self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
         self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
@@ -308,13 +315,19 @@ pub const DecodeState = struct {
     } || DecodeLiteralsError;
 
     /// Decode one sequence from `bit_reader` into `dest`, written starting at
-    /// `write_pos` and update FSE states if `last_sequence` is `false`. Returns
-    /// `error.MalformedSequence` error if the decompressed sequence would be longer
-    /// than `sequence_size_limit` or the sequence's offset is too large; returns
-    /// `error.EndOfStream` if `bit_reader` does not contain enough bits; returns
-    /// `error.UnexpectedEndOfLiteralStream` if the decoder state's literal streams
-    /// do not contain enough literals for the sequence (this may mean the literal
-    /// stream or the sequence is malformed).
+    /// `write_pos` and update FSE states if `last_sequence` is `false`.
+    /// `prepare()` must be called for the block before attempting to decode
+    /// sequences.
+    ///
+    /// Errors returned:
+    ///   - `error.MalformedSequence` if the decompressed sequence would be
+    ///     longer than `sequence_size_limit` or the sequence's offset is too
+    ///     large
+    ///   - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
+    ///     streams do not contain enough literals for the sequence (this may
+    ///     mean the literal stream or the sequence is malformed).
+    ///   - `error.OffsetCodeTooLarge` if an invalid offset code is found
+    ///   - `error.EndOfStream` if `bit_reader` does not contain enough bits
     pub fn decodeSequenceSlice(
         self: *DecodeState,
         dest: []u8,
@@ -336,7 +349,8 @@ pub const DecodeState = struct {
         return sequence_length;
     }
 
-    /// Decode one sequence from `bit_reader` into `dest`; see `decodeSequenceSlice`.
+    /// Decode one sequence from `bit_reader` into `dest`; see
+    /// `decodeSequenceSlice`.
     pub fn decodeSequenceRingBuffer(
         self: *DecodeState,
         dest: *RingBuffer,
@@ -364,7 +378,7 @@ pub const DecodeState = struct {
         try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
     }
 
-    pub fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
+    fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
         try self.literal_stream_reader.init(bytes);
     }
 
@@ -393,12 +407,14 @@ pub const DecodeState = struct {
         PrefixNotFound,
     } || LiteralBitsError;
 
-    /// Decode `len` bytes of literals into `dest`. `literals` should be the
-    /// `LiteralsSection` that was passed to `prepare()`. Returns
-    /// `error.MalformedLiteralsLength` if the number of literal bytes decoded by
-    /// `self` plus `len` is greater than the regenerated size of `literals`.
-    /// Returns `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
-    /// there are problems decoding Huffman compressed literals.
+    /// Decode `len` bytes of literals into `dest`.
+    ///
+    /// Errors returned:
+    ///   - `error.MalformedLiteralsLength` if the number of literal bytes
+    ///     decoded by `self` plus `len` is greater than the regenerated size of
+    ///     `literals`
+    ///   - `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
+    ///     there are problems decoding Huffman compressed literals
     pub fn decodeLiteralsSlice(
         self: *DecodeState,
         dest: []u8,
@@ -561,7 +577,6 @@ pub const DecodeState = struct {
 ///   - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
 ///   - `error.MalformedCompressedBlock` if there are errors decoding a
 ///     compressed block
-///   - `error.EndOfStream` if the sequence bit stream ends unexpectedly
 pub fn decodeBlock(
     dest: []u8,
     src: []const u8,
@@ -738,7 +753,8 @@ pub fn decodeBlockRingBuffer(
 /// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
 /// upper bound for the size of both buffers). See `decodeBlock`
 /// and `decodeBlockRingBuffer` for function that can decode a block without
-/// these extra copies.
+/// these extra copies. `error.EndOfStream` is returned if `source` does not
+/// contain enough bytes.
 pub fn decodeBlockReader(
     dest: *RingBuffer,
     source: anytype,
@@ -820,6 +836,10 @@ pub fn decodeBlockHeader(src: *const [3]u8) frame.ZStandard.Block.Header {
     };
 }
 
+/// Decode the header of a block.
+///
+/// Errors returned:
+///   - `error.EndOfStream` if `src.len < 3`
 pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandard.Block.Header {
     if (src.len < 3) return error.EndOfStream;
     return decodeBlockHeader(src[0..3]);
@@ -828,9 +848,14 @@ pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandar
 /// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
 /// number of bytes the section uses.
 ///
-/// Errors:
-///   - returns `error.MalformedLiteralsHeader` if the header is invalid
-///   - returns `error.MalformedLiteralsSection` if there are errors decoding
+/// Errors returned:
+///   - `error.MalformedLiteralsHeader` if the header is invalid
+///   - `error.MalformedLiteralsSection` if there are decoding errors
+///   - `error.MalformedAccuracyLog` if compressed literals have invalid
+///     accuracy
+///   - `error.MalformedFseTable` if compressed literals have invalid FSE table
+///   - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
+///   - `error.EndOfStream` if there are not enough bytes in `src`
 pub fn decodeLiteralsSectionSlice(
     src: []const u8,
     consumed_count: *usize,
@@ -886,11 +911,7 @@ pub fn decodeLiteralsSectionSlice(
 }
 
 /// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
-/// number of bytes the section uses.
-///
-/// Errors:
-///   - returns `error.MalformedLiteralsHeader` if the header is invalid
-///   - returns `error.MalformedLiteralsSection` if there are errors decoding
+/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
 pub fn decodeLiteralsSection(
     source: anytype,
     buffer: []u8,
@@ -961,6 +982,9 @@ fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Stre
 }
 
 /// Decode a literals section header.
+///
+/// Errors returned:
+///   - `error.EndOfStream` if there are not enough bytes in `source`
 pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
     const byte0 = try source.readByte();
     const block_type = @intToEnum(LiteralsSection.BlockType, byte0 & 0b11);
@@ -1011,9 +1035,9 @@ pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
 
 /// Decode a sequences section header.
 ///
-/// Errors:
-///   - returns `error.ReservedBitSet` is the reserved bit is set
-///   - returns `error.MalformedSequencesHeader` if the header is invalid
+/// Errors returned:
+///   - `error.ReservedBitSet` if the reserved bit is set
+///   - `error.EndOfStream` if there are not enough bytes in `source`
 pub fn decodeSequencesHeader(
     source: anytype,
 ) !SequencesSection.Header {

@@ -25,11 +25,12 @@ pub fn isSkippableMagic(magic: u32) bool {
 
 /// Returns the kind of frame at the beginning of `src`.
 ///
-/// Errors:
-///   - returns `error.BadMagic` if `source` begins with bytes not equal to the
+/// Errors returned:
+///   - `error.BadMagic` if `source` begins with bytes not equal to the
 ///     Zstandard frame magic number, or outside the range of magic numbers for
 ///     skippable frames.
-pub fn decodeFrameType(source: anytype) !frame.Kind {
+///   - `error.EndOfStream` if `source` contains fewer than 4 bytes
+pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
     const magic = try source.readIntLittle(u32);
     return if (magic == frame.ZStandard.magic_number)
         .zstandard
@@ -45,12 +46,23 @@ const ReadWriteCount = struct {
 };
 
 /// Decodes the frame at the start of `src` into `dest`. Returns the number of
-/// bytes read from `src` and written to `dest`.
+/// bytes read from `src` and written to `dest`. This function can only decode
+/// frames that declare the decompressed content size.
 ///
-/// Errors:
-///   - returns `error.UnknownContentSizeUnsupported`
-///   - returns `error.ContentTooLarge`
-///   - returns `error.BadMagic`
+/// Errors returned:
+///   - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+///     uncompressed content size
+///   - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+///   - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
+///     number for a Zstandard or Skippable frame
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.UnusedBitSet` if the unused bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - an error in `block.Error` if there are errors decoding a block
 pub fn decodeFrame(
     dest: []u8,
     src: []const u8,
@@ -66,6 +78,7 @@ pub fn decodeFrame(
     };
 }
 
+/// Returns the frame checksum corresponding to the data fed into `hasher`
 pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
     const hash = hasher.final();
     return @intCast(u32, hash & 0xFFFFFFFF);
@@ -74,20 +87,31 @@ pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
 const FrameError = error{
     DictionaryIdFlagUnsupported,
     ChecksumFailure,
+    EndOfStream,
 } || InvalidBit || block.Error;
 
 /// Decode a Zstandard frame from `src` into `dest`, returning the number of
-/// bytes read from `src` and written to `dest`; if the frame does not declare
-/// its decompressed content size `error.UnknownContentSizeUnsupported` is
-/// returned. Returns `error.DictionaryIdFlagUnsupported` if the frame uses a
-/// dictionary, and `error.ChecksumFailure` if `verify_checksum` is `true` and
-/// the frame contains a checksum that does not match the checksum computed from
-/// the decompressed frame.
+/// bytes read from `src` and written to `dest`. The first four bytes of `src`
+/// must be the magic number for a Zstandard frame.
+///
+/// Error returned:
+///   - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+///     uncompressed content size
+///   - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+///     number for a Zstandard or Skippable frame
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.UnusedBitSet` if the unused bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - an error in `block.Error` if there are errors decoding a block
 pub fn decodeZStandardFrame(
     dest: []u8,
     src: []const u8,
     verify_checksum: bool,
-) (error{ UnknownContentSizeUnsupported, ContentTooLarge, EndOfStream } || FrameError)!ReadWriteCount {
+) (error{ UnknownContentSizeUnsupported, ContentTooLarge } || FrameError)!ReadWriteCount {
     assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
     var consumed_count: usize = 4;
 
@@ -127,7 +151,18 @@ pub const FrameContext = struct {
     has_checksum: bool,
     block_size_max: usize,
 
-    pub fn init(frame_header: frame.ZStandard.Header, window_size_max: usize, verify_checksum: bool) !FrameContext {
+    const Error = error{ DictionaryIdFlagUnsupported, WindowSizeUnknown, WindowTooLarge };
+    /// Validates `frame_header` and returns the associated `FrameContext`.
+    ///
+    /// Errors returned:
+    ///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+    ///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+    ///   - `error.WindowTooLarge` if the window size is larger than
+    pub fn init(
+        frame_header: frame.ZStandard.Header,
+        window_size_max: usize,
+        verify_checksum: bool,
+    ) Error!FrameContext {
         if (frame_header.descriptor.dictionary_id_flag != 0) return error.DictionaryIdFlagUnsupported;
 
         const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
@@ -147,19 +182,29 @@ pub const FrameContext = struct {
 };
 
 /// Decode a Zstandard from from `src` and return the decompressed bytes; see
-/// `decodeZStandardFrame()`. Returns `error.WindowSizeUnknown` if the frame
-/// does not declare its content size or a window descriptor (this indicates a
-/// malformed frame).
+/// `decodeZStandardFrame()`. `allocator` is used to allocate both the returned
+/// slice and internal buffers used during decoding. The first four bytes of
+/// `src` must be the magic number for a Zstandard frame.
 ///
-/// Errors:
-///   - returns `error.WindowTooLarge`
-///   - returns `error.WindowSizeUnknown`
+/// Errors returned:
+///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+///   - `error.WindowTooLarge` if the window size is larger than
+///     `window_size_max`
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.UnusedBitSet` if the unused bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - `error.OutOfMemory` if `allocator` cannot allocate enough memory
+///   - an error in `block.Error` if there are errors decoding a block
 pub fn decodeZStandardFrameAlloc(
     allocator: std.mem.Allocator,
     src: []const u8,
     verify_checksum: bool,
     window_size_max: usize,
-) (error{ WindowSizeUnknown, WindowTooLarge, OutOfMemory, EndOfStream } || FrameError)![]u8 {
+) (error{OutOfMemory} || FrameContext.Error || FrameError)![]u8 {
     var result = std.ArrayList(u8).init(allocator);
     assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
     var consumed_count: usize = 4;
@@ -222,7 +267,7 @@ fn decodeFrameBlocks(
     src: []const u8,
     consumed_count: *usize,
     hash: ?*std.hash.XxHash64,
-) block.Error!usize {
+) (error{EndOfStream} || block.Error)!usize {
     // These tables take 7680 bytes
     var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
     var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
@@ -252,7 +297,8 @@ fn decodeFrameBlocks(
     return written_count;
 }
 
-/// Decode the header of a skippable frame.
+/// Decode the header of a skippable frame. The first four bytes of `src` must
+/// be a valid magic number for a Skippable frame.
 pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
     const magic = readInt(u32, src[0..4]);
     assert(isSkippableMagic(magic));
@@ -263,8 +309,8 @@ pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
     };
 }
 
-/// Returns the window size required to decompress a frame, or `null` if it cannot be
-/// determined, which indicates a malformed frame header.
+/// Returns the window size required to decompress a frame, or `null` if it
+/// cannot be determined (which indicates a malformed frame header).
 pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
     if (header.window_descriptor) |descriptor| {
         const exponent = (descriptor & 0b11111000) >> 3;
@@ -279,10 +325,10 @@ pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
 const InvalidBit = error{ UnusedBitSet, ReservedBitSet };
 /// Decode the header of a Zstandard frame.
 ///
-/// Errors:
-///   - returns `error.UnusedBitSet` if the unused bits of the header are set
-///   - returns `error.ReservedBitSet` if the reserved bits of the header are
-///     set
+/// Errors returned:
+///   - `error.UnusedBitSet` if the unused bits of the header are set
+///   - `error.ReservedBitSet` if the reserved bits of the header are set
+///   - `error.EndOfStream` if `source` does not contain a complete header
 pub fn decodeZStandardHeader(source: anytype) (error{EndOfStream} || InvalidBit)!frame.ZStandard.Header {
     const descriptor = @bitCast(frame.ZStandard.Header.Descriptor, try source.readByte());

Commit 89f9c5cb37

Commit `89f9c5cb37`