Commit 89f9c5cb37
Changed files (2)
lib
std
compress
zstandard
decode
lib/std/compress/zstandard/decode/block.zig
@@ -23,7 +23,6 @@ pub const Error = error{
ReservedBlock,
MalformedRleBlock,
MalformedCompressedBlock,
- EndOfStream,
};
pub const DecodeState = struct {
@@ -92,11 +91,17 @@ pub const DecodeState = struct {
/// stream and Huffman tree from `literals` and reads the FSE tables from
/// `source`.
///
- /// Errors:
- /// - returns `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
- /// first byte does not have any bits set.
- /// - returns `error.TreelessLiteralsFirst` `literals` is a treeless literals section
- /// and the decode state does not have a Huffman tree from a previous block.
+ /// Errors returned:
+ /// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
+ /// first byte does not have any bits set
+ /// - `error.TreelessLiteralsFirst` `literals` is a treeless literals
+ /// section and the decode state does not have a Huffman tree from a
+ /// previous block
+ /// - `error.RepeatModeFirst` on the first call if one of the sequence FSE
+ /// tables is set to repeat mode
+ /// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
+ /// - `error.MalformedFseTable` if there are errors decoding an FSE table
+ /// - `error.EndOfStream` if `source` ends before all FSE tables are read
pub fn prepare(
self: *DecodeState,
source: anytype,
@@ -132,8 +137,10 @@ pub const DecodeState = struct {
}
}
- /// Read initial FSE states for sequence decoding. Returns `error.EndOfStream`
- /// if `bit_reader` does not contain enough bits.
+ /// Read initial FSE states for sequence decoding.
+ ///
+ /// Errors returned:
+ /// - `error.EndOfStream` if `bit_reader` does not contain enough bits.
pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
@@ -308,13 +315,19 @@ pub const DecodeState = struct {
} || DecodeLiteralsError;
/// Decode one sequence from `bit_reader` into `dest`, written starting at
- /// `write_pos` and update FSE states if `last_sequence` is `false`. Returns
- /// `error.MalformedSequence` error if the decompressed sequence would be longer
- /// than `sequence_size_limit` or the sequence's offset is too large; returns
- /// `error.EndOfStream` if `bit_reader` does not contain enough bits; returns
- /// `error.UnexpectedEndOfLiteralStream` if the decoder state's literal streams
- /// do not contain enough literals for the sequence (this may mean the literal
- /// stream or the sequence is malformed).
+ /// `write_pos` and update FSE states if `last_sequence` is `false`.
+ /// `prepare()` must be called for the block before attempting to decode
+ /// sequences.
+ ///
+ /// Errors returned:
+ /// - `error.MalformedSequence` if the decompressed sequence would be
+ /// longer than `sequence_size_limit` or the sequence's offset is too
+ /// large
+ /// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
+ /// streams do not contain enough literals for the sequence (this may
+ /// mean the literal stream or the sequence is malformed).
+ /// - `error.OffsetCodeTooLarge` if an invalid offset code is found
+ /// - `error.EndOfStream` if `bit_reader` does not contain enough bits
pub fn decodeSequenceSlice(
self: *DecodeState,
dest: []u8,
@@ -336,7 +349,8 @@ pub const DecodeState = struct {
return sequence_length;
}
- /// Decode one sequence from `bit_reader` into `dest`; see `decodeSequenceSlice`.
+ /// Decode one sequence from `bit_reader` into `dest`; see
+ /// `decodeSequenceSlice`.
pub fn decodeSequenceRingBuffer(
self: *DecodeState,
dest: *RingBuffer,
@@ -364,7 +378,7 @@ pub const DecodeState = struct {
try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
}
- pub fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
+ fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
try self.literal_stream_reader.init(bytes);
}
@@ -393,12 +407,14 @@ pub const DecodeState = struct {
PrefixNotFound,
} || LiteralBitsError;
- /// Decode `len` bytes of literals into `dest`. `literals` should be the
- /// `LiteralsSection` that was passed to `prepare()`. Returns
- /// `error.MalformedLiteralsLength` if the number of literal bytes decoded by
- /// `self` plus `len` is greater than the regenerated size of `literals`.
- /// Returns `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
- /// there are problems decoding Huffman compressed literals.
+ /// Decode `len` bytes of literals into `dest`.
+ ///
+ /// Errors returned:
+ /// - `error.MalformedLiteralsLength` if the number of literal bytes
+ /// decoded by `self` plus `len` is greater than the regenerated size of
+ /// `literals`
+ /// - `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
+ /// there are problems decoding Huffman compressed literals
pub fn decodeLiteralsSlice(
self: *DecodeState,
dest: []u8,
@@ -561,7 +577,6 @@ pub const DecodeState = struct {
/// - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
/// - `error.MalformedCompressedBlock` if there are errors decoding a
/// compressed block
-/// - `error.EndOfStream` if the sequence bit stream ends unexpectedly
pub fn decodeBlock(
dest: []u8,
src: []const u8,
@@ -738,7 +753,8 @@ pub fn decodeBlockRingBuffer(
/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
/// upper bound for the size of both buffers). See `decodeBlock`
/// and `decodeBlockRingBuffer` for function that can decode a block without
-/// these extra copies.
+/// these extra copies. `error.EndOfStream` is returned if `source` does not
+/// contain enough bytes.
pub fn decodeBlockReader(
dest: *RingBuffer,
source: anytype,
@@ -820,6 +836,10 @@ pub fn decodeBlockHeader(src: *const [3]u8) frame.ZStandard.Block.Header {
};
}
+/// Decode the header of a block.
+///
+/// Errors returned:
+/// - `error.EndOfStream` if `src.len < 3`
pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandard.Block.Header {
if (src.len < 3) return error.EndOfStream;
return decodeBlockHeader(src[0..3]);
@@ -828,9 +848,14 @@ pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandar
/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
/// number of bytes the section uses.
///
-/// Errors:
-/// - returns `error.MalformedLiteralsHeader` if the header is invalid
-/// - returns `error.MalformedLiteralsSection` if there are errors decoding
+/// Errors returned:
+/// - `error.MalformedLiteralsHeader` if the header is invalid
+/// - `error.MalformedLiteralsSection` if there are decoding errors
+/// - `error.MalformedAccuracyLog` if compressed literals have invalid
+/// accuracy
+/// - `error.MalformedFseTable` if compressed literals have invalid FSE table
+/// - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
+/// - `error.EndOfStream` if there are not enough bytes in `src`
pub fn decodeLiteralsSectionSlice(
src: []const u8,
consumed_count: *usize,
@@ -886,11 +911,7 @@ pub fn decodeLiteralsSectionSlice(
}
/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
-/// number of bytes the section uses.
-///
-/// Errors:
-/// - returns `error.MalformedLiteralsHeader` if the header is invalid
-/// - returns `error.MalformedLiteralsSection` if there are errors decoding
+/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
pub fn decodeLiteralsSection(
source: anytype,
buffer: []u8,
@@ -961,6 +982,9 @@ fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Stre
}
/// Decode a literals section header.
+///
+/// Errors returned:
+/// - `error.EndOfStream` if there are not enough bytes in `source`
pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
const byte0 = try source.readByte();
const block_type = @intToEnum(LiteralsSection.BlockType, byte0 & 0b11);
@@ -1011,9 +1035,9 @@ pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
/// Decode a sequences section header.
///
-/// Errors:
-/// - returns `error.ReservedBitSet` is the reserved bit is set
-/// - returns `error.MalformedSequencesHeader` if the header is invalid
+/// Errors returned:
+/// - `error.ReservedBitSet` if the reserved bit is set
+/// - `error.EndOfStream` if there are not enough bytes in `source`
pub fn decodeSequencesHeader(
source: anytype,
) !SequencesSection.Header {
lib/std/compress/zstandard/decompress.zig
@@ -25,11 +25,12 @@ pub fn isSkippableMagic(magic: u32) bool {
/// Returns the kind of frame at the beginning of `src`.
///
-/// Errors:
-/// - returns `error.BadMagic` if `source` begins with bytes not equal to the
+/// Errors returned:
+/// - `error.BadMagic` if `source` begins with bytes not equal to the
/// Zstandard frame magic number, or outside the range of magic numbers for
/// skippable frames.
-pub fn decodeFrameType(source: anytype) !frame.Kind {
+/// - `error.EndOfStream` if `source` contains fewer than 4 bytes
+pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
const magic = try source.readIntLittle(u32);
return if (magic == frame.ZStandard.magic_number)
.zstandard
@@ -45,12 +46,23 @@ const ReadWriteCount = struct {
};
/// Decodes the frame at the start of `src` into `dest`. Returns the number of
-/// bytes read from `src` and written to `dest`.
+/// bytes read from `src` and written to `dest`. This function can only decode
+/// frames that declare the decompressed content size.
///
-/// Errors:
-/// - returns `error.UnknownContentSizeUnsupported`
-/// - returns `error.ContentTooLarge`
-/// - returns `error.BadMagic`
+/// Errors returned:
+/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+/// uncompressed content size
+/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
+/// number for a Zstandard or Skippable frame
+/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+/// contains a checksum that does not match the checksum of the decompressed
+/// data
+/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
+/// - `error.UnusedBitSet` if the unused bit of the frame header is set
+/// - `error.EndOfStream` if `src` does not contain a complete frame
+/// - an error in `block.Error` if there are errors decoding a block
pub fn decodeFrame(
dest: []u8,
src: []const u8,
@@ -66,6 +78,7 @@ pub fn decodeFrame(
};
}
+/// Returns the frame checksum corresponding to the data fed into `hasher`
pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
const hash = hasher.final();
return @intCast(u32, hash & 0xFFFFFFFF);
@@ -74,20 +87,31 @@ pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
const FrameError = error{
DictionaryIdFlagUnsupported,
ChecksumFailure,
+ EndOfStream,
} || InvalidBit || block.Error;
/// Decode a Zstandard frame from `src` into `dest`, returning the number of
-/// bytes read from `src` and written to `dest`; if the frame does not declare
-/// its decompressed content size `error.UnknownContentSizeUnsupported` is
-/// returned. Returns `error.DictionaryIdFlagUnsupported` if the frame uses a
-/// dictionary, and `error.ChecksumFailure` if `verify_checksum` is `true` and
-/// the frame contains a checksum that does not match the checksum computed from
-/// the decompressed frame.
+/// bytes read from `src` and written to `dest`. The first four bytes of `src`
+/// must be the magic number for a Zstandard frame.
+///
+/// Error returned:
+/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+/// uncompressed content size
+/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+/// number for a Zstandard or Skippable frame
+/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+/// contains a checksum that does not match the checksum of the decompressed
+/// data
+/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
+/// - `error.UnusedBitSet` if the unused bit of the frame header is set
+/// - `error.EndOfStream` if `src` does not contain a complete frame
+/// - an error in `block.Error` if there are errors decoding a block
pub fn decodeZStandardFrame(
dest: []u8,
src: []const u8,
verify_checksum: bool,
-) (error{ UnknownContentSizeUnsupported, ContentTooLarge, EndOfStream } || FrameError)!ReadWriteCount {
+) (error{ UnknownContentSizeUnsupported, ContentTooLarge } || FrameError)!ReadWriteCount {
assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
var consumed_count: usize = 4;
@@ -127,7 +151,18 @@ pub const FrameContext = struct {
has_checksum: bool,
block_size_max: usize,
- pub fn init(frame_header: frame.ZStandard.Header, window_size_max: usize, verify_checksum: bool) !FrameContext {
+ const Error = error{ DictionaryIdFlagUnsupported, WindowSizeUnknown, WindowTooLarge };
+ /// Validates `frame_header` and returns the associated `FrameContext`.
+ ///
+ /// Errors returned:
+ /// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+ /// - `error.WindowSizeUnknown` if the frame does not have a valid window size
+ /// - `error.WindowTooLarge` if the window size is larger than
+ pub fn init(
+ frame_header: frame.ZStandard.Header,
+ window_size_max: usize,
+ verify_checksum: bool,
+ ) Error!FrameContext {
if (frame_header.descriptor.dictionary_id_flag != 0) return error.DictionaryIdFlagUnsupported;
const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
@@ -147,19 +182,29 @@ pub const FrameContext = struct {
};
/// Decode a Zstandard from from `src` and return the decompressed bytes; see
-/// `decodeZStandardFrame()`. Returns `error.WindowSizeUnknown` if the frame
-/// does not declare its content size or a window descriptor (this indicates a
-/// malformed frame).
+/// `decodeZStandardFrame()`. `allocator` is used to allocate both the returned
+/// slice and internal buffers used during decoding. The first four bytes of
+/// `src` must be the magic number for a Zstandard frame.
///
-/// Errors:
-/// - returns `error.WindowTooLarge`
-/// - returns `error.WindowSizeUnknown`
+/// Errors returned:
+/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
+/// - `error.WindowTooLarge` if the window size is larger than
+/// `window_size_max`
+/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+/// contains a checksum that does not match the checksum of the decompressed
+/// data
+/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
+/// - `error.UnusedBitSet` if the unused bit of the frame header is set
+/// - `error.EndOfStream` if `src` does not contain a complete frame
+/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
+/// - an error in `block.Error` if there are errors decoding a block
pub fn decodeZStandardFrameAlloc(
allocator: std.mem.Allocator,
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
-) (error{ WindowSizeUnknown, WindowTooLarge, OutOfMemory, EndOfStream } || FrameError)![]u8 {
+) (error{OutOfMemory} || FrameContext.Error || FrameError)![]u8 {
var result = std.ArrayList(u8).init(allocator);
assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
var consumed_count: usize = 4;
@@ -222,7 +267,7 @@ fn decodeFrameBlocks(
src: []const u8,
consumed_count: *usize,
hash: ?*std.hash.XxHash64,
-) block.Error!usize {
+) (error{EndOfStream} || block.Error)!usize {
// These tables take 7680 bytes
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
@@ -252,7 +297,8 @@ fn decodeFrameBlocks(
return written_count;
}
-/// Decode the header of a skippable frame.
+/// Decode the header of a skippable frame. The first four bytes of `src` must
+/// be a valid magic number for a Skippable frame.
pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
const magic = readInt(u32, src[0..4]);
assert(isSkippableMagic(magic));
@@ -263,8 +309,8 @@ pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
};
}
-/// Returns the window size required to decompress a frame, or `null` if it cannot be
-/// determined, which indicates a malformed frame header.
+/// Returns the window size required to decompress a frame, or `null` if it
+/// cannot be determined (which indicates a malformed frame header).
pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
if (header.window_descriptor) |descriptor| {
const exponent = (descriptor & 0b11111000) >> 3;
@@ -279,10 +325,10 @@ pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
const InvalidBit = error{ UnusedBitSet, ReservedBitSet };
/// Decode the header of a Zstandard frame.
///
-/// Errors:
-/// - returns `error.UnusedBitSet` if the unused bits of the header are set
-/// - returns `error.ReservedBitSet` if the reserved bits of the header are
-/// set
+/// Errors returned:
+/// - `error.UnusedBitSet` if the unused bits of the header are set
+/// - `error.ReservedBitSet` if the reserved bits of the header are set
+/// - `error.EndOfStream` if `source` does not contain a complete header
pub fn decodeZStandardHeader(source: anytype) (error{EndOfStream} || InvalidBit)!frame.ZStandard.Header {
const descriptor = @bitCast(frame.ZStandard.Header.Descriptor, try source.readByte());