Commit e2fd37e75b

tgschultz <tgschultz@gmail.com>
2019-05-04 02:28:51
Added PackedIntArray, PackedIntSlice to std
1 parent f479829
Changed files (1)
std/packed_int_array.zig
@@ -0,0 +1,599 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const debug = std.debug;
+const testing = std.testing;
+
+pub fn PackedIntIo(comptime Int: type) type
+{
+    //The general technique employed here is to cast bytes in the array to a container
+    // integer (having bits % 8 == 0) large enough to contain the number of bits we want,
+    // then we can retrieve or store the new value with a relative minimum of masking
+    // and shifting. In this worst case, this means that we'll need an integer that's
+    // actually 1 byte larger than the minimum required to store the bits, because it
+    // is possible that the bits start at the end of the first byte, continue through 
+    // zero or more, then end in the beginning of the last. But, if we try to access
+    // a value in the very last byte of memory with that integer size, that extra byte
+    // will be out of bounds. Depending on the circumstances of the memory, that might
+    // mean the OS fatally kills the program. Thus, we use a larger container (MaxIo)
+    // most of the time, but a smaller container (MinIo) when touching the last byte
+    // of the memory.
+    
+    const int_bits = comptime std.meta.bitCount(Int);
+    
+    //in the best case, this is the number of bytes we need to touch
+    // to read or write a value, as bits
+    const min_io_bits = ((int_bits + 7) / 8) * 8;
+    
+    //in the worst case, this is the number of bytes we need to touch
+    // to read or write a value, as bits
+    const max_io_bits = switch(int_bits)
+    {
+        0 => 0,
+        1 => 8,
+        2...9 => 16,
+        10...65535 => ((int_bits / 8) + 2) * 8,
+        else => unreachable,
+    };
+    
+    //we bitcast the desired Int type to an unsigned version of itself
+    // to avoid issues with shifting signed ints.
+    const UnInt = @IntType(false, int_bits);
+    
+    //The maximum container int type
+    const MinIo = @IntType(false, min_io_bits);
+    
+    //The minimum container int type
+    const MaxIo = @IntType(false, max_io_bits);
+    
+    return struct
+    {
+        pub fn get(bytes: []const u8, index: usize, bit_offset: u7) Int
+        {
+            if(int_bits == 0) return 0;
+            
+            const bit_index = (index * int_bits) + bit_offset;
+            const max_end_byte = (bit_index + max_io_bits) / 8;
+            
+            //Using the larger container size will potentially read out of bounds
+            if(max_end_byte > bytes.len) return getBits(bytes, MinIo, bit_index);
+            return getBits(bytes, MaxIo, bit_index);
+        }
+        
+        fn getBits(bytes: []const u8, comptime Container: type, bit_index: usize) Int
+        {
+            const container_bits = comptime std.meta.bitCount(Container);
+            const Shift = std.math.Log2Int(Container);
+            
+            const start_byte = bit_index / 8;
+            const head_keep_bits = bit_index - (start_byte * 8);
+            const tail_keep_bits = container_bits - (int_bits + head_keep_bits);
+            
+            //read bytes as container
+            const value_ptr = @ptrCast(*const align(1) Container, &bytes[start_byte]);
+            var value = value_ptr.*;
+
+            switch(builtin.endian)
+            {
+                .Big =>
+                {
+                    value <<= @intCast(Shift, head_keep_bits);
+                    value >>= @intCast(Shift, head_keep_bits);
+                    value >>= @intCast(Shift, tail_keep_bits);
+                },
+                .Little =>
+                {
+                    value <<= @intCast(Shift, tail_keep_bits);
+                    value >>= @intCast(Shift, tail_keep_bits);
+                    value >>= @intCast(Shift, head_keep_bits);
+                },
+            }
+            
+            return @bitCast(Int, @truncate(UnInt, value));
+        }
+        
+        pub fn set(bytes: []u8, index: usize, bit_offset: u3, int: Int) void
+        {
+            if(int_bits == 0) return;
+
+            const bit_index = (index * int_bits) + bit_offset;
+            const max_end_byte = (bit_index + max_io_bits) / 8;
+            
+            //Using the larger container size will potentially write out of bounds
+            if(max_end_byte > bytes.len) return setBits(bytes, MinIo, bit_index, int);
+            setBits(bytes, MaxIo, bit_index, int);
+        }
+        
+        fn setBits(bytes: []u8, comptime Container: type, bit_index: usize, int: Int) void
+        {
+            const container_bits = comptime std.meta.bitCount(Container);
+            const Shift = std.math.Log2Int(Container);
+            
+            const start_byte = bit_index / 8;
+            const head_keep_bits = bit_index - (start_byte * 8);
+            const tail_keep_bits = container_bits - (int_bits + head_keep_bits);
+            const keep_shift = switch(builtin.endian)
+            {
+                .Big => @intCast(Shift, tail_keep_bits),
+                .Little => @intCast(Shift, head_keep_bits),
+            };
+            
+            //position the bits where they need to be in the container
+            const value = @intCast(Container, @bitCast(UnInt, int)) << keep_shift;
+            
+            //read existing bytes
+            const target_ptr = @ptrCast(*align(1) Container, &bytes[start_byte]);
+            var target = target_ptr.*;
+            
+            //zero the bits we want to replace in the existing bytes
+            const inv_mask = @intCast(Container, std.math.maxInt(UnInt)) << keep_shift;
+            const mask = ~inv_mask;
+            target &= mask;
+            
+            //merge the new value
+            target |= value;
+            
+            //save it back
+            target_ptr.* = target;
+        }
+        
+        fn slice(bytes: []u8, bit_offset: u3, start: usize, end: usize) PackedIntSlice(Int)
+        {
+            debug.assert(end >= start);
+
+            const length = end - start;
+            const bit_index = (start * int_bits) + bit_offset;
+            const start_byte = bit_index / 8;
+            const end_byte = (bit_index + (length * int_bits) + 7) / 8;
+            const new_bytes = bytes[start_byte..end_byte];
+            
+            if(length == 0) return PackedIntSlice(Int).init(new_bytes[0..0], 0);
+            
+            var new_slice = PackedIntSlice(Int).init(new_bytes, length);
+            new_slice.bit_offset = @intCast(u3, (bit_index - (start_byte * 8)));
+            return new_slice;
+        }
+        
+        fn sliceCast(bytes: []u8, comptime NewInt: type, bit_offset: u3, old_len: usize) 
+            PackedIntSlice(NewInt)
+        {
+            const new_int_bits = comptime std.meta.bitCount(NewInt);
+            const New = PackedIntSlice(NewInt);
+            
+            const total_bits = (old_len * int_bits);
+            const new_int_count = total_bits / new_int_bits;
+            
+            debug.assert(total_bits == new_int_count * new_int_bits);
+            
+            var new = New.init(bytes, new_int_count);
+            new.bit_offset = bit_offset;
+            return new;
+        }
+    };
+}
+
+///Creates a bit-packed array of int_count integers of type Int. Bits
+/// are packed using native endianess and without storing any meta
+/// data. PackedArray(i3, 8) will occupy exactly 3 bytes of memory.
+pub fn PackedIntArray(comptime Int: type, comptime int_count: usize) type
+{
+    const int_bits = comptime std.meta.bitCount(Int);
+    const total_bits = int_bits * int_count;
+    const total_bytes = (total_bits + 7) / 8;
+    
+    const Io = PackedIntIo(Int);
+    
+    return struct
+    {
+        const Self = @This();
+        
+        bytes: [total_bytes]u8,
+        
+        ///Returns the number of elements in the packed array
+        pub fn len(self: Self) usize
+        {
+            return int_count;
+        }
+        
+        ///Initialize a packed array using an unpacked array
+        /// or, more likely, an array literal.
+        pub fn init(ints: [int_count]Int) Self
+        {
+            var self = Self(undefined);
+            for(ints) |int, i| self.set(i, int);
+            return self;
+        }
+        
+        ///Return the Int stored at index
+        pub fn get(self: Self, index: usize) Int
+        {
+            debug.assert(index < int_count);
+            return Io.get(self.bytes, index, 0);
+        }
+        
+        ///Copy int into the array at index
+        pub fn set(self: *Self, index: usize, int: Int) void
+        {
+            debug.assert(index < int_count);
+            return Io.set(&self.bytes, index, 0, int);
+        }
+        
+        ///Create a PackedIntSlice of the array from given start to given end
+        pub fn slice(self: *Self, start: usize, end: usize) PackedIntSlice(Int)
+        {
+            debug.assert(start < int_count);
+            debug.assert(end <= int_count);
+            return Io.slice(&self.bytes, 0, start, end);
+        }
+        
+        ///Create a PackedIntSlice of the array using NewInt as the bit width integer.
+        /// NewInt's bit width must fit evenly within the array's Int's total bits.
+        pub fn sliceCast(self: *Self, comptime NewInt: type) PackedIntSlice(NewInt)
+        {
+            return Io.sliceCast(&self.bytes, NewInt, 0, int_count);
+        }
+    };
+}
+
+//@TODO: Add Slice Casting
+pub fn PackedIntSlice(comptime Int: type) type
+{
+    const int_bits = comptime std.meta.bitCount(Int);
+    const Io = PackedIntIo(Int);
+    
+    return struct
+    {
+        const Self = @This();
+        
+        bytes: []u8,
+        int_count: usize,
+        bit_offset: u3,
+        
+        ///Returns the number of elements in the packed slice
+        pub fn len(self: Self) usize
+        {
+            return self.int_count;
+        }
+        
+        ///Calculates the number of bytes required to store a desired count
+        /// of Ints
+        pub fn bytesRequired(int_count: usize) usize
+        {
+            const total_bits = int_bits * int_count;
+            const total_bytes = (total_bits + 7) / 8;
+            return total_bytes;
+        }
+        
+        ///Initialize a packed slice using the memory at bytes, with int_count
+        /// elements. bytes must be large enough to accomodate the requested
+        /// count.
+        pub fn init(bytes: []u8, int_count: usize) Self
+        {
+            debug.assert(bytes.len >= bytesRequired(int_count));
+            
+            return Self
+            {
+                .bytes = bytes,
+                .int_count = int_count,
+                .bit_offset = 0,
+            };
+        }
+        
+        ///Return the Int stored at index
+        pub fn get(self: Self, index: usize) Int
+        {
+            debug.assert(index < self.int_count);
+            return Io.get(self.bytes, index, self.bit_offset);
+        }
+        
+        ///Copy int into the array at index
+        pub fn set(self: *Self, index: usize, int: Int) void
+        {
+            debug.assert(index < self.int_count);
+            return Io.set(self.bytes, index, self.bit_offset, int);
+        }
+        
+        ///Create a PackedIntSlice of this slice from given start to given end
+        pub fn slice(self: Self, start: usize, end: usize) PackedIntSlice(Int)
+        {
+            debug.assert(start < self.int_count);
+            debug.assert(end <= self.int_count);
+            return Io.slice(self.bytes, self.bit_offset, start, end);
+        }
+        
+        ///Create a PackedIntSlice of this slice using NewInt as the bit width integer.
+        /// NewInt's bit width must fit evenly within this slice's Int's total bits.
+        pub fn sliceCast(self: Self, comptime NewInt: type) PackedIntSlice(NewInt)
+        {
+            return Io.sliceCast(self.bytes, NewInt, self.bit_offset, self.int_count);
+        }
+    };
+}
+
+test "PackedIntArray"
+{
+    @setEvalBranchQuota(10000);
+    const max_bits = 256;
+    const int_count = 19;
+    
+    comptime var bits = 0;
+    inline while(bits <= 256):(bits += 1)
+    {
+        //alternate unsigned and signed
+        const even = bits % 2 == 0;
+        const I = @IntType(even, bits);
+
+        const PackedArray = PackedIntArray(I, int_count);
+        const expected_bytes = ((bits * int_count) + 7) / 8;
+        testing.expect(@sizeOf(PackedArray) == expected_bytes);
+        
+        var data = PackedArray(undefined);
+        
+        //write values, counting up
+        var i = usize(0);
+        var count = I(0);
+        while(i < data.len()):(i += 1)
+        {
+            data.set(i, count);
+            if(bits > 0) count +%= 1;
+        }
+        
+        //read and verify values
+        i = 0;
+        count = 0;
+        while(i < data.len()):(i += 1)
+        {
+            const val = data.get(i);
+            testing.expect(val == count);
+            if(bits > 0) count +%= 1;
+        }
+    }
+}
+
+test "PackedIntArray init"
+{
+    const PackedArray = PackedIntArray(u3, 8);
+    var packed_array = PackedArray.init([]u3{0,1,2,3,4,5,6,7});
+    var i = usize(0);
+    while(i < packed_array.len()):(i += 1) testing.expect(packed_array.get(i) == i);
+}
+
+test "PackedIntSlice"
+{
+    @setEvalBranchQuota(10000);
+    const max_bits = 256;
+    const int_count = 19;
+    const total_bits = max_bits * int_count;
+    const total_bytes = (total_bits + 7) / 8;
+    
+    var buffer: [total_bytes]u8 = undefined;
+    
+    comptime var bits = 0;
+    inline while(bits <= 256):(bits += 1)
+    {
+        //alternate unsigned and signed
+        const even = bits % 2 == 0;
+        const I = @IntType(even, bits);
+        const P = PackedIntSlice(I);
+        
+        var data = P.init(&buffer, int_count);
+        
+        //write values, counting up
+        var i = usize(0);
+        var count = I(0);
+        while(i < data.len()):(i += 1)
+        {
+            data.set(i, count);
+            if(bits > 0) count +%= 1;
+        }
+        
+        //read and verify values
+        i = 0;
+        count = 0;
+        while(i < data.len()):(i += 1)
+        {
+            const val = data.get(i);
+            testing.expect(val == count);
+            if(bits > 0) count +%= 1;
+        }
+    }
+}
+
+test "PackedIntSlice of PackedInt(Array/Slice)"
+{
+    const max_bits = 16;
+    const int_count = 19;
+    
+    comptime var bits = 0;
+    inline while(bits <= max_bits):(bits += 1)
+    {
+        const Int = @IntType(false, bits);
+        
+        const PackedArray = PackedIntArray(Int, int_count);
+        var packed_array = PackedArray(undefined);
+        
+        const limit = (1 << bits);
+        
+        var i = usize(0);
+        while(i < packed_array.len()):(i += 1)
+        {
+            packed_array.set(i, @intCast(Int, i % limit));
+        }
+        
+        //slice of array
+        var packed_slice = packed_array.slice(2, 5);
+        testing.expect(packed_slice.len() == 3);
+        const ps_bit_count = (bits * packed_slice.len()) + packed_slice.bit_offset;
+        const ps_expected_bytes = (ps_bit_count + 7) / 8;
+        testing.expect(packed_slice.bytes.len == ps_expected_bytes);
+        testing.expect(packed_slice.get(0) == 2 % limit);
+        testing.expect(packed_slice.get(1) == 3 % limit);
+        testing.expect(packed_slice.get(2) == 4 % limit);
+        packed_slice.set(1, 7 % limit);
+        testing.expect(packed_slice.get(1) == 7 % limit);
+        
+        //write through slice
+        testing.expect(packed_array.get(3) == 7 % limit);
+        
+        //slice of a slice
+        const packed_slice_two = packed_slice.slice(0, 3);
+        testing.expect(packed_slice_two.len() == 3);
+        const ps2_bit_count = (bits * packed_slice_two.len()) + packed_slice_two.bit_offset;
+        const ps2_expected_bytes = (ps2_bit_count + 7) / 8;
+        testing.expect(packed_slice_two.bytes.len == ps2_expected_bytes);
+        testing.expect(packed_slice_two.get(1) == 7 % limit);
+        testing.expect(packed_slice_two.get(2) == 4 % limit);
+        
+        //size one case
+        const packed_slice_three = packed_slice_two.slice(1, 2);
+        testing.expect(packed_slice_three.len() == 1);
+        const ps3_bit_count = (bits * packed_slice_three.len()) + packed_slice_three.bit_offset;
+        const ps3_expected_bytes = (ps3_bit_count + 7) / 8;
+        testing.expect(packed_slice_three.bytes.len == ps3_expected_bytes);
+        testing.expect(packed_slice_three.get(0) == 7 % limit);
+        
+        //empty slice case
+        const packed_slice_empty = packed_slice.slice(0, 0);
+        testing.expect(packed_slice_empty.len() == 0);
+        testing.expect(packed_slice_empty.bytes.len == 0);
+        
+        //slicing at byte boundaries
+        const packed_slice_edge = packed_array.slice(8, 16);
+        testing.expect(packed_slice_edge.len() == 8);
+        const pse_bit_count = (bits * packed_slice_edge.len()) + packed_slice_edge.bit_offset;
+        const pse_expected_bytes = (pse_bit_count + 7) / 8;
+        testing.expect(packed_slice_edge.bytes.len == pse_expected_bytes);
+        testing.expect(packed_slice_edge.bit_offset == 0);
+    }
+    
+}
+
+test "PackedIntSlice accumulating bit offsets"
+{
+    //bit_offset is u3, so standard debugging asserts should catch
+    // anything
+    {
+        const PackedArray = PackedIntArray(u3, 16);
+        var packed_array = PackedArray(undefined);
+        
+        var packed_slice = packed_array.slice(0, packed_array.len());
+        var i = usize(0);
+        while(i < packed_array.len() - 1):(i += 1)
+        {
+            
+            packed_slice = packed_slice.slice(1, packed_slice.len());
+        }
+    }
+    {
+        const PackedArray = PackedIntArray(u11, 88);
+        var packed_array = PackedArray(undefined);
+        
+        var packed_slice = packed_array.slice(0, packed_array.len());
+        var i = usize(0);
+        while(i < packed_array.len() - 1):(i += 1)
+        {
+            packed_slice = packed_slice.slice(1, packed_slice.len());
+        }
+    }
+    
+}
+
+//@NOTE: As I do not have a big endian system to test this on,
+// big endian values were not tested
+test "PackedInt(Array/Slice) sliceCast"
+{
+    const PackedArray = PackedIntArray(u1, 16);
+    var packed_array = PackedArray.init([]u1{0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1});
+    const packed_slice_cast_2 = packed_array.sliceCast(u2);
+    const packed_slice_cast_4 = packed_slice_cast_2.sliceCast(u4);
+    var packed_slice_cast_9 = packed_array.slice(0, (packed_array.len() / 9) * 9).sliceCast(u9);
+    const packed_slice_cast_3 = packed_slice_cast_9.sliceCast(u3);
+    
+    var i = usize(0);
+    while(i < packed_slice_cast_2.len()):(i += 1)
+    {
+        const val = switch(builtin.endian)
+        {
+            .Big => 0b01,
+            .Little => 0b10,
+        };
+        testing.expect(packed_slice_cast_2.get(i) == val);
+    }
+    i = 0;
+    while(i < packed_slice_cast_4.len()):(i += 1)
+    {
+        const val = switch(builtin.endian)
+        {
+            .Big => 0b0101,
+            .Little => 0b1010,
+        };
+        testing.expect(packed_slice_cast_4.get(i) == val);
+    }
+    i = 0;
+    while(i < packed_slice_cast_9.len()):(i += 1)
+    {
+        const val = 0b010101010;
+        testing.expect(packed_slice_cast_9.get(i) == val);
+        packed_slice_cast_9.set(i, 0b111000111);
+    }
+    i = 0;
+    while(i < packed_slice_cast_3.len()):(i += 1)
+    {
+        const val = switch(builtin.endian)
+        {
+            .Big => if(i % 2 == 0) u3(0b111) else u3(0b000),
+            .Little => if(i % 2 == 0) u3(0b111) else u3(0b000),
+        };
+        testing.expect(packed_slice_cast_3.get(i) == val);
+    }
+}
+
+//@NOTE: Need to manually update this list as more posix os's get
+// added to DirectAllocator. Windows can be added too when DirectAllocator
+// switches to VirtualAlloc.
+
+//These tests prove we aren't accidentally accessing memory past
+// the end of the array/slice by placing it at the end of a page
+// and reading the last element. The assumption is that the page
+// after this one is not mapped and will cause a segfault if we
+// don't account for the bounds.
+test "PackedIntArray at end of available memory"
+{
+    switch(builtin.os)
+    {
+        .linux, .macosx, .ios, .freebsd, .netbsd => {},
+        else => return,
+    }
+    const PackedArray = PackedIntArray(u3, 8);
+    
+    const Padded = struct
+    {
+        _: [std.os.page_size - @sizeOf(PackedArray)]u8,
+        p: PackedArray,
+    };
+    
+    var da = std.heap.DirectAllocator.init();
+    const allocator = &da.allocator;
+    
+    var pad = try allocator.create(Padded);
+    defer allocator.destroy(pad);
+    pad.p.set(7, std.math.maxInt(u3));
+}
+
+test "PackedIntSlice at end of available memory"
+{
+    switch(builtin.os)
+    {
+        .linux, .macosx, .ios, .freebsd, .netbsd => {},
+        else => return,
+    }
+    const PackedSlice = PackedIntSlice(u11);
+    
+    var da = std.heap.DirectAllocator.init();
+    const allocator = &da.allocator;
+    
+    var page = try allocator.alloc(u8, std.os.page_size);
+    defer allocator.free(page);
+    
+    var p = PackedSlice.init(page[std.os.page_size - 2..], 1);
+    p.set(0, std.math.maxInt(u11));
+}