Commit 3295fee911

Cody Tapscott <topolarity@tapscott.me>
2022-10-18 20:37:43
stage2: Use mem.readPackedInt etc. for packed bitcasts
Packed memory has a well-defined layout that doesn't require conversion from an integer to read from. Let's use it :-) This change means that for bitcasting to/from a packed value that is N layers deep, we no longer have to create N temporary big-ints and perform N copies. Other miscellaneous improvements: - Adds support for casting to packed enums and vectors - Fixes bitcasting to/from vectors outside of a packed struct - Adds a fast path for bitcasting <= u/i64 - Fixes bug when bitcasting f80 which would clear following fields This also changes the bitcast memory layout of exotic integers on big-endian systems to match what's empirically observed on our targets. Technically, this layout is not guaranteed by LLVM so we should probably ban bitcasts that reveal these padding bits, but for now this is an improvement.
1 parent c639c22
Changed files (6)
lib/std/math/big/int.zig
@@ -1762,16 +1762,32 @@ pub const Mutable = struct {
     }
 
     /// Read the value of `x` from `buffer`
-    /// Asserts that `buffer`, `abi_size`, and `bit_count` are large enough to store the value.
+    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`.
     ///
     /// The contents of `buffer` are interpreted as if they were the contents of
-    /// @ptrCast(*[abi_size]const u8, &x). Byte ordering is determined by `endian`
+    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`
     /// and any required padding bits are expected on the MSB end.
     pub fn readTwosComplement(
         x: *Mutable,
         buffer: []const u8,
         bit_count: usize,
-        abi_size: usize,
+        endian: Endian,
+        signedness: Signedness,
+    ) void {
+        return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness);
+    }
+
+    /// Read the value of `x` from a packed memory `buffer`.
+    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
+    /// at offset `bit_offset`.
+    ///
+    /// This is equivalent to loading the value of an integer with `bit_count` bits as
+    /// if it were a field in packed memory at the provided bit offset.
+    pub fn readPackedTwosComplement(
+        x: *Mutable,
+        bytes: []const u8,
+        bit_offset: usize,
+        bit_count: usize,
         endian: Endian,
         signedness: Signedness,
     ) void {
@@ -1782,75 +1798,54 @@ pub const Mutable = struct {
             return;
         }
 
-        // byte_count is our total read size: it cannot exceed abi_size,
-        // but may be less as long as it includes the required bits
-        const limb_count = calcTwosCompLimbCount(bit_count);
-        const byte_count = std.math.min(abi_size, @sizeOf(Limb) * limb_count);
-        assert(8 * byte_count >= bit_count);
-
         // Check whether the input is negative
         var positive = true;
         if (signedness == .signed) {
+            const total_bits = bit_offset + bit_count;
             var last_byte = switch (endian) {
-                .Little => ((bit_count + 7) / 8) - 1,
-                .Big => abi_size - ((bit_count + 7) / 8),
+                .Little => ((total_bits + 7) / 8) - 1,
+                .Big => bytes.len - ((total_bits + 7) / 8),
             };
 
-            const sign_bit = @as(u8, 1) << @intCast(u3, (bit_count - 1) % 8);
-            positive = ((buffer[last_byte] & sign_bit) == 0);
+            const sign_bit = @as(u8, 1) << @intCast(u3, (total_bits - 1) % 8);
+            positive = ((bytes[last_byte] & sign_bit) == 0);
         }
 
         // Copy all complete limbs
-        var carry: u1 = if (positive) 0 else 1;
+        var carry: u1 = 1;
         var limb_index: usize = 0;
+        var bit_index: usize = 0;
         while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
-            var buf_index = switch (endian) {
-                .Little => @sizeOf(Limb) * limb_index,
-                .Big => abi_size - (limb_index + 1) * @sizeOf(Limb),
-            };
-
-            const limb_buf = @ptrCast(*const [@sizeOf(Limb)]u8, buffer[buf_index..]);
-            var limb = mem.readInt(Limb, limb_buf, endian);
+            // Read one Limb of bits
+            var limb = mem.readPackedInt(Limb, bytes, bit_index + bit_offset, endian);
+            bit_index += @bitSizeOf(Limb);
 
             // 2's complement (bitwise not, then add carry bit)
             if (!positive) carry = @boolToInt(@addWithOverflow(Limb, ~limb, carry, &limb));
             x.limbs[limb_index] = limb;
         }
 
-        // Copy the remaining N bytes (N <= @sizeOf(Limb))
-        var bytes_read = limb_index * @sizeOf(Limb);
-        if (bytes_read != byte_count) {
-            var limb: Limb = 0;
-
-            while (bytes_read != byte_count) {
-                const read_size = std.math.floorPowerOfTwo(usize, byte_count - bytes_read);
-                var int_buffer = switch (endian) {
-                    .Little => buffer[bytes_read..],
-                    .Big => buffer[(abi_size - bytes_read - read_size)..],
-                };
-                limb |= @intCast(Limb, switch (read_size) {
-                    1 => mem.readInt(u8, int_buffer[0..1], endian),
-                    2 => mem.readInt(u16, int_buffer[0..2], endian),
-                    4 => mem.readInt(u32, int_buffer[0..4], endian),
-                    8 => mem.readInt(u64, int_buffer[0..8], endian),
-                    16 => mem.readInt(u128, int_buffer[0..16], endian),
-                    else => unreachable,
-                }) << @intCast(Log2Limb, 8 * (bytes_read % @sizeOf(Limb)));
-                bytes_read += read_size;
-            }
+        // Copy the remaining bits
+        if (bit_count != bit_index) {
+            // Read all remaining bits
+            var limb = switch (signedness) {
+                .unsigned => mem.readVarPackedInt(Limb, bytes, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned),
+                .signed => b: {
+                    const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb));
+                    const limb = mem.readVarPackedInt(SLimb, bytes, bit_index + bit_offset, bit_count - bit_index, endian, .signed);
+                    break :b @bitCast(Limb, limb);
+                },
+            };
 
             // 2's complement (bitwise not, then add carry bit)
-            if (!positive) _ = @addWithOverflow(Limb, ~limb, carry, &limb);
-
-            // Mask off any unused bits
-            const valid_bits = @intCast(Log2Limb, bit_count % @bitSizeOf(Limb));
-            const mask = (@as(Limb, 1) << valid_bits) -% 1; // 0b0..01..1 with (valid_bits_in_limb) trailing ones
-            limb &= mask;
+            if (!positive) assert(!@addWithOverflow(Limb, ~limb, carry, &limb));
+            x.limbs[limb_index] = limb;
 
-            x.limbs[limb_count - 1] = limb;
+            limb_index += 1;
         }
+
         x.positive = positive;
-        x.len = limb_count;
+        x.len = limb_index;
         x.normalize(x.len);
     }
 
@@ -2212,66 +2207,48 @@ pub const Const = struct {
     }
 
     /// Write the value of `x` into `buffer`
-    /// Asserts that `buffer`, `abi_size`, and `bit_count` are large enough to store the value.
+    /// Asserts that `buffer` is large enough to store the value.
     ///
     /// `buffer` is filled so that its contents match what would be observed via
-    /// @ptrCast(*[abi_size]const u8, &x). Byte ordering is determined by `endian`,
+    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`,
     /// and any required padding bits are added on the MSB end.
-    pub fn writeTwosComplement(x: Const, buffer: []u8, bit_count: usize, abi_size: usize, endian: Endian) void {
+    pub fn writeTwosComplement(x: Const, buffer: []u8, endian: Endian) void {
+        return writePackedTwosComplement(x, buffer, 0, 8 * buffer.len, endian);
+    }
 
-        // byte_count is our total write size
-        const byte_count = abi_size;
-        assert(8 * byte_count >= bit_count);
-        assert(buffer.len >= byte_count);
+    /// Write the value of `x` to a packed memory `buffer`.
+    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
+    /// at offset `bit_offset`.
+    ///
+    /// This is equivalent to storing the value of an integer with `bit_count` bits as
+    /// if it were a field in packed memory at the provided bit offset.
+    pub fn writePackedTwosComplement(x: Const, bytes: []u8, bit_offset: usize, bit_count: usize, endian: Endian) void {
         assert(x.fitsInTwosComp(if (x.positive) .unsigned else .signed, bit_count));
 
         // Copy all complete limbs
-        var carry: u1 = if (x.positive) 0 else 1;
+        var carry: u1 = 1;
         var limb_index: usize = 0;
-        while (limb_index < byte_count / @sizeOf(Limb)) : (limb_index += 1) {
-            var buf_index = switch (endian) {
-                .Little => @sizeOf(Limb) * limb_index,
-                .Big => abi_size - (limb_index + 1) * @sizeOf(Limb),
-            };
-
+        var bit_index: usize = 0;
+        while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
             var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
+
             // 2's complement (bitwise not, then add carry bit)
             if (!x.positive) carry = @boolToInt(@addWithOverflow(Limb, ~limb, carry, &limb));
 
-            var limb_buf = @ptrCast(*[@sizeOf(Limb)]u8, buffer[buf_index..]);
-            mem.writeInt(Limb, limb_buf, limb, endian);
+            // Write one Limb of bits
+            mem.writePackedInt(Limb, bytes, bit_index + bit_offset, limb, endian);
+            bit_index += @bitSizeOf(Limb);
         }
 
-        // Copy the remaining N bytes (N < @sizeOf(Limb))
-        var bytes_written = limb_index * @sizeOf(Limb);
-        if (bytes_written != byte_count) {
+        // Copy the remaining bits
+        if (bit_count != bit_index) {
             var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
+
             // 2's complement (bitwise not, then add carry bit)
             if (!x.positive) _ = @addWithOverflow(Limb, ~limb, carry, &limb);
 
-            while (bytes_written != byte_count) {
-                const write_size = std.math.floorPowerOfTwo(usize, byte_count - bytes_written);
-                var int_buffer = switch (endian) {
-                    .Little => buffer[bytes_written..],
-                    .Big => buffer[(abi_size - bytes_written - write_size)..],
-                };
-
-                if (write_size == 1) {
-                    mem.writeInt(u8, int_buffer[0..1], @truncate(u8, limb), endian);
-                } else if (@sizeOf(Limb) >= 2 and write_size == 2) {
-                    mem.writeInt(u16, int_buffer[0..2], @truncate(u16, limb), endian);
-                } else if (@sizeOf(Limb) >= 4 and write_size == 4) {
-                    mem.writeInt(u32, int_buffer[0..4], @truncate(u32, limb), endian);
-                } else if (@sizeOf(Limb) >= 8 and write_size == 8) {
-                    mem.writeInt(u64, int_buffer[0..8], @truncate(u64, limb), endian);
-                } else if (@sizeOf(Limb) >= 16 and write_size == 16) {
-                    mem.writeInt(u128, int_buffer[0..16], @truncate(u128, limb), endian);
-                } else if (@sizeOf(Limb) >= 32) {
-                    @compileError("@sizeOf(Limb) exceeded supported range");
-                } else unreachable;
-                limb >>= @intCast(Log2Limb, 8 * write_size);
-                bytes_written += write_size;
-            }
+            // Write all remaining bits
+            mem.writeVarPackedInt(bytes, bit_index + bit_offset, bit_count - bit_index, limb, endian);
         }
     }
 
lib/std/math/big/int_test.zig
@@ -2603,13 +2603,13 @@ test "big int conversion read/write twos complement" {
 
     for (endians) |endian| {
         // Writing to buffer and back should not change anything
-        a.toConst().writeTwosComplement(buffer1, 493, abi_size, endian);
-        m.readTwosComplement(buffer1, 493, abi_size, endian, .unsigned);
+        a.toConst().writeTwosComplement(buffer1[0..abi_size], endian);
+        m.readTwosComplement(buffer1[0..abi_size], 493, endian, .unsigned);
         try testing.expect(m.toConst().order(a.toConst()) == .eq);
 
         // Equivalent to @bitCast(i493, @as(u493, intMax(u493))
-        a.toConst().writeTwosComplement(buffer1, 493, abi_size, endian);
-        m.readTwosComplement(buffer1, 493, abi_size, endian, .signed);
+        a.toConst().writeTwosComplement(buffer1[0..abi_size], endian);
+        m.readTwosComplement(buffer1[0..abi_size], 493, endian, .signed);
         try testing.expect(m.toConst().orderAgainstScalar(-1) == .eq);
     }
 }
@@ -2628,26 +2628,26 @@ test "big int conversion read twos complement with padding" {
     // (3) should sign-extend any bits from bit_count to 8 * abi_size
 
     var bit_count: usize = 12 * 8 + 1;
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0, 0x0, 0x0 }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd }));
 
     @memset(buffer1.ptr, 0xaa, buffer1.len);
     try a.set(-0x01_02030405_06070809_0a0b0c0d);
     bit_count = 12 * 8 + 2;
 
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xff, 0xff, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3 }));
 }
 
@@ -2660,17 +2660,15 @@ test "big int write twos complement +/- zero" {
     defer testing.allocator.free(buffer1);
     @memset(buffer1.ptr, 0xaa, buffer1.len);
 
-    var bit_count: usize = 0;
-
     // Test zero
 
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
 
     @memset(buffer1.ptr, 0xaa, buffer1.len);
@@ -2678,13 +2676,13 @@ test "big int write twos complement +/- zero" {
 
     // Test negative zero
 
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
 }
 
@@ -2705,62 +2703,82 @@ test "big int conversion write twos complement with padding" {
     // Test 0x01_02030405_06070809_0a0b0c0d
 
     buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xb };
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xb, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xab, 0xaa, 0xaa, 0xaa };
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xaa, 0xaa, 0xaa, 0xab, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
+    bit_count = @sizeOf(Limb) * 8;
+
+    // Test 0x0a0a0a0a_02030405_06070809_0a0b0c0d
+
+    buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xaa };
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaa_02030405_06070809_0a0b0c0d)) == .eq);
+
+    buffer = &[_]u8{ 0xaa, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaa_02030405_06070809_0a0b0c0d)) == .eq);
+
+    buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xaa, 0xaa, 0xaa, 0xaa };
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaaaaaaaa_02030405_06070809_0a0b0c0d)) == .eq);
+
+    buffer = &[_]u8{ 0xaa, 0xaa, 0xaa, 0xaa, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaaaaaaaa_02030405_06070809_0a0b0c0d)) == .eq);
+
     bit_count = 12 * 8 + 2;
 
     // Test -0x01_02030405_06070809_0a0b0c0d
 
     buffer = &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0x02 };
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .signed);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0x02, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3 };
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .signed);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0x02, 0xaa, 0xaa, 0xaa };
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .signed);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xaa, 0xaa, 0xaa, 0x02, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3 };
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .signed);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     // Test 0
 
     buffer = &([_]u8{0} ** 16);
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 
     bit_count = 0;
     buffer = &([_]u8{0xaa} ** 16);
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 }
 
@@ -2779,15 +2797,15 @@ test "big int conversion write twos complement zero" {
     var buffer: []const u8 = undefined;
 
     buffer = &([_]u8{0} ** 13);
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 
     buffer = &([_]u8{0} ** 16);
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 }
 
src/codegen.zig
@@ -470,7 +470,7 @@ pub fn generateSymbol(
                 const abi_size = math.cast(usize, typed_value.ty.abiSize(target)) orelse return error.Overflow;
                 const start = code.items.len;
                 try code.resize(start + abi_size);
-                bigint.writeTwosComplement(code.items[start..][0..abi_size], info.bits, abi_size, endian);
+                bigint.writeTwosComplement(code.items[start..][0..abi_size], endian);
                 return Result{ .appended = {} };
             }
             switch (info.signedness) {
src/Sema.zig
@@ -26445,48 +26445,6 @@ fn bitCastVal(
     const target = sema.mod.getTarget();
     if (old_ty.eql(new_ty, sema.mod)) return val;
 
-    // Some conversions have a bitwise definition that ignores in-memory layout,
-    // such as converting between f80 and u80.
-
-    if (old_ty.eql(Type.f80, sema.mod) and new_ty.isAbiInt()) {
-        const float = val.toFloat(f80);
-        switch (new_ty.intInfo(target).signedness) {
-            .signed => {
-                const int = @bitCast(i80, float);
-                const limbs = try sema.arena.alloc(std.math.big.Limb, 2);
-                const big_int = std.math.big.int.Mutable.init(limbs, int);
-                return Value.fromBigInt(sema.arena, big_int.toConst());
-            },
-            .unsigned => {
-                const int = @bitCast(u80, float);
-                const limbs = try sema.arena.alloc(std.math.big.Limb, 2);
-                const big_int = std.math.big.int.Mutable.init(limbs, int);
-                return Value.fromBigInt(sema.arena, big_int.toConst());
-            },
-        }
-    }
-
-    if (new_ty.eql(Type.f80, sema.mod) and old_ty.isAbiInt()) {
-        var bigint_space: Value.BigIntSpace = undefined;
-        var bigint = try val.toBigIntAdvanced(&bigint_space, target, sema.kit(block, src));
-        switch (old_ty.intInfo(target).signedness) {
-            .signed => {
-                // This conversion cannot fail because we already checked bit size before
-                // calling bitCastVal.
-                const int = bigint.to(i80) catch unreachable;
-                const float = @bitCast(f80, int);
-                return Value.Tag.float_80.create(sema.arena, float);
-            },
-            .unsigned => {
-                // This conversion cannot fail because we already checked bit size before
-                // calling bitCastVal.
-                const int = bigint.to(u80) catch unreachable;
-                const float = @bitCast(f80, int);
-                return Value.Tag.float_80.create(sema.arena, float);
-            },
-        }
-    }
-
     // For types with well-defined memory layouts, we serialize them a byte buffer,
     // then deserialize to the new type.
     const abi_size = try sema.usizeCast(block, src, old_ty.abiSize(target));
src/value.zig
@@ -1206,8 +1206,13 @@ pub const Value = extern union {
         };
     }
 
+    /// Write a Value's contents to `buffer`.
+    ///
+    /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
+    /// the end of the value in memory.
     pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) void {
         const target = mod.getTarget();
+        const endian = target.cpu.arch.endian();
         if (val.isUndef()) {
             const size = @intCast(usize, ty.abiSize(target));
             std.mem.set(u8, buffer[0..size], 0xaa);
@@ -1218,31 +1223,41 @@ pub const Value = extern union {
             .Bool => {
                 buffer[0] = @boolToInt(val.toBool());
             },
-            .Int => {
-                var bigint_buffer: BigIntSpace = undefined;
-                const bigint = val.toBigInt(&bigint_buffer, target);
-                const bits = ty.intInfo(target).bits;
-                const abi_size = @intCast(usize, ty.abiSize(target));
-                bigint.writeTwosComplement(buffer, bits, abi_size, target.cpu.arch.endian());
-            },
-            .Enum => {
+            .Int, .Enum => {
+                const int_info = ty.intInfo(target);
+                const bits = int_info.bits;
+                const byte_count = (bits + 7) / 8;
+
                 var enum_buffer: Payload.U64 = undefined;
                 const int_val = val.enumToInt(ty, &enum_buffer);
-                var bigint_buffer: BigIntSpace = undefined;
-                const bigint = int_val.toBigInt(&bigint_buffer, target);
-                const bits = ty.intInfo(target).bits;
-                const abi_size = @intCast(usize, ty.abiSize(target));
-                bigint.writeTwosComplement(buffer, bits, abi_size, target.cpu.arch.endian());
+
+                if (byte_count <= @sizeOf(u64)) {
+                    const int: u64 = switch (int_val.tag()) {
+                        .zero => 0,
+                        .one => 1,
+                        .int_u64 => int_val.castTag(.int_u64).?.data,
+                        .int_i64 => @bitCast(u64, int_val.castTag(.int_i64).?.data),
+                        else => unreachable,
+                    };
+                    for (buffer[0..byte_count]) |_, i| switch (endian) {
+                        .Little => buffer[i] = @truncate(u8, (int >> @intCast(u6, (8 * i)))),
+                        .Big => buffer[byte_count - i - 1] = @truncate(u8, (int >> @intCast(u6, (8 * i)))),
+                    };
+                } else {
+                    var bigint_buffer: BigIntSpace = undefined;
+                    const bigint = int_val.toBigInt(&bigint_buffer, target);
+                    bigint.writeTwosComplement(buffer[0..byte_count], endian);
+                }
             },
             .Float => switch (ty.floatBits(target)) {
-                16 => return floatWriteToMemory(f16, val.toFloat(f16), target, buffer),
-                32 => return floatWriteToMemory(f32, val.toFloat(f32), target, buffer),
-                64 => return floatWriteToMemory(f64, val.toFloat(f64), target, buffer),
-                80 => return floatWriteToMemory(f80, val.toFloat(f80), target, buffer),
-                128 => return floatWriteToMemory(f128, val.toFloat(f128), target, buffer),
+                16 => std.mem.writeInt(u16, buffer[0..2], @bitCast(u16, val.toFloat(f16)), endian),
+                32 => std.mem.writeInt(u32, buffer[0..4], @bitCast(u32, val.toFloat(f32)), endian),
+                64 => std.mem.writeInt(u64, buffer[0..8], @bitCast(u64, val.toFloat(f64)), endian),
+                80 => std.mem.writeInt(u80, buffer[0..10], @bitCast(u80, val.toFloat(f80)), endian),
+                128 => std.mem.writeInt(u128, buffer[0..16], @bitCast(u128, val.toFloat(f128)), endian),
                 else => unreachable,
             },
-            .Array, .Vector => {
+            .Array => {
                 const len = ty.arrayLen();
                 const elem_ty = ty.childType();
                 const elem_size = @intCast(usize, elem_ty.abiSize(target));
@@ -1251,10 +1266,16 @@ pub const Value = extern union {
                 var buf_off: usize = 0;
                 while (elem_i < len) : (elem_i += 1) {
                     const elem_val = val.elemValueBuffer(mod, elem_i, &elem_value_buf);
-                    writeToMemory(elem_val, elem_ty, mod, buffer[buf_off..]);
+                    elem_val.writeToMemory(elem_ty, mod, buffer[buf_off..]);
                     buf_off += elem_size;
                 }
             },
+            .Vector => {
+                // We use byte_count instead of abi_size here, so that any padding bytes
+                // follow the data bytes, on both big- and little-endian systems.
+                const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
+            },
             .Struct => switch (ty.containerLayout()) {
                 .Auto => unreachable, // Sema is supposed to have emitted a compile error already
                 .Extern => {
@@ -1266,122 +1287,113 @@ pub const Value = extern union {
                     }
                 },
                 .Packed => {
-                    // TODO allocate enough heap space instead of using this buffer
-                    // on the stack.
-                    var buf: [16]std.math.big.Limb = undefined;
-                    const host_int = packedStructToInt(val, ty, target, &buf);
-                    const abi_size = @intCast(usize, ty.abiSize(target));
-                    const bit_size = @intCast(usize, ty.bitSize(target));
-                    host_int.writeTwosComplement(buffer, bit_size, abi_size, target.cpu.arch.endian());
+                    const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                    writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
                 },
             },
             .ErrorSet => {
                 // TODO revisit this when we have the concept of the error tag type
                 const Int = u16;
                 const int = mod.global_error_set.get(val.castTag(.@"error").?.data.name).?;
-                std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), target.cpu.arch.endian());
+                std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), endian);
             },
             else => @panic("TODO implement writeToMemory for more types"),
         }
     }
 
-    fn packedStructToInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb) BigIntConst {
-        var bigint = BigIntMutable.init(buf, 0);
-        const fields = ty.structFields().values();
-        const field_vals = val.castTag(.aggregate).?.data;
-        var bits: u16 = 0;
-        // TODO allocate enough heap space instead of using this buffer
-        // on the stack.
-        var field_buf: [16]std.math.big.Limb = undefined;
-        var field_space: BigIntSpace = undefined;
-        var field_buf2: [16]std.math.big.Limb = undefined;
-        for (fields) |field, i| {
-            const field_val = field_vals[i];
-            const field_bigint_const = switch (field.ty.zigTypeTag()) {
-                .Void => continue,
-                .Float => floatToBigInt(field_val, field.ty, target, &field_buf),
-                .Int, .Bool => intOrBoolToBigInt(field_val, field.ty, target, &field_buf, &field_space),
-                .Struct => switch (field.ty.containerLayout()) {
-                    .Auto, .Extern => unreachable, // Sema should have error'd before this.
-                    .Packed => packedStructToInt(field_val, field.ty, target, &field_buf),
-                },
-                .Vector => vectorToBigInt(field_val, field.ty, target, &field_buf),
-                .Enum => enumToBigInt(field_val, field.ty, target, &field_space),
-                .Union => unreachable, // TODO: packed structs support packed unions
-                else => unreachable,
-            };
-            var field_bigint = BigIntMutable.init(&field_buf2, 0);
-            field_bigint.shiftLeft(field_bigint_const, bits);
-            bits += @intCast(u16, field.ty.bitSize(target));
-            bigint.bitOr(bigint.toConst(), field_bigint.toConst());
-        }
-        return bigint.toConst();
-    }
-
-    fn intOrBoolToBigInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb, space: *BigIntSpace) BigIntConst {
-        const big_int_const = val.toBigInt(space, target);
-        if (big_int_const.positive) return big_int_const;
-
-        var big_int = BigIntMutable.init(buf, 0);
-        big_int.bitNotWrap(big_int_const.negate(), .unsigned, @intCast(u32, ty.bitSize(target)));
-        big_int.addScalar(big_int.toConst(), 1);
-        return big_int.toConst();
-    }
-
-    fn vectorToBigInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb) BigIntConst {
+    /// Write a Value's contents to `buffer`.
+    ///
+    /// Both the start and the end of the provided buffer must be tight, since
+    /// big-endian packed memory layouts start at the end of the buffer.
+    pub fn writeToPackedMemory(val: Value, ty: Type, mod: *Module, buffer: []u8, bit_offset: usize) void {
+        const target = mod.getTarget();
         const endian = target.cpu.arch.endian();
-        var vec_bitint = BigIntMutable.init(buf, 0);
-        const vec_len = @intCast(usize, ty.arrayLen());
-        const elem_ty = ty.childType();
-        const elem_size = @intCast(usize, elem_ty.bitSize(target));
-
-        var elem_buf: [16]std.math.big.Limb = undefined;
-        var elem_space: BigIntSpace = undefined;
-        var elem_buf2: [16]std.math.big.Limb = undefined;
-
-        var elem_i: usize = 0;
-        while (elem_i < vec_len) : (elem_i += 1) {
-            const elem_i_target = if (endian == .Big) vec_len - elem_i - 1 else elem_i;
-            const elem_val = val.indexVectorlike(elem_i_target);
-            const elem_bigint_const = switch (elem_ty.zigTypeTag()) {
-                .Int, .Bool => intOrBoolToBigInt(elem_val, elem_ty, target, &elem_buf, &elem_space),
-                .Float => floatToBigInt(elem_val, elem_ty, target, &elem_buf),
-                .Pointer => unreachable, // TODO
-                else => unreachable, // Sema should not let this happen
-            };
-            var elem_bitint = BigIntMutable.init(&elem_buf2, 0);
-            elem_bitint.shiftLeft(elem_bigint_const, elem_size * elem_i);
-            vec_bitint.bitOr(vec_bitint.toConst(), elem_bitint.toConst());
+        if (val.isUndef()) {
+            const bit_size = @intCast(usize, ty.bitSize(target));
+            std.mem.writeVarPackedInt(buffer, bit_offset, bit_size, @as(u1, 0), endian);
+            return;
         }
-        return vec_bitint.toConst();
-    }
+        switch (ty.zigTypeTag()) {
+            .Void => {},
+            .Bool => {
+                const byte_index = switch (endian) {
+                    .Little => bit_offset / 8,
+                    .Big => buffer.len - bit_offset / 8 - 1,
+                };
+                if (val.toBool()) {
+                    buffer[byte_index] |= (@as(u8, 1) << @intCast(u3, bit_offset % 8));
+                } else {
+                    buffer[byte_index] &= ~(@as(u8, 1) << @intCast(u3, bit_offset % 8));
+                }
+            },
+            .Int, .Enum => {
+                const bits = ty.intInfo(target).bits;
+                const abi_size = @intCast(usize, ty.abiSize(target));
 
-    fn enumToBigInt(val: Value, ty: Type, target: Target, space: *BigIntSpace) BigIntConst {
-        var enum_buf: Payload.U64 = undefined;
-        const int_val = val.enumToInt(ty, &enum_buf);
-        return int_val.toBigInt(space, target);
-    }
+                var enum_buffer: Payload.U64 = undefined;
+                const int_val = val.enumToInt(ty, &enum_buffer);
 
-    fn floatToBigInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb) BigIntConst {
-        return switch (ty.floatBits(target)) {
-            16 => bitcastFloatToBigInt(f16, val.toFloat(f16), buf),
-            32 => bitcastFloatToBigInt(f32, val.toFloat(f32), buf),
-            64 => bitcastFloatToBigInt(f64, val.toFloat(f64), buf),
-            80 => bitcastFloatToBigInt(f80, val.toFloat(f80), buf),
-            128 => bitcastFloatToBigInt(f128, val.toFloat(f128), buf),
-            else => unreachable,
-        };
-    }
+                if (abi_size <= @sizeOf(u64)) {
+                    const int: u64 = switch (int_val.tag()) {
+                        .zero => 0,
+                        .one => 1,
+                        .int_u64 => int_val.castTag(.int_u64).?.data,
+                        .int_i64 => @bitCast(u64, int_val.castTag(.int_i64).?.data),
+                        else => unreachable,
+                    };
+                    std.mem.writeVarPackedInt(buffer, bit_offset, bits, int, endian);
+                } else {
+                    var bigint_buffer: BigIntSpace = undefined;
+                    const bigint = int_val.toBigInt(&bigint_buffer, target);
+                    bigint.writePackedTwosComplement(buffer, bit_offset, bits, endian);
+                }
+            },
+            .Float => switch (ty.floatBits(target)) {
+                16 => std.mem.writePackedInt(u16, buffer, bit_offset, @bitCast(u16, val.toFloat(f16)), endian),
+                32 => std.mem.writePackedInt(u32, buffer, bit_offset, @bitCast(u32, val.toFloat(f32)), endian),
+                64 => std.mem.writePackedInt(u64, buffer, bit_offset, @bitCast(u64, val.toFloat(f64)), endian),
+                80 => std.mem.writePackedInt(u80, buffer, bit_offset, @bitCast(u80, val.toFloat(f80)), endian),
+                128 => std.mem.writePackedInt(u128, buffer, bit_offset, @bitCast(u128, val.toFloat(f128)), endian),
+                else => unreachable,
+            },
+            .Vector => {
+                const len = ty.arrayLen();
+                const elem_ty = ty.childType();
+                const elem_bit_size = @intCast(u16, elem_ty.bitSize(target));
 
-    fn bitcastFloatToBigInt(comptime F: type, f: F, buf: []std.math.big.Limb) BigIntConst {
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = @bitCast(Int, f);
-        return BigIntMutable.init(buf, int).toConst();
+                var bits: u16 = 0;
+                var elem_i: usize = 0;
+                var elem_value_buf: ElemValueBuffer = undefined;
+                while (elem_i < len) : (elem_i += 1) {
+                    // On big-endian systems, LLVM reverses the element order of vectors by default
+                    const tgt_elem_i = if (endian == .Big) len - elem_i - 1 else elem_i;
+                    const elem_val = val.elemValueBuffer(mod, tgt_elem_i, &elem_value_buf);
+                    elem_val.writeToPackedMemory(elem_ty, mod, buffer, bit_offset + bits);
+                    bits += elem_bit_size;
+                }
+            },
+            .Struct => switch (ty.containerLayout()) {
+                .Auto => unreachable, // Sema is supposed to have emitted a compile error already
+                .Extern => unreachable, // Handled in non-packed writeToMemory
+                .Packed => {
+                    var bits: u16 = 0;
+                    const fields = ty.structFields().values();
+                    const field_vals = val.castTag(.aggregate).?.data;
+                    for (fields) |field, i| {
+                        const field_bits = @intCast(u16, field.ty.bitSize(target));
+                        field_vals[i].writeToPackedMemory(field.ty, mod, buffer, bit_offset + bits);
+                        bits += field_bits;
+                    }
+                },
+            },
+            else => @panic("TODO implement writeToPackedMemory for more types"),
+        }
     }
 
+    /// Load a Value from the contents of `buffer`.
+    ///
+    /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
+    /// the end of the value in memory.
     pub fn readFromMemory(
         ty: Type,
         mod: *Module,
@@ -1389,6 +1401,7 @@ pub const Value = extern union {
         arena: Allocator,
     ) Allocator.Error!Value {
         const target = mod.getTarget();
+        const endian = target.cpu.arch.endian();
         switch (ty.zigTypeTag()) {
             .Void => return Value.@"void",
             .Bool => {
@@ -1398,27 +1411,40 @@ pub const Value = extern union {
                     return Value.@"true";
                 }
             },
-            .Int => {
-                if (buffer.len == 0) return Value.zero;
+            .Int, .Enum => {
                 const int_info = ty.intInfo(target);
-                const endian = target.cpu.arch.endian();
-                const Limb = std.math.big.Limb;
-                const limb_count = (buffer.len + @sizeOf(Limb) - 1) / @sizeOf(Limb);
-                const limbs_buffer = try arena.alloc(Limb, limb_count);
-                const abi_size = @intCast(usize, ty.abiSize(target));
-                var bigint = BigIntMutable.init(limbs_buffer, 0);
-                bigint.readTwosComplement(buffer, int_info.bits, abi_size, endian, int_info.signedness);
-                return fromBigInt(arena, bigint.toConst());
+                const bits = int_info.bits;
+                const byte_count = (bits + 7) / 8;
+                if (bits == 0 or buffer.len == 0) return Value.zero;
+
+                if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64
+                    .signed => {
+                        const val = std.mem.readVarInt(i64, buffer[0..byte_count], endian);
+                        return Value.Tag.int_i64.create(arena, (val << @intCast(u6, 64 - bits)) >> @intCast(u6, 64 - bits));
+                    },
+                    .unsigned => {
+                        const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian);
+                        return Value.Tag.int_u64.create(arena, (val << @intCast(u6, 64 - bits)) >> @intCast(u6, 64 - bits));
+                    },
+                } else { // Slow path, we have to construct a big-int
+                    const Limb = std.math.big.Limb;
+                    const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb);
+                    const limbs_buffer = try arena.alloc(Limb, limb_count);
+
+                    var bigint = BigIntMutable.init(limbs_buffer, 0);
+                    bigint.readTwosComplement(buffer[0..byte_count], bits, endian, int_info.signedness);
+                    return fromBigInt(arena, bigint.toConst());
+                }
             },
             .Float => switch (ty.floatBits(target)) {
-                16 => return Value.Tag.float_16.create(arena, floatReadFromMemory(f16, target, buffer)),
-                32 => return Value.Tag.float_32.create(arena, floatReadFromMemory(f32, target, buffer)),
-                64 => return Value.Tag.float_64.create(arena, floatReadFromMemory(f64, target, buffer)),
-                80 => return Value.Tag.float_80.create(arena, floatReadFromMemory(f80, target, buffer)),
-                128 => return Value.Tag.float_128.create(arena, floatReadFromMemory(f128, target, buffer)),
+                16 => return Value.Tag.float_16.create(arena, @bitCast(f16, std.mem.readInt(u16, buffer[0..2], endian))),
+                32 => return Value.Tag.float_32.create(arena, @bitCast(f32, std.mem.readInt(u32, buffer[0..4], endian))),
+                64 => return Value.Tag.float_64.create(arena, @bitCast(f64, std.mem.readInt(u64, buffer[0..8], endian))),
+                80 => return Value.Tag.float_80.create(arena, @bitCast(f80, std.mem.readInt(u80, buffer[0..10], endian))),
+                128 => return Value.Tag.float_128.create(arena, @bitCast(f128, std.mem.readInt(u128, buffer[0..16], endian))),
                 else => unreachable,
             },
-            .Array, .Vector => {
+            .Array => {
                 const elem_ty = ty.childType();
                 const elem_size = elem_ty.abiSize(target);
                 const elems = try arena.alloc(Value, @intCast(usize, ty.arrayLen()));
@@ -1429,6 +1455,12 @@ pub const Value = extern union {
                 }
                 return Tag.aggregate.create(arena, elems);
             },
+            .Vector => {
+                // We use byte_count instead of abi_size here, so that any padding bytes
+                // follow the data bytes, on both big- and little-endian systems.
+                const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                return readFromPackedMemory(ty, mod, buffer[0..byte_count], 0, arena);
+            },
             .Struct => switch (ty.containerLayout()) {
                 .Auto => unreachable, // Sema is supposed to have emitted a compile error already
                 .Extern => {
@@ -1436,26 +1468,20 @@ pub const Value = extern union {
                     const field_vals = try arena.alloc(Value, fields.len);
                     for (fields) |field, i| {
                         const off = @intCast(usize, ty.structFieldOffset(i, target));
-                        field_vals[i] = try readFromMemory(field.ty, mod, buffer[off..], arena);
+                        const sz = @intCast(usize, ty.structFieldType(i).abiSize(target));
+                        field_vals[i] = try readFromMemory(field.ty, mod, buffer[off..(off + sz)], arena);
                     }
                     return Tag.aggregate.create(arena, field_vals);
                 },
                 .Packed => {
-                    const endian = target.cpu.arch.endian();
-                    const Limb = std.math.big.Limb;
-                    const abi_size = @intCast(usize, ty.abiSize(target));
-                    const bit_size = @intCast(usize, ty.bitSize(target));
-                    const limb_count = (buffer.len + @sizeOf(Limb) - 1) / @sizeOf(Limb);
-                    const limbs_buffer = try arena.alloc(Limb, limb_count);
-                    var bigint = BigIntMutable.init(limbs_buffer, 0);
-                    bigint.readTwosComplement(buffer, bit_size, abi_size, endian, .unsigned);
-                    return intToPackedStruct(ty, target, bigint.toConst(), arena);
+                    const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                    return readFromPackedMemory(ty, mod, buffer[0..byte_count], 0, arena);
                 },
             },
             .ErrorSet => {
                 // TODO revisit this when we have the concept of the error tag type
                 const Int = u16;
-                const int = std.mem.readInt(Int, buffer[0..@sizeOf(Int)], target.cpu.arch.endian());
+                const int = std.mem.readInt(Int, buffer[0..@sizeOf(Int)], endian);
 
                 const payload = try arena.create(Value.Payload.Error);
                 payload.* = .{
@@ -1468,115 +1494,90 @@ pub const Value = extern union {
         }
     }
 
-    fn intToPackedStruct(
+    /// Load a Value from the contents of `buffer`.
+    ///
+    /// Both the start and the end of the provided buffer must be tight, since
+    /// big-endian packed memory layouts start at the end of the buffer.
+    pub fn readFromPackedMemory(
         ty: Type,
-        target: Target,
-        bigint: BigIntConst,
+        mod: *Module,
+        buffer: []const u8,
+        bit_offset: usize,
         arena: Allocator,
     ) Allocator.Error!Value {
-        const limbs_buffer = try arena.alloc(std.math.big.Limb, bigint.limbs.len);
-        var bigint_mut = bigint.toMutable(limbs_buffer);
-        const fields = ty.structFields().values();
-        const field_vals = try arena.alloc(Value, fields.len);
-        var bits: u16 = 0;
-        for (fields) |field, i| {
-            const field_bits = @intCast(u16, field.ty.bitSize(target));
-            bigint_mut.shiftRight(bigint, bits);
-            bigint_mut.truncate(bigint_mut.toConst(), .unsigned, field_bits);
-            bits += field_bits;
-            const field_bigint = bigint_mut.toConst();
-
-            field_vals[i] = switch (field.ty.zigTypeTag()) {
-                .Float => switch (field.ty.floatBits(target)) {
-                    16 => try bitCastBigIntToFloat(f16, .float_16, field_bigint, arena),
-                    32 => try bitCastBigIntToFloat(f32, .float_32, field_bigint, arena),
-                    64 => try bitCastBigIntToFloat(f64, .float_64, field_bigint, arena),
-                    80 => try bitCastBigIntToFloat(f80, .float_80, field_bigint, arena),
-                    128 => try bitCastBigIntToFloat(f128, .float_128, field_bigint, arena),
-                    else => unreachable,
-                },
-                .Bool => makeBool(!field_bigint.eqZero()),
-                .Int => try Tag.int_big_positive.create(
-                    arena,
-                    try arena.dupe(std.math.big.Limb, field_bigint.limbs),
-                ),
-                .Struct => try intToPackedStruct(field.ty, target, field_bigint, arena),
-                else => unreachable,
-            };
-        }
-        return Tag.aggregate.create(arena, field_vals);
-    }
-
-    fn bitCastBigIntToFloat(
-        comptime F: type,
-        comptime float_tag: Tag,
-        bigint: BigIntConst,
-        arena: Allocator,
-    ) !Value {
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = bigint.to(Int) catch |err| switch (err) {
-            error.NegativeIntoUnsigned => unreachable,
-            error.TargetTooSmall => unreachable,
-        };
-        const f = @bitCast(F, int);
-        return float_tag.create(arena, f);
-    }
-
-    fn floatWriteToMemory(comptime F: type, f: F, target: Target, buffer: []u8) void {
+        const target = mod.getTarget();
         const endian = target.cpu.arch.endian();
-        if (F == f80) {
-            const repr = std.math.break_f80(f);
-            std.mem.writeInt(u64, buffer[0..8], repr.fraction, endian);
-            std.mem.writeInt(u16, buffer[8..10], repr.exp, endian);
-            std.mem.set(u8, buffer[10..], 0);
-            return;
-        }
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = @bitCast(Int, f);
-        std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], int, endian);
-    }
+        switch (ty.zigTypeTag()) {
+            .Void => return Value.@"void",
+            .Bool => {
+                const byte = switch (endian) {
+                    .Big => buffer[buffer.len - bit_offset / 8 - 1],
+                    .Little => buffer[bit_offset / 8],
+                };
+                if (((byte >> @intCast(u3, bit_offset % 8)) & 1) == 0) {
+                    return Value.@"false";
+                } else {
+                    return Value.@"true";
+                }
+            },
+            .Int, .Enum => {
+                if (buffer.len == 0) return Value.zero;
+                const int_info = ty.intInfo(target);
+                const abi_size = @intCast(usize, ty.abiSize(target));
 
-    fn floatReadFromMemory(comptime F: type, target: Target, buffer: []const u8) F {
-        const endian = target.cpu.arch.endian();
-        if (F == f80) {
-            return std.math.make_f80(.{
-                .fraction = readInt(u64, buffer[0..8], endian),
-                .exp = readInt(u16, buffer[8..10], endian),
-            });
-        }
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = readInt(Int, buffer[0..@sizeOf(Int)], endian);
-        return @bitCast(F, int);
-    }
-
-    fn readInt(comptime Int: type, buffer: *const [@sizeOf(Int)]u8, endian: std.builtin.Endian) Int {
-        var result: Int = 0;
-        switch (endian) {
-            .Big => {
-                for (buffer) |byte| {
-                    result <<= 8;
-                    result |= byte;
+                const bits = int_info.bits;
+                if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64
+                    .signed => return Value.Tag.int_i64.create(arena, std.mem.readVarPackedInt(i64, buffer, bit_offset, bits, endian, .signed)),
+                    .unsigned => return Value.Tag.int_u64.create(arena, std.mem.readVarPackedInt(u64, buffer, bit_offset, bits, endian, .unsigned)),
+                } else { // Slow path, we have to construct a big-int
+                    const Limb = std.math.big.Limb;
+                    const limb_count = (abi_size + @sizeOf(Limb) - 1) / @sizeOf(Limb);
+                    const limbs_buffer = try arena.alloc(Limb, limb_count);
+
+                    var bigint = BigIntMutable.init(limbs_buffer, 0);
+                    bigint.readPackedTwosComplement(buffer, bit_offset, bits, endian, int_info.signedness);
+                    return fromBigInt(arena, bigint.toConst());
                 }
             },
-            .Little => {
-                var i: usize = buffer.len;
-                while (i != 0) {
-                    i -= 1;
-                    result <<= 8;
-                    result |= buffer[i];
+            .Float => switch (ty.floatBits(target)) {
+                16 => return Value.Tag.float_16.create(arena, @bitCast(f16, std.mem.readPackedInt(u16, buffer, bit_offset, endian))),
+                32 => return Value.Tag.float_32.create(arena, @bitCast(f32, std.mem.readPackedInt(u32, buffer, bit_offset, endian))),
+                64 => return Value.Tag.float_64.create(arena, @bitCast(f64, std.mem.readPackedInt(u64, buffer, bit_offset, endian))),
+                80 => return Value.Tag.float_80.create(arena, @bitCast(f80, std.mem.readPackedInt(u80, buffer, bit_offset, endian))),
+                128 => return Value.Tag.float_128.create(arena, @bitCast(f128, std.mem.readPackedInt(u128, buffer, bit_offset, endian))),
+                else => unreachable,
+            },
+            .Vector => {
+                const elem_ty = ty.childType();
+                const elems = try arena.alloc(Value, @intCast(usize, ty.arrayLen()));
+
+                var bits: u16 = 0;
+                const elem_bit_size = @intCast(u16, elem_ty.bitSize(target));
+                for (elems) |_, i| {
+                    // On big-endian systems, LLVM reverses the element order of vectors by default
+                    const tgt_elem_i = if (endian == .Big) elems.len - i - 1 else i;
+                    elems[tgt_elem_i] = try readFromPackedMemory(elem_ty, mod, buffer, bit_offset + bits, arena);
+                    bits += elem_bit_size;
                 }
+                return Tag.aggregate.create(arena, elems);
             },
+            .Struct => switch (ty.containerLayout()) {
+                .Auto => unreachable, // Sema is supposed to have emitted a compile error already
+                .Extern => unreachable, // Handled by non-packed readFromMemory
+                .Packed => {
+                    var bits: u16 = 0;
+                    const fields = ty.structFields().values();
+                    const field_vals = try arena.alloc(Value, fields.len);
+                    for (fields) |field, i| {
+                        const field_bits = @intCast(u16, field.ty.bitSize(target));
+                        field_vals[i] = try readFromPackedMemory(field.ty, mod, buffer, bit_offset + bits, arena);
+                        bits += field_bits;
+                    }
+                    return Tag.aggregate.create(arena, field_vals);
+                },
+            },
+            else => @panic("TODO implement readFromPackedMemory for more types"),
         }
-        return result;
     }
 
     /// Asserts that the value is a float or an integer.
test/behavior/bitcast.zig
@@ -63,6 +63,10 @@ fn testBitCast(comptime N: usize) !void {
     try expect(conv_iN(N, 0) == 0);
 
     try expect(conv_iN(N, -0) == 0);
+
+    if (N > 24) {
+        try expect(conv_uN(N, 0xf23456) == 0xf23456);
+    }
 }
 
 fn conv_iN(comptime N: usize, x: std.meta.Int(.signed, N)) std.meta.Int(.unsigned, N) {
@@ -73,6 +77,55 @@ fn conv_uN(comptime N: usize, x: std.meta.Int(.unsigned, N)) std.meta.Int(.signe
     return @bitCast(std.meta.Int(.signed, N), x);
 }
 
+test "bitcast uX to bytes" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
+
+    const bit_values = [_]usize{ 1, 48, 27, 512, 493, 293, 125, 204, 112 };
+    inline for (bit_values) |bits| {
+        try testBitCast(bits);
+        comptime try testBitCast(bits);
+    }
+}
+
+fn testBitCastuXToBytes(comptime N: usize) !void {
+
+    // The location of padding bits in these layouts are technically not defined
+    // by LLVM, but we currently allow exotic integers to be cast (at comptime)
+    // to types that expose their padding bits anyway.
+    //
+    // This test at least makes sure those bits are matched by the runtime behavior
+    // on the platforms we target. If the above behavior is restricted after all,
+    // this test should be deleted.
+
+    const T = std.meta.Int(.unsigned, N);
+    for ([_]T{ 0, ~@as(T, 0) }) |init_value| {
+        var x: T = init_value;
+        const bytes = std.mem.asBytes(&x);
+
+        const byte_count = (N + 7) / 8;
+        switch (builtin.cpu.arch.endian()) {
+            .Little => {
+                var byte_i = 0;
+                while (byte_i < (byte_count - 1)) : (byte_i += 1) {
+                    try expect(bytes[byte_i] == 0xff);
+                }
+                try expect(((bytes[byte_i] ^ 0xff) << -%@truncate(u3, N)) == 0);
+            },
+            .Big => {
+                var byte_i = byte_count - 1;
+                while (byte_i > 0) : (byte_i -= 1) {
+                    try expect(bytes[byte_i] == 0xff);
+                }
+                try expect(((bytes[byte_i] ^ 0xff) << -%@truncate(u3, N)) == 0);
+            },
+        }
+    }
+}
+
 test "nested bitcast" {
     const S = struct {
         fn moo(x: isize) !void {
@@ -283,7 +336,8 @@ test "@bitCast packed struct of floats" {
     comptime try S.doTheTest();
 }
 
-test "comptime @bitCast packed struct to int" {
+test "comptime @bitCast packed struct to int and back" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
@@ -304,6 +358,37 @@ test "comptime @bitCast packed struct to int" {
         vectorf: @Vector(2, f16) = .{ 3.14, 2.71 },
     };
     const Int = @typeInfo(S).Struct.backing_integer.?;
+
+    // S -> Int
     var s: S = .{};
     try expectEqual(@bitCast(Int, s), comptime @bitCast(Int, S{}));
+
+    // Int -> S
+    var i: Int = 0;
+    const rt_cast = @bitCast(S, i);
+    const ct_cast = comptime @bitCast(S, @as(Int, 0));
+    inline for (@typeInfo(S).Struct.fields) |field| {
+        if (@typeInfo(field.field_type) == .Vector)
+            continue; //TODO: https://github.com/ziglang/zig/issues/13201
+
+        try expectEqual(@field(rt_cast, field.name), @field(ct_cast, field.name));
+    }
+}
+
+test "comptime bitcast with fields following a float" {
+    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO: https://github.com/ziglang/zig/issues/13214
+
+    const FloatT = extern struct { f: f80, x: u128 };
+    var x: FloatT = .{ .f = 0.5, .x = 123 };
+    try expect(@bitCast(u256, x) == comptime @bitCast(u256, @as(FloatT, .{ .f = 0.5, .x = 123 })));
+}
+
+test "bitcast vector to integer and back" {
+    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO: https://github.com/ziglang/zig/issues/13220
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest; // stage1 gets the comptime cast wrong
+
+    const arr: [16]bool = [_]bool{ true, false } ++ [_]bool{true} ** 14;
+    var x = @splat(16, true);
+    x[1] = false;
+    try expect(@bitCast(u16, x) == comptime @bitCast(u16, @as(@Vector(16, bool), arr)));
 }