Commit ef417f19e1

Cody Tapscott <topolarity@tapscott.me>
2022-02-13 23:04:46
stage2: Implement `@bitReverse` and `@byteSwap`
This change implements the above built-ins for Sema and the LLVM backend. Other backends have had placeholders added for lowering.
1 parent dee96e2
lib/std/math/big/int.zig
@@ -745,6 +745,132 @@ pub const Mutable = struct {
         rma.truncate(rma.toConst(), signedness, bit_count);
     }
 
+    /// r = @bitReverse(a) with 2s-complement semantics.
+    /// r and a may be aliases.
+    ///
+    /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(bit_count)`.
+    pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
+        if (bit_count == 0) return;
+
+        r.copy(a);
+
+        const limbs_required = calcTwosCompLimbCount(bit_count);
+
+        if (!a.positive) {
+            r.positive = true; // Negate.
+            r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
+            r.addScalar(r.toConst(), 1); // Add one.
+        } else if (limbs_required > a.limbs.len) {
+            // Zero-extend to our output length
+            for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
+                limb.* = 0;
+            }
+            r.len = limbs_required;
+        }
+
+        // 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones
+        const endian_mask: usize = (@sizeOf(Limb) - 1) << 3;
+
+        var bytes = std.mem.sliceAsBytes(r.limbs);
+        var bits = std.packed_int_array.PackedIntSliceEndian(u1, .Little).init(bytes, limbs_required * @bitSizeOf(Limb));
+
+        var k: usize = 0;
+        while (k < ((bit_count + 1) / 2)) : (k += 1) {
+            var i = k;
+            var rev_i = bit_count - i - 1;
+
+            // This "endian mask" remaps a low (LE) byte to the corresponding high
+            // (BE) byte in the Limb, without changing which limbs we are indexing
+            if (native_endian == .Big) {
+                i ^= endian_mask;
+                rev_i ^= endian_mask;
+            }
+
+            const bit_i = bits.get(i);
+            const bit_rev_i = bits.get(rev_i);
+            bits.set(i, bit_rev_i);
+            bits.set(rev_i, bit_i);
+        }
+
+        // Calculate signed-magnitude representation for output
+        if (signedness == .signed) {
+            const last_bit = switch (native_endian) {
+                .Little => bits.get(bit_count - 1),
+                .Big => bits.get((bit_count - 1) ^ endian_mask),
+            };
+            if (last_bit == 1) {
+                r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
+                r.addScalar(r.toConst(), 1); // Add one.
+                r.positive = false; // Negate.
+            }
+        }
+        r.normalize(r.len);
+    }
+
+    /// r = @byteSwap(a) with 2s-complement semantics.
+    /// r and a may be aliases.
+    ///
+    /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(8*byte_count)`.
+    pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void {
+        if (byte_count == 0) return;
+
+        r.copy(a);
+        const limbs_required = calcTwosCompLimbCount(8 * byte_count);
+
+        if (!a.positive) {
+            r.positive = true; // Negate.
+            r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
+            r.addScalar(r.toConst(), 1); // Add one.
+        } else if (limbs_required > a.limbs.len) {
+            // Zero-extend to our output length
+            for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
+                limb.* = 0;
+            }
+            r.len = limbs_required;
+        }
+
+        // 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones
+        const endian_mask: usize = @sizeOf(Limb) - 1;
+
+        var bytes = std.mem.sliceAsBytes(r.limbs);
+        assert(bytes.len >= byte_count);
+
+        var k: usize = 0;
+        while (k < (byte_count + 1) / 2) : (k += 1) {
+            var i = k;
+            var rev_i = byte_count - k - 1;
+
+            // This "endian mask" remaps a low (LE) byte to the corresponding high
+            // (BE) byte in the Limb, without changing which limbs we are indexing
+            if (native_endian == .Big) {
+                i ^= endian_mask;
+                rev_i ^= endian_mask;
+            }
+
+            const byte_i = bytes[i];
+            const byte_rev_i = bytes[rev_i];
+            bytes[rev_i] = byte_i;
+            bytes[i] = byte_rev_i;
+        }
+
+        // Calculate signed-magnitude representation for output
+        if (signedness == .signed) {
+            const last_byte = switch (native_endian) {
+                .Little => bytes[byte_count - 1],
+                .Big => bytes[(byte_count - 1) ^ endian_mask],
+            };
+
+            if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte
+                r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
+                r.addScalar(r.toConst(), 1); // Add one.
+                r.positive = false; // Negate.
+            }
+        }
+        r.normalize(r.len);
+    }
+
     /// r = @popCount(a) with 2s-complement semantics.
     /// r and a may be aliases.
     ///
lib/std/math/big/int_test.zig
@@ -7,6 +7,7 @@ const Limb = std.math.big.Limb;
 const SignedLimb = std.math.big.SignedLimb;
 const DoubleLimb = std.math.big.DoubleLimb;
 const SignedDoubleLimb = std.math.big.SignedDoubleLimb;
+const calcTwosCompLimbCount = std.math.big.int.calcTwosCompLimbCount;
 const maxInt = std.math.maxInt;
 const minInt = std.math.minInt;
 
@@ -2689,3 +2690,107 @@ test "big int conversion write twos complement zero" {
     m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 }
+
+fn bitReverseTest(comptime T: type, comptime input: comptime_int, comptime expected_output: comptime_int) !void {
+    const bit_count = @typeInfo(T).Int.bits;
+    const signedness = @typeInfo(T).Int.signedness;
+
+    var a = try Managed.initSet(testing.allocator, input);
+    defer a.deinit();
+
+    try a.ensureCapacity(calcTwosCompLimbCount(bit_count));
+    var m = a.toMutable();
+    m.bitReverse(a.toConst(), signedness, bit_count);
+    try testing.expect(m.toConst().orderAgainstScalar(expected_output) == .eq);
+}
+
+test "big int bit reverse" {
+    var a = try Managed.initSet(testing.allocator, 0x01_ffffffff_ffffffff_ffffffff);
+    defer a.deinit();
+
+    try bitReverseTest(u0, 0, 0);
+    try bitReverseTest(u5, 0x12, 0x09);
+    try bitReverseTest(u8, 0x12, 0x48);
+    try bitReverseTest(u16, 0x1234, 0x2c48);
+    try bitReverseTest(u24, 0x123456, 0x6a2c48);
+    try bitReverseTest(u32, 0x12345678, 0x1e6a2c48);
+    try bitReverseTest(u40, 0x123456789a, 0x591e6a2c48);
+    try bitReverseTest(u48, 0x123456789abc, 0x3d591e6a2c48);
+    try bitReverseTest(u56, 0x123456789abcde, 0x7b3d591e6a2c48);
+    try bitReverseTest(u64, 0x123456789abcdef1, 0x8f7b3d591e6a2c48);
+    try bitReverseTest(u95, 0x123456789abcdef111213141, 0x4146424447bd9eac8f351624);
+    try bitReverseTest(u96, 0x123456789abcdef111213141, 0x828c84888f7b3d591e6a2c48);
+    try bitReverseTest(u128, 0x123456789abcdef11121314151617181, 0x818e868a828c84888f7b3d591e6a2c48);
+
+    try bitReverseTest(i8, @bitCast(i8, @as(u8, 0x92)), @bitCast(i8, @as(u8, 0x49)));
+    try bitReverseTest(i16, @bitCast(i16, @as(u16, 0x1234)), @bitCast(i16, @as(u16, 0x2c48)));
+    try bitReverseTest(i24, @bitCast(i24, @as(u24, 0x123456)), @bitCast(i24, @as(u24, 0x6a2c48)));
+    try bitReverseTest(i24, @bitCast(i24, @as(u24, 0x12345f)), @bitCast(i24, @as(u24, 0xfa2c48)));
+    try bitReverseTest(i24, @bitCast(i24, @as(u24, 0xf23456)), @bitCast(i24, @as(u24, 0x6a2c4f)));
+    try bitReverseTest(i32, @bitCast(i32, @as(u32, 0x12345678)), @bitCast(i32, @as(u32, 0x1e6a2c48)));
+    try bitReverseTest(i32, @bitCast(i32, @as(u32, 0xf2345678)), @bitCast(i32, @as(u32, 0x1e6a2c4f)));
+    try bitReverseTest(i32, @bitCast(i32, @as(u32, 0x1234567f)), @bitCast(i32, @as(u32, 0xfe6a2c48)));
+    try bitReverseTest(i40, @bitCast(i40, @as(u40, 0x123456789a)), @bitCast(i40, @as(u40, 0x591e6a2c48)));
+    try bitReverseTest(i48, @bitCast(i48, @as(u48, 0x123456789abc)), @bitCast(i48, @as(u48, 0x3d591e6a2c48)));
+    try bitReverseTest(i56, @bitCast(i56, @as(u56, 0x123456789abcde)), @bitCast(i56, @as(u56, 0x7b3d591e6a2c48)));
+    try bitReverseTest(i64, @bitCast(i64, @as(u64, 0x123456789abcdef1)), @bitCast(i64, @as(u64, 0x8f7b3d591e6a2c48)));
+    try bitReverseTest(i96, @bitCast(i96, @as(u96, 0x123456789abcdef111213141)), @bitCast(i96, @as(u96, 0x828c84888f7b3d591e6a2c48)));
+    try bitReverseTest(i128, @bitCast(i128, @as(u128, 0x123456789abcdef11121314151617181)), @bitCast(i128, @as(u128, 0x818e868a828c84888f7b3d591e6a2c48)));
+}
+
+fn byteSwapTest(comptime T: type, comptime input: comptime_int, comptime expected_output: comptime_int) !void {
+    const byte_count = @typeInfo(T).Int.bits / 8;
+    const signedness = @typeInfo(T).Int.signedness;
+
+    var a = try Managed.initSet(testing.allocator, input);
+    defer a.deinit();
+
+    try a.ensureCapacity(calcTwosCompLimbCount(8 * byte_count));
+    var m = a.toMutable();
+    m.byteSwap(a.toConst(), signedness, byte_count);
+    try testing.expect(m.toConst().orderAgainstScalar(expected_output) == .eq);
+}
+
+test "big int byte swap" {
+    var a = try Managed.initSet(testing.allocator, 0x01_ffffffff_ffffffff_ffffffff);
+    defer a.deinit();
+
+    @setEvalBranchQuota(10_000);
+
+    try byteSwapTest(u0, 0, 0);
+    try byteSwapTest(u8, 0x12, 0x12);
+    try byteSwapTest(u16, 0x1234, 0x3412);
+    try byteSwapTest(u24, 0x123456, 0x563412);
+    try byteSwapTest(u32, 0x12345678, 0x78563412);
+    try byteSwapTest(u40, 0x123456789a, 0x9a78563412);
+    try byteSwapTest(u48, 0x123456789abc, 0xbc9a78563412);
+    try byteSwapTest(u56, 0x123456789abcde, 0xdebc9a78563412);
+    try byteSwapTest(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
+    try byteSwapTest(u88, 0x123456789abcdef1112131, 0x312111f1debc9a78563412);
+    try byteSwapTest(u96, 0x123456789abcdef111213141, 0x41312111f1debc9a78563412);
+    try byteSwapTest(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
+
+    try byteSwapTest(i8, -50, -50);
+    try byteSwapTest(i16, @bitCast(i16, @as(u16, 0x1234)), @bitCast(i16, @as(u16, 0x3412)));
+    try byteSwapTest(i24, @bitCast(i24, @as(u24, 0x123456)), @bitCast(i24, @as(u24, 0x563412)));
+    try byteSwapTest(i32, @bitCast(i32, @as(u32, 0x12345678)), @bitCast(i32, @as(u32, 0x78563412)));
+    try byteSwapTest(i40, @bitCast(i40, @as(u40, 0x123456789a)), @bitCast(i40, @as(u40, 0x9a78563412)));
+    try byteSwapTest(i48, @bitCast(i48, @as(u48, 0x123456789abc)), @bitCast(i48, @as(u48, 0xbc9a78563412)));
+    try byteSwapTest(i56, @bitCast(i56, @as(u56, 0x123456789abcde)), @bitCast(i56, @as(u56, 0xdebc9a78563412)));
+    try byteSwapTest(i64, @bitCast(i64, @as(u64, 0x123456789abcdef1)), @bitCast(i64, @as(u64, 0xf1debc9a78563412)));
+    try byteSwapTest(i88, @bitCast(i88, @as(u88, 0x123456789abcdef1112131)), @bitCast(i88, @as(u88, 0x312111f1debc9a78563412)));
+    try byteSwapTest(i96, @bitCast(i96, @as(u96, 0x123456789abcdef111213141)), @bitCast(i96, @as(u96, 0x41312111f1debc9a78563412)));
+    try byteSwapTest(i128, @bitCast(i128, @as(u128, 0x123456789abcdef11121314151617181)), @bitCast(i128, @as(u128, 0x8171615141312111f1debc9a78563412)));
+
+    try byteSwapTest(u512, 0x80, 1 << 511);
+    try byteSwapTest(i512, 0x80, minInt(i512));
+    try byteSwapTest(i512, 0x40, 1 << 510);
+    try byteSwapTest(i512, -0x100, (1 << 504) - 1);
+    try byteSwapTest(i400, -0x100, (1 << 392) - 1);
+    try byteSwapTest(i400, -0x2, -(1 << 392) - 1);
+    try byteSwapTest(i24, @bitCast(i24, @as(u24, 0xf23456)), 0x5634f2);
+    try byteSwapTest(i24, 0x1234f6, @bitCast(i24, @as(u24, 0xf63412)));
+    try byteSwapTest(i32, @bitCast(i32, @as(u32, 0xf2345678)), 0x785634f2);
+    try byteSwapTest(i32, 0x123456f8, @bitCast(i32, @as(u32, 0xf8563412)));
+    try byteSwapTest(i48, 0x123456789abc, @bitCast(i48, @as(u48, 0xbc9a78563412)));
+}
src/arch/aarch64/CodeGen.zig
@@ -621,6 +621,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .clz             => try self.airClz(inst),
             .ctz             => try self.airCtz(inst),
             .popcount        => try self.airPopcount(inst),
+            .byte_swap       => try self.airByteSwap(inst),
+            .bit_reverse     => try self.airBitReverse(inst),
             .tag_name        => try self.airTagName(inst),
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
@@ -1682,6 +1684,18 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airByteSwap for {}", .{self.target.cpu.arch});
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airBitReverse for {}", .{self.target.cpu.arch});
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const result: MCValue = if (self.liveness.isUnused(inst))
src/arch/arm/CodeGen.zig
@@ -605,6 +605,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .clz             => try self.airClz(inst),
             .ctz             => try self.airCtz(inst),
             .popcount        => try self.airPopcount(inst),
+            .byte_swap       => try self.airByteSwap(inst),
+            .bit_reverse     => try self.airBitReverse(inst),
             .tag_name        => try self.airTagName(inst),
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
@@ -1392,6 +1394,20 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
     // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    _ = ty_op;
+    return self.fail("TODO implement airByteSwap for {}", .{self.target.cpu.arch});
+    // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    _ = ty_op;
+    return self.fail("TODO implement airBitReverse for {}", .{self.target.cpu.arch});
+    // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const result: MCValue = if (self.liveness.isUnused(inst))
src/arch/riscv64/CodeGen.zig
@@ -592,6 +592,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .clz             => try self.airClz(inst),
             .ctz             => try self.airCtz(inst),
             .popcount        => try self.airPopcount(inst),
+            .byte_swap       => try self.airByteSwap(inst),
+            .bit_reverse     => try self.airBitReverse(inst),
             .tag_name        => try self.airTagName(inst),
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
@@ -1181,6 +1183,18 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airByteSwap for {}", .{self.target.cpu.arch});
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airBitReverse for {}", .{self.target.cpu.arch});
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const result: MCValue = if (self.liveness.isUnused(inst))
src/arch/wasm/CodeGen.zig
@@ -1687,6 +1687,8 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .clz,
         .ctz,
         .popcount,
+        .byte_swap,
+        .bit_reverse,
         .is_err_ptr,
         .is_non_err_ptr,
         .fptrunc,
src/arch/x86_64/CodeGen.zig
@@ -686,6 +686,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .clz             => try self.airClz(inst),
             .ctz             => try self.airCtz(inst),
             .popcount        => try self.airPopcount(inst),
+            .byte_swap       => try self.airByteSwap(inst),
+            .bit_reverse     => try self.airBitReverse(inst),
             .tag_name        => try self.airTagName(inst),
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
@@ -1716,6 +1718,24 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else
+        return self.fail("TODO implement airByteSwap for {}", .{self.target.cpu.arch});
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else
+        return self.fail("TODO implement airBitReverse for {}", .{self.target.cpu.arch});
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const result: MCValue = if (self.liveness.isUnused(inst))
src/codegen/c.zig
@@ -1709,6 +1709,8 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .clz              => try airBuiltinCall(f, inst, "clz"),
             .ctz              => try airBuiltinCall(f, inst, "ctz"),
             .popcount         => try airBuiltinCall(f, inst, "popcount"),
+            .byte_swap        => try airBuiltinCall(f, inst, "byte_swap"),
+            .bit_reverse      => try airBuiltinCall(f, inst, "bit_reverse"),
             .tag_name         => try airTagName(f, inst),
             .error_name       => try airErrorName(f, inst),
             .splat            => try airSplat(f, inst),
src/codegen/llvm.zig
@@ -2205,7 +2205,9 @@ pub const FuncGen = struct {
                 .get_union_tag  => try self.airGetUnionTag(inst),
                 .clz            => try self.airClzCtz(inst, "llvm.ctlz"),
                 .ctz            => try self.airClzCtz(inst, "llvm.cttz"),
-                .popcount       => try self.airPopCount(inst),
+                .popcount       => try self.airBitOp(inst, "llvm.ctpop"),
+                .byte_swap      => try self.airByteSwap(inst, "llvm.bswap"),
+                .bit_reverse    => try self.airBitOp(inst, "llvm.bitreverse"),
                 .tag_name       => try self.airTagName(inst),
                 .error_name     => try self.airErrorName(inst),
                 .splat          => try self.airSplat(inst),
@@ -4348,7 +4350,7 @@ pub const FuncGen = struct {
         }
     }
 
-    fn airPopCount(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airBitOp(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
 
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
@@ -4357,7 +4359,7 @@ pub const FuncGen = struct {
 
         const params = [_]*const llvm.Value{operand};
         const operand_llvm_ty = try self.dg.llvmType(operand_ty);
-        const fn_val = self.getIntrinsic("llvm.ctpop", &.{operand_llvm_ty});
+        const fn_val = self.getIntrinsic(llvm_fn_name, &.{operand_llvm_ty});
 
         const wrong_size_result = self.builder.buildCall(fn_val, &params, params.len, .C, .Auto, "");
         const result_ty = self.air.typeOfIndex(inst);
@@ -4375,6 +4377,44 @@ pub const FuncGen = struct {
         }
     }
 
+    fn airByteSwap(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const target = self.dg.module.getTarget();
+        const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+        const operand_ty = self.air.typeOf(ty_op.operand);
+        var bits = operand_ty.intInfo(target).bits;
+        assert(bits % 8 == 0);
+
+        var operand = try self.resolveInst(ty_op.operand);
+        var operand_llvm_ty = try self.dg.llvmType(operand_ty);
+
+        if (bits % 16 == 8) {
+            // If not an even byte-multiple, we need zero-extend + shift-left 1 byte 
+            // The truncated result at the end will be the correct bswap
+            operand_llvm_ty = self.context.intType(bits + 8);
+            const extended = self.builder.buildZExt(operand, operand_llvm_ty, "");
+            operand = self.builder.buildShl(extended, operand_llvm_ty.constInt(8, .False), "");
+            bits = bits + 8;
+        }
+
+        const params = [_]*const llvm.Value{operand};
+        const fn_val = self.getIntrinsic(llvm_fn_name, &.{operand_llvm_ty});
+
+        const wrong_size_result = self.builder.buildCall(fn_val, &params, params.len, .C, .Auto, "");
+
+        const result_ty = self.air.typeOfIndex(inst);
+        const result_llvm_ty = try self.dg.llvmType(result_ty);
+        const result_bits = result_ty.intInfo(target).bits;
+        if (bits > result_bits) {
+            return self.builder.buildTrunc(wrong_size_result, result_llvm_ty, "");
+        } else if (bits < result_bits) {
+            return self.builder.buildZExt(wrong_size_result, result_llvm_ty, "");
+        } else {
+            return wrong_size_result;
+        }
+    }
+
     fn airTagName(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
 
src/Air.zig
@@ -236,6 +236,12 @@ pub const Inst = struct {
         /// Result type will always be an unsigned integer big enough to fit the answer.
         /// Uses the `ty_op` field.
         popcount,
+        /// Reverse the bytes in an integer according to its representation in twos complement.
+        /// Uses the `ty_op` field.
+        byte_swap,
+        /// Reverse the bits in an integer according to its representation in twos complement.
+        /// Uses the `ty_op` field.
+        bit_reverse,
 
         /// Square root of a floating point number.
         /// Uses the `un_op` field.
@@ -874,6 +880,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .clz,
         .ctz,
         .popcount,
+        .byte_swap,
+        .bit_reverse,
         => return air.getRefType(datas[inst].ty_op.ty),
 
         .loop,
src/Liveness.zig
@@ -318,6 +318,8 @@ fn analyzeInst(
         .clz,
         .ctz,
         .popcount,
+        .byte_swap,
+        .bit_reverse,
         .splat,
         => {
             const o = inst_datas[inst].ty_op;
src/print_air.zig
@@ -216,6 +216,8 @@ const Writer = struct {
             .clz,
             .ctz,
             .popcount,
+            .byte_swap,
+            .bit_reverse,
             => try w.writeTyOp(s, inst),
 
             .block,
src/Sema.zig
@@ -11701,14 +11701,60 @@ fn zirBitCount(
 
 fn zirByteSwap(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
     const inst_data = sema.code.instructions.items(.data)[inst].un_node;
-    const src = inst_data.src();
-    return sema.fail(block, src, "TODO: Sema.zirByteSwap", .{});
+    const ty_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const operand = sema.resolveInst(inst_data.operand);
+    const operand_ty = sema.typeOf(operand);
+    // TODO implement support for vectors
+    if (operand_ty.zigTypeTag() != .Int) {
+        return sema.fail(block, ty_src, "expected integer type, found '{}'", .{
+            operand_ty,
+        });
+    }
+    const target = sema.mod.getTarget();
+    const bits = operand_ty.intInfo(target).bits;
+    if (bits == 0) return Air.Inst.Ref.zero;
+    if (operand_ty.intInfo(target).bits % 8 != 0) {
+        return sema.fail(block, ty_src, "@byteSwap requires the number of bits to be evenly divisible by 8, but {} has {} bits", .{
+            operand_ty,
+            operand_ty.intInfo(target).bits,
+        });
+    }
+
+    const runtime_src = if (try sema.resolveMaybeUndefVal(block, operand_src, operand)) |val| {
+        if (val.isUndef()) return sema.addConstUndef(operand_ty);
+        const result_val = try val.byteSwap(operand_ty, target, sema.arena);
+        return sema.addConstant(operand_ty, result_val);
+    } else operand_src;
+
+    try sema.requireRuntimeBlock(block, runtime_src);
+    return block.addTyOp(.byte_swap, operand_ty, operand);
 }
 
 fn zirBitReverse(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
     const inst_data = sema.code.instructions.items(.data)[inst].un_node;
-    const src = inst_data.src();
-    return sema.fail(block, src, "TODO: Sema.zirBitReverse", .{});
+    const ty_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const operand = sema.resolveInst(inst_data.operand);
+    const operand_ty = sema.typeOf(operand);
+    // TODO implement support for vectors
+    if (operand_ty.zigTypeTag() != .Int) {
+        return sema.fail(block, ty_src, "expected integer type, found '{}'", .{
+            operand_ty,
+        });
+    }
+    const target = sema.mod.getTarget();
+    const bits = operand_ty.intInfo(target).bits;
+    if (bits == 0) return Air.Inst.Ref.zero;
+
+    const runtime_src = if (try sema.resolveMaybeUndefVal(block, operand_src, operand)) |val| {
+        if (val.isUndef()) return sema.addConstUndef(operand_ty);
+        const result_val = try val.bitReverse(operand_ty, target, sema.arena);
+        return sema.addConstant(operand_ty, result_val);
+    } else operand_src;
+
+    try sema.requireRuntimeBlock(block, runtime_src);
+    return block.addTyOp(.bit_reverse, operand_ty, operand);
 }
 
 fn zirBitOffsetOf(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
src/value.zig
@@ -1334,6 +1334,45 @@ pub const Value = extern union {
         }
     }
 
+    pub fn bitReverse(val: Value, ty: Type, target: Target, arena: Allocator) !Value {
+        assert(!val.isUndef());
+
+        const info = ty.intInfo(target);
+
+        var buffer: Value.BigIntSpace = undefined;
+        const operand_bigint = val.toBigInt(&buffer);
+
+        const limbs = try arena.alloc(
+            std.math.big.Limb,
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
+        );
+        var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+        result_bigint.bitReverse(operand_bigint, info.signedness, info.bits);
+
+        return fromBigInt(arena, result_bigint.toConst());
+    }
+
+    pub fn byteSwap(val: Value, ty: Type, target: Target, arena: Allocator) !Value {
+        assert(!val.isUndef());
+
+        const info = ty.intInfo(target);
+
+        // Bit count must be evenly divisible by 8
+        assert(info.bits % 8 == 0);
+
+        var buffer: Value.BigIntSpace = undefined;
+        const operand_bigint = val.toBigInt(&buffer);
+
+        const limbs = try arena.alloc(
+            std.math.big.Limb,
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
+        );
+        var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+        result_bigint.byteSwap(operand_bigint, info.signedness, info.bits / 8);
+
+        return fromBigInt(arena, result_bigint.toConst());
+    }
+
     /// Asserts the value is an integer and not undefined.
     /// Returns the number of bits the value requires to represent stored in twos complement form.
     pub fn intBitCountTwosComp(self: Value, target: Target) usize {
test/behavior/bitreverse.zig
@@ -1,25 +1,32 @@
 const std = @import("std");
+const builtin = @import("builtin");
 const expect = std.testing.expect;
 const minInt = std.math.minInt;
 
 test "@bitReverse" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+
     comptime try testBitReverse();
     try testBitReverse();
 }
 
 fn testBitReverse() !void {
     // using comptime_ints, unsigned
-    try expect(@bitReverse(u0, 0) == 0);
-    try expect(@bitReverse(u5, 0x12) == 0x9);
-    try expect(@bitReverse(u8, 0x12) == 0x48);
-    try expect(@bitReverse(u16, 0x1234) == 0x2c48);
-    try expect(@bitReverse(u24, 0x123456) == 0x6a2c48);
-    try expect(@bitReverse(u32, 0x12345678) == 0x1e6a2c48);
-    try expect(@bitReverse(u40, 0x123456789a) == 0x591e6a2c48);
-    try expect(@bitReverse(u48, 0x123456789abc) == 0x3d591e6a2c48);
-    try expect(@bitReverse(u56, 0x123456789abcde) == 0x7b3d591e6a2c48);
-    try expect(@bitReverse(u64, 0x123456789abcdef1) == 0x8f7b3d591e6a2c48);
-    try expect(@bitReverse(u128, 0x123456789abcdef11121314151617181) == 0x818e868a828c84888f7b3d591e6a2c48);
+    try expect(@bitReverse(u0, @as(u0, 0)) == 0);
+    try expect(@bitReverse(u5, @as(u5, 0x12)) == 0x9);
+    try expect(@bitReverse(u8, @as(u8, 0x12)) == 0x48);
+    try expect(@bitReverse(u16, @as(u16, 0x1234)) == 0x2c48);
+    try expect(@bitReverse(u24, @as(u24, 0x123456)) == 0x6a2c48);
+    try expect(@bitReverse(u32, @as(u32, 0x12345678)) == 0x1e6a2c48);
+    try expect(@bitReverse(u40, @as(u40, 0x123456789a)) == 0x591e6a2c48);
+    try expect(@bitReverse(u48, @as(u48, 0x123456789abc)) == 0x3d591e6a2c48);
+    try expect(@bitReverse(u56, @as(u56, 0x123456789abcde)) == 0x7b3d591e6a2c48);
+    try expect(@bitReverse(u64, @as(u64, 0x123456789abcdef1)) == 0x8f7b3d591e6a2c48);
+    try expect(@bitReverse(u95, @as(u95, 0x123456789abcdef111213141)) == 0x4146424447bd9eac8f351624);
+    try expect(@bitReverse(u96, @as(u96, 0x123456789abcdef111213141)) == 0x828c84888f7b3d591e6a2c48);
+    try expect(@bitReverse(u128, @as(u128, 0x123456789abcdef11121314151617181)) == 0x818e868a828c84888f7b3d591e6a2c48);
 
     // using runtime uints, unsigned
     var num0: u0 = 0;
@@ -50,11 +57,16 @@ fn testBitReverse() !void {
     try expect(@bitReverse(i8, @bitCast(i8, @as(u8, 0x92))) == @bitCast(i8, @as(u8, 0x49)));
     try expect(@bitReverse(i16, @bitCast(i16, @as(u16, 0x1234))) == @bitCast(i16, @as(u16, 0x2c48)));
     try expect(@bitReverse(i24, @bitCast(i24, @as(u24, 0x123456))) == @bitCast(i24, @as(u24, 0x6a2c48)));
+    try expect(@bitReverse(i24, @bitCast(i24, @as(u24, 0x12345f))) == @bitCast(i24, @as(u24, 0xfa2c48)));
+    try expect(@bitReverse(i24, @bitCast(i24, @as(u24, 0xf23456))) == @bitCast(i24, @as(u24, 0x6a2c4f)));
     try expect(@bitReverse(i32, @bitCast(i32, @as(u32, 0x12345678))) == @bitCast(i32, @as(u32, 0x1e6a2c48)));
+    try expect(@bitReverse(i32, @bitCast(i32, @as(u32, 0xf2345678))) == @bitCast(i32, @as(u32, 0x1e6a2c4f)));
+    try expect(@bitReverse(i32, @bitCast(i32, @as(u32, 0x1234567f))) == @bitCast(i32, @as(u32, 0xfe6a2c48)));
     try expect(@bitReverse(i40, @bitCast(i40, @as(u40, 0x123456789a))) == @bitCast(i40, @as(u40, 0x591e6a2c48)));
     try expect(@bitReverse(i48, @bitCast(i48, @as(u48, 0x123456789abc))) == @bitCast(i48, @as(u48, 0x3d591e6a2c48)));
     try expect(@bitReverse(i56, @bitCast(i56, @as(u56, 0x123456789abcde))) == @bitCast(i56, @as(u56, 0x7b3d591e6a2c48)));
     try expect(@bitReverse(i64, @bitCast(i64, @as(u64, 0x123456789abcdef1))) == @bitCast(i64, @as(u64, 0x8f7b3d591e6a2c48)));
+    try expect(@bitReverse(i96, @bitCast(i96, @as(u96, 0x123456789abcdef111213141))) == @bitCast(i96, @as(u96, 0x828c84888f7b3d591e6a2c48)));
     try expect(@bitReverse(i128, @bitCast(i128, @as(u128, 0x123456789abcdef11121314151617181))) == @bitCast(i128, @as(u128, 0x818e868a828c84888f7b3d591e6a2c48)));
 
     // using signed, negative. Compare to runtime ints returned from llvm.
test/behavior/byteswap.zig
@@ -1,18 +1,29 @@
 const std = @import("std");
+const builtin = @import("builtin");
 const expect = std.testing.expect;
 
 test "@byteSwap integers" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+
     const ByteSwapIntTest = struct {
         fn run() !void {
             try t(u0, 0, 0);
             try t(u8, 0x12, 0x12);
             try t(u16, 0x1234, 0x3412);
             try t(u24, 0x123456, 0x563412);
+            try t(i24, @bitCast(i24, @as(u24, 0xf23456)), 0x5634f2);
+            try t(i24, 0x1234f6, @bitCast(i24, @as(u24, 0xf63412)));
             try t(u32, 0x12345678, 0x78563412);
+            try t(i32, @bitCast(i32, @as(u32, 0xf2345678)), 0x785634f2);
+            try t(i32, 0x123456f8, @bitCast(i32, @as(u32, 0xf8563412)));
             try t(u40, 0x123456789a, 0x9a78563412);
             try t(i48, 0x123456789abc, @bitCast(i48, @as(u48, 0xbc9a78563412)));
             try t(u56, 0x123456789abcde, 0xdebc9a78563412);
             try t(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
+            try t(u88, 0x123456789abcdef1112131, 0x312111f1debc9a78563412);
+            try t(u96, 0x123456789abcdef111213141, 0x41312111f1debc9a78563412);
             try t(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
 
             try t(u0, @as(u0, 0), 0);
@@ -24,6 +35,8 @@ test "@byteSwap integers" {
             try t(i48, @bitCast(i48, @as(u48, 0x123456789abc)), @bitCast(i48, @as(u48, 0xbc9a78563412)));
             try t(i56, @bitCast(i56, @as(u56, 0x123456789abcde)), @bitCast(i56, @as(u56, 0xdebc9a78563412)));
             try t(i64, @bitCast(i64, @as(u64, 0x123456789abcdef1)), @bitCast(i64, @as(u64, 0xf1debc9a78563412)));
+            try t(i88, @bitCast(i88, @as(u88, 0x123456789abcdef1112131)), @bitCast(i88, @as(u88, 0x312111f1debc9a78563412)));
+            try t(i96, @bitCast(i96, @as(u96, 0x123456789abcdef111213141)), @bitCast(i96, @as(u96, 0x41312111f1debc9a78563412)));
             try t(
                 i128,
                 @bitCast(i128, @as(u128, 0x123456789abcdef11121314151617181)),
@@ -31,7 +44,7 @@ test "@byteSwap integers" {
             );
         }
         fn t(comptime I: type, input: I, expected_output: I) !void {
-            try std.testing.expectEqual(expected_output, @byteSwap(I, input));
+            try std.testing.expect(expected_output == @byteSwap(I, input));
         }
     };
     comptime try ByteSwapIntTest.run();
@@ -39,6 +52,11 @@ test "@byteSwap integers" {
 }
 
 test "@byteSwap vectors" {
+    if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+
     const ByteSwapVectorTest = struct {
         fn run() !void {
             try t(u8, 2, [_]u8{ 0x12, 0x13 }, [_]u8{ 0x12, 0x13 });
test/behavior.zig
@@ -105,6 +105,8 @@ test {
             if (builtin.zig_backend != .stage2_c) {
                 // Tests that pass for stage1 and the llvm backend.
                 _ = @import("behavior/atomics.zig");
+                _ = @import("behavior/bitreverse.zig");
+                _ = @import("behavior/byteswap.zig");
                 _ = @import("behavior/bugs/9584.zig");
                 _ = @import("behavior/eval.zig");
                 _ = @import("behavior/floatop.zig");
@@ -124,7 +126,6 @@ test {
                         _ = @import("behavior/async_fn.zig");
                     }
                     _ = @import("behavior/await_struct.zig");
-                    _ = @import("behavior/bitreverse.zig");
                     _ = @import("behavior/bugs/421.zig");
                     _ = @import("behavior/bugs/529.zig");
                     _ = @import("behavior/bugs/718.zig");
@@ -151,7 +152,6 @@ test {
                     _ = @import("behavior/bugs/7027.zig");
                     _ = @import("behavior/bugs/7047.zig");
                     _ = @import("behavior/bugs/10147.zig");
-                    _ = @import("behavior/byteswap.zig");
                     _ = @import("behavior/const_slice_child.zig");
                     _ = @import("behavior/export_self_referential_type_info.zig");
                     _ = @import("behavior/field_parent_ptr.zig");