Commit ac07ddadeb

Andrew Kelley <andrew@ziglang.org>
2023-05-05 22:26:58
InternPool: enhance integer values
The Key struct now has a Storage tagged union which can store a u64, i64, or big int. This is needed so that indexToKey can be implemented for integers stored compactly in the data structure.
1 parent 75cf06c
Changed files (3)
src/InternPool.zig
@@ -11,6 +11,7 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
 const BigIntConst = std.math.big.int.Const;
+const BigIntMutable = std.math.big.int.Mutable;
 
 const InternPool = @This();
 const DeclIndex = enum(u32) { _ };
@@ -50,10 +51,7 @@ pub const Key = union(enum) {
         /// Index into the string table bytes.
         lib_name: u32,
     },
-    int: struct {
-        ty: Index,
-        big_int: BigIntConst,
-    },
+    int: Key.Int,
     enum_tag: struct {
         ty: Index,
         tag: BigIntConst,
@@ -110,6 +108,32 @@ pub const Key = union(enum) {
         child: Index,
     };
 
+    pub const Int = struct {
+        ty: Index,
+        storage: Storage,
+
+        pub const Storage = union(enum) {
+            u64: u64,
+            i64: i64,
+            big_int: BigIntConst,
+
+            /// Big enough to fit any non-BigInt value
+            pub const BigIntSpace = struct {
+                /// The +1 is headroom so that operations such as incrementing once
+                /// or decrementing once are possible without using an allocator.
+                limbs: [(@sizeOf(u64) / @sizeOf(std.math.big.Limb)) + 1]std.math.big.Limb,
+            };
+
+            pub fn toBigInt(storage: Storage, space: *BigIntSpace) BigIntConst {
+                return switch (storage) {
+                    .big_int => |x| x,
+                    .u64 => |x| BigIntMutable.init(&space.limbs, x).toConst(),
+                    .i64 => |x| BigIntMutable.init(&space.limbs, x).toConst(),
+                };
+            }
+        };
+    };
+
     pub fn hash32(key: Key) u32 {
         return @truncate(u32, key.hash64());
     }
@@ -137,9 +161,13 @@ pub const Key = union(enum) {
             => |info| std.hash.autoHash(hasher, info),
 
             .int => |int| {
+                // Canonicalize all integers by converting them to BigIntConst.
+                var buffer: Key.Int.Storage.BigIntSpace = undefined;
+                const big_int = int.storage.toBigInt(&buffer);
+
                 std.hash.autoHash(hasher, int.ty);
-                std.hash.autoHash(hasher, int.big_int.positive);
-                for (int.big_int.limbs) |limb| std.hash.autoHash(hasher, limb);
+                std.hash.autoHash(hasher, big_int.positive);
+                for (big_int.limbs) |limb| std.hash.autoHash(hasher, limb);
             },
 
             .enum_tag => |enum_tag| {
@@ -573,42 +601,27 @@ pub const static_keys = [_]Key{
 
     .{ .int = .{
         .ty = .comptime_int_type,
-        .big_int = .{
-            .limbs = &.{0},
-            .positive = true,
-        },
+        .storage = .{ .u64 = 0 },
     } },
 
     .{ .int = .{
         .ty = .usize_type,
-        .big_int = .{
-            .limbs = &.{0},
-            .positive = true,
-        },
+        .storage = .{ .u64 = 0 },
     } },
 
     .{ .int = .{
         .ty = .u8_type,
-        .big_int = .{
-            .limbs = &.{0},
-            .positive = true,
-        },
+        .storage = .{ .u64 = 0 },
     } },
 
     .{ .int = .{
         .ty = .comptime_int_type,
-        .big_int = .{
-            .limbs = &.{1},
-            .positive = true,
-        },
+        .storage = .{ .u64 = 1 },
     } },
 
     .{ .int = .{
         .ty = .usize_type,
-        .big_int = .{
-            .limbs = &.{1},
-            .positive = true,
-        },
+        .storage = .{ .u64 = 1 },
     } },
 
     .{ .enum_tag = .{
@@ -680,19 +693,19 @@ pub const Tag = enum(u8) {
     simple_internal,
     /// Type: u32
     /// data is integer value
-    int_small_u32,
+    int_u32,
     /// Type: i32
     /// data is integer value bitcasted to u32.
-    int_small_i32,
+    int_i32,
     /// A usize that fits in 32 bits.
     /// data is integer value.
-    int_small_usize,
+    int_usize,
     /// A comptime_int that fits in a u32.
     /// data is integer value.
-    int_small_comptime_unsigned,
+    int_comptime_int_u32,
     /// A comptime_int that fits in an i32.
     /// data is integer value bitcasted to u32.
-    int_small_comptime_signed,
+    int_comptime_int_i32,
     /// A positive integer value.
     /// data is a limbs index to Int.
     int_positive,
@@ -932,11 +945,26 @@ pub fn indexToKey(ip: InternPool, index: Index) Key {
         .type_error_union => @panic("TODO"),
         .type_enum_simple => @panic("TODO"),
         .simple_internal => @panic("TODO"),
-        .int_small_u32 => @panic("TODO"),
-        .int_small_i32 => @panic("TODO"),
-        .int_small_usize => @panic("TODO"),
-        .int_small_comptime_unsigned => @panic("TODO"),
-        .int_small_comptime_signed => @panic("TODO"),
+        .int_u32 => return .{ .int = .{
+            .ty = .u32_type,
+            .storage = .{ .u64 = data },
+        } },
+        .int_i32 => return .{ .int = .{
+            .ty = .i32_type,
+            .storage = .{ .i64 = @bitCast(i32, data) },
+        } },
+        .int_usize => return .{ .int = .{
+            .ty = .usize_type,
+            .storage = .{ .u64 = data },
+        } },
+        .int_comptime_int_u32 => return .{ .int = .{
+            .ty = .comptime_int_type,
+            .storage = .{ .u64 = data },
+        } },
+        .int_comptime_int_i32 => return .{ .int = .{
+            .ty = .comptime_int_type,
+            .storage = .{ .i64 = @bitCast(i32, data) },
+        } },
         .int_positive => @panic("TODO"),
         .int_negative => @panic("TODO"),
         .enum_tag_positive => @panic("TODO"),
@@ -1041,54 +1069,114 @@ pub fn get(ip: *InternPool, gpa: Allocator, key: Key) Allocator.Error!Index {
 
         .int => |int| b: {
             switch (int.ty) {
-                .u32_type => {
-                    if (int.big_int.fits(u32)) {
-                        ip.items.appendAssumeCapacity(.{
-                            .tag = .int_small_u32,
-                            .data = int.big_int.to(u32) catch unreachable,
-                        });
-                        break :b;
-                    }
+                .u32_type => switch (int.storage) {
+                    .big_int => |big_int| {
+                        if (big_int.to(u32)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_u32,
+                                .data = casted,
+                            });
+                            break :b;
+                        } else |_| {}
+                    },
+                    inline .u64, .i64 => |x| {
+                        if (std.math.cast(u32, x)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_u32,
+                                .data = casted,
+                            });
+                            break :b;
+                        }
+                    },
                 },
-                .i32_type => {
-                    if (int.big_int.fits(i32)) {
-                        ip.items.appendAssumeCapacity(.{
-                            .tag = .int_small_i32,
-                            .data = @bitCast(u32, int.big_int.to(i32) catch unreachable),
-                        });
-                        break :b;
-                    }
+                .i32_type => switch (int.storage) {
+                    .big_int => |big_int| {
+                        if (big_int.to(i32)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_i32,
+                                .data = @bitCast(u32, casted),
+                            });
+                            break :b;
+                        } else |_| {}
+                    },
+                    inline .u64, .i64 => |x| {
+                        if (std.math.cast(i32, x)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_u32,
+                                .data = @bitCast(u32, casted),
+                            });
+                            break :b;
+                        }
+                    },
                 },
-                .usize_type => {
-                    if (int.big_int.fits(u32)) {
-                        ip.items.appendAssumeCapacity(.{
-                            .tag = .int_small_usize,
-                            .data = int.big_int.to(u32) catch unreachable,
-                        });
-                        break :b;
-                    }
+                .usize_type => switch (int.storage) {
+                    .big_int => |big_int| {
+                        if (big_int.to(u32)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_usize,
+                                .data = casted,
+                            });
+                            break :b;
+                        } else |_| {}
+                    },
+                    inline .u64, .i64 => |x| {
+                        if (std.math.cast(u32, x)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_usize,
+                                .data = casted,
+                            });
+                            break :b;
+                        }
+                    },
                 },
-                .comptime_int_type => {
-                    if (int.big_int.fits(u32)) {
-                        ip.items.appendAssumeCapacity(.{
-                            .tag = .int_small_comptime_unsigned,
-                            .data = int.big_int.to(u32) catch unreachable,
-                        });
-                        break :b;
-                    }
-                    if (int.big_int.fits(i32)) {
-                        ip.items.appendAssumeCapacity(.{
-                            .tag = .int_small_comptime_signed,
-                            .data = @bitCast(u32, int.big_int.to(i32) catch unreachable),
-                        });
-                        break :b;
-                    }
+                .comptime_int_type => switch (int.storage) {
+                    .big_int => |big_int| {
+                        if (big_int.to(u32)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_comptime_int_u32,
+                                .data = casted,
+                            });
+                            break :b;
+                        } else |_| {}
+                        if (big_int.to(i32)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_comptime_int_i32,
+                                .data = @bitCast(u32, casted),
+                            });
+                            break :b;
+                        } else |_| {}
+                    },
+                    inline .u64, .i64 => |x| {
+                        if (std.math.cast(u32, x)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_comptime_int_u32,
+                                .data = casted,
+                            });
+                            break :b;
+                        }
+                        if (std.math.cast(i32, x)) |casted| {
+                            ip.items.appendAssumeCapacity(.{
+                                .tag = .int_comptime_int_i32,
+                                .data = @bitCast(u32, casted),
+                            });
+                            break :b;
+                        }
+                    },
                 },
                 else => {},
             }
-
-            const tag: Tag = if (int.big_int.positive) .int_positive else .int_negative;
-            try addInt(ip, gpa, int.ty, tag, int.big_int.limbs);
+            switch (int.storage) {
+                .big_int => |big_int| {
+                    const tag: Tag = if (big_int.positive) .int_positive else .int_negative;
+                    try addInt(ip, gpa, int.ty, tag, big_int.limbs);
+                },
+                inline .i64, .u64 => |x| {
+                    var buf: [2]usize = undefined;
+                    const big_int = BigIntMutable.init(&buf, x).toConst();
+                    const tag: Tag = if (big_int.positive) .int_positive else .int_negative;
+                    try addInt(ip, gpa, int.ty, tag, big_int.limbs);
+                },
+            }
         },
 
         .enum_tag => |enum_tag| {
src/Sema.zig
@@ -1981,7 +1981,7 @@ fn resolveMaybeUndefValAllowVariablesMaybeRuntime(
         if (air_tags[i] == .constant) {
             const ty_pl = sema.air_instructions.items(.data)[i].ty_pl;
             const val = sema.air_values.items[ty_pl.payload];
-            if (val.tag() == .variable) return val;
+            if (val.tagIsVariable()) return val;
         }
         return opv;
     }
@@ -5033,7 +5033,7 @@ fn storeToInferredAllocComptime(
     // There will be only one store_to_inferred_ptr because we are running at comptime.
     // The alloc will turn into a Decl.
     if (try sema.resolveMaybeUndefValAllowVariables(operand)) |operand_val| store: {
-        if (operand_val.tag() == .variable) break :store;
+        if (operand_val.tagIsVariable()) break :store;
         var anon_decl = try block.startAnonDecl();
         defer anon_decl.deinit();
         iac.data.decl_index = try anon_decl.finish(
@@ -28125,7 +28125,7 @@ fn beginComptimePtrLoad(
                 const is_mutable = ptr_val.tag() == .decl_ref_mut;
                 const decl = sema.mod.declPtr(decl_index);
                 const decl_tv = try decl.typedValue();
-                if (decl_tv.val.tag() == .variable) return error.RuntimeLoad;
+                if (decl_tv.val.tagIsVariable()) return error.RuntimeLoad;
 
                 const layout_defined = decl.ty.hasWellDefinedLayout(mod);
                 break :blk ComptimePtrLoadKit{
src/value.zig
@@ -796,17 +796,17 @@ pub const Value = struct {
         mod: *const Module,
         opt_sema: ?*Sema,
     ) Module.CompileError!BigIntConst {
-        switch (val.ip_index) {
-            .bool_false => return BigIntMutable.init(&space.limbs, 0).toConst(),
-            .bool_true => return BigIntMutable.init(&space.limbs, 1).toConst(),
+        return switch (val.ip_index) {
+            .bool_false => BigIntMutable.init(&space.limbs, 0).toConst(),
+            .bool_true => BigIntMutable.init(&space.limbs, 1).toConst(),
             .undef => unreachable,
-            .null_value => return BigIntMutable.init(&space.limbs, 0).toConst(),
+            .null_value => BigIntMutable.init(&space.limbs, 0).toConst(),
             .none => switch (val.tag()) {
                 .zero,
                 .the_only_possible_value, // i0, u0
-                => return BigIntMutable.init(&space.limbs, 0).toConst(),
+                => BigIntMutable.init(&space.limbs, 0).toConst(),
 
-                .one => return BigIntMutable.init(&space.limbs, 1).toConst(),
+                .one => BigIntMutable.init(&space.limbs, 1).toConst(),
 
                 .enum_field_index => {
                     const index = val.castTag(.enum_field_index).?.data;
@@ -816,10 +816,10 @@ pub const Value = struct {
                     const sub_val = val.castTag(.runtime_value).?.data;
                     return sub_val.toBigIntAdvanced(space, mod, opt_sema);
                 },
-                .int_u64 => return BigIntMutable.init(&space.limbs, val.castTag(.int_u64).?.data).toConst(),
-                .int_i64 => return BigIntMutable.init(&space.limbs, val.castTag(.int_i64).?.data).toConst(),
-                .int_big_positive => return val.castTag(.int_big_positive).?.asBigInt(),
-                .int_big_negative => return val.castTag(.int_big_negative).?.asBigInt(),
+                .int_u64 => BigIntMutable.init(&space.limbs, val.castTag(.int_u64).?.data).toConst(),
+                .int_i64 => BigIntMutable.init(&space.limbs, val.castTag(.int_i64).?.data).toConst(),
+                .int_big_positive => val.castTag(.int_big_positive).?.asBigInt(),
+                .int_big_negative => val.castTag(.int_big_negative).?.asBigInt(),
 
                 .lazy_align => {
                     const ty = val.castTag(.lazy_align).?.data;
@@ -849,10 +849,10 @@ pub const Value = struct {
                 else => unreachable,
             },
             else => switch (mod.intern_pool.indexToKey(val.ip_index)) {
-                .int => |int| return int.big_int,
+                .int => |int| int.storage.toBigInt(space),
                 else => unreachable,
             },
-        }
+        };
     }
 
     /// If the value fits in a u64, return it, otherwise null.
@@ -900,7 +900,11 @@ pub const Value = struct {
                 else => return null,
             },
             else => return switch (mod.intern_pool.indexToKey(val.ip_index)) {
-                .int => |int| int.big_int.to(u64) catch null,
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| big_int.to(u64) catch null,
+                    .u64 => |x| x,
+                    .i64 => |x| std.math.cast(u64, x),
+                },
                 else => null,
             },
         }
@@ -940,18 +944,22 @@ pub const Value = struct {
 
                 else => unreachable,
             },
-            else => switch (mod.intern_pool.indexToKey(val.ip_index)) {
-                .int => |int| return int.big_int.to(i64) catch unreachable,
+            else => return switch (mod.intern_pool.indexToKey(val.ip_index)) {
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| big_int.to(i64) catch unreachable,
+                    .i64 => |x| x,
+                    .u64 => |x| @intCast(i64, x),
+                },
                 else => unreachable,
             },
         }
     }
 
     pub fn toBool(val: Value, mod: *const Module) bool {
-        switch (val.ip_index) {
-            .bool_true => return true,
-            .bool_false => return false,
-            .none => return switch (val.tag()) {
+        return switch (val.ip_index) {
+            .bool_true => true,
+            .bool_false => false,
+            .none => switch (val.tag()) {
                 .one => true,
                 .zero => false,
 
@@ -968,10 +976,13 @@ pub const Value = struct {
                 else => unreachable,
             },
             else => switch (mod.intern_pool.indexToKey(val.ip_index)) {
-                .int => |int| return !int.big_int.eqZero(),
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| !big_int.eqZero(),
+                    inline .u64, .i64 => |x| x != 0,
+                },
                 else => unreachable,
             },
-        }
+        };
     }
 
     fn isDeclRef(val: Value) bool {
@@ -1483,12 +1494,12 @@ pub const Value = struct {
 
     pub fn clz(val: Value, ty: Type, mod: *const Module) u64 {
         const ty_bits = ty.intInfo(mod).bits;
-        switch (val.ip_index) {
-            .bool_false => return ty_bits,
-            .bool_true => return ty_bits - 1,
+        return switch (val.ip_index) {
+            .bool_false => ty_bits,
+            .bool_true => ty_bits - 1,
             .none => switch (val.tag()) {
-                .zero => return ty_bits,
-                .one => return ty_bits - 1,
+                .zero => ty_bits,
+                .one => ty_bits - 1,
 
                 .int_u64 => {
                     const big = @clz(val.castTag(.int_u64).?.data);
@@ -1519,20 +1530,24 @@ pub const Value = struct {
                 else => unreachable,
             },
             else => switch (mod.intern_pool.indexToKey(val.ip_index)) {
-                .int => |int| return int.big_int.clz(ty_bits),
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| big_int.clz(ty_bits),
+                    .u64 => |x| @clz(x) + ty_bits - 64,
+                    .i64 => @panic("TODO implement i64 Value clz"),
+                },
                 else => unreachable,
             },
-        }
+        };
     }
 
     pub fn ctz(val: Value, ty: Type, mod: *const Module) u64 {
         const ty_bits = ty.intInfo(mod).bits;
-        switch (val.ip_index) {
-            .bool_false => return ty_bits,
-            .bool_true => return 0,
+        return switch (val.ip_index) {
+            .bool_false => ty_bits,
+            .bool_true => 0,
             .none => switch (val.tag()) {
-                .zero => return ty_bits,
-                .one => return 0,
+                .zero => ty_bits,
+                .one => 0,
 
                 .int_u64 => {
                     const big = @ctz(val.castTag(.int_u64).?.data);
@@ -1563,10 +1578,17 @@ pub const Value = struct {
                 else => unreachable,
             },
             else => switch (mod.intern_pool.indexToKey(val.ip_index)) {
-                .int => |int| return int.big_int.ctz(),
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| big_int.ctz(),
+                    .u64 => |x| {
+                        const big = @ctz(x);
+                        return if (big == 64) ty_bits else big;
+                    },
+                    .i64 => @panic("TODO implement i64 Value ctz"),
+                },
                 else => unreachable,
             },
-        }
+        };
     }
 
     pub fn popCount(val: Value, ty: Type, mod: *const Module) u64 {
@@ -1591,7 +1613,9 @@ pub const Value = struct {
             else => switch (mod.intern_pool.indexToKey(val.ip_index)) {
                 .int => |int| {
                     const info = ty.intInfo(mod);
-                    return int.big_int.popCount(info.bits);
+                    var buffer: Value.BigIntSpace = undefined;
+                    const big_int = int.storage.toBigInt(&buffer);
+                    return @intCast(u64, big_int.popCount(info.bits));
                 },
                 else => unreachable,
             },
@@ -1641,23 +1665,23 @@ pub const Value = struct {
     /// Returns the number of bits the value requires to represent stored in twos complement form.
     pub fn intBitCountTwosComp(self: Value, mod: *const Module) usize {
         const target = mod.getTarget();
-        switch (self.ip_index) {
-            .bool_false => return 0,
-            .bool_true => return 1,
+        return switch (self.ip_index) {
+            .bool_false => 0,
+            .bool_true => 1,
             .none => switch (self.tag()) {
                 .zero,
                 .the_only_possible_value,
-                => return 0,
+                => 0,
 
-                .one => return 1,
+                .one => 1,
 
                 .int_u64 => {
                     const x = self.castTag(.int_u64).?.data;
                     if (x == 0) return 0;
                     return @intCast(usize, std.math.log2(x) + 1);
                 },
-                .int_big_positive => return self.castTag(.int_big_positive).?.asBigInt().bitCountTwosComp(),
-                .int_big_negative => return self.castTag(.int_big_negative).?.asBigInt().bitCountTwosComp(),
+                .int_big_positive => self.castTag(.int_big_positive).?.asBigInt().bitCountTwosComp(),
+                .int_big_negative => self.castTag(.int_big_negative).?.asBigInt().bitCountTwosComp(),
 
                 .decl_ref_mut,
                 .comptime_field_ptr,
@@ -1667,7 +1691,7 @@ pub const Value = struct {
                 .variable,
                 .eu_payload_ptr,
                 .opt_payload_ptr,
-                => return target.ptrBitWidth(),
+                => target.ptrBitWidth(),
 
                 else => {
                     var buffer: BigIntSpace = undefined;
@@ -1675,10 +1699,18 @@ pub const Value = struct {
                 },
             },
             else => switch (mod.intern_pool.indexToKey(self.ip_index)) {
-                .int => |int| return int.big_int.bitCountTwosComp(),
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| big_int.bitCountTwosComp(),
+                    .u64 => |x| if (x == 0) 0 else @intCast(usize, std.math.log2(x) + 1),
+                    .i64 => {
+                        var buffer: Value.BigIntSpace = undefined;
+                        const big_int = int.storage.toBigInt(&buffer);
+                        return big_int.bitCountTwosComp();
+                    },
+                },
                 else => unreachable,
             },
-        }
+        };
     }
 
     /// Converts an integer or a float to a float. May result in a loss of information.
@@ -1798,8 +1830,11 @@ pub const Value = struct {
 
                 else => unreachable,
             },
-            else => switch (mod.intern_pool.indexToKey(lhs.ip_index)) {
-                .int => |int| return int.big_int.orderAgainstScalar(0),
+            else => return switch (mod.intern_pool.indexToKey(lhs.ip_index)) {
+                .int => |int| switch (int.storage) {
+                    .big_int => |big_int| big_int.orderAgainstScalar(0),
+                    inline .u64, .i64 => |x| std.math.order(x, 0),
+                },
                 else => unreachable,
             },
         }
@@ -2777,6 +2812,10 @@ pub const Value = struct {
         }
     }
 
+    pub fn tagIsVariable(val: Value) bool {
+        return val.ip_index == .none and val.tag() == .variable;
+    }
+
     /// Returns true if a Value is backed by a variable
     pub fn isVariable(val: Value, mod: *Module) bool {
         return switch (val.ip_index) {
@@ -5399,12 +5438,7 @@ pub const Value = struct {
         };
     };
 
-    /// Big enough to fit any non-BigInt value
-    pub const BigIntSpace = struct {
-        /// The +1 is headroom so that operations such as incrementing once or decrementing once
-        /// are possible without using an allocator.
-        limbs: [(@sizeOf(u64) / @sizeOf(std.math.big.Limb)) + 1]std.math.big.Limb,
-    };
+    pub const BigIntSpace = InternPool.Key.Int.Storage.BigIntSpace;
 
     pub const zero = initTag(.zero);
     pub const one = initTag(.one);