Commit 383cffbfae

Jacob Young <jacobly0@users.noreply.github.com>
2024-07-05 19:34:14
InternPool: temporarily disable multi-threaded behavior
This reduces the cost of the new data structure until the multi-threaded behavior is actually used.
1 parent 92ddb95
Changed files (3)
lib/std/Thread/Pool.zig
@@ -8,8 +8,13 @@ cond: std.Thread.Condition = .{},
 run_queue: RunQueue = .{},
 is_running: bool = true,
 allocator: std.mem.Allocator,
-threads: []std.Thread,
-ids: std.AutoArrayHashMapUnmanaged(std.Thread.Id, void),
+threads: if (builtin.single_threaded) [0]std.Thread else []std.Thread,
+ids: if (builtin.single_threaded) struct {
+    inline fn deinit(_: @This(), _: std.mem.Allocator) void {}
+    fn getIndex(_: @This(), _: std.Thread.Id) usize {
+        return 0;
+    }
+} else std.AutoArrayHashMapUnmanaged(std.Thread.Id, void),
 
 const RunQueue = std.SinglyLinkedList(Runnable);
 const Runnable = struct {
@@ -29,7 +34,7 @@ pub fn init(pool: *Pool, options: Options) !void {
 
     pool.* = .{
         .allocator = allocator,
-        .threads = &[_]std.Thread{},
+        .threads = if (builtin.single_threaded) .{} else &.{},
         .ids = .{},
     };
 
src/Zcu/PerThread.zig
@@ -3,7 +3,7 @@ zcu: *Zcu,
 /// Dense, per-thread unique index.
 tid: Id,
 
-pub const Id = if (builtin.single_threaded) enum { main } else enum(usize) { main, _ };
+pub const Id = if (InternPool.single_threaded) enum { main } else enum(usize) { main, _ };
 
 pub fn astGenFile(
     pt: Zcu.PerThread,
src/InternPool.zig
@@ -4,11 +4,10 @@
 
 locals: []Local = &.{},
 shards: []Shard = &.{},
-tid_width: std.math.Log2Int(u32) = 0,
-tid_shift_31: std.math.Log2Int(u32) = 31,
-tid_shift_32: std.math.Log2Int(u32) = 31,
+tid_width: if (single_threaded) u0 else std.math.Log2Int(u32) = 0,
+tid_shift_31: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
+tid_shift_32: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
 
-//items: std.MultiArrayList(Item) = .{},
 extra: std.ArrayListUnmanaged(u32) = .{},
 /// On 32-bit systems, this array is ignored and extra is used for everything.
 /// On 64-bit systems, this array is used for big integers and associated metadata.
@@ -92,6 +91,14 @@ free_dep_entries: std.ArrayListUnmanaged(DepEntry.Index) = .{},
 /// Value is the `Decl` of the struct that represents this `File`.
 files: std.AutoArrayHashMapUnmanaged(Cache.BinDigest, OptionalDeclIndex) = .{},
 
+/// Whether a multi-threaded intern pool is useful.
+/// Currently `false` until the intern pool is actually accessed
+/// from multiple threads to reduce the cost of this data structure.
+const want_multi_threaded = false;
+
+/// Whether a single-threaded intern pool impl is in use.
+pub const single_threaded = builtin.single_threaded or !want_multi_threaded;
+
 pub const FileIndex = enum(u32) {
     _,
 };
@@ -497,19 +504,23 @@ const Local = struct {
                     var new_list: ListSelf = .{ .bytes = @ptrCast(buf[bytes_offset..].ptr) };
                     new_list.header().* = .{ .capacity = capacity };
                     const len = mutable.lenPtr().*;
-                    const old_slice = mutable.list.view().slice();
-                    const new_slice = new_list.view().slice();
-                    inline for (fields) |field| {
-                        @memcpy(new_slice.items(field)[0..len], old_slice.items(field)[0..len]);
+                    // this cold, quickly predictable, condition enables
+                    // the `MultiArrayList` optimization in `view`
+                    if (len > 0) {
+                        const old_slice = mutable.list.view().slice();
+                        const new_slice = new_list.view().slice();
+                        inline for (fields) |field| @memcpy(new_slice.items(field)[0..len], old_slice.items(field)[0..len]);
                     }
                     mutable.list.release(new_list);
                 }
 
                 fn view(mutable: Mutable) View {
+                    const capacity = mutable.capacityPtr().*;
+                    assert(capacity > 0); // optimizes `MultiArrayList.Slice.items`
                     return .{
                         .bytes = mutable.list.bytes,
                         .len = mutable.lenPtr().*,
-                        .capacity = mutable.capacityPtr().*,
+                        .capacity = capacity,
                     };
                 }
 
@@ -550,6 +561,7 @@ const Local = struct {
 
             fn view(list: ListSelf) View {
                 const capacity = list.header().capacity;
+                assert(capacity > 0); // optimizes `MultiArrayList.Slice.items`
                 return .{
                     .bytes = list.bytes,
                     .len = capacity,
@@ -665,13 +677,8 @@ const Shard = struct {
     }
 };
 
-fn getShard(ip: *InternPool, tid: Zcu.PerThread.Id) *Shard {
-    return &ip.shards[@intFromEnum(tid)];
-}
-
 fn getTidMask(ip: *const InternPool) u32 {
-    assert(std.math.isPowerOfTwo(ip.shards.len));
-    return @intCast(ip.shards.len - 1);
+    return (@as(u32, 1) << ip.tid_width) - 1;
 }
 
 fn getIndexMask(ip: *const InternPool, comptime BackingInt: type) u32 {
@@ -809,7 +816,7 @@ pub const String = enum(u32) {
         };
     }
 
-    fn toOverlongSlice(string: String, ip: *const InternPool) []const u8 {
+    noinline fn toOverlongSlice(string: String, ip: *const InternPool) []const u8 {
         const unwrapped = string.unwrap(ip);
         return ip.getLocalShared(unwrapped.tid).strings.acquire().view().items(.@"0")[unwrapped.index..];
     }
@@ -3230,19 +3237,35 @@ pub const Index = enum(u32) {
         }
     };
 
-    pub fn getItem(index: Index, ip: *const InternPool) Item {
-        const unwrapped = index.unwrap(ip);
-        return ip.getLocalShared(unwrapped.tid).items.acquire().view().get(unwrapped.index);
+    pub inline fn getItem(index: Index, ip: *const InternPool) Item {
+        const item_ptr = index.itemPtr(ip);
+        const tag = @atomicLoad(Tag, item_ptr.tag_ptr, .acquire);
+        return .{ .tag = tag, .data = item_ptr.data_ptr.* };
     }
 
-    pub fn getTag(index: Index, ip: *const InternPool) Tag {
-        const unwrapped = index.unwrap(ip);
-        return ip.getLocalShared(unwrapped.tid).items.acquire().view().items(.tag)[unwrapped.index];
+    pub inline fn getTag(index: Index, ip: *const InternPool) Tag {
+        const item_ptr = index.itemPtr(ip);
+        return @atomicLoad(Tag, item_ptr.tag_ptr, .acquire);
     }
 
-    pub fn getData(index: Index, ip: *const InternPool) u32 {
-        const unwrapped = index.unwrap(ip);
-        return ip.getLocalShared(unwrapped.tid).items.acquire().view().items(.data)[unwrapped.index];
+    pub inline fn getData(index: Index, ip: *const InternPool) u32 {
+        return index.getItem(ip).data;
+    }
+
+    const ItemPtr = struct {
+        tag_ptr: *Tag,
+        data_ptr: *u32,
+    };
+    fn itemPtr(index: Index, ip: *const InternPool) ItemPtr {
+        const unwrapped: Unwrapped = if (single_threaded) .{
+            .tid = .main,
+            .index = @intFromEnum(index),
+        } else index.unwrap(ip);
+        const slice = ip.getLocalShared(unwrapped.tid).items.acquire().view().slice();
+        return .{
+            .tag_ptr = &slice.items(.tag)[unwrapped.index],
+            .data_ptr = &slice.items(.data)[unwrapped.index],
+        };
     }
 
     const Unwrapped = struct {
@@ -4905,11 +4928,12 @@ pub const MemoizedCall = struct {
     result: Index,
 };
 
-pub fn init(ip: *InternPool, gpa: Allocator, total_threads: usize) !void {
+pub fn init(ip: *InternPool, gpa: Allocator, available_threads: usize) !void {
     errdefer ip.deinit(gpa);
     assert(ip.locals.len == 0 and ip.shards.len == 0);
 
-    ip.locals = try gpa.alloc(Local, total_threads);
+    const used_threads = if (single_threaded) 1 else available_threads;
+    ip.locals = try gpa.alloc(Local, used_threads);
     @memset(ip.locals, .{
         .shared = .{
             .items = Local.List(Item).empty,
@@ -4922,9 +4946,9 @@ pub fn init(ip: *InternPool, gpa: Allocator, total_threads: usize) !void {
         },
     });
 
-    ip.tid_width = @intCast(std.math.log2_int_ceil(usize, total_threads));
-    ip.tid_shift_31 = 31 - ip.tid_width;
-    ip.tid_shift_32 = ip.tid_shift_31 +| 1;
+    ip.tid_width = @intCast(std.math.log2_int_ceil(usize, used_threads));
+    ip.tid_shift_31 = if (single_threaded) 0 else 31 - ip.tid_width;
+    ip.tid_shift_32 = if (single_threaded) 0 else ip.tid_shift_31 +| 1;
     ip.shards = try gpa.alloc(Shard, @as(usize, 1) << ip.tid_width);
     @memset(ip.shards, .{
         .shared = .{
@@ -7063,7 +7087,7 @@ pub fn getExternFunc(
         .tag = .extern_func,
         .data = extra_index,
     });
-    errdefer ip.items.lenPtr().* -= 1;
+    errdefer items.lenPtr().* -= 1;
     return gop.put();
 }
 
@@ -10146,12 +10170,9 @@ pub fn iesFuncIndex(ip: *const InternPool, ies_index: Index) Index {
 /// error set function. The returned pointer is invalidated when anything is
 /// added to `ip`.
 pub fn iesResolved(ip: *const InternPool, ies_index: Index) *Index {
-    assert(ies_index != .none);
-    const tags = ip.items.items(.tag);
-    const datas = ip.items.items(.data);
-    assert(tags[@intFromEnum(ies_index)] == .type_inferred_error_set);
-    const func_index = datas[@intFromEnum(ies_index)];
-    return funcIesResolved(ip, func_index);
+    const ies_item = ies_index.getItem(ip);
+    assert(ies_item.tag == .type_inferred_error_set);
+    return funcIesResolved(ip, ies_item.data);
 }
 
 /// Returns a mutable pointer to the resolved error set type of an inferred