Commit b3b6ccba50
Changed files (3)
lib/std/array_list.zig
@@ -210,6 +210,14 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type {
self.capacity = new_len;
}
+ /// Reduce length to `new_len`.
+ /// Invalidates element pointers.
+ /// Keeps capacity the same.
+ pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
+ assert(new_len <= self.items.len);
+ self.items.len = new_len;
+ }
+
pub fn ensureCapacity(self: *Self, new_capacity: usize) !void {
var better_capacity = self.capacity;
if (better_capacity >= new_capacity) return;
@@ -432,6 +440,14 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ
self.capacity = new_len;
}
+ /// Reduce length to `new_len`.
+ /// Invalidates element pointers.
+ /// Keeps capacity the same.
+ pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
+ assert(new_len <= self.items.len);
+ self.items.len = new_len;
+ }
+
pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void {
var better_capacity = self.capacity;
if (better_capacity >= new_capacity) return;
lib/std/debug.zig
@@ -1278,7 +1278,7 @@ pub const DebugInfo = struct {
else => return error.MissingDebugInfo,
}
- if (self.address_map.getValue(ctx.base_address)) |obj_di| {
+ if (self.address_map.get(ctx.base_address)) |obj_di| {
return obj_di;
}
lib/std/hash_map.zig
@@ -9,17 +9,15 @@ const autoHash = std.hash.autoHash;
const Wyhash = std.hash.Wyhash;
const Allocator = mem.Allocator;
const builtin = @import("builtin");
-
-const want_modification_safety = std.debug.runtime_safety;
-const debug_u32 = if (want_modification_safety) u32 else void;
+const hash_map = @This();
pub fn AutoHashMap(comptime K: type, comptime V: type) type {
- return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K));
+ return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K));
}
/// Builtin hashmap for strings as keys.
pub fn StringHashMap(comptime V: type) type {
- return HashMap([]const u8, V, hashString, eqlString);
+ return HashMap([]const u8, V, hashString, eqlString, true);
}
pub fn eqlString(a: []const u8, b: []const u8) bool {
@@ -30,422 +28,846 @@ pub fn hashString(s: []const u8) u32 {
return @truncate(u32, std.hash.Wyhash.hash(0, s));
}
-pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u32, comptime eql: fn (a: K, b: K) bool) type {
+/// Insertion order is preserved.
+/// Deletions perform a "swap removal" on the entries list.
+/// Modifying the hash map while iterating is allowed, however one must understand
+/// the (well defined) behavior when mixing insertions and deletions with iteration.
+/// For a hash map that can be initialized directly that does not store an Allocator
+/// field, see `HashMapUnmanaged`.
+/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
+/// functions. It does not store each item's hash in the table. Setting `store_hash`
+/// to `true` incurs slightly more memory cost by storing each key's hash in the table
+/// but only has to call `eql` for hash collisions.
+pub fn HashMap(
+ comptime K: type,
+ comptime V: type,
+ comptime hash: fn (key: K) u32,
+ comptime eql: fn (a: K, b: K) bool,
+ comptime store_hash: bool,
+) type {
return struct {
- entries: []Entry,
- size: usize,
- max_distance_from_start_index: usize,
+ unmanaged: Unmanaged,
allocator: *Allocator,
- /// This is used to detect bugs where a hashtable is edited while an iterator is running.
- modification_count: debug_u32,
-
- const Self = @This();
-
- /// A *KV is a mutable pointer into this HashMap's internal storage.
- /// Modifying the key is undefined behavior.
- /// Modifying the value is harmless.
- /// *KV pointers become invalid whenever this HashMap is modified,
- /// and then any access to the *KV is undefined behavior.
- pub const KV = struct {
- key: K,
- value: V,
- };
-
- const Entry = struct {
- used: bool,
- distance_from_start_index: usize,
- kv: KV,
- };
-
- pub const GetOrPutResult = struct {
- kv: *KV,
- found_existing: bool,
- };
+ pub const Unmanaged = HashMapUnmanaged(K, V, hash, eql, store_hash);
+ pub const Entry = Unmanaged.Entry;
+ pub const Hash = Unmanaged.Hash;
+ pub const GetOrPutResult = Unmanaged.GetOrPutResult;
+ /// Deprecated. Iterate using `items`.
pub const Iterator = struct {
hm: *const Self,
- // how many items have we returned
- count: usize,
- // iterator through the entry array
+ /// Iterator through the entry array.
index: usize,
- // used to detect concurrent modification
- initial_modification_count: debug_u32,
- pub fn next(it: *Iterator) ?*KV {
- if (want_modification_safety) {
- assert(it.initial_modification_count == it.hm.modification_count); // concurrent modification
- }
- if (it.count >= it.hm.size) return null;
- while (it.index < it.hm.entries.len) : (it.index += 1) {
- const entry = &it.hm.entries[it.index];
- if (entry.used) {
- it.index += 1;
- it.count += 1;
- return &entry.kv;
- }
- }
- unreachable; // no next item
+ pub fn next(it: *Iterator) ?*Entry {
+ if (it.index >= it.hm.unmanaged.entries.items.len) return null;
+ const result = &it.hm.unmanaged.entries.items[it.index];
+ it.index += 1;
+ return result;
}
- // Reset the iterator to the initial index
+ /// Reset the iterator to the initial index
pub fn reset(it: *Iterator) void {
- it.count = 0;
it.index = 0;
- // Resetting the modification count too
- it.initial_modification_count = it.hm.modification_count;
}
};
+ const Self = @This();
+ const Index = Unmanaged.Index;
+
pub fn init(allocator: *Allocator) Self {
- return Self{
- .entries = &[_]Entry{},
+ return .{
+ .unmanaged = .{},
.allocator = allocator,
- .size = 0,
- .max_distance_from_start_index = 0,
- .modification_count = if (want_modification_safety) 0 else {},
};
}
- pub fn deinit(hm: Self) void {
- hm.allocator.free(hm.entries);
+ pub fn deinit(self: *Self) void {
+ self.unmanaged.deinit(self.allocator);
+ self.* = undefined;
}
- pub fn clear(hm: *Self) void {
- for (hm.entries) |*entry| {
- entry.used = false;
- }
- hm.size = 0;
- hm.max_distance_from_start_index = 0;
- hm.incrementModificationCount();
+ pub fn clearRetainingCapacity(self: *Self) void {
+ return self.unmanaged.clearRetainingCapacity();
}
+ pub fn clearAndFree(self: *Self, allocator: *Allocator) void {
+ return self.unmanaged.clearAndFree(self.allocator);
+ }
+
+ /// Deprecated. Use `items().len`.
pub fn count(self: Self) usize {
- return self.size;
+ return self.items().len;
+ }
+
+ /// Deprecated. Iterate using `items`.
+ pub fn iterator(self: *const Self) Iterator {
+ return Iterator{
+ .hm = self,
+ .index = 0,
+ };
}
/// If key exists this function cannot fail.
/// If there is an existing item with `key`, then the result
- /// kv pointer points to it, and found_existing is true.
+ /// `Entry` pointer points to it, and found_existing is true.
/// Otherwise, puts a new item with undefined value, and
- /// the kv pointer points to it. Caller should then initialize
- /// the data.
+ /// the `Entry` pointer points to it. Caller should then initialize
+ /// the value (but not the key).
pub fn getOrPut(self: *Self, key: K) !GetOrPutResult {
- // TODO this implementation can be improved - we should only
- // have to hash once and find the entry once.
- if (self.get(key)) |kv| {
- return GetOrPutResult{
- .kv = kv,
- .found_existing = true,
- };
- }
- self.incrementModificationCount();
- try self.autoCapacity();
- const put_result = self.internalPut(key);
- assert(put_result.old_kv == null);
- return GetOrPutResult{
- .kv = &put_result.new_entry.kv,
- .found_existing = false,
+ return self.unmanaged.getOrPut(self.allocator, key);
+ }
+
+ /// If there is an existing item with `key`, then the result
+ /// `Entry` pointer points to it, and found_existing is true.
+ /// Otherwise, puts a new item with undefined value, and
+ /// the `Entry` pointer points to it. Caller should then initialize
+ /// the value (but not the key).
+ /// If a new entry needs to be stored, this function asserts there
+ /// is enough capacity to store it.
+ pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
+ return self.unmanaged.getOrPutAssumeCapacity(key);
+ }
+
+ pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry {
+ return self.unmanaged.getOrPutValue(self.allocator, key, value);
+ }
+
+ /// Increases capacity, guaranteeing that insertions up until the
+ /// `expected_count` will not cause an allocation, and therefore cannot fail.
+ pub fn ensureCapacity(self: *Self, new_capacity: usize) !void {
+ return self.unmanaged.ensureCapacity(self.allocator, new_capacity);
+ }
+
+ /// Returns the number of total elements which may be present before it is
+ /// no longer guaranteed that no allocations will be performed.
+ pub fn capacity(self: *Self) usize {
+ return self.unmanaged.capacity();
+ }
+
+ /// Clobbers any existing data. To detect if a put would clobber
+ /// existing data, see `getOrPut`.
+ pub fn put(self: *Self, key: K, value: V) !void {
+ return self.unmanaged.put(self.allocator, key, value);
+ }
+
+ /// Inserts a key-value pair into the hash map, asserting that no previous
+ /// entry with the same key is already present
+ pub fn putNoClobber(self: *Self, key: K, value: V) !void {
+ return self.unmanaged.putNoClobber(self.allocator, key, value);
+ }
+
+ /// Asserts there is enough capacity to store the new key-value pair.
+ /// Clobbers any existing data. To detect if a put would clobber
+ /// existing data, see `getOrPutAssumeCapacity`.
+ pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
+ return self.unmanaged.putAssumeCapacity(key, value);
+ }
+
+ /// Asserts there is enough capacity to store the new key-value pair.
+ /// Asserts that it does not clobber any existing data.
+ /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`.
+ pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
+ return self.unmanaged.putAssumeCapacityNoClobber(key, value);
+ }
+
+ /// Inserts a new `Entry` into the hash map, returning the previous one, if any.
+ pub fn fetchPut(self: *Self, key: K, value: V) !?Entry {
+ return self.unmanaged.fetchPut(self.allocator, key, value);
+ }
+
+ /// Inserts a new `Entry` into the hash map, returning the previous one, if any.
+ /// If insertion happuns, asserts there is enough capacity without allocating.
+ pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry {
+ return self.unmanaged.fetchPutAssumeCapacity(key, value);
+ }
+
+ pub fn getEntry(self: Self, key: K) ?*Entry {
+ return self.unmanaged.getEntry(key);
+ }
+
+ pub fn get(self: Self, key: K) ?V {
+ return self.unmanaged.get(key);
+ }
+
+ pub fn contains(self: Self, key: K) bool {
+ return self.unmanaged.contains(key);
+ }
+
+ /// If there is an `Entry` with a matching key, it is deleted from
+ /// the hash map, and then returned from this function.
+ pub fn remove(self: *Self, key: K) ?Entry {
+ return self.unmanaged.remove(key);
+ }
+
+ /// Asserts there is an `Entry` with matching key, deletes it from the hash map,
+ /// and discards it.
+ pub fn removeAssertDiscard(self: *Self, key: K) void {
+ return self.unmanaged.removeAssertDiscard(key);
+ }
+
+ pub fn items(self: Self) []Entry {
+ return self.unmanaged.items();
+ }
+
+ pub fn clone(self: Self) !Self {
+ var other = try self.unmanaged.clone(self.allocator);
+ return other.promote(self.allocator);
+ }
+ };
+}
+
+/// General purpose hash table.
+/// Insertion order is preserved.
+/// Deletions perform a "swap removal" on the entries list.
+/// Modifying the hash map while iterating is allowed, however one must understand
+/// the (well defined) behavior when mixing insertions and deletions with iteration.
+/// This type does not store an Allocator field - the Allocator must be passed in
+/// with each function call that requires it. See `HashMap` for a type that stores
+/// an Allocator field for convenience.
+/// Can be initialized directly using the default field values.
+/// This type is designed to have low overhead for small numbers of entries. When
+/// `store_hash` is `false` and the number of entries in the map is less than 9,
+/// the overhead cost of using `HashMapUnmanaged` rather than `std.ArrayList` is
+/// only a single pointer-sized integer.
+/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
+/// functions. It does not store each item's hash in the table. Setting `store_hash`
+/// to `true` incurs slightly more memory cost by storing each key's hash in the table
+/// but guarantees only one call to `eql` per insertion/deletion.
+pub fn HashMapUnmanaged(
+ comptime K: type,
+ comptime V: type,
+ comptime hash: fn (key: K) u32,
+ comptime eql: fn (a: K, b: K) bool,
+ comptime store_hash: bool,
+) type {
+ return struct {
+ /// It is permitted to access this field directly.
+ entries: std.ArrayListUnmanaged(Entry) = .{},
+
+ /// When entries length is less than `linear_scan_max`, this remains `null`.
+ /// Once entries length grows big enough, this field is allocated. There is
+ /// an IndexHeader followed by an array of Index(I) structs, where I is defined
+ /// by how many total indexes there are.
+ index_header: ?*IndexHeader = null,
+
+ /// Modifying the key is illegal behavior.
+ /// Modifying the value is allowed.
+ /// Entry pointers become invalid whenever this HashMap is modified,
+ /// unless `ensureCapacity` was previously used.
+ pub const Entry = struct {
+ /// This field is `void` if `store_hash` is `false`.
+ hash: Hash,
+ key: K,
+ value: V,
+ };
+
+ pub const Hash = if (store_hash) u32 else void;
+
+ pub const GetOrPutResult = struct {
+ entry: *Entry,
+ found_existing: bool,
+ };
+
+ pub const Managed = HashMap(K, V, hash, eql, store_hash);
+
+ const Self = @This();
+
+ const linear_scan_max = 8;
+
+ pub fn promote(self: Self, allocator: *Allocator) Managed {
+ return .{
+ .unmanaged = self,
+ .allocator = allocator,
};
}
- pub fn getOrPutValue(self: *Self, key: K, value: V) !*KV {
- const res = try self.getOrPut(key);
- if (!res.found_existing)
- res.kv.value = value;
+ pub fn deinit(self: *Self, allocator: *Allocator) void {
+ self.entries.deinit(allocator);
+ if (self.index_header) |header| {
+ header.free(allocator);
+ }
+ self.* = undefined;
+ }
- return res.kv;
+ pub fn clearRetainingCapacity(self: *Self) void {
+ self.entries.items.len = 0;
+ if (self.header) |header| {
+ header.max_distance_from_start_index = 0;
+ const indexes = header.indexes(u8);
+ @memset(indexes.ptr, 0xff, indexes.len);
+ }
}
- fn optimizedCapacity(expected_count: usize) usize {
- // ensure that the hash map will be at most 60% full if
- // expected_count items are put into it
- var optimized_capacity = expected_count * 5 / 3;
- // an overflow here would mean the amount of memory required would not
- // be representable in the address space
- return math.ceilPowerOfTwo(usize, optimized_capacity) catch unreachable;
+ pub fn clearAndFree(self: *Self, allocator: *Allocator) void {
+ self.entries.shrink(allocator, 0);
+ if (self.header) |header| {
+ header.free(allocator);
+ self.header = null;
+ }
}
- /// Increases capacity so that the hash map will be at most
- /// 60% full when expected_count items are put into it
- pub fn ensureCapacity(self: *Self, expected_count: usize) !void {
- if (expected_count == 0) return;
- const optimized_capacity = optimizedCapacity(expected_count);
- return self.ensureCapacityExact(optimized_capacity);
+ /// If key exists this function cannot fail.
+ /// If there is an existing item with `key`, then the result
+ /// `Entry` pointer points to it, and found_existing is true.
+ /// Otherwise, puts a new item with undefined value, and
+ /// the `Entry` pointer points to it. Caller should then initialize
+ /// the value (but not the key).
+ pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult {
+ self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| {
+ // "If key exists this function cannot fail."
+ return GetOrPutResult{
+ .entry = self.getEntry(key) orelse return err,
+ .found_existing = true,
+ };
+ };
+ return self.getOrPutAssumeCapacity(key);
}
- /// Sets the capacity to the new capacity if the new
- /// capacity is greater than the current capacity.
- /// New capacity must be a power of two.
- fn ensureCapacityExact(self: *Self, new_capacity: usize) !void {
- // capacity must always be a power of two to allow for modulo
- // optimization in the constrainIndex fn
- assert(math.isPowerOfTwo(new_capacity));
+ /// If there is an existing item with `key`, then the result
+ /// `Entry` pointer points to it, and found_existing is true.
+ /// Otherwise, puts a new item with undefined value, and
+ /// the `Entry` pointer points to it. Caller should then initialize
+ /// the value (but not the key).
+ /// If a new entry needs to be stored, this function asserts there
+ /// is enough capacity to store it.
+ pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
+ const header = self.index_header orelse {
+ // Linear scan.
+ const h = if (store_hash) hash(key) else {};
+ for (self.entries.items) |*item| {
+ if (item.hash == h and eql(key, item.key)) {
+ return GetOrPutResult{
+ .entry = item,
+ .found_existing = true,
+ };
+ }
+ }
+ const new_entry = self.entries.addOneAssumeCapacity();
+ new_entry.* = .{
+ .hash = if (store_hash) h else {},
+ .key = key,
+ .value = undefined,
+ };
+ return GetOrPutResult{
+ .entry = new_entry,
+ .found_existing = false,
+ };
+ };
- if (new_capacity <= self.entries.len) {
- return;
+ switch (header.capacityIndexType()) {
+ .u8 => return self.getOrPutInternal(key, header, u8),
+ .u16 => return self.getOrPutInternal(key, header, u16),
+ .u32 => return self.getOrPutInternal(key, header, u32),
+ .usize => return self.getOrPutInternal(key, header, usize),
}
+ }
- const old_entries = self.entries;
- try self.initCapacity(new_capacity);
- self.incrementModificationCount();
- if (old_entries.len > 0) {
- // dump all of the old elements into the new table
- for (old_entries) |*old_entry| {
- if (old_entry.used) {
- self.internalPut(old_entry.kv.key).new_entry.kv.value = old_entry.kv.value;
+ pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry {
+ const res = try self.getOrPut(allocator, key);
+ if (!res.found_existing)
+ res.entry.value = value;
+
+ return res.entry;
+ }
+
+ /// Increases capacity, guaranteeing that insertions up until the
+ /// `expected_count` will not cause an allocation, and therefore cannot fail.
+ pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void {
+ try self.entries.ensureCapacity(allocator, new_capacity);
+ if (new_capacity <= linear_scan_max) return;
+
+ // Resize if indexes would be more than 75% full.
+ const needed_len = new_capacity * 4 / 3;
+ if (self.index_header) |header| {
+ if (needed_len > header.indexes_len) {
+ var new_indexes_len = header.indexes_len;
+ while (true) {
+ new_indexes_len += new_indexes_len / 2 + 8;
+ if (new_indexes_len >= needed_len) break;
}
+ const new_header = try IndexHeader.alloc(allocator, new_indexes_len);
+ self.insertAllEntriesIntoNewHeader(new_header);
+ header.free(allocator);
+ self.index_header = new_header;
}
- self.allocator.free(old_entries);
+ } else {
+ const header = try IndexHeader.alloc(allocator, needed_len);
+ self.insertAllEntriesIntoNewHeader(header);
+ self.index_header = header;
}
}
- /// Returns the kv pair that was already there.
- pub fn put(self: *Self, key: K, value: V) !?KV {
- try self.autoCapacity();
- return putAssumeCapacity(self, key, value);
+ /// Returns the number of total elements which may be present before it is
+ /// no longer guaranteed that no allocations will be performed.
+ pub fn capacity(self: Self) usize {
+ const entry_cap = self.entries.capacity;
+ const header = self.index_header orelse return math.min(linear_scan_max, entry_cap);
+ const indexes_cap = (header.indexes_len + 1) * 3 / 4;
+ return math.min(entry_cap, indexes_cap);
}
- /// Calls put() and asserts that no kv pair is clobbered.
- pub fn putNoClobber(self: *Self, key: K, value: V) !void {
- assert((try self.put(key, value)) == null);
+ /// Clobbers any existing data. To detect if a put would clobber
+ /// existing data, see `getOrPut`.
+ pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void {
+ const result = try self.getOrPut(allocator, key);
+ result.entry.value = value;
}
- pub fn putAssumeCapacity(self: *Self, key: K, value: V) ?KV {
- assert(self.count() < self.entries.len);
- self.incrementModificationCount();
+ /// Inserts a key-value pair into the hash map, asserting that no previous
+ /// entry with the same key is already present
+ pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void {
+ const result = try self.getOrPut(allocator, key);
+ assert(!result.found_existing);
+ result.entry.value = value;
+ }
- const put_result = self.internalPut(key);
- put_result.new_entry.kv.value = value;
- return put_result.old_kv;
+ /// Asserts there is enough capacity to store the new key-value pair.
+ /// Clobbers any existing data. To detect if a put would clobber
+ /// existing data, see `getOrPutAssumeCapacity`.
+ pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
+ const result = self.getOrPutAssumeCapacity(key);
+ result.entry.value = value;
}
+ /// Asserts there is enough capacity to store the new key-value pair.
+ /// Asserts that it does not clobber any existing data.
+ /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`.
pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
- assert(self.putAssumeCapacity(key, value) == null);
+ const result = self.getOrPutAssumeCapacity(key);
+ assert(!result.found_existing);
+ result.entry.value = value;
}
- pub fn get(hm: *const Self, key: K) ?*KV {
- if (hm.entries.len == 0) {
+ /// Inserts a new `Entry` into the hash map, returning the previous one, if any.
+ pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry {
+ const gop = try self.getOrPut(allocator, key);
+ var result: ?Entry = null;
+ if (gop.found_existing) {
+ result = gop.entry.*;
+ }
+ gop.entry.value = value;
+ return result;
+ }
+
+ /// Inserts a new `Entry` into the hash map, returning the previous one, if any.
+ /// If insertion happuns, asserts there is enough capacity without allocating.
+ pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry {
+ const gop = self.getOrPutAssumeCapacity(key);
+ var result: ?Entry = null;
+ if (gop.found_existing) {
+ result = gop.entry.*;
+ }
+ gop.entry.value = value;
+ return result;
+ }
+
+ pub fn getEntry(self: Self, key: K) ?*Entry {
+ const header = self.index_header orelse {
+ // Linear scan.
+ const h = if (store_hash) hash(key) else {};
+ for (self.entries.items) |*item| {
+ if (item.hash == h and eql(key, item.key)) {
+ return item;
+ }
+ }
return null;
+ };
+
+ switch (header.capacityIndexType()) {
+ .u8 => return self.getInternal(key, header, u8),
+ .u16 => return self.getInternal(key, header, u16),
+ .u32 => return self.getInternal(key, header, u32),
+ .usize => return self.getInternal(key, header, usize),
}
- return hm.internalGet(key);
}
- pub fn getValue(hm: *const Self, key: K) ?V {
- return if (hm.get(key)) |kv| kv.value else null;
+ pub fn get(self: Self, key: K) ?V {
+ return if (self.getEntry(key)) |entry| entry.value else null;
}
- pub fn contains(hm: *const Self, key: K) bool {
- return hm.get(key) != null;
+ pub fn contains(self: Self, key: K) bool {
+ return self.getEntry(key) != null;
}
- /// Returns any kv pair that was removed.
- pub fn remove(hm: *Self, key: K) ?KV {
- if (hm.entries.len == 0) return null;
- hm.incrementModificationCount();
- const start_index = hm.keyToIndex(key);
- {
- var roll_over: usize = 0;
- while (roll_over <= hm.max_distance_from_start_index) : (roll_over += 1) {
- const index = hm.constrainIndex(start_index + roll_over);
- var entry = &hm.entries[index];
-
- if (!entry.used) return null;
-
- if (!eql(entry.kv.key, key)) continue;
-
- const removed_kv = entry.kv;
- while (roll_over < hm.entries.len) : (roll_over += 1) {
- const next_index = hm.constrainIndex(start_index + roll_over + 1);
- const next_entry = &hm.entries[next_index];
- if (!next_entry.used or next_entry.distance_from_start_index == 0) {
- entry.used = false;
- hm.size -= 1;
- return removed_kv;
- }
- entry.* = next_entry.*;
- entry.distance_from_start_index -= 1;
- entry = next_entry;
+ /// If there is an `Entry` with a matching key, it is deleted from
+ /// the hash map, and then returned from this function.
+ pub fn remove(self: *Self, key: K) ?Entry {
+ const header = self.index_header orelse {
+ // Linear scan.
+ const h = if (store_hash) hash(key) else {};
+ for (self.entries.items) |item, i| {
+ if (item.hash == h and eql(key, item.key)) {
+ return self.entries.swapRemove(i);
}
- unreachable; // shifting everything in the table
}
+ return null;
+ };
+ switch (header.capacityIndexType()) {
+ .u8 => return self.removeInternal(key, header, u8),
+ .u16 => return self.removeInternal(key, header, u16),
+ .u32 => return self.removeInternal(key, header, u32),
+ .usize => return self.removeInternal(key, header, usize),
}
- return null;
}
- /// Calls remove(), asserts that a kv pair is removed, and discards it.
- pub fn removeAssertDiscard(hm: *Self, key: K) void {
- assert(hm.remove(key) != null);
+ /// Asserts there is an `Entry` with matching key, deletes it from the hash map,
+ /// and discards it.
+ pub fn removeAssertDiscard(self: *Self, key: K) void {
+ assert(self.remove(key) != null);
}
- pub fn iterator(hm: *const Self) Iterator {
- return Iterator{
- .hm = hm,
- .count = 0,
- .index = 0,
- .initial_modification_count = hm.modification_count,
- };
+ pub fn items(self: Self) []Entry {
+ return self.entries.items;
}
- pub fn clone(self: Self) !Self {
- var other = Self.init(self.allocator);
- try other.initCapacity(self.entries.len);
- var it = self.iterator();
- while (it.next()) |entry| {
- try other.putNoClobber(entry.key, entry.value);
+ pub fn clone(self: Self, allocator: *Allocator) !Self {
+ // TODO this can be made more efficient by directly allocating
+ // the memory slices and memcpying the elements.
+ var other = Self.init();
+ try other.initCapacity(allocator, self.entries.len);
+ for (self.entries.items) |entry| {
+ other.putAssumeCapacityNoClobber(entry.key, entry.value);
}
return other;
}
- fn autoCapacity(self: *Self) !void {
- if (self.entries.len == 0) {
- return self.ensureCapacityExact(16);
- }
- // if we get too full (60%), double the capacity
- if (self.size * 5 >= self.entries.len * 3) {
- return self.ensureCapacityExact(self.entries.len * 2);
- }
- }
+ fn removeInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) ?Entry {
+ const indexes = header.indexes(I);
+ const h = hash(key);
+ const start_index = header.hashToIndex(h);
+ var roll_over: usize = 0;
+ while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) {
+ const index_index = (start_index + roll_over) % header.indexes_len;
+ var index = &indexes[index_index];
+ if (index.isEmpty())
+ return null;
+
+ const entry = &self.entries.items[index.entry_index];
+
+ const hash_match = if (store_hash) h == entry.hash else true;
+ if (!hash_match or !eql(key, entry.key))
+ continue;
- fn initCapacity(hm: *Self, capacity: usize) !void {
- hm.entries = try hm.allocator.alloc(Entry, capacity);
- hm.size = 0;
- hm.max_distance_from_start_index = 0;
- for (hm.entries) |*entry| {
- entry.used = false;
+ const removed_entry = self.entries.swapRemove(index.entry_index);
+ if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) {
+ // Because of the swap remove, now we need to update the index that was
+ // pointing to the last entry and is now pointing to this removed item slot.
+ self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes);
+ }
+
+ // Now we have to shift over the following indexes.
+ roll_over += 1;
+ while (roll_over < header.indexes_len) : (roll_over += 1) {
+ const next_index_index = (start_index + roll_over) % header.indexes_len;
+ const next_index = &indexes[next_index_index];
+ if (next_index.isEmpty() or next_index.distance_from_start_index == 0) {
+ index.setEmpty();
+ return removed_entry;
+ }
+ index.* = next_index.*;
+ index.distance_from_start_index -= 1;
+ index = next_index;
+ }
+ unreachable;
}
+ return null;
}
- fn incrementModificationCount(hm: *Self) void {
- if (want_modification_safety) {
- hm.modification_count +%= 1;
+ fn updateEntryIndex(
+ self: *Self,
+ header: *IndexHeader,
+ old_entry_index: usize,
+ new_entry_index: usize,
+ comptime I: type,
+ indexes: []Index(I),
+ ) void {
+ const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key);
+ const start_index = header.hashToIndex(h);
+ var roll_over: usize = 0;
+ while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) {
+ const index_index = (start_index + roll_over) % header.indexes_len;
+ const index = &indexes[index_index];
+ if (index.entry_index == old_entry_index) {
+ index.entry_index = @intCast(I, new_entry_index);
+ return;
+ }
}
+ unreachable;
}
- const InternalPutResult = struct {
- new_entry: *Entry,
- old_kv: ?KV,
- };
-
- /// Returns a pointer to the new entry.
- /// Asserts that there is enough space for the new item.
- fn internalPut(self: *Self, orig_key: K) InternalPutResult {
- var key = orig_key;
- var value: V = undefined;
- const start_index = self.keyToIndex(key);
+ /// Must ensureCapacity before calling this.
+ fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult {
+ const indexes = header.indexes(I);
+ const h = hash(key);
+ const start_index = header.hashToIndex(h);
var roll_over: usize = 0;
var distance_from_start_index: usize = 0;
- var got_result_entry = false;
- var result = InternalPutResult{
- .new_entry = undefined,
- .old_kv = null,
- };
- while (roll_over < self.entries.len) : ({
+ while (roll_over <= header.indexes_len) : ({
roll_over += 1;
distance_from_start_index += 1;
}) {
- const index = self.constrainIndex(start_index + roll_over);
- const entry = &self.entries[index];
-
- if (entry.used and !eql(entry.kv.key, key)) {
- if (entry.distance_from_start_index < distance_from_start_index) {
- // robin hood to the rescue
- const tmp = entry.*;
- self.max_distance_from_start_index = math.max(self.max_distance_from_start_index, distance_from_start_index);
- if (!got_result_entry) {
- got_result_entry = true;
- result.new_entry = entry;
+ const index_index = (start_index + roll_over) % header.indexes_len;
+ const index = indexes[index_index];
+ if (index.isEmpty()) {
+ indexes[index_index] = .{
+ .distance_from_start_index = @intCast(I, distance_from_start_index),
+ .entry_index = @intCast(I, self.entries.items.len),
+ };
+ header.maybeBumpMax(distance_from_start_index);
+ const new_entry = self.entries.addOneAssumeCapacity();
+ new_entry.* = .{
+ .hash = if (store_hash) h else {},
+ .key = key,
+ .value = undefined,
+ };
+ return .{
+ .found_existing = false,
+ .entry = new_entry,
+ };
+ }
+
+ // This pointer survives the following append because we call
+ // entries.ensureCapacity before getOrPutInternal.
+ const entry = &self.entries.items[index.entry_index];
+ const hash_match = if (store_hash) h == entry.hash else true;
+ if (hash_match and eql(key, entry.key)) {
+ return .{
+ .found_existing = true,
+ .entry = entry,
+ };
+ }
+ if (index.distance_from_start_index < distance_from_start_index) {
+ // In this case, we did not find the item. We will put a new entry.
+ // However, we will use this index for the new entry, and move
+ // the previous index down the line, to keep the max_distance_from_start_index
+ // as small as possible.
+ indexes[index_index] = .{
+ .distance_from_start_index = @intCast(I, distance_from_start_index),
+ .entry_index = @intCast(I, self.entries.items.len),
+ };
+ header.maybeBumpMax(distance_from_start_index);
+ const new_entry = self.entries.addOneAssumeCapacity();
+ new_entry.* = .{
+ .hash = if (store_hash) h else {},
+ .key = key,
+ .value = undefined,
+ };
+
+ distance_from_start_index = index.distance_from_start_index;
+ var prev_entry_index = index.entry_index;
+
+ // Find somewhere to put the index we replaced by shifting
+ // following indexes backwards.
+ roll_over += 1;
+ distance_from_start_index += 1;
+ while (roll_over < header.indexes_len) : ({
+ roll_over += 1;
+ distance_from_start_index += 1;
+ }) {
+ const next_index_index = (start_index + roll_over) % header.indexes_len;
+ const next_index = indexes[next_index_index];
+ if (next_index.isEmpty()) {
+ header.maybeBumpMax(distance_from_start_index);
+ indexes[next_index_index] = .{
+ .entry_index = prev_entry_index,
+ .distance_from_start_index = @intCast(I, distance_from_start_index),
+ };
+ return .{
+ .found_existing = false,
+ .entry = new_entry,
+ };
+ }
+ if (next_index.distance_from_start_index < distance_from_start_index) {
+ header.maybeBumpMax(distance_from_start_index);
+ indexes[next_index_index] = .{
+ .entry_index = prev_entry_index,
+ .distance_from_start_index = @intCast(I, distance_from_start_index),
+ };
+ distance_from_start_index = next_index.distance_from_start_index;
+ prev_entry_index = next_index.entry_index;
}
- entry.* = Entry{
- .used = true,
- .distance_from_start_index = distance_from_start_index,
- .kv = KV{
- .key = key,
- .value = value,
- },
- };
- key = tmp.kv.key;
- value = tmp.kv.value;
- distance_from_start_index = tmp.distance_from_start_index;
}
- continue;
+ unreachable;
}
+ }
+ unreachable;
+ }
- if (entry.used) {
- result.old_kv = entry.kv;
- } else {
- // adding an entry. otherwise overwriting old value with
- // same key
- self.size += 1;
- }
+ fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?*Entry {
+ const indexes = header.indexes(I);
+ const h = hash(key);
+ const start_index = header.hashToIndex(h);
+ var roll_over: usize = 0;
+ while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) {
+ const index_index = (start_index + roll_over) % header.indexes_len;
+ const index = indexes[index_index];
+ if (index.isEmpty())
+ return null;
+
+ const entry = &self.entries.items[index.entry_index];
+ const hash_match = if (store_hash) h == entry.hash else true;
+ if (hash_match and eql(key, entry.key))
+ return entry;
+ }
+ return null;
+ }
- self.max_distance_from_start_index = math.max(distance_from_start_index, self.max_distance_from_start_index);
- if (!got_result_entry) {
- result.new_entry = entry;
- }
- entry.* = Entry{
- .used = true,
- .distance_from_start_index = distance_from_start_index,
- .kv = KV{
- .key = key,
- .value = value,
- },
- };
- return result;
+ fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void {
+ switch (header.capacityIndexType()) {
+ .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8),
+ .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16),
+ .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32),
+ .usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize),
}
- unreachable; // put into a full map
}
- fn internalGet(hm: Self, key: K) ?*KV {
- const start_index = hm.keyToIndex(key);
- {
+ fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void {
+ const indexes = header.indexes(I);
+ entry_loop: for (self.entries.items) |entry, i| {
+ const h = if (store_hash) entry.hash else hash(entry.key);
+ const start_index = header.hashToIndex(h);
+ var entry_index = i;
var roll_over: usize = 0;
- while (roll_over <= hm.max_distance_from_start_index) : (roll_over += 1) {
- const index = hm.constrainIndex(start_index + roll_over);
- const entry = &hm.entries[index];
-
- if (!entry.used) return null;
- if (eql(entry.kv.key, key)) return &entry.kv;
+ var distance_from_start_index: usize = 0;
+ while (roll_over < header.indexes_len) : ({
+ roll_over += 1;
+ distance_from_start_index += 1;
+ }) {
+ const index_index = (start_index + roll_over) % header.indexes_len;
+ const next_index = indexes[index_index];
+ if (next_index.isEmpty()) {
+ header.maybeBumpMax(distance_from_start_index);
+ indexes[index_index] = .{
+ .distance_from_start_index = @intCast(I, distance_from_start_index),
+ .entry_index = @intCast(I, entry_index),
+ };
+ continue :entry_loop;
+ }
+ if (next_index.distance_from_start_index < distance_from_start_index) {
+ header.maybeBumpMax(distance_from_start_index);
+ indexes[index_index] = .{
+ .distance_from_start_index = @intCast(I, distance_from_start_index),
+ .entry_index = @intCast(I, entry_index),
+ };
+ distance_from_start_index = next_index.distance_from_start_index;
+ entry_index = next_index.entry_index;
+ }
}
+ unreachable;
}
- return null;
}
+ };
+}
+
+const CapacityIndexType = enum { u8, u16, u32, usize };
- fn keyToIndex(hm: Self, key: K) usize {
- return hm.constrainIndex(@as(usize, hash(key)));
+fn capacityIndexType(indexes_len: usize) CapacityIndexType {
+ if (indexes_len < math.maxInt(u8))
+ return .u8;
+ if (indexes_len < math.maxInt(u16))
+ return .u16;
+ if (indexes_len < math.maxInt(u32))
+ return .u32;
+ return .usize;
+}
+
+fn capacityIndexSize(indexes_len: usize) usize {
+ switch (capacityIndexType(indexes_len)) {
+ .u8 => return @sizeOf(Index(u8)),
+ .u16 => return @sizeOf(Index(u16)),
+ .u32 => return @sizeOf(Index(u32)),
+ .usize => return @sizeOf(Index(usize)),
+ }
+}
+
+fn Index(comptime I: type) type {
+ return extern struct {
+ entry_index: I,
+ distance_from_start_index: I,
+
+ const Self = @This();
+
+ fn isEmpty(idx: Self) bool {
+ return idx.entry_index == math.maxInt(I);
}
- fn constrainIndex(hm: Self, i: usize) usize {
- // this is an optimization for modulo of power of two integers;
- // it requires hm.entries.len to always be a power of two
- return i & (hm.entries.len - 1);
+ fn setEmpty(idx: *Self) void {
+ idx.entry_index = math.maxInt(I);
}
};
}
+/// This struct is trailed by an array of `Index(I)`, where `I`
+/// and the array length are determined by `indexes_len`.
+const IndexHeader = struct {
+ max_distance_from_start_index: usize,
+ indexes_len: usize,
+
+ fn hashToIndex(header: IndexHeader, h: u32) usize {
+ return @as(usize, h) % header.indexes_len;
+ }
+
+ fn indexes(header: *IndexHeader, comptime I: type) []Index(I) {
+ const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader));
+ return start[0..header.indexes_len];
+ }
+
+ fn capacityIndexType(header: IndexHeader) CapacityIndexType {
+ return hash_map.capacityIndexType(header.indexes_len);
+ }
+
+ fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void {
+ if (distance_from_start_index > header.max_distance_from_start_index) {
+ header.max_distance_from_start_index = distance_from_start_index;
+ }
+ }
+
+ fn alloc(allocator: *Allocator, len: usize) !*IndexHeader {
+ const index_size = hash_map.capacityIndexSize(len);
+ const nbytes = @sizeOf(IndexHeader) + index_size * len;
+ const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact);
+ @memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader));
+ const result = @ptrCast(*IndexHeader, bytes.ptr);
+ result.* = .{
+ .max_distance_from_start_index = 0,
+ .indexes_len = len,
+ };
+ return result;
+ }
+
+ fn free(header: *IndexHeader, allocator: *Allocator) void {
+ const index_size = hash_map.capacityIndexSize(header.indexes_len);
+ const ptr = @ptrCast([*]u8, header);
+ const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size];
+ allocator.free(slice);
+ }
+};
+
test "basic hash map usage" {
var map = AutoHashMap(i32, i32).init(std.testing.allocator);
defer map.deinit();
- testing.expect((try map.put(1, 11)) == null);
- testing.expect((try map.put(2, 22)) == null);
- testing.expect((try map.put(3, 33)) == null);
- testing.expect((try map.put(4, 44)) == null);
+ testing.expect((try map.fetchPut(1, 11)) == null);
+ testing.expect((try map.fetchPut(2, 22)) == null);
+ testing.expect((try map.fetchPut(3, 33)) == null);
+ testing.expect((try map.fetchPut(4, 44)) == null);
try map.putNoClobber(5, 55);
- testing.expect((try map.put(5, 66)).?.value == 55);
- testing.expect((try map.put(5, 55)).?.value == 66);
+ testing.expect((try map.fetchPut(5, 66)).?.value == 55);
+ testing.expect((try map.fetchPut(5, 55)).?.value == 66);
const gop1 = try map.getOrPut(5);
testing.expect(gop1.found_existing == true);
- testing.expect(gop1.kv.value == 55);
- gop1.kv.value = 77;
- testing.expect(map.get(5).?.value == 77);
+ testing.expect(gop1.entry.value == 55);
+ gop1.entry.value = 77;
+ testing.expect(map.getEntry(5).?.value == 77);
const gop2 = try map.getOrPut(99);
testing.expect(gop2.found_existing == false);
- gop2.kv.value = 42;
- testing.expect(map.get(99).?.value == 42);
+ gop2.entry.value = 42;
+ testing.expect(map.getEntry(99).?.value == 42);
const gop3 = try map.getOrPutValue(5, 5);
testing.expect(gop3.value == 77);
@@ -454,15 +876,15 @@ test "basic hash map usage" {
testing.expect(gop4.value == 41);
testing.expect(map.contains(2));
- testing.expect(map.get(2).?.value == 22);
- testing.expect(map.getValue(2).? == 22);
+ testing.expect(map.getEntry(2).?.value == 22);
+ testing.expect(map.get(2).? == 22);
const rmv1 = map.remove(2);
testing.expect(rmv1.?.key == 2);
testing.expect(rmv1.?.value == 22);
testing.expect(map.remove(2) == null);
+ testing.expect(map.getEntry(2) == null);
testing.expect(map.get(2) == null);
- testing.expect(map.getValue(2) == null);
map.removeAssertDiscard(3);
}
@@ -498,8 +920,8 @@ test "iterator hash map" {
it.reset();
var count: usize = 0;
- while (it.next()) |kv| : (count += 1) {
- buffer[@intCast(usize, kv.key)] = kv.value;
+ while (it.next()) |entry| : (count += 1) {
+ buffer[@intCast(usize, entry.key)] = entry.value;
}
testing.expect(count == 3);
testing.expect(it.next() == null);
@@ -510,8 +932,8 @@ test "iterator hash map" {
it.reset();
count = 0;
- while (it.next()) |kv| {
- buffer[@intCast(usize, kv.key)] = kv.value;
+ while (it.next()) |entry| {
+ buffer[@intCast(usize, entry.key)] = entry.value;
count += 1;
if (count >= 2) break;
}
@@ -531,14 +953,14 @@ test "ensure capacity" {
defer map.deinit();
try map.ensureCapacity(20);
- const initialCapacity = map.entries.len;
- testing.expect(initialCapacity >= 20);
+ const initial_capacity = map.capacity();
+ testing.expect(initial_capacity >= 20);
var i: i32 = 0;
while (i < 20) : (i += 1) {
- testing.expect(map.putAssumeCapacity(i, i + 10) == null);
+ testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null);
}
// shouldn't resize from putAssumeCapacity
- testing.expect(initialCapacity == map.entries.len);
+ testing.expect(initial_capacity == map.capacity());
}
pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) {
@@ -575,6 +997,24 @@ pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) {
}.eql;
}
+pub fn autoEqlIsCheap(comptime K: type) bool {
+ return switch (@typeInfo(K)) {
+ .Bool,
+ .Int,
+ .Float,
+ .Pointer,
+ .ComptimeFloat,
+ .ComptimeInt,
+ .Enum,
+ .Fn,
+ .ErrorSet,
+ .AnyFrame,
+ .EnumLiteral,
+ => true,
+ else => false,
+ };
+}
+
pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) {
return struct {
fn hash(key: K) u32 {