Commit 8b82c40104
Changed files (2)
src
test
stage1
behavior
src/hash_map.hpp
@@ -19,45 +19,64 @@ public:
init_capacity(capacity);
}
void deinit(void) {
- heap::c_allocator.deallocate(_entries, _capacity);
+ _entries.deinit();
+ heap::c_allocator.deallocate(_index_bytes,
+ _indexes_len * capacity_index_size(_indexes_len));
}
struct Entry {
K key;
V value;
- bool used;
- int distance_from_start_index;
};
void clear() {
- for (int i = 0; i < _capacity; i += 1) {
- _entries[i].used = false;
- }
- _size = 0;
+ _entries.clear();
+ memset(_index_bytes, 0, _indexes_len * capacity_index_size(_indexes_len));
_max_distance_from_start_index = 0;
_modification_count += 1;
}
- int size() const {
- return _size;
+ size_t size() const {
+ return _entries.length;
}
void put(const K &key, const V &value) {
_modification_count += 1;
- internal_put(key, value);
-
- // if we get too full (60%), double the capacity
- if (_size * 5 >= _capacity * 3) {
- Entry *old_entries = _entries;
- int old_capacity = _capacity;
- init_capacity(_capacity * 2);
- // dump all of the old elements into the new table
- for (int i = 0; i < old_capacity; i += 1) {
- Entry *old_entry = &old_entries[i];
- if (old_entry->used)
- internal_put(old_entry->key, old_entry->value);
+
+ // if we would get too full (60%), double the indexes size
+ if ((_entries.length + 1) * 5 >= _indexes_len * 3) {
+ heap::c_allocator.deallocate(_index_bytes,
+ _indexes_len * capacity_index_size(_indexes_len));
+ _indexes_len *= 2;
+ size_t sz = capacity_index_size(_indexes_len);
+ // This zero initializes the bytes, setting them all empty.
+ _index_bytes = heap::c_allocator.allocate<uint8_t>(_indexes_len * sz);
+ _max_distance_from_start_index = 0;
+ for (size_t i = 0; i < _entries.length; i += 1) {
+ Entry *entry = &_entries.items[i];
+ switch (sz) {
+ case 1:
+ put_index(key_to_index(entry->key), i, (uint8_t*)_index_bytes);
+ continue;
+ case 2:
+ put_index(key_to_index(entry->key), i, (uint16_t*)_index_bytes);
+ continue;
+ case 4:
+ put_index(key_to_index(entry->key), i, (uint32_t*)_index_bytes);
+ continue;
+ default:
+ put_index(key_to_index(entry->key), i, (size_t*)_index_bytes);
+ continue;
+ }
}
- heap::c_allocator.deallocate(old_entries, old_capacity);
+ }
+
+
+ switch (capacity_index_size(_indexes_len)) {
+ case 1: return internal_put(key, value, (uint8_t*)_index_bytes);
+ case 2: return internal_put(key, value, (uint16_t*)_index_bytes);
+ case 4: return internal_put(key, value, (uint32_t*)_index_bytes);
+ default: return internal_put(key, value, (size_t*)_index_bytes);
}
}
@@ -81,40 +100,21 @@ public:
return internal_get(key);
}
- void maybe_remove(const K &key) {
- if (maybe_get(key)) {
- remove(key);
- }
+ bool remove(const K &key) {
+ bool deleted_something = maybe_remove(key);
+ if (!deleted_something)
+ zig_panic("key not found");
+ return deleted_something;
}
- void remove(const K &key) {
+ bool maybe_remove(const K &key) {
_modification_count += 1;
- int start_index = key_to_index(key);
- for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
- int index = (start_index + roll_over) % _capacity;
- Entry *entry = &_entries[index];
-
- if (!entry->used)
- zig_panic("key not found");
-
- if (!EqualFn(entry->key, key))
- continue;
-
- for (; roll_over < _capacity; roll_over += 1) {
- int next_index = (start_index + roll_over + 1) % _capacity;
- Entry *next_entry = &_entries[next_index];
- if (!next_entry->used || next_entry->distance_from_start_index == 0) {
- entry->used = false;
- _size -= 1;
- return;
- }
- *entry = *next_entry;
- entry->distance_from_start_index -= 1;
- entry = next_entry;
- }
- zig_panic("shifting everything in the table");
+ switch (capacity_index_size(_indexes_len)) {
+ case 1: return internal_remove(key, (uint8_t*)_index_bytes);
+ case 2: return internal_remove(key, (uint16_t*)_index_bytes);
+ case 4: return internal_remove(key, (uint32_t*)_index_bytes);
+ default: return internal_remove(key, (size_t*)_index_bytes);
}
- zig_panic("key not found");
}
class Iterator {
@@ -122,24 +122,16 @@ public:
Entry *next() {
if (_inital_modification_count != _table->_modification_count)
zig_panic("concurrent modification");
- if (_count >= _table->size())
- return NULL;
- for (; _index < _table->_capacity; _index += 1) {
- Entry *entry = &_table->_entries[_index];
- if (entry->used) {
- _index += 1;
- _count += 1;
- return entry;
- }
- }
- zig_panic("no next item");
+ if (_index >= _table->_entries.length)
+ return nullptr;
+ Entry *entry = &_table->_entries.items[_index];
+ _index += 1;
+ return entry;
}
private:
const HashMap * _table;
- // how many items have we returned
- int _count = 0;
// iterator through the entry array
- int _index = 0;
+ size_t _index = 0;
// used to detect concurrent modification
uint32_t _inital_modification_count;
Iterator(const HashMap * table) :
@@ -154,89 +146,166 @@ public:
}
private:
-
- Entry *_entries;
- int _capacity;
- int _size;
- int _max_distance_from_start_index;
- // this is used to detect bugs where a hashtable is edited while an iterator is running.
+ // Maintains insertion order.
+ ZigList<Entry> _entries;
+ // If _indexes_len is less than 2**8, this is an array of uint8_t.
+ // If _indexes_len is less than 2**16, it is an array of uint16_t.
+ // If _indexes_len is less than 2**32, it is an array of uint32_t.
+ // Otherwise it is size_t.
+ // It's off by 1. 0 means empty slot, 1 means index 0, etc.
+ uint8_t *_index_bytes;
+ // This is the number of indexes. When indexes are bytes, it equals number of bytes.
+ // When indexes are uint16_t, _indexes_len is half the number of bytes.
+ size_t _indexes_len;
+
+ size_t _max_distance_from_start_index;
+ // This is used to detect bugs where a hashtable is edited while an iterator is running.
uint32_t _modification_count;
- void init_capacity(int capacity) {
- _capacity = capacity;
- _entries = heap::c_allocator.allocate<Entry>(_capacity);
- _size = 0;
+ void init_capacity(size_t capacity) {
+ _entries = {};
+ _entries.ensure_capacity(capacity);
+ // So that at capacity it will only be 60% full.
+ _indexes_len = capacity * 5 / 3;
+ size_t sz = capacity_index_size(_indexes_len);
+ // This zero initializes _index_bytes which sets them all to empty.
+ _index_bytes = heap::c_allocator.allocate<uint8_t>(_indexes_len * sz);
+
_max_distance_from_start_index = 0;
- for (int i = 0; i < _capacity; i += 1) {
- _entries[i].used = false;
- }
+ _modification_count = 0;
+ }
+
+ static size_t capacity_index_size(size_t len) {
+ if (len < UINT8_MAX)
+ return 1;
+ if (len < UINT16_MAX)
+ return 2;
+ if (len < UINT32_MAX)
+ return 4;
+ return sizeof(size_t);
}
- void internal_put(K key, V value) {
- int start_index = key_to_index(key);
- for (int roll_over = 0, distance_from_start_index = 0;
- roll_over < _capacity; roll_over += 1, distance_from_start_index += 1)
+ template <typename I>
+ void internal_put(const K &key, const V &value, I *indexes) {
+ size_t start_index = key_to_index(key);
+ for (size_t roll_over = 0, distance_from_start_index = 0;
+ roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1)
{
- int index = (start_index + roll_over) % _capacity;
- Entry *entry = &_entries[index];
-
- if (entry->used && !EqualFn(entry->key, key)) {
- if (entry->distance_from_start_index < distance_from_start_index) {
- // robin hood to the rescue
- Entry tmp = *entry;
- if (distance_from_start_index > _max_distance_from_start_index)
- _max_distance_from_start_index = distance_from_start_index;
- *entry = {
- key,
- value,
- true,
- distance_from_start_index,
- };
- key = tmp.key;
- value = tmp.value;
- distance_from_start_index = tmp.distance_from_start_index;
- }
- continue;
+ size_t index_index = (start_index + roll_over) % _indexes_len;
+ I index_data = indexes[index_index];
+ if (index_data == 0) {
+ _entries.append({key, value});
+ indexes[index_index] = _entries.length;
+ if (distance_from_start_index > _max_distance_from_start_index)
+ _max_distance_from_start_index = distance_from_start_index;
+ return;
}
-
- if (!entry->used) {
- // adding an entry. otherwise overwriting old value with
- // same key
- _size += 1;
+ Entry *entry = &_entries.items[index_data - 1];
+ if (EqualFn(entry->key, key)) {
+ *entry = {key, value};
+ if (distance_from_start_index > _max_distance_from_start_index)
+ _max_distance_from_start_index = distance_from_start_index;
+ return;
}
-
- if (distance_from_start_index > _max_distance_from_start_index)
- _max_distance_from_start_index = distance_from_start_index;
- *entry = {
- key,
- value,
- true,
- distance_from_start_index,
- };
- return;
}
- zig_panic("put into a full HashMap");
+ zig_unreachable();
}
+ template <typename I>
+ void put_index(size_t start_index, size_t entry_index, I *indexes) {
+ for (size_t roll_over = 0, distance_from_start_index = 0;
+ roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1)
+ {
+ size_t index_index = (start_index + roll_over) % _indexes_len;
+ if (indexes[index_index] == 0) {
+ indexes[index_index] = entry_index + 1;
+ if (distance_from_start_index > _max_distance_from_start_index)
+ _max_distance_from_start_index = distance_from_start_index;
+ return;
+ }
+ }
+ zig_unreachable();
+ }
Entry *internal_get(const K &key) const {
- int start_index = key_to_index(key);
- for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
- int index = (start_index + roll_over) % _capacity;
- Entry *entry = &_entries[index];
+ switch (capacity_index_size(_indexes_len)) {
+ case 1: return internal_get2(key, (uint8_t*)_index_bytes);
+ case 2: return internal_get2(key, (uint16_t*)_index_bytes);
+ case 4: return internal_get2(key, (uint32_t*)_index_bytes);
+ default: return internal_get2(key, (size_t*)_index_bytes);
+ }
+ }
- if (!entry->used)
- return NULL;
+ template <typename I>
+ Entry *internal_get2(const K &key, I *indexes) const {
+ size_t start_index = key_to_index(key);
+ for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
+ size_t index_index = (start_index + roll_over) % _indexes_len;
+ size_t index_data = indexes[index_index];
+ if (index_data == 0)
+ return nullptr;
+ Entry *entry = &_entries.items[index_data - 1];
if (EqualFn(entry->key, key))
return entry;
}
- return NULL;
+ return nullptr;
}
- int key_to_index(const K &key) const {
- return (int)(HashFunction(key) % ((uint32_t)_capacity));
+ size_t key_to_index(const K &key) const {
+ return ((size_t)HashFunction(key)) % _indexes_len;
+ }
+
+ template <typename I>
+ bool internal_remove(const K &key, I *indexes) {
+ size_t start_index = key_to_index(key);
+ for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
+ size_t index_index = (start_index + roll_over) % _indexes_len;
+ size_t index_data = indexes[index_index];
+ if (index_data == 0)
+ return false;
+
+ size_t index = index_data - 1;
+ Entry *entry = &_entries.items[index];
+ if (!EqualFn(entry->key, key))
+ continue;
+
+ indexes[index_index] = 0;
+ _entries.swap_remove(index);
+ if (_entries.length > 0 && _entries.length != index) {
+ // Because of the swap remove, now we need to update the index that was
+ // pointing to the last entry and is now pointing to this removed item slot.
+ update_entry_index(_entries.length, index, indexes);
+ }
+
+ // Now we have to shift over the following indexes.
+ roll_over += 1;
+ for (; roll_over <= _max_distance_from_start_index; roll_over += 1) {
+ size_t next_index = (start_index + roll_over) % _indexes_len;
+ if (indexes[next_index] == 0)
+ break;
+ size_t next_start_index = key_to_index(_entries.items[indexes[next_index]].key);
+ if (next_start_index != start_index)
+ break;
+ indexes[next_index - 1] = indexes[next_index];
+ }
+
+ return true;
+ }
+ return false;
}
-};
+ template <typename I>
+ void update_entry_index(size_t old_entry_index, size_t new_entry_index, I *indexes) {
+ size_t start_index = key_to_index(_entries.items[new_entry_index].key);
+ for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
+ size_t index_index = (start_index + roll_over) % _indexes_len;
+ if (indexes[index_index] == old_entry_index + 1) {
+ indexes[index_index] = new_entry_index + 1;
+ return;
+ }
+ }
+ zig_unreachable();
+ }
+};
#endif
test/stage1/behavior/type_info.zig
@@ -251,14 +251,13 @@ fn testStruct() void {
}
const TestStruct = packed struct {
- const Self = @This();
-
fieldA: usize,
fieldB: void,
fieldC: *Self,
fieldD: u32 = 4,
pub fn foo(self: *const Self) void {}
+ const Self = @This();
};
test "type info: function type info" {