Commit 91f41bdc70

Andrew Kelley <andrew@ziglang.org>
2025-01-31 03:21:54
std.heap.PageAllocator: restore high alignment functionality
This allocator now supports alignments greater than page size, with the same implementation as it used before. This is a partial revert of ceb0a632cfd6a4eada6bd27bf6a3754e95dcac86. It looks like VirtualAlloc2 has better solutions to this problem, including features such as MEM_RESERVE_PLACEHOLDER and MEM_LARGE_PAGES. This possibility can be investigated as a follow-up task.
1 parent 5c63884
Changed files (1)
lib
lib/std/heap/PageAllocator.zig
@@ -15,51 +15,100 @@ pub const vtable: Allocator.VTable = .{
     .free = free,
 };
 
-fn alloc(_: *anyopaque, n: usize, log2_align: u8, ra: usize) ?[*]u8 {
+fn alloc(context: *anyopaque, n: usize, log2_align: u8, ra: usize) ?[*]u8 {
+    const requested_alignment: mem.Alignment = @enumFromInt(log2_align);
+    _ = context;
     _ = ra;
-    _ = log2_align;
     assert(n > 0);
 
+    const page_size = std.heap.pageSize();
+    if (n >= maxInt(usize) - page_size) return null;
+    const alignment_bytes = requested_alignment.toByteUnits();
+
     if (native_os == .windows) {
+        // According to official documentation, VirtualAlloc aligns to page
+        // boundary, however, empirically it reserves pages on a 64K boundary.
+        // Since it is very likely the requested alignment will be honored,
+        // this logic first tries a call with exactly the size requested,
+        // before falling back to the loop below.
+        // https://devblogs.microsoft.com/oldnewthing/?p=42223
         const addr = windows.VirtualAlloc(
             null,
-
             // VirtualAlloc will round the length to a multiple of page size.
-            // VirtualAlloc docs: If the lpAddress parameter is NULL, this value is rounded up to the next page boundary
+            // "If the lpAddress parameter is NULL, this value is rounded up to
+            // the next page boundary".
             n,
-
             windows.MEM_COMMIT | windows.MEM_RESERVE,
             windows.PAGE_READWRITE,
         ) catch return null;
-        return @ptrCast(addr);
-    }
 
-    const page_size = std.heap.pageSize();
-    if (n >= maxInt(usize) - page_size) return null;
+        if (mem.isAligned(@intFromPtr(addr), alignment_bytes))
+            return @ptrCast(addr);
+
+        // Fallback: reserve a range of memory large enough to find a
+        // sufficiently aligned address, then free the entire range and
+        // immediately allocate the desired subset. Another thread may have won
+        // the race to map the target range, in which case a retry is needed.
+        windows.VirtualFree(addr, 0, windows.MEM_RELEASE);
+
+        const overalloc_len = n + alignment_bytes - page_size;
+        const aligned_len = mem.alignForward(usize, n, page_size);
+
+        while (true) {
+            const reserved_addr = windows.VirtualAlloc(
+                null,
+                overalloc_len,
+                windows.MEM_RESERVE,
+                windows.PAGE_NOACCESS,
+            ) catch return null;
+            const aligned_addr = mem.alignForward(usize, @intFromPtr(reserved_addr), alignment_bytes);
+            windows.VirtualFree(reserved_addr, 0, windows.MEM_RELEASE);
+            const ptr = windows.VirtualAlloc(
+                @ptrFromInt(aligned_addr),
+                aligned_len,
+                windows.MEM_COMMIT | windows.MEM_RESERVE,
+                windows.PAGE_READWRITE,
+            ) catch continue;
+            return @ptrCast(ptr);
+        }
+    }
 
     const aligned_len = mem.alignForward(usize, n, page_size);
+    const max_drop_len = alignment_bytes - @min(alignment_bytes, page_size);
+    const overalloc_len = if (max_drop_len <= aligned_len - n)
+        aligned_len
+    else
+        mem.alignForward(usize, aligned_len + max_drop_len, page_size);
     const hint = @atomicLoad(@TypeOf(std.heap.next_mmap_addr_hint), &std.heap.next_mmap_addr_hint, .unordered);
     const slice = posix.mmap(
         hint,
-        aligned_len,
+        overalloc_len,
         posix.PROT.READ | posix.PROT.WRITE,
         .{ .TYPE = .PRIVATE, .ANONYMOUS = true },
         -1,
         0,
     ) catch return null;
-    assert(mem.isAligned(@intFromPtr(slice.ptr), page_size_min));
-    const new_hint: [*]align(std.heap.page_size_min) u8 = @alignCast(slice.ptr + aligned_len);
+    const result_ptr = mem.alignPointer(slice.ptr, alignment_bytes) orelse return null;
+    // Unmap the extra bytes that were only requested in order to guarantee
+    // that the range of memory we were provided had a proper alignment in it
+    // somewhere. The extra bytes could be at the beginning, or end, or both.
+    const drop_len = result_ptr - slice.ptr;
+    if (drop_len != 0) posix.munmap(slice[0..drop_len]);
+    const remaining_len = overalloc_len - drop_len;
+    if (remaining_len > aligned_len) posix.munmap(@alignCast(result_ptr[aligned_len..remaining_len]));
+    const new_hint: [*]align(page_size_min) u8 = @alignCast(result_ptr + aligned_len);
     _ = @cmpxchgStrong(@TypeOf(std.heap.next_mmap_addr_hint), &std.heap.next_mmap_addr_hint, hint, new_hint, .monotonic, .monotonic);
-    return slice.ptr;
+    return result_ptr;
 }
 
 fn resize(
-    _: *anyopaque,
+    context: *anyopaque,
     buf_unaligned: []u8,
     log2_buf_align: u8,
     new_size: usize,
     return_address: usize,
 ) bool {
+    _ = context;
     _ = log2_buf_align;
     _ = return_address;
     const page_size = std.heap.pageSize();
@@ -71,8 +120,8 @@ fn resize(
             const old_addr_end = base_addr + buf_unaligned.len;
             const new_addr_end = mem.alignForward(usize, base_addr + new_size, page_size);
             if (old_addr_end > new_addr_end) {
-                // For shrinking that is not releasing, we will only
-                // decommit the pages not needed anymore.
+                // For shrinking that is not releasing, we will only decommit
+                // the pages not needed anymore.
                 windows.VirtualFree(
                     @as(*anyopaque, @ptrFromInt(new_addr_end)),
                     old_addr_end - new_addr_end,
@@ -104,7 +153,8 @@ fn resize(
     return false;
 }
 
-fn free(_: *anyopaque, slice: []u8, log2_buf_align: u8, return_address: usize) void {
+fn free(context: *anyopaque, slice: []u8, log2_buf_align: u8, return_address: usize) void {
+    _ = context;
     _ = log2_buf_align;
     _ = return_address;