Commit 5d241a1478

Wooster <wooster0@proton.me>
2023-10-29 19:21:32
std.debug: detect general protection faults on x86_64-linux
```zig const std = @import("std"); pub fn main() !void { var addr: *u8 = @ptrFromInt(0xaaaaaaaaaaaaaaaa); addr.* = 1; } ``` On x86_64-linux: Before: ``` $ zig run x.zig Segmentation fault at address 0x0 /home/wooster/Desktop/zig/x.zig:5:5: 0x21d887 in main (x) addr.* = 1; ^ /home/wooster/Desktop/zig-linux-x86_64/lib/std/start.zig:583:37: 0x21d847 in posixCallMainAndExit (x) const result = root.main() catch |err| { ^ /home/wooster/Desktop/zig-linux-x86_64/lib/std/start.zig:251:5: 0x21d371 in _start (x) asm volatile (switch (native_arch) { ^ ???:?:?: 0x0 in ??? (???) Aborted (core dumped) ``` After: ``` $ zig run x.zig --zig-lib-dir lib General protection exception /home/wooster/Desktop/zig/x.zig:5:5: 0x21d907 in main (x) addr.* = 1; ^ /home/wooster/Desktop/zig/lib/std/start.zig:583:37: 0x21d8c7 in posixCallMainAndExit (x) const result = root.main() catch |err| { ^ /home/wooster/Desktop/zig/lib/std/start.zig:251:5: 0x21d3f1 in _start (x) asm volatile (switch (native_arch) { ^ ???:?:?: 0x0 in ??? (???) Aborted (core dumped) ``` As @IntegratedQuantum pointed out in <https://github.com/ziglang/zig/issues/17745#issuecomment-1783815386>, it seems that if `code` of the `siginfo_t` instance is a certain value (128), you are able to distinguish between a general protection exception and a segmentation fault. This does not seem to be documented on `man sigaction`: ``` The following values can be placed in si_code for a SIGSEGV signal: SEGV_MAPERR Address not mapped to object. SEGV_ACCERR Invalid permissions for mapped object. SEGV_BNDERR (since Linux 3.19) Failed address bound checks. SEGV_PKUERR (since Linux 4.6) Access was denied by memory protection keys. See pkeys(7). The protection key which applied to this access is available via si_pkey. ``` (those constants are 1, 2, 3, and 4; none of them are the 128) I can't find a lot of documentation about this but it seems to work consistently for me on x86_64-linux. Here is a gist which provides additional evidence that this is a reliable way of checking for a general protection fault: https://gist.github.com/ytoshima/5682393 (read comment in first line) See also: https://stackoverflow.com/questions/64309366/why-is-the-segfault-address-null-when-accessing-memory-that-has-any-of-the-16-mo This only seems to affect x86_64 and on 32-bit x86 this does not seem to be a problem. Helps with #17745 but doesn't close it because the issue still exists on Windows and other POSIX OSs. I also limited this to x86_64-linux for now because that's the only platform where I tested it. Might work on more POSIX OSs.
1 parent 8bf4b3c
Changed files (1)
lib
lib/std/debug.zig
@@ -2401,14 +2401,14 @@ fn handleSegfaultPosix(sig: i32, info: *const os.siginfo_t, ctx_ptr: ?*const any
                 panic_mutex.lock();
                 defer panic_mutex.unlock();
 
-                dumpSegfaultInfoPosix(sig, addr, ctx_ptr);
+                dumpSegfaultInfoPosix(sig, info.code, addr, ctx_ptr);
             }
 
             waitForOtherThreadToFinishPanicking();
         },
         else => {
             // panic mutex already locked
-            dumpSegfaultInfoPosix(sig, addr, ctx_ptr);
+            dumpSegfaultInfoPosix(sig, info.code, addr, ctx_ptr);
         },
     };
 
@@ -2418,10 +2418,20 @@ fn handleSegfaultPosix(sig: i32, info: *const os.siginfo_t, ctx_ptr: ?*const any
     os.abort();
 }
 
-fn dumpSegfaultInfoPosix(sig: i32, addr: usize, ctx_ptr: ?*const anyopaque) void {
+fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*const anyopaque) void {
     const stderr = io.getStdErr().writer();
     _ = switch (sig) {
-        os.SIG.SEGV => stderr.print("Segmentation fault at address 0x{x}\n", .{addr}),
+        os.SIG.SEGV => if (native_arch == .x86_64 and native_os == .linux and code == 128) // SI_KERNEL
+            // x86_64 doesn't have a full 64-bit virtual address space.
+            // Addresses outside of that address space are non-canonical
+            // and the CPU won't provide the faulting address to us.
+            // This happens when accessing memory addresses such as 0xaaaaaaaaaaaaaaaa
+            // but can also happen when no addressable memory is involved;
+            // for example when reading/writing model-specific registers
+            // by executing `rdmsr` or `wrmsr` in user-space (unprivileged mode).
+            stderr.print("General protection exception (no address available)\n", .{})
+        else
+            stderr.print("Segmentation fault at address 0x{x}\n", .{addr}),
         os.SIG.ILL => stderr.print("Illegal instruction at address 0x{x}\n", .{addr}),
         os.SIG.BUS => stderr.print("Bus error at address 0x{x}\n", .{addr}),
         os.SIG.FPE => stderr.print("Arithmetic exception at address 0x{x}\n", .{addr}),