Commit 7e74276661

Alex Rønne Petersen <alex@alexrp.com>
2024-06-22 18:33:22
generate_linux_syscalls: Rework generation strategy for newer kernel ports.
If we're going to abuse the preprocessor, we may as well go all the way and have it generate a convenient format for us. This achieves two things: 1. We no longer need hacks for the arch-specific syscalls. 2. We now generate the correct syscall names for 32-bit platforms. The latter is because we now resolve __SC_3264, etc.
1 parent f494a47
Changed files (1)
tools/generate_linux_syscalls.zig
@@ -10,7 +10,12 @@ const zig = std.zig;
 const fs = std.fs;
 
 const stdlib_renames = std.StaticStringMap([]const u8).initComptime(.{
+    // Remove underscore prefix.
+    .{ "_llseek", "llseek" },
+    .{ "_newselect", "newselect" },
+    .{ "_sysctl", "sysctl" },
     // Most 64-bit archs.
+    .{ "newfstat", "fstat64" },
     .{ "newfstatat", "fstatat64" },
     // POWER.
     .{ "sync_file_range2", "sync_file_range" },
@@ -19,6 +24,24 @@ const stdlib_renames = std.StaticStringMap([]const u8).initComptime(.{
     .{ "arm_fadvise64_64", "fadvise64_64" },
 });
 
+// Only for newer architectures where we use the C preprocessor.
+const stdlib_renames_new = std.StaticStringMap([]const u8).initComptime(.{
+    .{ "newuname", "uname" },
+    .{ "umount", "umount2" },
+});
+
+// We use this to deal with the fact that multiple syscalls can be mapped to sys_ni_syscall.
+// Thankfully it's only 2 well-known syscalls in newer kernel ports at the moment.
+fn getOverridenNameNew(value: []const u8) ?[]const u8 {
+    if (mem.eql(u8, value, "18")) {
+        return "sys_lookup_dcookie";
+    } else if (mem.eql(u8, value, "42")) {
+        return "sys_nfsservctl";
+    } else {
+        return null;
+    }
+}
+
 pub fn main() !void {
     var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
     defer arena.deinit();
@@ -60,8 +83,9 @@ pub fn main() !void {
             // abi is always i386
             _ = fields.next() orelse return error.Incomplete;
             const name = fields.next() orelse return error.Incomplete;
+            const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
 
-            try writer.print("    {p} = {s},\n", .{ zig.fmtId(name), number });
+            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
         }
 
         try writer.writeAll("};\n\n");
@@ -80,8 +104,8 @@ pub fn main() !void {
             // The x32 abi syscalls are always at the end.
             if (mem.eql(u8, abi, "x32")) break;
             const name = fields.next() orelse return error.Incomplete;
-
             const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
+
             try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
         }
 
@@ -105,8 +129,8 @@ pub fn main() !void {
             const abi = fields.next() orelse return error.Incomplete;
             if (mem.eql(u8, abi, "oabi")) continue;
             const name = fields.next() orelse return error.Incomplete;
-
             const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
+
             try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
         }
 
@@ -136,8 +160,9 @@ pub fn main() !void {
             const abi = fields.next() orelse return error.Incomplete;
             if (mem.eql(u8, abi, "32")) continue;
             const name = fields.next() orelse return error.Incomplete;
+            const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
 
-            try writer.print("    {p} = {s},\n", .{ zig.fmtId(name), number });
+            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
         }
 
         try writer.writeAll("};\n\n");
@@ -161,8 +186,9 @@ pub fn main() !void {
             _ = fields.next() orelse return error.Incomplete;
             const name = fields.next() orelse return error.Incomplete;
             if (mem.startsWith(u8, name, "unused")) continue;
+            const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
 
-            try writer.print("    {p} = Linux + {s},\n", .{ zig.fmtId(name), number });
+            try writer.print("    {p} = Linux + {s},\n", .{ zig.fmtId(fixed_name), number });
         }
 
         try writer.writeAll("};\n\n");
@@ -232,11 +258,6 @@ pub fn main() !void {
     // Newer architectures (starting with aarch64 c. 2012) now use the same C
     // header file for their syscall numbers. Arch-specific headers are used to
     // define pre-proc. vars that add additional (usually obsolete) syscalls.
-    //
-    // TODO:
-    // - It would be better to use libclang/translate-c directly to extract the definitions.
-    // - The `-dD` option only does minimal pre-processing and doesn't resolve addition,
-    //   so arch specific syscalls are dealt with manually.
     {
         try writer.writeAll("pub const Arm64 = enum(usize) {\n");
 
@@ -254,6 +275,8 @@ pub fn main() !void {
             // Using -I=[dir] includes the zig linux headers, which we don't want.
             "-Iinclude",
             "-Iinclude/uapi",
+            // Output the syscall in a format we can easily recognize.
+            "-D __SYSCALL(nr, nm)=zigsyscall nm nr",
             "arch/arm64/include/uapi/asm/unistd.h",
         };
 
@@ -277,32 +300,24 @@ pub fn main() !void {
         };
 
         var lines = mem.tokenizeScalar(u8, defines, '\n');
-        loop: while (lines.next()) |line| {
-            var fields = mem.tokenizeAny(u8, line, " \t");
-            const cmd = fields.next() orelse return error.Incomplete;
-            if (!mem.eql(u8, cmd, "#define")) continue;
-            const define = fields.next() orelse return error.Incomplete;
-            const number = fields.next() orelse continue;
+        while (lines.next()) |line| {
+            var fields = mem.tokenizeAny(u8, line, " ");
+            const prefix = fields.next() orelse return error.Incomplete;
 
-            if (!std.ascii.isDigit(number[0])) continue;
-            if (!mem.startsWith(u8, define, "__NR")) continue;
-            const name = mem.trimLeft(u8, mem.trimLeft(u8, define, "__NR3264_"), "__NR_");
-            if (mem.eql(u8, name, "arch_specific_syscall")) continue;
-            if (mem.eql(u8, name, "syscalls")) break :loop;
+            if (!mem.eql(u8, prefix, "zigsyscall")) continue;
 
-            const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
-            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
+            const sys_name = fields.next() orelse return error.Incomplete;
+            const value = fields.rest();
+            const name = (getOverridenNameNew(value) orelse sys_name)["sys_".len..];
+            const fixed_name = if (stdlib_renames_new.get(name)) |f| f else if (stdlib_renames.get(name)) |f| f else name;
+
+            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), value });
         }
 
         try writer.writeAll("};\n\n");
     }
     {
-        try writer.writeAll(
-            \\pub const RiscV32 = enum(usize) {
-            \\    pub const arch_specific_syscall = 244;
-            \\
-            \\
-        );
+        try writer.writeAll("pub const RiscV32 = enum(usize) {\n");
 
         const child_args = [_][]const u8{
             zig_exe,
@@ -316,6 +331,7 @@ pub fn main() !void {
             "-Iinclude",
             "-Iinclude/uapi",
             "-Iarch/riscv/include/uapi",
+            "-D __SYSCALL(nr, nm)=zigsyscall nm nr",
             "arch/riscv/include/uapi/asm/unistd.h",
         };
 
@@ -339,38 +355,24 @@ pub fn main() !void {
         };
 
         var lines = mem.tokenizeScalar(u8, defines, '\n');
-        loop: while (lines.next()) |line| {
-            var fields = mem.tokenizeAny(u8, line, " \t");
-            const cmd = fields.next() orelse return error.Incomplete;
-            if (!mem.eql(u8, cmd, "#define")) continue;
-            const define = fields.next() orelse return error.Incomplete;
-            const number = fields.next() orelse continue;
+        while (lines.next()) |line| {
+            var fields = mem.tokenizeAny(u8, line, " ");
+            const prefix = fields.next() orelse return error.Incomplete;
 
-            if (!std.ascii.isDigit(number[0])) continue;
-            if (!mem.startsWith(u8, define, "__NR")) continue;
-            const name = mem.trimLeft(u8, mem.trimLeft(u8, define, "__NR3264_"), "__NR_");
-            if (mem.eql(u8, name, "arch_specific_syscall")) continue;
-            if (mem.eql(u8, name, "syscalls")) break :loop;
+            if (!mem.eql(u8, prefix, "zigsyscall")) continue;
 
-            const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
-            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
+            const sys_name = fields.next() orelse return error.Incomplete;
+            const value = fields.rest();
+            const name = (getOverridenNameNew(value) orelse sys_name)["sys_".len..];
+            const fixed_name = if (stdlib_renames_new.get(name)) |f| f else if (stdlib_renames.get(name)) |f| f else name;
+
+            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), value });
         }
 
-        try writer.writeAll(
-            \\
-            \\    riscv_flush_icache = arch_specific_syscall + 15,
-            \\    riscv_hwprobe = arch_specific_syscall + 14,
-            \\};
-            \\
-        );
+        try writer.writeAll("};\n\n");
     }
     {
-        try writer.writeAll(
-            \\pub const RiscV64 = enum(usize) {
-            \\    pub const arch_specific_syscall = 244;
-            \\
-            \\
-        );
+        try writer.writeAll("pub const RiscV64 = enum(usize) {\n");
 
         const child_args = [_][]const u8{
             zig_exe,
@@ -384,6 +386,7 @@ pub fn main() !void {
             "-Iinclude",
             "-Iinclude/uapi",
             "-Iarch/riscv/include/uapi",
+            "-D __SYSCALL(nr, nm)=zigsyscall nm nr",
             "arch/riscv/include/uapi/asm/unistd.h",
         };
 
@@ -407,30 +410,21 @@ pub fn main() !void {
         };
 
         var lines = mem.tokenizeScalar(u8, defines, '\n');
-        loop: while (lines.next()) |line| {
-            var fields = mem.tokenizeAny(u8, line, " \t");
-            const cmd = fields.next() orelse return error.Incomplete;
-            if (!mem.eql(u8, cmd, "#define")) continue;
-            const define = fields.next() orelse return error.Incomplete;
-            const number = fields.next() orelse continue;
+        while (lines.next()) |line| {
+            var fields = mem.tokenizeAny(u8, line, " ");
+            const prefix = fields.next() orelse return error.Incomplete;
 
-            if (!std.ascii.isDigit(number[0])) continue;
-            if (!mem.startsWith(u8, define, "__NR")) continue;
-            const name = mem.trimLeft(u8, mem.trimLeft(u8, define, "__NR3264_"), "__NR_");
-            if (mem.eql(u8, name, "arch_specific_syscall")) continue;
-            if (mem.eql(u8, name, "syscalls")) break :loop;
+            if (!mem.eql(u8, prefix, "zigsyscall")) continue;
 
-            const fixed_name = if (stdlib_renames.get(name)) |fixed| fixed else name;
-            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), number });
+            const sys_name = fields.next() orelse return error.Incomplete;
+            const value = fields.rest();
+            const name = (getOverridenNameNew(value) orelse sys_name)["sys_".len..];
+            const fixed_name = if (stdlib_renames_new.get(name)) |f| f else if (stdlib_renames.get(name)) |f| f else name;
+
+            try writer.print("    {p} = {s},\n", .{ zig.fmtId(fixed_name), value });
         }
 
-        try writer.writeAll(
-            \\
-            \\    riscv_flush_icache = arch_specific_syscall + 15,
-            \\    riscv_hwprobe = arch_specific_syscall + 14,
-            \\};
-            \\
-        );
+        try writer.writeAll("};\n\n");
     }
     {
         try writer.writeAll(