Commit 857c1d4ff2

Eric Joldasov <bratishkaerik@landless-city.net>
2024-04-23 20:19:19
std.zig.system: fix ELF file search
* Adjust buffer length a bit. * Fix detecting if file is a script. Logic below was unreachable, because 99% of scripts failed "At least 255 bytes long" check and were detected as ELF files. It should be "At least 4" instead (minimum value of "ELF magic length" and "smallest possible interpreter path length"). * Fix parsing interpreter path, when text after shebang: 1. does not have newline, 2. has leading spaces and tabs, 3. separates interpreter and arguments by tab or NUL. * Remove empty error set from `defaultAbiAndDynamicLinker`. Signed-off-by: Eric Joldasov <bratishkaerik@landless-city.net>
1 parent 7cf3167
Changed files (1)
lib
std
lib/std/zig/system.zig
@@ -983,7 +983,7 @@ fn detectAbiAndDynamicLinker(
 
     // Best case scenario: the executable is dynamically linked, and we can iterate
     // over our own shared objects and find a dynamic linker.
-    const elf_file = blk: {
+    const elf_file = elf_file: {
         // This block looks for a shebang line in /usr/bin/env,
         // if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
         // doing the same logic recursively in case it finds another shebang line.
@@ -995,9 +995,22 @@ fn detectAbiAndDynamicLinker(
             // Haiku does not have a /usr root directory.
             .haiku => "/bin/env",
         };
-        // #! (2) + 255 (max length of shebang line since Linux 5.1) + \n (1)
-        var buffer: [258]u8 = undefined;
+
+        // According to `man 2 execve`:
+        //
+        // The kernel imposes a maximum length on the text
+        // that follows the "#!" characters at the start of a script;
+        // characters beyond the limit are ignored.
+        // Before Linux 5.1, the limit is 127 characters.
+        // Since Linux 5.1, the limit is 255 characters.
+        //
+        // Tests show that bash and zsh consider 255 as total limit,
+        // *including* "#!" characters and ignoring newline.
+        // For safety, we set max length as 255 + \n (1).
+        var buffer: [255 + 1]u8 = undefined;
         while (true) {
+            // Interpreter path can be relative on Linux, but
+            // for simplicity we are asserting it is an absolute path.
             const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
                 error.NoSpaceLeft => unreachable,
                 error.NameTooLong => unreachable,
@@ -1027,27 +1040,55 @@ fn detectAbiAndDynamicLinker(
 
                 else => |e| return e,
             };
-            errdefer file.close();
-
-            const len = preadAtLeast(file, &buffer, 0, buffer.len) catch |err| switch (err) {
+            var is_elf_file = false;
+            defer if (is_elf_file == false) file.close();
+
+            // Shortest working interpreter path is "#!/i" (4)
+            // (interpreter is "/i", assuming all pathes are absolute, like in above comment).
+            // ELF magic number length is also 4.
+            //
+            // If file is shorter than that, it is definitely not ELF file
+            // nor file with "shebang" line.
+            const min_len: usize = 4;
+
+            const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
                 error.UnexpectedEndOfFile,
                 error.UnableToReadElfFile,
-                => break :blk file,
+                => return defaultAbiAndDynamicLinker(cpu, os, query),
 
                 else => |e| return e,
             };
-            const newline = mem.indexOfScalar(u8, buffer[0..len], '\n') orelse break :blk file;
-            const line = buffer[0..newline];
-            if (!mem.startsWith(u8, line, "#!")) break :blk file;
-            var it = mem.tokenizeScalar(u8, line[2..], ' ');
-            file_name = it.next() orelse return defaultAbiAndDynamicLinker(cpu, os, query);
-            file.close();
+            const content = buffer[0..len];
+
+            if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
+                // It is very likely ELF file!
+                is_elf_file = true;
+                break :elf_file file;
+            } else if (mem.eql(u8, content[0..2], "#!")) {
+                // We detected shebang, now parse entire line.
+
+                // Trim leading "#!", spaces and tabs.
+                const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
+
+                // This line can have:
+                // * Interpreter path only,
+                // * Interpreter path and arguments, all separated by space, tab or NUL character.
+                // And optionally newline at the end.
+                const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
+
+                // Separate path and args.
+                const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
+
+                file_name = path_maybe_args[0..path_end];
+                continue;
+            } else {
+                // Not a ELF file, not a shell script with "shebang line", invalid duck.
+                return defaultAbiAndDynamicLinker(cpu, os, query);
+            }
         }
     };
     defer elf_file.close();
 
-    // If Zig is statically linked, such as via distributed binary static builds, the above
-    // trick (block self_exe) won't work. The next thing we fall back to is the same thing, but for elf_file.
     // TODO: inline this function and combine the buffer we already read above to find
     // the possible shebang line with the buffer we use for the ELF header.
     return abiAndDynamicLinkerFromFile(elf_file, cpu, os, ld_info_list, query) catch |err| switch (err) {
@@ -1075,7 +1116,7 @@ fn detectAbiAndDynamicLinker(
     };
 }
 
-fn defaultAbiAndDynamicLinker(cpu: Target.Cpu, os: Target.Os, query: Target.Query) !Target {
+fn defaultAbiAndDynamicLinker(cpu: Target.Cpu, os: Target.Os, query: Target.Query) Target {
     const abi = query.abi orelse Target.Abi.default(cpu.arch, os);
     return .{
         .cpu = cpu,