Commit 631633b252

Jonathan S <gereeter+code@gmail.com>
2020-03-27 20:28:48
Document and reduce usage of MAX_PATH_BYTES, lifting arbitrary buffer size requirements
1 parent 4c8b937
Changed files (4)
lib/std/debug.zig
@@ -1283,6 +1283,9 @@ pub const DebugInfo = struct {
         const elf_path = if (ctx.name.len > 0)
             ctx.name
         else blk: {
+            // Use of MAX_PATH_BYTES here is valid as the resulting path is immediately
+            // opened with no modification. TODO: Use openSelfExe instead to avoid path
+            // length limitations
             var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
             break :blk try fs.selfExePath(&buf);
         };
lib/std/fs.zig
@@ -33,8 +33,11 @@ pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirE
 
 pub const Watch = @import("fs/watch.zig").Watch;
 
-/// This represents the maximum size of a UTF-8 encoded file path.
-/// All file system operations which return a path are guaranteed to
+/// This represents the maximum size of a UTF-8 encoded file path that the
+/// operating system will accept. Paths, including those returned from file
+/// system operations, may be longer than this length, but such paths cannot
+/// be successfully passed back in other file system operations. However,
+/// all path components returned by file system operations are assumed to
 /// fit into a UTF-8 encoded array of this length.
 /// The byte count includes room for a null sentinel byte.
 pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
@@ -1194,7 +1197,7 @@ pub const Dir = struct {
     /// Read value of a symbolic link.
     /// The return value is a slice of `buffer`, from index `0`.
     /// Asserts that the path parameter has no null bytes.
-    pub fn readLink(self: Dir, sub_path: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {
+    pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
         const sub_path_c = try os.toPosixPath(sub_path);
         return self.readLinkZ(&sub_path_c, buffer);
     }
@@ -1202,7 +1205,7 @@ pub const Dir = struct {
     pub const readLinkC = @compileError("deprecated: renamed to readLinkZ");
 
     /// Same as `readLink`, except the `pathname` parameter is null-terminated.
-    pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {
+    pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
         return os.readlinkatZ(self.fd, sub_path_c, buffer);
     }
 
@@ -1320,6 +1323,9 @@ pub const Dir = struct {
             var cleanup_dir = true;
             defer if (cleanup_dir) dir.close();
 
+            // Likely valid use of MAX_PATH_BYTES, as dir_name_buf will only
+            // ever store a single path component that was returned from the
+            // filesystem.
             var dir_name_buf: [MAX_PATH_BYTES]u8 = undefined;
             var dir_name: []const u8 = sub_path;
 
@@ -1781,6 +1787,8 @@ pub fn openSelfExe() OpenSelfExeError!File {
         const prefixed_path_w = try os.windows.wToPrefixedFileW(wide_slice);
         return cwd().openFileW(prefixed_path_w.span(), .{});
     }
+    // Use of MAX_PATH_BYTES here is valid as the resulting path is immediately
+    // opened with no modification.
     var buf: [MAX_PATH_BYTES]u8 = undefined;
     const self_exe_path = try selfExePath(&buf);
     buf[self_exe_path.len] = 0;
@@ -1792,6 +1800,8 @@ pub const SelfExePathError = os.ReadLinkError || os.SysCtlError;
 /// `selfExePath` except allocates the result on the heap.
 /// Caller owns returned memory.
 pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 {
+    // TODO(#4812): Consider looping with larger and larger buffers to handle
+    // overlong paths.
     var buf: [MAX_PATH_BYTES]u8 = undefined;
     return mem.dupe(allocator, u8, try selfExePath(&buf));
 }
@@ -1806,10 +1816,10 @@ pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 {
 /// On Linux, depends on procfs being mounted. If the currently executing binary has
 /// been deleted, the file path looks something like `/a/b/c/exe (deleted)`.
 /// TODO make the return type of this a null terminated pointer
-pub fn selfExePath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]u8 {
+pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
     if (is_darwin) {
-        var u32_len: u32 = out_buffer.len;
-        const rc = std.c._NSGetExecutablePath(out_buffer, &u32_len);
+        var u32_len: u32 = @intCast(u32, math.min(out_buffer.len, math.maxInt(u32)));
+        const rc = std.c._NSGetExecutablePath(out_buffer.ptr, &u32_len);
         if (rc != 0) return error.NameTooLong;
         return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
     }
@@ -1818,14 +1828,14 @@ pub fn selfExePath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]u8 {
         .freebsd, .dragonfly => {
             var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC, os.KERN_PROC_PATHNAME, -1 };
             var out_len: usize = out_buffer.len;
-            try os.sysctl(&mib, out_buffer, &out_len, null, 0);
+            try os.sysctl(&mib, out_buffer.ptr, &out_len, null, 0);
             // TODO could this slice from 0 to out_len instead?
             return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
         },
         .netbsd => {
             var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC_ARGS, -1, os.KERN_PROC_PATHNAME };
             var out_len: usize = out_buffer.len;
-            try os.sysctl(&mib, out_buffer, &out_len, null, 0);
+            try os.sysctl(&mib, out_buffer.ptr, &out_len, null, 0);
             // TODO could this slice from 0 to out_len instead?
             return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
         },
@@ -1848,13 +1858,15 @@ pub fn selfExePathW() [:0]const u16 {
 /// `selfExeDirPath` except allocates the result on the heap.
 /// Caller owns returned memory.
 pub fn selfExeDirPathAlloc(allocator: *Allocator) ![]u8 {
+    // TODO(#4812): Consider looping with larger and larger buffers to handle
+    // overlong paths.
     var buf: [MAX_PATH_BYTES]u8 = undefined;
     return mem.dupe(allocator, u8, try selfExeDirPath(&buf));
 }
 
 /// Get the directory path that contains the current executable.
 /// Returned value is a slice of out_buffer.
-pub fn selfExeDirPath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]const u8 {
+pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
     const self_exe_path = try selfExePath(out_buffer);
     // Assume that the OS APIs return absolute paths, and therefore dirname
     // will not return null.
@@ -1864,6 +1876,12 @@ pub fn selfExeDirPath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]const
 /// `realpath`, except caller must free the returned memory.
 /// TODO integrate with `Dir`
 pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 {
+    // Use of MAX_PATH_BYTES here is valid as the realpath function does not
+    // have a variant that takes an arbitrary-size buffer.
+    // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
+    // NULL out parameter (GNU's canonicalize_file_name) to handle overelong
+    // paths. musl supports passing NULL but restricts the output to PATH_MAX
+    // anyway.
     var buf: [MAX_PATH_BYTES]u8 = undefined;
     return mem.dupe(allocator, u8, try os.realpath(pathname, &buf));
 }
lib/std/os.zig
@@ -1236,6 +1236,8 @@ pub fn execvpeZ_expandArg0(
     if (mem.indexOfScalar(u8, file_slice, '/') != null) return execveZ(file, child_argv, envp);
 
     const PATH = getenvZ("PATH") orelse "/usr/local/bin:/bin/:/usr/bin";
+    // Use of MAX_PATH_BYTES here is valid as the path_buf will be passed
+    // directly to the operating system in execveZ.
     var path_buf: [MAX_PATH_BYTES]u8 = undefined;
     var it = mem.tokenize(PATH, ":");
     var seen_eacces = false;
lib/std/process.zig
@@ -15,12 +15,14 @@ pub const changeCurDir = os.chdir;
 pub const changeCurDirC = os.chdirC;
 
 /// The result is a slice of `out_buffer`, from index `0`.
-pub fn getCwd(out_buffer: *[fs.MAX_PATH_BYTES]u8) ![]u8 {
+pub fn getCwd(out_buffer: []u8) ![]u8 {
     return os.getcwd(out_buffer);
 }
 
 /// Caller must free the returned memory.
 pub fn getCwdAlloc(allocator: *Allocator) ![]u8 {
+    // TODO(#4812): Consider looping with larger and larger buffers to handle
+    // overlong paths.
     var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
     return mem.dupe(allocator, u8, try os.getcwd(&buf));
 }