Commit 832f6d8f7f

Nameless <truemedian@gmail.com>
2023-12-18 21:08:38
std.Uri: fix implementation of resolve with trailing slashes
1 parent c5d359e
Changed files (1)
lib
lib/std/Uri.zig
@@ -358,53 +358,111 @@ pub fn parse(text: []const u8) ParseError!Uri {
     return uri;
 }
 
+/// Implementation of RFC 3986, Section 5.2.4. Removes dot segments from a URI path.
+///
+/// `std.fs.path.resolvePosix` is not sufficient here because it may return relative paths and does not preserve trailing slashes.
+fn removeDotSegments(allocator: std.mem.Allocator, paths: []const []const u8) std.mem.Allocator.Error![]const u8 {
+    var result = std.ArrayList(u8).init(allocator);
+    defer result.deinit();
+
+    for (paths) |p| {
+        var it = std.mem.tokenizeScalar(u8, p, '/');
+        while (it.next()) |component| {
+            if (std.mem.eql(u8, component, ".")) {
+                continue;
+            } else if (std.mem.eql(u8, component, "..")) {
+                if (result.items.len == 0)
+                    continue;
+
+                while (true) {
+                    const ends_with_slash = result.items[result.items.len - 1] == '/';
+                    result.items.len -= 1;
+                    if (ends_with_slash or result.items.len == 0) break;
+                }
+            } else {
+                try result.ensureUnusedCapacity(1 + component.len);
+                result.appendAssumeCapacity('/');
+                result.appendSliceAssumeCapacity(component);
+            }
+        }
+    }
+
+    // ensure a trailing slash is kept
+    const last_path = paths[paths.len - 1];
+    if (last_path.len > 0 and last_path[last_path.len - 1] == '/') {
+        try result.append('/');
+    }
+
+    return result.toOwnedSlice();
+}
+
 /// Resolves a URI against a base URI, conforming to RFC 3986, Section 5.
-/// arena owns any memory allocated by this function.
-pub fn resolve(Base: Uri, R: Uri, strict: bool, arena: std.mem.Allocator) !Uri {
-    var T: Uri = undefined;
-
-    if (R.scheme.len > 0 and !((!strict) and (std.mem.eql(u8, R.scheme, Base.scheme)))) {
-        T.scheme = R.scheme;
-        T.user = R.user;
-        T.host = R.host;
-        T.port = R.port;
-        T.path = try std.fs.path.resolvePosix(arena, &.{ "/", R.path });
-        T.query = R.query;
+///
+/// Assumes `arena` owns all memory in `base` and `ref`. `arena` will own all memory in the returned URI.
+pub fn resolve(base: Uri, ref: Uri, strict: bool, arena: std.mem.Allocator) std.mem.Allocator.Error!Uri {
+    var target: Uri = Uri{
+        .scheme = "",
+        .user = null,
+        .password = null,
+        .host = null,
+        .port = null,
+        .path = "",
+        .query = null,
+        .fragment = null,
+    };
+
+    if (ref.scheme.len > 0 and (strict or !std.mem.eql(u8, ref.scheme, base.scheme))) {
+        target.scheme = ref.scheme;
+        target.user = ref.user;
+        target.host = ref.host;
+        target.port = ref.port;
+        target.path = try removeDotSegments(arena, &.{ref.path});
+        target.query = ref.query;
     } else {
-        if (R.host) |host| {
-            T.user = R.user;
-            T.host = host;
-            T.port = R.port;
-            T.path = R.path;
-            T.path = try std.fs.path.resolvePosix(arena, &.{ "/", R.path });
-            T.query = R.query;
+        target.scheme = base.scheme;
+        if (ref.host) |host| {
+            target.user = ref.user;
+            target.host = host;
+            target.port = ref.port;
+            target.path = ref.path;
+            target.path = try removeDotSegments(arena, &.{ref.path});
+            target.query = ref.query;
         } else {
-            if (R.path.len == 0) {
-                T.path = Base.path;
-                if (R.query) |query| {
-                    T.query = query;
-                } else {
-                    T.query = Base.query;
-                }
+            if (ref.path.len == 0) {
+                target.path = base.path;
+                target.query = ref.query orelse base.query;
             } else {
-                if (R.path[0] == '/') {
-                    T.path = try std.fs.path.resolvePosix(arena, &.{ "/", R.path });
+                if (ref.path[0] == '/') {
+                    target.path = try removeDotSegments(arena, &.{ref.path});
                 } else {
-                    T.path = try std.fs.path.resolvePosix(arena, &.{ "/", Base.path, R.path });
+                    target.path = try removeDotSegments(arena, &.{ std.fs.path.dirnamePosix(base.path) orelse "", ref.path });
                 }
-                T.query = R.query;
+                target.query = ref.query;
             }
 
-            T.user = Base.user;
-            T.host = Base.host;
-            T.port = Base.port;
+            target.user = base.user;
+            target.host = base.host;
+            target.port = base.port;
         }
-        T.scheme = Base.scheme;
     }
 
-    T.fragment = R.fragment;
+    target.fragment = ref.fragment;
+
+    return target;
+}
+
+test resolve {
+    const base = try parse("http://a/b/c/d;p?q");
+
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
 
-    return T;
+    try std.testing.expectEqualDeep(try parse("http://a/b/c/blog/"), try base.resolve(try parseWithoutScheme("blog/"), true, arena.allocator()));
+    try std.testing.expectEqualDeep(try parse("http://a/b/c/blog/?k"), try base.resolve(try parseWithoutScheme("blog/?k"), true, arena.allocator()));
+    try std.testing.expectEqualDeep(try parse("http://a/b/blog/"), try base.resolve(try parseWithoutScheme("../blog/"), true, arena.allocator()));
+    try std.testing.expectEqualDeep(try parse("http://a/b/blog"), try base.resolve(try parseWithoutScheme("../blog"), true, arena.allocator()));
+    try std.testing.expectEqualDeep(try parse("http://e"), try base.resolve(try parseWithoutScheme("//e"), true, arena.allocator()));
+    try std.testing.expectEqualDeep(try parse("https://a:1/"), try base.resolve(try parse("https://a:1/"), true, arena.allocator()));
 }
 
 const SliceReader = struct {