master
  1//! Uniform Resource Identifier (URI) parsing roughly adhering to
  2//! <https://tools.ietf.org/html/rfc3986>. Does not do perfect grammar and
  3//! character class checking, but should be robust against URIs in the wild.
  4
  5const std = @import("std.zig");
  6const testing = std.testing;
  7const Uri = @This();
  8const Allocator = std.mem.Allocator;
  9const Writer = std.Io.Writer;
 10const HostName = std.Io.net.HostName;
 11
 12scheme: []const u8,
 13user: ?Component = null,
 14password: ?Component = null,
 15/// If non-null, already validated.
 16host: ?Component = null,
 17port: ?u16 = null,
 18path: Component = Component.empty,
 19query: ?Component = null,
 20fragment: ?Component = null,
 21
 22pub const GetHostError = error{UriMissingHost};
 23
 24/// Returned value may point into `buffer` or be the original string.
 25///
 26/// See also:
 27/// * `getHostAlloc`
 28pub fn getHost(uri: Uri, buffer: *[HostName.max_len]u8) GetHostError!HostName {
 29    const component = uri.host orelse return error.UriMissingHost;
 30    const bytes = component.toRaw(buffer) catch |err| switch (err) {
 31        error.NoSpaceLeft => unreachable, // `host` already validated.
 32    };
 33    return .{ .bytes = bytes };
 34}
 35
 36pub const GetHostAllocError = GetHostError || error{OutOfMemory};
 37
 38/// Returned value may point into `buffer` or be the original string.
 39///
 40/// See also:
 41/// * `getHost`
 42pub fn getHostAlloc(uri: Uri, arena: Allocator) GetHostAllocError!HostName {
 43    const component = uri.host orelse return error.UriMissingHost;
 44    const bytes = try component.toRawMaybeAlloc(arena);
 45    return .{ .bytes = bytes };
 46}
 47
 48pub const Component = union(enum) {
 49    /// Invalid characters in this component must be percent encoded
 50    /// before being printed as part of a URI.
 51    raw: []const u8,
 52    /// This component is already percent-encoded, it can be printed
 53    /// directly as part of a URI.
 54    percent_encoded: []const u8,
 55
 56    pub const empty: Component = .{ .percent_encoded = "" };
 57
 58    pub fn isEmpty(component: Component) bool {
 59        return switch (component) {
 60            .raw, .percent_encoded => |string| string.len == 0,
 61        };
 62    }
 63
 64    /// Returned value may point into `buffer` or be the original string.
 65    pub fn toRaw(component: Component, buffer: []u8) error{NoSpaceLeft}![]const u8 {
 66        return switch (component) {
 67            .raw => |raw| raw,
 68            .percent_encoded => |percent_encoded| if (std.mem.indexOfScalar(u8, percent_encoded, '%')) |_|
 69                try std.fmt.bufPrint(buffer, "{f}", .{std.fmt.alt(component, .formatRaw)})
 70            else
 71                percent_encoded,
 72        };
 73    }
 74
 75    /// Allocates the result with `arena` only if needed, so the result should not be freed.
 76    pub fn toRawMaybeAlloc(component: Component, arena: Allocator) Allocator.Error![]const u8 {
 77        return switch (component) {
 78            .raw => |raw| raw,
 79            .percent_encoded => |percent_encoded| if (std.mem.indexOfScalar(u8, percent_encoded, '%')) |_|
 80                try std.fmt.allocPrint(arena, "{f}", .{std.fmt.alt(component, .formatRaw)})
 81            else
 82                percent_encoded,
 83        };
 84    }
 85
 86    pub fn formatRaw(component: Component, w: *Writer) Writer.Error!void {
 87        switch (component) {
 88            .raw => |raw| try w.writeAll(raw),
 89            .percent_encoded => |percent_encoded| {
 90                var start: usize = 0;
 91                var index: usize = 0;
 92                while (std.mem.indexOfScalarPos(u8, percent_encoded, index, '%')) |percent| {
 93                    index = percent + 1;
 94                    if (percent_encoded.len - index < 2) continue;
 95                    const percent_encoded_char =
 96                        std.fmt.parseInt(u8, percent_encoded[index..][0..2], 16) catch continue;
 97                    try w.print("{s}{c}", .{
 98                        percent_encoded[start..percent],
 99                        percent_encoded_char,
100                    });
101                    start = percent + 3;
102                    index = percent + 3;
103                }
104                try w.writeAll(percent_encoded[start..]);
105            },
106        }
107    }
108
109    pub fn formatEscaped(component: Component, w: *Writer) Writer.Error!void {
110        switch (component) {
111            .raw => |raw| try percentEncode(w, raw, isUnreserved),
112            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
113        }
114    }
115
116    pub fn formatUser(component: Component, w: *Writer) Writer.Error!void {
117        switch (component) {
118            .raw => |raw| try percentEncode(w, raw, isUserChar),
119            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
120        }
121    }
122
123    pub fn formatPassword(component: Component, w: *Writer) Writer.Error!void {
124        switch (component) {
125            .raw => |raw| try percentEncode(w, raw, isPasswordChar),
126            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
127        }
128    }
129
130    pub fn formatHost(component: Component, w: *Writer) Writer.Error!void {
131        switch (component) {
132            .raw => |raw| try percentEncode(w, raw, isHostChar),
133            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
134        }
135    }
136
137    pub fn formatPath(component: Component, w: *Writer) Writer.Error!void {
138        switch (component) {
139            .raw => |raw| try percentEncode(w, raw, isPathChar),
140            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
141        }
142    }
143
144    pub fn formatQuery(component: Component, w: *Writer) Writer.Error!void {
145        switch (component) {
146            .raw => |raw| try percentEncode(w, raw, isQueryChar),
147            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
148        }
149    }
150
151    pub fn formatFragment(component: Component, w: *Writer) Writer.Error!void {
152        switch (component) {
153            .raw => |raw| try percentEncode(w, raw, isFragmentChar),
154            .percent_encoded => |percent_encoded| try w.writeAll(percent_encoded),
155        }
156    }
157
158    pub fn percentEncode(w: *Writer, raw: []const u8, comptime isValidChar: fn (u8) bool) Writer.Error!void {
159        var start: usize = 0;
160        for (raw, 0..) |char, index| {
161            if (isValidChar(char)) continue;
162            try w.print("{s}%{X:0>2}", .{ raw[start..index], char });
163            start = index + 1;
164        }
165        try w.writeAll(raw[start..]);
166    }
167};
168
169/// Percent decodes all %XX where XX is a valid hex number.
170/// `output` may alias `input` if `output.ptr <= input.ptr`.
171/// Mutates and returns a subslice of `output`.
172pub fn percentDecodeBackwards(output: []u8, input: []const u8) []u8 {
173    var input_index = input.len;
174    var output_index = output.len;
175    while (input_index > 0) {
176        if (input_index >= 3) {
177            const maybe_percent_encoded = input[input_index - 3 ..][0..3];
178            if (maybe_percent_encoded[0] == '%') {
179                if (std.fmt.parseInt(u8, maybe_percent_encoded[1..], 16)) |percent_encoded_char| {
180                    input_index -= maybe_percent_encoded.len;
181                    output_index -= 1;
182                    output[output_index] = percent_encoded_char;
183                    continue;
184                } else |_| {}
185            }
186        }
187        input_index -= 1;
188        output_index -= 1;
189        output[output_index] = input[input_index];
190    }
191    return output[output_index..];
192}
193
194/// Percent decodes all %XX where XX is a valid hex number.
195/// Mutates and returns a subslice of `buffer`.
196pub fn percentDecodeInPlace(buffer: []u8) []u8 {
197    return percentDecodeBackwards(buffer, buffer);
198}
199
200pub const ParseError = error{
201    UnexpectedCharacter,
202    InvalidFormat,
203    InvalidPort,
204    InvalidHostName,
205};
206
207/// Parses the URI or returns an error. This function is not compliant, but is required to parse
208/// some forms of URIs in the wild, such as HTTP Location headers.
209/// The return value will contain strings pointing into the original `text`.
210/// Each component that is provided, will be non-`null`.
211pub fn parseAfterScheme(scheme: []const u8, text: []const u8) ParseError!Uri {
212    var uri: Uri = .{ .scheme = scheme, .path = undefined };
213    var i: usize = 0;
214
215    if (std.mem.startsWith(u8, text, "//")) a: {
216        i = std.mem.indexOfAnyPos(u8, text, 2, &authority_sep) orelse text.len;
217        const authority = text[2..i];
218        if (authority.len == 0) {
219            if (!std.mem.startsWith(u8, text[2..], "/")) return error.InvalidFormat;
220            break :a;
221        }
222
223        var start_of_host: usize = 0;
224        if (std.mem.indexOf(u8, authority, "@")) |index| {
225            start_of_host = index + 1;
226            const user_info = authority[0..index];
227
228            if (std.mem.indexOf(u8, user_info, ":")) |idx| {
229                uri.user = .{ .percent_encoded = user_info[0..idx] };
230                if (idx < user_info.len - 1) { // empty password is also "no password"
231                    uri.password = .{ .percent_encoded = user_info[idx + 1 ..] };
232                }
233            } else {
234                uri.user = .{ .percent_encoded = user_info };
235                uri.password = null;
236            }
237        }
238
239        // only possible if uri consists of only `userinfo@`
240        if (start_of_host >= authority.len) break :a;
241
242        var end_of_host: usize = authority.len;
243
244        // if  we see `]` first without `@`
245        if (authority[start_of_host] == ']') {
246            return error.InvalidFormat;
247        }
248
249        if (authority.len > start_of_host and authority[start_of_host] == '[') { // IPv6
250            end_of_host = std.mem.lastIndexOf(u8, authority, "]") orelse return error.InvalidFormat;
251            end_of_host += 1;
252
253            if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
254                if (index >= end_of_host) { // if not part of the V6 address field
255                    end_of_host = @min(end_of_host, index);
256                    uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
257                }
258            }
259        } else if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
260            if (index >= start_of_host) { // if not part of the userinfo field
261                end_of_host = @min(end_of_host, index);
262                uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
263            }
264        }
265
266        if (start_of_host >= end_of_host) return error.InvalidFormat;
267        uri.host = .{ .percent_encoded = authority[start_of_host..end_of_host] };
268    }
269
270    const path_start = i;
271    i = std.mem.indexOfAnyPos(u8, text, path_start, &path_sep) orelse text.len;
272    uri.path = .{ .percent_encoded = text[path_start..i] };
273
274    if (std.mem.startsWith(u8, text[i..], "?")) {
275        const query_start = i + 1;
276        i = std.mem.indexOfScalarPos(u8, text, query_start, '#') orelse text.len;
277        uri.query = .{ .percent_encoded = text[query_start..i] };
278    }
279
280    if (std.mem.startsWith(u8, text[i..], "#")) {
281        uri.fragment = .{ .percent_encoded = text[i + 1 ..] };
282    }
283
284    return uri;
285}
286
287pub fn format(uri: *const Uri, writer: *Writer) Writer.Error!void {
288    return writeToStream(uri, writer, .all);
289}
290
291pub fn writeToStream(uri: *const Uri, writer: *Writer, flags: Format.Flags) Writer.Error!void {
292    if (flags.scheme) {
293        try writer.print("{s}:", .{uri.scheme});
294        if (flags.authority and uri.host != null) {
295            try writer.writeAll("//");
296        }
297    }
298    if (flags.authority) {
299        if (flags.authentication and uri.host != null) {
300            if (uri.user) |user| {
301                try user.formatUser(writer);
302                if (uri.password) |password| {
303                    try writer.writeByte(':');
304                    try password.formatPassword(writer);
305                }
306                try writer.writeByte('@');
307            }
308        }
309        if (uri.host) |host| {
310            try host.formatHost(writer);
311            if (flags.port) {
312                if (uri.port) |port| try writer.print(":{d}", .{port});
313            }
314        }
315    }
316    if (flags.path) {
317        const uri_path: Component = if (uri.path.isEmpty()) .{ .percent_encoded = "/" } else uri.path;
318        try uri_path.formatPath(writer);
319        if (flags.query) {
320            if (uri.query) |query| {
321                try writer.writeByte('?');
322                try query.formatQuery(writer);
323            }
324        }
325        if (flags.fragment) {
326            if (uri.fragment) |fragment| {
327                try writer.writeByte('#');
328                try fragment.formatFragment(writer);
329            }
330        }
331    }
332}
333
334pub const Format = struct {
335    uri: *const Uri,
336    flags: Flags = .{},
337
338    pub const Flags = struct {
339        /// When true, include the scheme part of the URI.
340        scheme: bool = false,
341        /// When true, include the user and password part of the URI. Ignored if `authority` is false.
342        authentication: bool = false,
343        /// When true, include the authority part of the URI.
344        authority: bool = false,
345        /// When true, include the path part of the URI.
346        path: bool = false,
347        /// When true, include the query part of the URI. Ignored when `path` is false.
348        query: bool = false,
349        /// When true, include the fragment part of the URI. Ignored when `path` is false.
350        fragment: bool = false,
351        /// When true, include the port part of the URI. Ignored when `port` is null.
352        port: bool = true,
353
354        pub const all: Flags = .{
355            .scheme = true,
356            .authentication = true,
357            .authority = true,
358            .path = true,
359            .query = true,
360            .fragment = true,
361            .port = true,
362        };
363    };
364
365    pub fn default(f: Format, writer: *Writer) Writer.Error!void {
366        return writeToStream(f.uri, writer, f.flags);
367    }
368};
369
370pub fn fmt(uri: *const Uri, flags: Format.Flags) std.fmt.Alt(Format, Format.default) {
371    return .{ .data = .{ .uri = uri, .flags = flags } };
372}
373
374/// The return value will contain strings pointing into the original `text`.
375/// Each component that is provided will be non-`null`.
376pub fn parse(text: []const u8) ParseError!Uri {
377    const end = for (text, 0..) |byte, i| {
378        if (!isSchemeChar(byte)) break i;
379    } else text.len;
380    // After the scheme, a ':' must appear.
381    if (end >= text.len) return error.InvalidFormat;
382    if (text[end] != ':') return error.UnexpectedCharacter;
383    return parseAfterScheme(text[0..end], text[end + 1 ..]);
384}
385
386pub const ResolveInPlaceError = ParseError || error{NoSpaceLeft};
387
388/// Resolves a URI against a base URI, conforming to
389/// [RFC 3986, Section 5](https://www.rfc-editor.org/rfc/rfc3986#section-5)
390///
391/// Assumes new location is already copied to the beginning of `aux_buf.*`.
392/// Parses that new location as a URI, and then resolves the path in place.
393///
394/// If a merge needs to take place, the newly constructed path will be stored
395/// in `aux_buf.*` just after the copied location, and `aux_buf.*` will be
396/// modified to only contain the remaining unused space.
397pub fn resolveInPlace(base: Uri, new_len: usize, aux_buf: *[]u8) ResolveInPlaceError!Uri {
398    const new = aux_buf.*[0..new_len];
399    const new_parsed = parse(new) catch |err| (parseAfterScheme("", new) catch return err);
400    aux_buf.* = aux_buf.*[new_len..];
401    // As you can see above, `new` is not a const pointer.
402    const new_path: []u8 = @constCast(new_parsed.path.percent_encoded);
403
404    if (new_parsed.scheme.len > 0) return .{
405        .scheme = new_parsed.scheme,
406        .user = new_parsed.user,
407        .password = new_parsed.password,
408        .host = try validateHostComponent(new_parsed.host),
409        .port = new_parsed.port,
410        .path = remove_dot_segments(new_path),
411        .query = new_parsed.query,
412        .fragment = new_parsed.fragment,
413    };
414
415    if (new_parsed.host) |host| return .{
416        .scheme = base.scheme,
417        .user = new_parsed.user,
418        .password = new_parsed.password,
419        .host = try validateHostComponent(host),
420        .port = new_parsed.port,
421        .path = remove_dot_segments(new_path),
422        .query = new_parsed.query,
423        .fragment = new_parsed.fragment,
424    };
425
426    const path, const query = if (new_path.len == 0) .{
427        base.path,
428        new_parsed.query orelse base.query,
429    } else if (new_path[0] == '/') .{
430        remove_dot_segments(new_path),
431        new_parsed.query,
432    } else .{
433        try merge_paths(base.path, new_path, aux_buf),
434        new_parsed.query,
435    };
436
437    return .{
438        .scheme = base.scheme,
439        .user = base.user,
440        .password = base.password,
441        .host = try validateHostComponent(base.host),
442        .port = base.port,
443        .path = path,
444        .query = query,
445        .fragment = new_parsed.fragment,
446    };
447}
448
449fn validateHostComponent(optional_component: ?Component) error{InvalidHostName}!?Component {
450    const component = optional_component orelse return null;
451    switch (component) {
452        .raw => |raw| HostName.validate(raw) catch return error.InvalidHostName,
453        .percent_encoded => |encoded| {
454            // TODO validate decoded name instead
455            HostName.validate(encoded) catch return error.InvalidHostName;
456        },
457    }
458    return component;
459}
460
461/// In-place implementation of RFC 3986, Section 5.2.4.
462fn remove_dot_segments(path: []u8) Component {
463    var in_i: usize = 0;
464    var out_i: usize = 0;
465    while (in_i < path.len) {
466        if (std.mem.startsWith(u8, path[in_i..], "./")) {
467            in_i += 2;
468        } else if (std.mem.startsWith(u8, path[in_i..], "../")) {
469            in_i += 3;
470        } else if (std.mem.startsWith(u8, path[in_i..], "/./")) {
471            in_i += 2;
472        } else if (std.mem.eql(u8, path[in_i..], "/.")) {
473            in_i += 1;
474            path[in_i] = '/';
475        } else if (std.mem.startsWith(u8, path[in_i..], "/../")) {
476            in_i += 3;
477            while (out_i > 0) {
478                out_i -= 1;
479                if (path[out_i] == '/') break;
480            }
481        } else if (std.mem.eql(u8, path[in_i..], "/..")) {
482            in_i += 2;
483            path[in_i] = '/';
484            while (out_i > 0) {
485                out_i -= 1;
486                if (path[out_i] == '/') break;
487            }
488        } else if (std.mem.eql(u8, path[in_i..], ".")) {
489            in_i += 1;
490        } else if (std.mem.eql(u8, path[in_i..], "..")) {
491            in_i += 2;
492        } else {
493            while (true) {
494                path[out_i] = path[in_i];
495                out_i += 1;
496                in_i += 1;
497                if (in_i >= path.len or path[in_i] == '/') break;
498            }
499        }
500    }
501    return .{ .percent_encoded = path[0..out_i] };
502}
503
504test remove_dot_segments {
505    {
506        var buffer = "/a/b/c/./../../g".*;
507        try std.testing.expectEqualStrings("/a/g", remove_dot_segments(&buffer).percent_encoded);
508    }
509}
510
511/// 5.2.3. Merge Paths
512fn merge_paths(base: Component, new: []u8, aux_buf: *[]u8) error{NoSpaceLeft}!Component {
513    var aux: Writer = .fixed(aux_buf.*);
514    if (!base.isEmpty()) {
515        base.formatPath(&aux) catch return error.NoSpaceLeft;
516        aux.end = std.mem.lastIndexOfScalar(u8, aux.buffered(), '/') orelse return remove_dot_segments(new);
517    }
518    aux.print("/{s}", .{new}) catch return error.NoSpaceLeft;
519    const merged_path = remove_dot_segments(aux.buffered());
520    aux_buf.* = aux_buf.*[merged_path.percent_encoded.len..];
521    return merged_path;
522}
523
524/// scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
525fn isSchemeChar(c: u8) bool {
526    return switch (c) {
527        'A'...'Z', 'a'...'z', '0'...'9', '+', '-', '.' => true,
528        else => false,
529    };
530}
531
532/// sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
533///             / "*" / "+" / "," / ";" / "="
534fn isSubLimit(c: u8) bool {
535    return switch (c) {
536        '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => true,
537        else => false,
538    };
539}
540
541/// unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
542fn isUnreserved(c: u8) bool {
543    return switch (c) {
544        'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => true,
545        else => false,
546    };
547}
548
549fn isUserChar(c: u8) bool {
550    return isUnreserved(c) or isSubLimit(c);
551}
552
553fn isPasswordChar(c: u8) bool {
554    return isUserChar(c) or c == ':';
555}
556
557fn isHostChar(c: u8) bool {
558    return isPasswordChar(c) or c == '[' or c == ']';
559}
560
561fn isPathChar(c: u8) bool {
562    return isUserChar(c) or c == '/' or c == ':' or c == '@';
563}
564
565fn isQueryChar(c: u8) bool {
566    return isPathChar(c) or c == '?';
567}
568
569const isFragmentChar = isQueryChar;
570
571const authority_sep: [3]u8 = .{ '/', '?', '#' };
572const path_sep: [2]u8 = .{ '?', '#' };
573
574test "basic" {
575    const parsed = try parse("https://ziglang.org/download");
576    try testing.expectEqualStrings("https", parsed.scheme);
577    try testing.expectEqualStrings("ziglang.org", parsed.host.?.percent_encoded);
578    try testing.expectEqualStrings("/download", parsed.path.percent_encoded);
579    try testing.expectEqual(@as(?u16, null), parsed.port);
580}
581
582test "with port" {
583    const parsed = try parse("http://example:1337/");
584    try testing.expectEqualStrings("http", parsed.scheme);
585    try testing.expectEqualStrings("example", parsed.host.?.percent_encoded);
586    try testing.expectEqualStrings("/", parsed.path.percent_encoded);
587    try testing.expectEqual(@as(?u16, 1337), parsed.port);
588}
589
590test "should fail gracefully" {
591    try std.testing.expectError(error.InvalidFormat, parse("foobar://"));
592}
593
594test "file" {
595    const parsed = try parse("file:///");
596    try std.testing.expectEqualStrings("file", parsed.scheme);
597    try std.testing.expectEqual(@as(?Component, null), parsed.host);
598    try std.testing.expectEqualStrings("/", parsed.path.percent_encoded);
599
600    const parsed2 = try parse("file:///an/absolute/path/to/something");
601    try std.testing.expectEqualStrings("file", parsed2.scheme);
602    try std.testing.expectEqual(@as(?Component, null), parsed2.host);
603    try std.testing.expectEqualStrings("/an/absolute/path/to/something", parsed2.path.percent_encoded);
604
605    const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/");
606    try std.testing.expectEqualStrings("file", parsed3.scheme);
607    try std.testing.expectEqualStrings("localhost", parsed3.host.?.percent_encoded);
608    try std.testing.expectEqualStrings("/an/absolute/path/to/another/thing/", parsed3.path.percent_encoded);
609
610    const parsed4 = try parse("file:/an/absolute/path");
611    try std.testing.expectEqualStrings("file", parsed4.scheme);
612    try std.testing.expectEqual(@as(?Component, null), parsed4.host);
613    try std.testing.expectEqualStrings("/an/absolute/path", parsed4.path.percent_encoded);
614}
615
616test "scheme" {
617    try std.testing.expectEqualStrings("http", (try parse("http:_")).scheme);
618    try std.testing.expectEqualStrings("scheme-mee", (try parse("scheme-mee:_")).scheme);
619    try std.testing.expectEqualStrings("a.b.c", (try parse("a.b.c:_")).scheme);
620    try std.testing.expectEqualStrings("ab+", (try parse("ab+:_")).scheme);
621    try std.testing.expectEqualStrings("X+++", (try parse("X+++:_")).scheme);
622    try std.testing.expectEqualStrings("Y+-.", (try parse("Y+-.:_")).scheme);
623}
624
625test "authority" {
626    try std.testing.expectEqualStrings("hostname", (try parse("scheme://hostname")).host.?.percent_encoded);
627
628    try std.testing.expectEqualStrings("hostname", (try parse("scheme://userinfo@hostname")).host.?.percent_encoded);
629    try std.testing.expectEqualStrings("userinfo", (try parse("scheme://userinfo@hostname")).user.?.percent_encoded);
630    try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@hostname")).password);
631    try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@")).host);
632
633    try std.testing.expectEqualStrings("hostname", (try parse("scheme://user:password@hostname")).host.?.percent_encoded);
634    try std.testing.expectEqualStrings("user", (try parse("scheme://user:password@hostname")).user.?.percent_encoded);
635    try std.testing.expectEqualStrings("password", (try parse("scheme://user:password@hostname")).password.?.percent_encoded);
636
637    try std.testing.expectEqualStrings("hostname", (try parse("scheme://hostname:0")).host.?.percent_encoded);
638    try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://hostname:1234")).port.?);
639
640    try std.testing.expectEqualStrings("hostname", (try parse("scheme://userinfo@hostname:1234")).host.?.percent_encoded);
641    try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://userinfo@hostname:1234")).port.?);
642    try std.testing.expectEqualStrings("userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?.percent_encoded);
643    try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@hostname:1234")).password);
644
645    try std.testing.expectEqualStrings("hostname", (try parse("scheme://user:password@hostname:1234")).host.?.percent_encoded);
646    try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://user:password@hostname:1234")).port.?);
647    try std.testing.expectEqualStrings("user", (try parse("scheme://user:password@hostname:1234")).user.?.percent_encoded);
648    try std.testing.expectEqualStrings("password", (try parse("scheme://user:password@hostname:1234")).password.?.percent_encoded);
649}
650
651test "authority.password" {
652    try std.testing.expectEqualStrings("username", (try parse("scheme://username@a")).user.?.percent_encoded);
653    try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://username@a")).password);
654
655    try std.testing.expectEqualStrings("username", (try parse("scheme://username:@a")).user.?.percent_encoded);
656    try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://username:@a")).password);
657
658    try std.testing.expectEqualStrings("username", (try parse("scheme://username:password@a")).user.?.percent_encoded);
659    try std.testing.expectEqualStrings("password", (try parse("scheme://username:password@a")).password.?.percent_encoded);
660
661    try std.testing.expectEqualStrings("username", (try parse("scheme://username::@a")).user.?.percent_encoded);
662    try std.testing.expectEqualStrings(":", (try parse("scheme://username::@a")).password.?.percent_encoded);
663}
664
665fn testAuthorityHost(comptime hostlist: anytype) !void {
666    inline for (hostlist) |hostname| {
667        try std.testing.expectEqualStrings(hostname, (try parse("scheme://" ++ hostname)).host.?.percent_encoded);
668    }
669}
670
671test "authority.dns-names" {
672    try testAuthorityHost(.{
673        "a",
674        "a.b",
675        "example.com",
676        "www.example.com",
677        "example.org.",
678        "www.example.org.",
679        "xn--nw2a.xn--j6w193g", // internationalized URI: 見.香港
680        "fe80--1ff-fe23-4567-890as3.ipv6-literal.net",
681    });
682}
683
684test "authority.IPv4" {
685    try testAuthorityHost(.{
686        "127.0.0.1",
687        "255.255.255.255",
688        "0.0.0.0",
689        "8.8.8.8",
690        "1.2.3.4",
691        "192.168.0.1",
692        "10.42.0.0",
693    });
694}
695
696test "authority.IPv6" {
697    try testAuthorityHost(.{
698        "[2001:db8:0:0:0:0:2:1]",
699        "[2001:db8::2:1]",
700        "[2001:db8:0000:1:1:1:1:1]",
701        "[2001:db8:0:1:1:1:1:1]",
702        "[0:0:0:0:0:0:0:0]",
703        "[0:0:0:0:0:0:0:1]",
704        "[::1]",
705        "[::]",
706        "[2001:db8:85a3:8d3:1319:8a2e:370:7348]",
707        "[fe80::1ff:fe23:4567:890a%25eth2]",
708        "[fe80::1ff:fe23:4567:890a]",
709        "[fe80::1ff:fe23:4567:890a%253]",
710        "[fe80:3::1ff:fe23:4567:890a]",
711    });
712}
713
714test "RFC example 1" {
715    const uri = "foo://example.com:8042/over/there?name=ferret#nose";
716    try std.testing.expectEqual(Uri{
717        .scheme = uri[0..3],
718        .user = null,
719        .password = null,
720        .host = .{ .percent_encoded = uri[6..17] },
721        .port = 8042,
722        .path = .{ .percent_encoded = uri[22..33] },
723        .query = .{ .percent_encoded = uri[34..45] },
724        .fragment = .{ .percent_encoded = uri[46..50] },
725    }, try parse(uri));
726}
727
728test "RFC example 2" {
729    const uri = "urn:example:animal:ferret:nose";
730    try std.testing.expectEqual(Uri{
731        .scheme = uri[0..3],
732        .user = null,
733        .password = null,
734        .host = null,
735        .port = null,
736        .path = .{ .percent_encoded = uri[4..] },
737        .query = null,
738        .fragment = null,
739    }, try parse(uri));
740}
741
742// source:
743// https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Examples
744test "Examples from wikipedia" {
745    const list = [_][]const u8{
746        "https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top",
747        "ldap://[2001:db8::7]/c=GB?objectClass?one",
748        "mailto:John.Doe@example.com",
749        "news:comp.infosystems.www.servers.unix",
750        "tel:+1-816-555-1212",
751        "telnet://192.0.2.16:80/",
752        "urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
753        "http://a/b/c/d;p?q",
754    };
755    for (list) |uri| {
756        _ = try parse(uri);
757    }
758}
759
760// source:
761// https://tools.ietf.org/html/rfc3986#section-5.4.1
762test "Examples from RFC3986" {
763    const list = [_][]const u8{
764        "http://a/b/c/g",
765        "http://a/b/c/g",
766        "http://a/b/c/g/",
767        "http://a/g",
768        "http://g",
769        "http://a/b/c/d;p?y",
770        "http://a/b/c/g?y",
771        "http://a/b/c/d;p?q#s",
772        "http://a/b/c/g#s",
773        "http://a/b/c/g?y#s",
774        "http://a/b/c/;x",
775        "http://a/b/c/g;x",
776        "http://a/b/c/g;x?y#s",
777        "http://a/b/c/d;p?q",
778        "http://a/b/c/",
779        "http://a/b/c/",
780        "http://a/b/",
781        "http://a/b/",
782        "http://a/b/g",
783        "http://a/",
784        "http://a/",
785        "http://a/g",
786    };
787    for (list) |uri| {
788        _ = try parse(uri);
789    }
790}
791
792test "Special test" {
793    // This is for all of you code readers ♥
794    _ = try parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=youtu.be&t=0");
795}
796
797test "URI percent encoding" {
798    try std.testing.expectFmt(
799        "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad",
800        "{f}",
801        .{std.fmt.alt(
802            @as(Component, .{ .raw = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad" }),
803            .formatEscaped,
804        )},
805    );
806}
807
808test "URI percent decoding" {
809    {
810        const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
811        var input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad".*;
812
813        try std.testing.expectFmt(expected, "{f}", .{std.fmt.alt(
814            @as(Component, .{ .percent_encoded = &input }),
815            .formatRaw,
816        )});
817
818        var output: [expected.len]u8 = undefined;
819        try std.testing.expectEqualStrings(percentDecodeBackwards(&output, &input), expected);
820
821        try std.testing.expectEqualStrings(expected, percentDecodeInPlace(&input));
822    }
823
824    {
825        const expected = "/abc%";
826        var input = expected.*;
827
828        try std.testing.expectFmt(expected, "{f}", .{std.fmt.alt(
829            @as(Component, .{ .percent_encoded = &input }),
830            .formatRaw,
831        )});
832
833        var output: [expected.len]u8 = undefined;
834        try std.testing.expectEqualStrings(percentDecodeBackwards(&output, &input), expected);
835
836        try std.testing.expectEqualStrings(expected, percentDecodeInPlace(&input));
837    }
838}
839
840test "URI query encoding" {
841    const address = "https://objects.githubusercontent.com/?response-content-type=application%2Foctet-stream";
842    const parsed = try Uri.parse(address);
843
844    // format the URI to percent encode it
845    try std.testing.expectFmt("/?response-content-type=application%2Foctet-stream", "{f}", .{
846        parsed.fmt(.{ .path = true, .query = true }),
847    });
848}
849
850test "format" {
851    const uri: Uri = .{
852        .scheme = "file",
853        .user = null,
854        .password = null,
855        .host = null,
856        .port = null,
857        .path = .{ .raw = "/foo/bar/baz" },
858        .query = null,
859        .fragment = null,
860    };
861    try std.testing.expectFmt("file:/foo/bar/baz", "{f}", .{
862        uri.fmt(.{ .scheme = true, .path = true, .query = true, .fragment = true }),
863    });
864}
865
866test "URI malformed input" {
867    try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://]["));
868    try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://]@["));
869    try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://lo]s\x85hc@[/8\x10?0Q"));
870}