Commit d3ca9d55d9

Ian Johnson <ian@ianjohnson.dev>
2024-03-19 03:12:06
Autodoc: implement Markdown autolinks
Closes #19265 This commit implements support for Markdown autolinks delimited by angle brackets. The precise syntax accepted is documented in the doc comment of `markdown.zig`.
1 parent 13a9d94
Changed files (4)
lib/docs/wasm/markdown/Document.zig
@@ -51,6 +51,8 @@ pub const Node = struct {
         // Inlines
         /// Data is `link`.
         link,
+        /// Data is `text`.
+        autolink,
         /// Data is `link`.
         image,
         /// Data is `container`.
lib/docs/wasm/markdown/Parser.zig
@@ -985,6 +985,7 @@ const InlineParser = struct {
                     ip.pos += 1;
                 },
                 ']' => try ip.parseLink(),
+                '<' => try ip.parseAutolink(),
                 '*', '_' => try ip.parseEmphasis(),
                 '`' => try ip.parseCodeSpan(),
                 else => {},
@@ -1076,6 +1077,52 @@ const InlineParser = struct {
         return @enumFromInt(string_top);
     }
 
+    /// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
+    /// closing `>`, or remains unchanged at the opening `<` if there is none.
+    fn parseAutolink(ip: *InlineParser) !void {
+        const start = ip.pos;
+        ip.pos += 1;
+        var state: enum {
+            start,
+            scheme,
+            target,
+        } = .start;
+        while (ip.pos < ip.content.len) : (ip.pos += 1) {
+            switch (state) {
+                .start => switch (ip.content[ip.pos]) {
+                    'A'...'Z', 'a'...'z' => state = .scheme,
+                    else => break,
+                },
+                .scheme => switch (ip.content[ip.pos]) {
+                    'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},
+                    ':' => state = .target,
+                    else => break,
+                },
+                .target => switch (ip.content[ip.pos]) {
+                    '<', ' ', '\t', '\n' => break, // Not allowed in autolinks
+                    '>' => {
+                        // Backslash escapes are not recognized in autolink targets.
+                        const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);
+                        const node = try ip.parent.addNode(.{
+                            .tag = .autolink,
+                            .data = .{ .text = .{
+                                .content = target,
+                            } },
+                        });
+                        try ip.completed_inlines.append(ip.parent.allocator, .{
+                            .node = node,
+                            .start = start,
+                            .len = ip.pos - start + 1,
+                        });
+                        return;
+                    },
+                    else => {},
+                },
+            }
+        }
+        ip.pos = start;
+    }
+
     /// Parses emphasis, starting at the beginning of a run of `*` or `_`
     /// characters. `ip.pos` is left at the last character in the run after
     /// parsing.
lib/docs/wasm/markdown/renderer.zig
@@ -140,6 +140,10 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type {
                     }
                     try writer.writeAll("</a>");
                 },
+                .autolink => {
+                    const target = doc.string(data.text.content);
+                    try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});
+                },
                 .image => {
                     const target = doc.string(data.link.target);
                     try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});
@@ -215,7 +219,7 @@ pub fn renderInlineNodeText(
                 try renderInlineNodeText(doc, child, writer);
             }
         },
-        .code_span, .text => {
+        .autolink, .code_span, .text => {
             const content = doc.string(data.text.content);
             try writer.print("{}", .{fmtHtml(content)});
         },
lib/docs/wasm/markdown.zig
@@ -75,6 +75,12 @@
 //!   content. `target` may contain `\`-escaped characters and balanced
 //!   parentheses.
 //!
+//! - **Autolink** - an abbreviated link, of the format `<target>`, where
+//!   `target` serves as both the link target and text. `target` may not
+//!   contain spaces or `<`, and any `\` in it are interpreted literally (not as
+//!   escapes). `target` is expected to be an absolute URI: an autolink will not
+//!   be recognized unless `target` starts with a URI scheme followed by a `:`.
+//!
 //! - **Image** - a link directly preceded by a `!`. The link text is
 //!   interpreted as the alt text of the image.
 //!
@@ -710,6 +716,30 @@ test "links" {
     );
 }
 
+test "autolinks" {
+    try testRender(
+        \\<https://example.com>
+        \\**This is important: <https://example.com/strong>**
+        \\<https://example.com?query=abc.123#page(parens)>
+        \\<placeholder>
+        \\<data:>
+        \\1 < 2
+        \\4 > 3
+        \\Unclosed: <
+        \\
+    ,
+        \\<p><a href="https://example.com">https://example.com</a>
+        \\<strong>This is important: <a href="https://example.com/strong">https://example.com/strong</a></strong>
+        \\<a href="https://example.com?query=abc.123#page(parens)">https://example.com?query=abc.123#page(parens)</a>
+        \\&lt;placeholder&gt;
+        \\<a href="data:">data:</a>
+        \\1 &lt; 2
+        \\4 &gt; 3
+        \\Unclosed: &lt;</p>
+        \\
+    );
+}
+
 test "images" {
     try testRender(
         \\![Alt text](https://example.com/image.png)