master
   1//! A Markdown parser producing `Document`s.
   2//!
   3//! The parser operates at two levels: at the outer level, the parser accepts
   4//! the content of an input document line by line and begins building the _block
   5//! structure_ of the document. This creates a stack of currently open blocks.
   6//!
   7//! When the parser detects the end of a block, it closes the block, popping it
   8//! from the open block stack and completing any additional parsing of the
   9//! block's content. For blocks which contain parseable inline content, this
  10//! invokes the inner level of the parser, handling the _inline structure_ of
  11//! the block.
  12//!
  13//! Inline parsing scans through the collected inline content of a block. When
  14//! it encounters a character that could indicate the beginning of an inline, it
  15//! either handles the inline right away (if possible) or adds it to a pending
  16//! inlines stack. When an inline is completed, it is added to a list of
  17//! completed inlines, which (along with any surrounding text nodes) will become
  18//! the children of the parent inline or the block whose inline content is being
  19//! parsed.
  20
  21const std = @import("std");
  22const mem = std.mem;
  23const assert = std.debug.assert;
  24const isWhitespace = std.ascii.isWhitespace;
  25const Allocator = mem.Allocator;
  26const expectEqual = std.testing.expectEqual;
  27const Document = @import("Document.zig");
  28const Node = Document.Node;
  29const ExtraIndex = Document.ExtraIndex;
  30const ExtraData = Document.ExtraData;
  31const StringIndex = Document.StringIndex;
  32const ArrayList = std.ArrayList;
  33
  34nodes: Node.List = .{},
  35extra: ArrayList(u32) = .empty,
  36scratch_extra: ArrayList(u32) = .empty,
  37string_bytes: ArrayList(u8) = .empty,
  38scratch_string: ArrayList(u8) = .empty,
  39pending_blocks: ArrayList(Block) = .empty,
  40allocator: Allocator,
  41
  42const Parser = @This();
  43
  44/// An arbitrary limit on the maximum number of columns in a table so that
  45/// table-related metadata maintained by the parser does not require dynamic
  46/// memory allocation.
  47const max_table_columns = 128;
  48
  49/// A block element which is still receiving children.
  50const Block = struct {
  51    tag: Tag,
  52    data: Data,
  53    extra_start: usize,
  54    string_start: usize,
  55
  56    const Tag = enum {
  57        /// Data is `list`.
  58        list,
  59        /// Data is `list_item`.
  60        list_item,
  61        /// Data is `table`.
  62        table,
  63        /// Data is `none`.
  64        table_row,
  65        /// Data is `heading`.
  66        heading,
  67        /// Data is `code_block`.
  68        code_block,
  69        /// Data is `none`.
  70        blockquote,
  71        /// Data is `none`.
  72        paragraph,
  73        /// Data is `none`.
  74        thematic_break,
  75    };
  76
  77    const Data = union {
  78        none: void,
  79        list: struct {
  80            marker: ListMarker,
  81            /// Between 0 and 999,999,999, inclusive.
  82            start: u30,
  83            tight: bool,
  84            last_line_blank: bool = false,
  85        },
  86        list_item: struct {
  87            continuation_indent: usize,
  88        },
  89        table: struct {
  90            column_alignments_buffer: [max_table_columns]Node.TableCellAlignment,
  91            column_alignments_len: usize,
  92        },
  93        heading: struct {
  94            /// Between 1 and 6, inclusive.
  95            level: u3,
  96        },
  97        code_block: struct {
  98            tag: StringIndex,
  99            fence_len: usize,
 100            indent: usize,
 101        },
 102
 103        const ListMarker = enum {
 104            @"-",
 105            @"*",
 106            @"+",
 107            number_dot,
 108            number_paren,
 109        };
 110    };
 111
 112    const ContentType = enum {
 113        blocks,
 114        inlines,
 115        raw_inlines,
 116        nothing,
 117    };
 118
 119    fn canAccept(b: Block) ContentType {
 120        return switch (b.tag) {
 121            .list,
 122            .list_item,
 123            .table,
 124            .blockquote,
 125            => .blocks,
 126
 127            .heading,
 128            .paragraph,
 129            => .inlines,
 130
 131            .code_block,
 132            => .raw_inlines,
 133
 134            .table_row,
 135            .thematic_break,
 136            => .nothing,
 137        };
 138    }
 139
 140    /// Attempts to continue `b` using the contents of `line`. If successful,
 141    /// returns the remaining portion of `line` to be considered part of `b`
 142    /// (e.g. for a blockquote, this would be everything except the leading
 143    /// `>`). If unsuccessful, returns null.
 144    fn match(b: Block, line: []const u8) ?[]const u8 {
 145        const unindented = mem.trimStart(u8, line, " \t");
 146        const indent = line.len - unindented.len;
 147        return switch (b.tag) {
 148            .list => line,
 149            .list_item => if (indent >= b.data.list_item.continuation_indent)
 150                line[b.data.list_item.continuation_indent..]
 151            else if (unindented.len == 0)
 152                // Blank lines should not close list items, since there may be
 153                // more indented contents to follow after the blank line.
 154                ""
 155            else
 156                null,
 157            .table => if (unindented.len > 0) line else null,
 158            .table_row => null,
 159            .heading => null,
 160            .code_block => code_block: {
 161                const trimmed = mem.trimEnd(u8, unindented, " \t");
 162                if (mem.indexOfNone(u8, trimmed, "`") != null or trimmed.len != b.data.code_block.fence_len) {
 163                    const effective_indent = @min(indent, b.data.code_block.indent);
 164                    break :code_block line[effective_indent..];
 165                } else {
 166                    break :code_block null;
 167                }
 168            },
 169            .blockquote => if (mem.startsWith(u8, unindented, ">"))
 170                unindented[1..]
 171            else
 172                null,
 173            .paragraph => if (unindented.len > 0) line else null,
 174            .thematic_break => null,
 175        };
 176    }
 177};
 178
 179pub fn init(allocator: Allocator) Allocator.Error!Parser {
 180    var p: Parser = .{ .allocator = allocator };
 181    try p.nodes.append(allocator, .{
 182        .tag = .root,
 183        .data = undefined,
 184    });
 185    try p.string_bytes.append(allocator, 0);
 186    return p;
 187}
 188
 189pub fn deinit(p: *Parser) void {
 190    p.nodes.deinit(p.allocator);
 191    p.extra.deinit(p.allocator);
 192    p.scratch_extra.deinit(p.allocator);
 193    p.string_bytes.deinit(p.allocator);
 194    p.scratch_string.deinit(p.allocator);
 195    p.pending_blocks.deinit(p.allocator);
 196    p.* = undefined;
 197}
 198
 199/// Accepts a single line of content. `line` should not have a trailing line
 200/// ending character.
 201pub fn feedLine(p: *Parser, line: []const u8) Allocator.Error!void {
 202    var rest_line = line;
 203    const first_unmatched = for (p.pending_blocks.items, 0..) |b, i| {
 204        if (b.match(rest_line)) |rest| {
 205            rest_line = rest;
 206        } else {
 207            break i;
 208        }
 209    } else p.pending_blocks.items.len;
 210
 211    const in_code_block = p.pending_blocks.items.len > 0 and
 212        p.pending_blocks.getLast().tag == .code_block;
 213    const code_block_end = in_code_block and
 214        first_unmatched + 1 == p.pending_blocks.items.len;
 215    // New blocks cannot be started if we are actively inside a code block or
 216    // are just closing one (to avoid interpreting the closing ``` as a new code
 217    // block start).
 218    var maybe_block_start = if (!in_code_block or first_unmatched + 2 <= p.pending_blocks.items.len)
 219        try p.startBlock(rest_line)
 220    else
 221        null;
 222
 223    // This is a lazy continuation line if there are no new blocks to open and
 224    // the last open block is a paragraph.
 225    if (maybe_block_start == null and
 226        !isBlank(rest_line) and
 227        p.pending_blocks.items.len > 0 and
 228        p.pending_blocks.getLast().tag == .paragraph)
 229    {
 230        try p.addScratchStringLine(mem.trimStart(u8, rest_line, " \t"));
 231        return;
 232    }
 233
 234    // If a new block needs to be started, any paragraph needs to be closed,
 235    // even though this isn't detected as part of the closing condition for
 236    // paragraphs.
 237    if (maybe_block_start != null and
 238        p.pending_blocks.items.len > 0 and
 239        p.pending_blocks.getLast().tag == .paragraph)
 240    {
 241        try p.closeLastBlock();
 242    }
 243
 244    while (p.pending_blocks.items.len > first_unmatched) {
 245        try p.closeLastBlock();
 246    }
 247
 248    while (maybe_block_start) |block_start| : (maybe_block_start = try p.startBlock(rest_line)) {
 249        try p.appendBlockStart(block_start);
 250        // There may be more blocks to start within the same line.
 251        rest_line = block_start.rest;
 252        // Headings may only contain inline content.
 253        if (block_start.tag == .heading) break;
 254        // An opening code fence does not contain any additional block or inline
 255        // content to process.
 256        if (block_start.tag == .code_block) return;
 257    }
 258
 259    // Do not append the end of a code block (```) as textual content.
 260    if (code_block_end) return;
 261
 262    const can_accept = if (p.pending_blocks.getLastOrNull()) |last_pending_block|
 263        last_pending_block.canAccept()
 264    else
 265        .blocks;
 266    const rest_line_trimmed = mem.trimStart(u8, rest_line, " \t");
 267    switch (can_accept) {
 268        .blocks => {
 269            // If we're inside a list item and the rest of the line is blank, it
 270            // means that any subsequent child of the list item (or subsequent
 271            // item in the list) will cause the containing list to be considered
 272            // loose. However, we can't immediately declare that the list is
 273            // loose, since we might just be looking at a blank line after the
 274            // end of the last item in the list. The final determination will be
 275            // made when appending the next child of the list or list item.
 276            const maybe_containing_list_index = if (p.pending_blocks.items.len > 0 and p.pending_blocks.getLast().tag == .list_item)
 277                p.pending_blocks.items.len - 2
 278            else
 279                null;
 280
 281            if (rest_line_trimmed.len > 0) {
 282                try p.appendBlockStart(.{
 283                    .tag = .paragraph,
 284                    .data = .{ .none = {} },
 285                    .rest = undefined,
 286                });
 287                try p.addScratchStringLine(rest_line_trimmed);
 288            }
 289
 290            if (maybe_containing_list_index) |containing_list_index| {
 291                p.pending_blocks.items[containing_list_index].data.list.last_line_blank = rest_line_trimmed.len == 0;
 292            }
 293        },
 294        .inlines => try p.addScratchStringLine(rest_line_trimmed),
 295        .raw_inlines => try p.addScratchStringLine(rest_line),
 296        .nothing => {},
 297    }
 298}
 299
 300/// Completes processing of the input and returns the parsed document.
 301pub fn endInput(p: *Parser) Allocator.Error!Document {
 302    while (p.pending_blocks.items.len > 0) {
 303        try p.closeLastBlock();
 304    }
 305    // There should be no inline content pending after closing the last open
 306    // block.
 307    assert(p.scratch_string.items.len == 0);
 308
 309    const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items));
 310    p.nodes.items(.data)[0] = .{ .container = .{ .children = children } };
 311    p.scratch_string.items.len = 0;
 312    p.scratch_extra.items.len = 0;
 313
 314    var nodes = p.nodes.toOwnedSlice();
 315    errdefer nodes.deinit(p.allocator);
 316    const extra = try p.extra.toOwnedSlice(p.allocator);
 317    errdefer p.allocator.free(extra);
 318    const string_bytes = try p.string_bytes.toOwnedSlice(p.allocator);
 319    errdefer p.allocator.free(string_bytes);
 320
 321    return .{
 322        .nodes = nodes,
 323        .extra = extra,
 324        .string_bytes = string_bytes,
 325    };
 326}
 327
 328/// Data describing the start of a new block element.
 329const BlockStart = struct {
 330    tag: Tag,
 331    data: Data,
 332    rest: []const u8,
 333
 334    const Tag = enum {
 335        /// Data is `list_item`.
 336        list_item,
 337        /// Data is `table_row`.
 338        table_row,
 339        /// Data is `heading`.
 340        heading,
 341        /// Data is `code_block`.
 342        code_block,
 343        /// Data is `none`.
 344        blockquote,
 345        /// Data is `none`.
 346        paragraph,
 347        /// Data is `none`.
 348        thematic_break,
 349    };
 350
 351    const Data = union {
 352        none: void,
 353        list_item: struct {
 354            marker: Block.Data.ListMarker,
 355            number: u30,
 356            continuation_indent: usize,
 357        },
 358        table_row: struct {
 359            cells_buffer: [max_table_columns][]const u8,
 360            cells_len: usize,
 361        },
 362        heading: struct {
 363            /// Between 1 and 6, inclusive.
 364            level: u3,
 365        },
 366        code_block: struct {
 367            tag: StringIndex,
 368            fence_len: usize,
 369            indent: usize,
 370        },
 371    };
 372};
 373
 374fn appendBlockStart(p: *Parser, block_start: BlockStart) !void {
 375    if (p.pending_blocks.getLastOrNull()) |last_pending_block| {
 376        // Close the last block if it is a list and the new block is not a list item
 377        // or not of the same marker type.
 378        const should_close_list = last_pending_block.tag == .list and
 379            (block_start.tag != .list_item or
 380                block_start.data.list_item.marker != last_pending_block.data.list.marker);
 381        // The last block should also be closed if the new block is not a table
 382        // row, which is the only allowed child of a table.
 383        const should_close_table = last_pending_block.tag == .table and
 384            block_start.tag != .table_row;
 385        if (should_close_list or should_close_table) {
 386            try p.closeLastBlock();
 387        }
 388    }
 389
 390    if (p.pending_blocks.getLastOrNull()) |last_pending_block| {
 391        // If the last block is a list or list item, check for tightness based
 392        // on the last line.
 393        const maybe_containing_list = switch (last_pending_block.tag) {
 394            .list => &p.pending_blocks.items[p.pending_blocks.items.len - 1],
 395            .list_item => &p.pending_blocks.items[p.pending_blocks.items.len - 2],
 396            else => null,
 397        };
 398        if (maybe_containing_list) |containing_list| {
 399            if (containing_list.data.list.last_line_blank) {
 400                containing_list.data.list.tight = false;
 401            }
 402        }
 403    }
 404
 405    // Start a new list if the new block is a list item and there is no
 406    // containing list yet.
 407    if (block_start.tag == .list_item and
 408        (p.pending_blocks.items.len == 0 or p.pending_blocks.getLast().tag != .list))
 409    {
 410        try p.pending_blocks.append(p.allocator, .{
 411            .tag = .list,
 412            .data = .{ .list = .{
 413                .marker = block_start.data.list_item.marker,
 414                .start = block_start.data.list_item.number,
 415                .tight = true,
 416            } },
 417            .string_start = p.scratch_string.items.len,
 418            .extra_start = p.scratch_extra.items.len,
 419        });
 420    }
 421
 422    if (block_start.tag == .table_row) {
 423        // Likewise, table rows start a table implicitly.
 424        if (p.pending_blocks.items.len == 0 or p.pending_blocks.getLast().tag != .table) {
 425            try p.pending_blocks.append(p.allocator, .{
 426                .tag = .table,
 427                .data = .{ .table = .{
 428                    .column_alignments_buffer = undefined,
 429                    .column_alignments_len = 0,
 430                } },
 431                .string_start = p.scratch_string.items.len,
 432                .extra_start = p.scratch_extra.items.len,
 433            });
 434        }
 435
 436        const current_row = p.scratch_extra.items.len - p.pending_blocks.getLast().extra_start;
 437        if (current_row <= 1) {
 438            var buffer: [max_table_columns]Node.TableCellAlignment = undefined;
 439            const table_row = &block_start.data.table_row;
 440            if (parseTableHeaderDelimiter(table_row.cells_buffer[0..table_row.cells_len], &buffer)) |alignments| {
 441                const table = &p.pending_blocks.items[p.pending_blocks.items.len - 1].data.table;
 442                @memcpy(table.column_alignments_buffer[0..alignments.len], alignments);
 443                table.column_alignments_len = alignments.len;
 444                if (current_row == 1) {
 445                    // We need to go back and mark the header row and its column
 446                    // alignments.
 447                    const datas = p.nodes.items(.data);
 448                    const header_data = datas[p.scratch_extra.getLast()];
 449                    for (p.extraChildren(header_data.container.children), 0..) |header_cell, i| {
 450                        const alignment = if (i < alignments.len) alignments[i] else .unset;
 451                        const cell_data = &datas[@intFromEnum(header_cell)].table_cell;
 452                        cell_data.info.alignment = alignment;
 453                        cell_data.info.header = true;
 454                    }
 455                }
 456                return;
 457            }
 458        }
 459    }
 460
 461    const tag: Block.Tag, const data: Block.Data = switch (block_start.tag) {
 462        .list_item => .{ .list_item, .{ .list_item = .{
 463            .continuation_indent = block_start.data.list_item.continuation_indent,
 464        } } },
 465        .table_row => .{ .table_row, .{ .none = {} } },
 466        .heading => .{ .heading, .{ .heading = .{
 467            .level = block_start.data.heading.level,
 468        } } },
 469        .code_block => .{ .code_block, .{ .code_block = .{
 470            .tag = block_start.data.code_block.tag,
 471            .fence_len = block_start.data.code_block.fence_len,
 472            .indent = block_start.data.code_block.indent,
 473        } } },
 474        .blockquote => .{ .blockquote, .{ .none = {} } },
 475        .paragraph => .{ .paragraph, .{ .none = {} } },
 476        .thematic_break => .{ .thematic_break, .{ .none = {} } },
 477    };
 478
 479    try p.pending_blocks.append(p.allocator, .{
 480        .tag = tag,
 481        .data = data,
 482        .string_start = p.scratch_string.items.len,
 483        .extra_start = p.scratch_extra.items.len,
 484    });
 485
 486    if (tag == .table_row) {
 487        // Table rows are unique, since we already have all the children
 488        // available in the BlockStart. We can immediately parse and append
 489        // these children now.
 490        const containing_table = p.pending_blocks.items[p.pending_blocks.items.len - 2];
 491        const table = &containing_table.data.table;
 492        const column_alignments = table.column_alignments_buffer[0..table.column_alignments_len];
 493        const table_row = &block_start.data.table_row;
 494        for (table_row.cells_buffer[0..table_row.cells_len], 0..) |cell_content, i| {
 495            const cell_children = try p.parseInlines(cell_content);
 496            const alignment = if (i < column_alignments.len) column_alignments[i] else .unset;
 497            const cell = try p.addNode(.{
 498                .tag = .table_cell,
 499                .data = .{ .table_cell = .{
 500                    .info = .{
 501                        .alignment = alignment,
 502                        .header = false,
 503                    },
 504                    .children = cell_children,
 505                } },
 506            });
 507            try p.addScratchExtraNode(cell);
 508        }
 509    }
 510}
 511
 512fn startBlock(p: *Parser, line: []const u8) !?BlockStart {
 513    const unindented = mem.trimStart(u8, line, " \t");
 514    const indent = line.len - unindented.len;
 515    if (isThematicBreak(line)) {
 516        // Thematic breaks take precedence over list items.
 517        return .{
 518            .tag = .thematic_break,
 519            .data = .{ .none = {} },
 520            .rest = "",
 521        };
 522    } else if (startListItem(unindented)) |list_item| {
 523        return .{
 524            .tag = .list_item,
 525            .data = .{ .list_item = .{
 526                .marker = list_item.marker,
 527                .number = list_item.number,
 528                .continuation_indent = indent + list_item.marker_len,
 529            } },
 530            .rest = list_item.rest,
 531        };
 532    } else if (startTableRow(unindented)) |table_row| {
 533        return .{
 534            .tag = .table_row,
 535            .data = .{ .table_row = .{
 536                .cells_buffer = table_row.cells_buffer,
 537                .cells_len = table_row.cells_len,
 538            } },
 539            .rest = "",
 540        };
 541    } else if (startHeading(unindented)) |heading| {
 542        return .{
 543            .tag = .heading,
 544            .data = .{ .heading = .{
 545                .level = heading.level,
 546            } },
 547            .rest = heading.rest,
 548        };
 549    } else if (try p.startCodeBlock(unindented)) |code_block| {
 550        return .{
 551            .tag = .code_block,
 552            .data = .{ .code_block = .{
 553                .tag = code_block.tag,
 554                .fence_len = code_block.fence_len,
 555                .indent = indent,
 556            } },
 557            .rest = "",
 558        };
 559    } else if (startBlockquote(unindented)) |rest| {
 560        return .{
 561            .tag = .blockquote,
 562            .data = .{ .none = {} },
 563            .rest = rest,
 564        };
 565    } else {
 566        return null;
 567    }
 568}
 569
 570const ListItemStart = struct {
 571    marker: Block.Data.ListMarker,
 572    number: u30,
 573    marker_len: usize,
 574    rest: []const u8,
 575};
 576
 577fn startListItem(unindented_line: []const u8) ?ListItemStart {
 578    if (mem.startsWith(u8, unindented_line, "- ")) {
 579        return .{
 580            .marker = .@"-",
 581            .number = undefined,
 582            .marker_len = 2,
 583            .rest = unindented_line[2..],
 584        };
 585    } else if (mem.startsWith(u8, unindented_line, "* ")) {
 586        return .{
 587            .marker = .@"*",
 588            .number = undefined,
 589            .marker_len = 2,
 590            .rest = unindented_line[2..],
 591        };
 592    } else if (mem.startsWith(u8, unindented_line, "+ ")) {
 593        return .{
 594            .marker = .@"+",
 595            .number = undefined,
 596            .marker_len = 2,
 597            .rest = unindented_line[2..],
 598        };
 599    }
 600
 601    const number_end = mem.indexOfNone(u8, unindented_line, "0123456789") orelse return null;
 602    const after_number = unindented_line[number_end..];
 603    const marker: Block.Data.ListMarker = if (mem.startsWith(u8, after_number, ". "))
 604        .number_dot
 605    else if (mem.startsWith(u8, after_number, ") "))
 606        .number_paren
 607    else
 608        return null;
 609    const number = std.fmt.parseInt(u30, unindented_line[0..number_end], 10) catch return null;
 610    if (number > 999_999_999) return null;
 611    return .{
 612        .marker = marker,
 613        .number = number,
 614        .marker_len = number_end + 2,
 615        .rest = after_number[2..],
 616    };
 617}
 618
 619const TableRowStart = struct {
 620    cells_buffer: [max_table_columns][]const u8,
 621    cells_len: usize,
 622};
 623
 624fn startTableRow(unindented_line: []const u8) ?TableRowStart {
 625    if (unindented_line.len < 2 or
 626        !mem.startsWith(u8, unindented_line, "|") or
 627        mem.endsWith(u8, unindented_line, "\\|") or
 628        !mem.endsWith(u8, unindented_line, "|")) return null;
 629
 630    var cells_buffer: [max_table_columns][]const u8 = undefined;
 631    var cells: ArrayList([]const u8) = .initBuffer(&cells_buffer);
 632    const table_row_content = unindented_line[1 .. unindented_line.len - 1];
 633    var cell_start: usize = 0;
 634    var i: usize = 0;
 635    while (i < table_row_content.len) : (i += 1) {
 636        switch (table_row_content[i]) {
 637            '\\' => i += 1,
 638            '|' => {
 639                cells.appendBounded(table_row_content[cell_start..i]) catch return null;
 640                cell_start = i + 1;
 641            },
 642            '`' => {
 643                // Ignoring pipes in code spans allows table cells to contain
 644                // code using ||, for example.
 645                const open_start = i;
 646                i = mem.indexOfNonePos(u8, table_row_content, i, "`") orelse return null;
 647                const open_len = i - open_start;
 648                while (mem.indexOfScalarPos(u8, table_row_content, i, '`')) |close_start| {
 649                    i = mem.indexOfNonePos(u8, table_row_content, close_start, "`") orelse return null;
 650                    const close_len = i - close_start;
 651                    if (close_len == open_len) break;
 652                } else return null;
 653            },
 654            else => {},
 655        }
 656    }
 657    cells.appendBounded(table_row_content[cell_start..]) catch return null;
 658
 659    return .{ .cells_buffer = cells_buffer, .cells_len = cells.items.len };
 660}
 661
 662fn parseTableHeaderDelimiter(
 663    row_cells: []const []const u8,
 664    buffer: []Node.TableCellAlignment,
 665) ?[]Node.TableCellAlignment {
 666    var alignments: ArrayList(Node.TableCellAlignment) = .initBuffer(buffer);
 667    for (row_cells) |content| {
 668        const alignment = parseTableHeaderDelimiterCell(content) orelse return null;
 669        alignments.appendAssumeCapacity(alignment);
 670    }
 671    return alignments.items;
 672}
 673
 674fn parseTableHeaderDelimiterCell(content: []const u8) ?Node.TableCellAlignment {
 675    var state: enum {
 676        before_rule,
 677        after_left_anchor,
 678        in_rule,
 679        after_right_anchor,
 680        after_rule,
 681    } = .before_rule;
 682    var left_anchor = false;
 683    var right_anchor = false;
 684    for (content) |c| {
 685        switch (state) {
 686            .before_rule => switch (c) {
 687                ' ' => {},
 688                ':' => {
 689                    left_anchor = true;
 690                    state = .after_left_anchor;
 691                },
 692                '-' => state = .in_rule,
 693                else => return null,
 694            },
 695            .after_left_anchor => switch (c) {
 696                '-' => state = .in_rule,
 697                else => return null,
 698            },
 699            .in_rule => switch (c) {
 700                '-' => {},
 701                ':' => {
 702                    right_anchor = true;
 703                    state = .after_right_anchor;
 704                },
 705                ' ' => state = .after_rule,
 706                else => return null,
 707            },
 708            .after_right_anchor => switch (c) {
 709                ' ' => state = .after_rule,
 710                else => return null,
 711            },
 712            .after_rule => switch (c) {
 713                ' ' => {},
 714                else => return null,
 715            },
 716        }
 717    }
 718
 719    switch (state) {
 720        .before_rule,
 721        .after_left_anchor,
 722        => return null,
 723
 724        .in_rule,
 725        .after_right_anchor,
 726        .after_rule,
 727        => {},
 728    }
 729
 730    return if (left_anchor and right_anchor)
 731        .center
 732    else if (left_anchor)
 733        .left
 734    else if (right_anchor)
 735        .right
 736    else
 737        .unset;
 738}
 739
 740test parseTableHeaderDelimiterCell {
 741    try expectEqual(null, parseTableHeaderDelimiterCell(""));
 742    try expectEqual(null, parseTableHeaderDelimiterCell("   "));
 743    try expectEqual(.unset, parseTableHeaderDelimiterCell("-"));
 744    try expectEqual(.unset, parseTableHeaderDelimiterCell(" - "));
 745    try expectEqual(.unset, parseTableHeaderDelimiterCell("----"));
 746    try expectEqual(.unset, parseTableHeaderDelimiterCell(" ---- "));
 747    try expectEqual(null, parseTableHeaderDelimiterCell(":"));
 748    try expectEqual(null, parseTableHeaderDelimiterCell("::"));
 749    try expectEqual(.left, parseTableHeaderDelimiterCell(":-"));
 750    try expectEqual(.left, parseTableHeaderDelimiterCell(" :----"));
 751    try expectEqual(.center, parseTableHeaderDelimiterCell(":-:"));
 752    try expectEqual(.center, parseTableHeaderDelimiterCell(":----:"));
 753    try expectEqual(.center, parseTableHeaderDelimiterCell("   :----:   "));
 754    try expectEqual(.right, parseTableHeaderDelimiterCell("-:"));
 755    try expectEqual(.right, parseTableHeaderDelimiterCell("----:"));
 756    try expectEqual(.right, parseTableHeaderDelimiterCell("  ----:  "));
 757}
 758
 759const HeadingStart = struct {
 760    level: u3,
 761    rest: []const u8,
 762};
 763
 764fn startHeading(unindented_line: []const u8) ?HeadingStart {
 765    var level: u3 = 0;
 766    return for (unindented_line, 0..) |c, i| {
 767        switch (c) {
 768            '#' => {
 769                if (level == 6) break null;
 770                level += 1;
 771            },
 772            ' ' => {
 773                // We must have seen at least one # by this point, since
 774                // unindented_line has no leading spaces.
 775                assert(level > 0);
 776                break .{
 777                    .level = level,
 778                    .rest = unindented_line[i + 1 ..],
 779                };
 780            },
 781            else => break null,
 782        }
 783    } else null;
 784}
 785
 786const CodeBlockStart = struct {
 787    tag: StringIndex,
 788    fence_len: usize,
 789};
 790
 791fn startCodeBlock(p: *Parser, unindented_line: []const u8) !?CodeBlockStart {
 792    var fence_len: usize = 0;
 793    const tag_bytes = for (unindented_line, 0..) |c, i| {
 794        switch (c) {
 795            '`' => fence_len += 1,
 796            else => break unindented_line[i..],
 797        }
 798    } else "";
 799    // Code block tags may not contain backticks, since that would create
 800    // potential confusion with inline code spans.
 801    if (fence_len < 3 or mem.indexOfScalar(u8, tag_bytes, '`') != null) return null;
 802    return .{
 803        .tag = try p.addString(mem.trim(u8, tag_bytes, " ")),
 804        .fence_len = fence_len,
 805    };
 806}
 807
 808fn startBlockquote(unindented_line: []const u8) ?[]const u8 {
 809    return if (mem.startsWith(u8, unindented_line, ">"))
 810        unindented_line[1..]
 811    else
 812        null;
 813}
 814
 815fn isThematicBreak(line: []const u8) bool {
 816    var char: ?u8 = null;
 817    var count: usize = 0;
 818    for (line) |c| {
 819        switch (c) {
 820            ' ' => {},
 821            '-', '_', '*' => {
 822                if (char != null and c != char.?) return false;
 823                char = c;
 824                count += 1;
 825            },
 826            else => return false,
 827        }
 828    }
 829    return count >= 3;
 830}
 831
 832fn closeLastBlock(p: *Parser) !void {
 833    const b = p.pending_blocks.pop().?;
 834    const node = switch (b.tag) {
 835        .list => list: {
 836            assert(b.string_start == p.scratch_string.items.len);
 837
 838            // Although tightness is parsed as a property of the list, it is
 839            // stored at the list item level to make it possible to render each
 840            // node without any context from its parents.
 841            const list_items = p.scratch_extra.items[b.extra_start..];
 842            const node_datas = p.nodes.items(.data);
 843            if (!b.data.list.tight) {
 844                for (list_items) |list_item| {
 845                    node_datas[list_item].list_item.tight = false;
 846                }
 847            }
 848
 849            const children = try p.addExtraChildren(@ptrCast(list_items));
 850            break :list try p.addNode(.{
 851                .tag = .list,
 852                .data = .{ .list = .{
 853                    .start = switch (b.data.list.marker) {
 854                        .number_dot, .number_paren => @enumFromInt(b.data.list.start),
 855                        .@"-", .@"*", .@"+" => .unordered,
 856                    },
 857                    .children = children,
 858                } },
 859            });
 860        },
 861        .list_item => list_item: {
 862            assert(b.string_start == p.scratch_string.items.len);
 863            const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..]));
 864            break :list_item try p.addNode(.{
 865                .tag = .list_item,
 866                .data = .{ .list_item = .{
 867                    .tight = true,
 868                    .children = children,
 869                } },
 870            });
 871        },
 872        .table => table: {
 873            assert(b.string_start == p.scratch_string.items.len);
 874            const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..]));
 875            break :table try p.addNode(.{
 876                .tag = .table,
 877                .data = .{ .container = .{
 878                    .children = children,
 879                } },
 880            });
 881        },
 882        .table_row => table_row: {
 883            assert(b.string_start == p.scratch_string.items.len);
 884            const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..]));
 885            break :table_row try p.addNode(.{
 886                .tag = .table_row,
 887                .data = .{ .container = .{
 888                    .children = children,
 889                } },
 890            });
 891        },
 892        .heading => heading: {
 893            const children = try p.parseInlines(p.scratch_string.items[b.string_start..]);
 894            break :heading try p.addNode(.{
 895                .tag = .heading,
 896                .data = .{ .heading = .{
 897                    .level = b.data.heading.level,
 898                    .children = children,
 899                } },
 900            });
 901        },
 902        .code_block => code_block: {
 903            const content = try p.addString(p.scratch_string.items[b.string_start..]);
 904            break :code_block try p.addNode(.{
 905                .tag = .code_block,
 906                .data = .{ .code_block = .{
 907                    .tag = b.data.code_block.tag,
 908                    .content = content,
 909                } },
 910            });
 911        },
 912        .blockquote => blockquote: {
 913            assert(b.string_start == p.scratch_string.items.len);
 914            const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..]));
 915            break :blockquote try p.addNode(.{
 916                .tag = .blockquote,
 917                .data = .{ .container = .{
 918                    .children = children,
 919                } },
 920            });
 921        },
 922        .paragraph => paragraph: {
 923            const children = try p.parseInlines(p.scratch_string.items[b.string_start..]);
 924            break :paragraph try p.addNode(.{
 925                .tag = .paragraph,
 926                .data = .{ .container = .{
 927                    .children = children,
 928                } },
 929            });
 930        },
 931        .thematic_break => try p.addNode(.{
 932            .tag = .thematic_break,
 933            .data = .{ .none = {} },
 934        }),
 935    };
 936    p.scratch_string.items.len = b.string_start;
 937    p.scratch_extra.items.len = b.extra_start;
 938    try p.addScratchExtraNode(node);
 939}
 940
 941const InlineParser = struct {
 942    parent: *Parser,
 943    content: []const u8,
 944    pos: usize = 0,
 945    pending_inlines: ArrayList(PendingInline) = .empty,
 946    completed_inlines: ArrayList(CompletedInline) = .empty,
 947
 948    const PendingInline = struct {
 949        tag: Tag,
 950        data: Data,
 951        start: usize,
 952
 953        const Tag = enum {
 954            /// Data is `emphasis`.
 955            emphasis,
 956            /// Data is `none`.
 957            link,
 958            /// Data is `none`.
 959            image,
 960        };
 961
 962        const Data = union {
 963            none: void,
 964            emphasis: struct {
 965                underscore: bool,
 966                run_len: usize,
 967            },
 968        };
 969    };
 970
 971    const CompletedInline = struct {
 972        node: Node.Index,
 973        start: usize,
 974        len: usize,
 975    };
 976
 977    fn deinit(ip: *InlineParser) void {
 978        ip.pending_inlines.deinit(ip.parent.allocator);
 979        ip.completed_inlines.deinit(ip.parent.allocator);
 980    }
 981
 982    /// Parses all of `ip.content`, returning the children of the node
 983    /// containing the inline content.
 984    fn parse(ip: *InlineParser) Allocator.Error!ExtraIndex {
 985        while (ip.pos < ip.content.len) : (ip.pos += 1) {
 986            switch (ip.content[ip.pos]) {
 987                '\\' => ip.pos += 1,
 988                '[' => try ip.pending_inlines.append(ip.parent.allocator, .{
 989                    .tag = .link,
 990                    .data = .{ .none = {} },
 991                    .start = ip.pos,
 992                }),
 993                '!' => if (ip.pos + 1 < ip.content.len and ip.content[ip.pos + 1] == '[') {
 994                    try ip.pending_inlines.append(ip.parent.allocator, .{
 995                        .tag = .image,
 996                        .data = .{ .none = {} },
 997                        .start = ip.pos,
 998                    });
 999                    ip.pos += 1;
1000                },
1001                ']' => try ip.parseLink(),
1002                '<' => try ip.parseAutolink(),
1003                '*', '_' => try ip.parseEmphasis(),
1004                '`' => try ip.parseCodeSpan(),
1005                'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {
1006                    try ip.parseTextAutolink();
1007                },
1008                else => {},
1009            }
1010        }
1011
1012        const children = try ip.encodeChildren(0, ip.content.len);
1013        // There may be pending inlines after parsing (e.g. unclosed emphasis
1014        // runs), but there must not be any completed inlines, since those
1015        // should all be part of `children`.
1016        assert(ip.completed_inlines.items.len == 0);
1017        return children;
1018    }
1019
1020    /// Parses a link, starting at the `]` at the end of the link text. `ip.pos`
1021    /// is left at the closing `)` of the link target or at the closing `]` if
1022    /// there is none.
1023    fn parseLink(ip: *InlineParser) !void {
1024        var i = ip.pending_inlines.items.len;
1025        while (i > 0) {
1026            i -= 1;
1027            if (ip.pending_inlines.items[i].tag == .link or
1028                ip.pending_inlines.items[i].tag == .image) break;
1029        } else return;
1030        const opener = ip.pending_inlines.items[i];
1031        ip.pending_inlines.shrinkRetainingCapacity(i);
1032        const text_start = switch (opener.tag) {
1033            .link => opener.start + 1,
1034            .image => opener.start + 2,
1035            else => unreachable,
1036        };
1037
1038        if (ip.pos + 1 >= ip.content.len or ip.content[ip.pos + 1] != '(') return;
1039        const text_end = ip.pos;
1040
1041        const target_start = text_end + 2;
1042        var target_end = target_start;
1043        var nesting_level: usize = 1;
1044        while (target_end < ip.content.len) : (target_end += 1) {
1045            switch (ip.content[target_end]) {
1046                '\\' => target_end += 1,
1047                '(' => nesting_level += 1,
1048                ')' => {
1049                    if (nesting_level == 1) break;
1050                    nesting_level -= 1;
1051                },
1052                else => {},
1053            }
1054        } else return;
1055        ip.pos = target_end;
1056
1057        const children = try ip.encodeChildren(text_start, text_end);
1058        const target = try ip.encodeLinkTarget(target_start, target_end);
1059
1060        const link = try ip.parent.addNode(.{
1061            .tag = switch (opener.tag) {
1062                .link => .link,
1063                .image => .image,
1064                else => unreachable,
1065            },
1066            .data = .{ .link = .{
1067                .target = target,
1068                .children = children,
1069            } },
1070        });
1071        try ip.completed_inlines.append(ip.parent.allocator, .{
1072            .node = link,
1073            .start = opener.start,
1074            .len = ip.pos - opener.start + 1,
1075        });
1076    }
1077
1078    fn encodeLinkTarget(ip: *InlineParser, start: usize, end: usize) !StringIndex {
1079        // For efficiency, we can encode directly into string_bytes rather than
1080        // creating a temporary string and then encoding it, since this process
1081        // is entirely linear.
1082        const string_top = ip.parent.string_bytes.items.len;
1083        errdefer ip.parent.string_bytes.shrinkRetainingCapacity(string_top);
1084
1085        var text_iter: TextIterator = .{ .content = ip.content[start..end] };
1086        while (text_iter.next()) |content| {
1087            switch (content) {
1088                .char => |c| try ip.parent.string_bytes.append(ip.parent.allocator, c),
1089                .text => |s| try ip.parent.string_bytes.appendSlice(ip.parent.allocator, s),
1090                .line_break => try ip.parent.string_bytes.appendSlice(ip.parent.allocator, "\\\n"),
1091            }
1092        }
1093        try ip.parent.string_bytes.append(ip.parent.allocator, 0);
1094        return @enumFromInt(string_top);
1095    }
1096
1097    /// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
1098    /// closing `>`, or remains unchanged at the opening `<` if there is none.
1099    fn parseAutolink(ip: *InlineParser) !void {
1100        const start = ip.pos;
1101        ip.pos += 1;
1102        var state: enum {
1103            start,
1104            scheme,
1105            target,
1106        } = .start;
1107        while (ip.pos < ip.content.len) : (ip.pos += 1) {
1108            switch (state) {
1109                .start => switch (ip.content[ip.pos]) {
1110                    'A'...'Z', 'a'...'z' => state = .scheme,
1111                    else => break,
1112                },
1113                .scheme => switch (ip.content[ip.pos]) {
1114                    'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},
1115                    ':' => state = .target,
1116                    else => break,
1117                },
1118                .target => switch (ip.content[ip.pos]) {
1119                    '<', ' ', '\t', '\n' => break, // Not allowed in autolinks
1120                    '>' => {
1121                        // Backslash escapes are not recognized in autolink targets.
1122                        const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);
1123                        const node = try ip.parent.addNode(.{
1124                            .tag = .autolink,
1125                            .data = .{ .text = .{
1126                                .content = target,
1127                            } },
1128                        });
1129                        try ip.completed_inlines.append(ip.parent.allocator, .{
1130                            .node = node,
1131                            .start = start,
1132                            .len = ip.pos - start + 1,
1133                        });
1134                        return;
1135                    },
1136                    else => {},
1137                },
1138            }
1139        }
1140        ip.pos = start;
1141    }
1142
1143    /// Parses a plain text autolink (not delimited by `<>`), starting at the
1144    /// first character in the link (an `h`). `ip.pos` is left at the last
1145    /// character of the link, or remains unchanged if there is no valid link.
1146    fn parseTextAutolink(ip: *InlineParser) !void {
1147        const start = ip.pos;
1148        var state: union(enum) {
1149            /// Inside `http`. Contains the rest of the text to be matched.
1150            http: []const u8,
1151            after_http,
1152            after_https,
1153            /// Inside `://`. Contains the rest of the text to be matched.
1154            authority: []const u8,
1155            /// Inside link content.
1156            content: struct {
1157                start: usize,
1158                paren_nesting: usize,
1159            },
1160        } = .{ .http = "http" };
1161
1162        while (ip.pos < ip.content.len) : (ip.pos += 1) {
1163            switch (state) {
1164                .http => |rest| {
1165                    if (ip.content[ip.pos] != rest[0]) break;
1166                    if (rest.len > 1) {
1167                        state = .{ .http = rest[1..] };
1168                    } else {
1169                        state = .after_http;
1170                    }
1171                },
1172                .after_http => switch (ip.content[ip.pos]) {
1173                    's' => state = .after_https,
1174                    ':' => state = .{ .authority = "//" },
1175                    else => break,
1176                },
1177                .after_https => switch (ip.content[ip.pos]) {
1178                    ':' => state = .{ .authority = "//" },
1179                    else => break,
1180                },
1181                .authority => |rest| {
1182                    if (ip.content[ip.pos] != rest[0]) break;
1183                    if (rest.len > 1) {
1184                        state = .{ .authority = rest[1..] };
1185                    } else {
1186                        state = .{ .content = .{
1187                            .start = ip.pos + 1,
1188                            .paren_nesting = 0,
1189                        } };
1190                    }
1191                },
1192                .content => |*content| switch (ip.content[ip.pos]) {
1193                    ' ', '\t', '\n' => break,
1194                    '(' => content.paren_nesting += 1,
1195                    ')' => if (content.paren_nesting == 0) {
1196                        break;
1197                    } else {
1198                        content.paren_nesting -= 1;
1199                    },
1200                    else => {},
1201                },
1202            }
1203        }
1204
1205        switch (state) {
1206            .http, .after_http, .after_https, .authority => {
1207                ip.pos = start;
1208            },
1209            .content => |content| {
1210                while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {
1211                    ip.pos -= 1;
1212                }
1213                if (ip.pos == content.start) {
1214                    ip.pos = start;
1215                    return;
1216                }
1217
1218                const target = try ip.parent.addString(ip.content[start..ip.pos]);
1219                const node = try ip.parent.addNode(.{
1220                    .tag = .autolink,
1221                    .data = .{ .text = .{
1222                        .content = target,
1223                    } },
1224                });
1225                try ip.completed_inlines.append(ip.parent.allocator, .{
1226                    .node = node,
1227                    .start = start,
1228                    .len = ip.pos - start,
1229                });
1230                ip.pos -= 1;
1231            },
1232        }
1233    }
1234
1235    /// Returns whether `c` may appear before a text autolink is recognized.
1236    fn isPreTextAutolink(c: u8) bool {
1237        return switch (c) {
1238            ' ', '\t', '\n', '*', '_', '(' => true,
1239            else => false,
1240        };
1241    }
1242
1243    /// Returns whether `c` is punctuation that may appear after a text autolink
1244    /// and not be considered part of it.
1245    fn isPostTextAutolink(c: u8) bool {
1246        return switch (c) {
1247            '?', '!', '.', ',', ':', '*', '_' => true,
1248            else => false,
1249        };
1250    }
1251
1252    /// Parses emphasis, starting at the beginning of a run of `*` or `_`
1253    /// characters. `ip.pos` is left at the last character in the run after
1254    /// parsing.
1255    fn parseEmphasis(ip: *InlineParser) !void {
1256        const char = ip.content[ip.pos];
1257        var start = ip.pos;
1258        while (ip.pos + 1 < ip.content.len and ip.content[ip.pos + 1] == char) {
1259            ip.pos += 1;
1260        }
1261        var len = ip.pos - start + 1;
1262        const underscore = char == '_';
1263        const space_before = start == 0 or isWhitespace(ip.content[start - 1]);
1264        const space_after = start + len == ip.content.len or isWhitespace(ip.content[start + len]);
1265        const punct_before = start == 0 or isPunctuation(ip.content[start - 1]);
1266        const punct_after = start + len == ip.content.len or isPunctuation(ip.content[start + len]);
1267        // The rules for when emphasis may be closed or opened are stricter for
1268        // underscores to avoid inappropriately interpreting snake_case words as
1269        // containing emphasis markers.
1270        const can_open = if (underscore)
1271            !space_after and (space_before or punct_before)
1272        else
1273            !space_after;
1274        const can_close = if (underscore)
1275            !space_before and (space_after or punct_after)
1276        else
1277            !space_before;
1278
1279        if (can_close and ip.pending_inlines.items.len > 0) {
1280            var i = ip.pending_inlines.items.len;
1281            while (i > 0 and len > 0) {
1282                i -= 1;
1283                const opener = &ip.pending_inlines.items[i];
1284                if (opener.tag != .emphasis or
1285                    opener.data.emphasis.underscore != underscore) continue;
1286
1287                const close_len = @min(opener.data.emphasis.run_len, len);
1288                const opener_end = opener.start + opener.data.emphasis.run_len;
1289
1290                const emphasis = try ip.encodeEmphasis(opener_end, start, close_len);
1291                const emphasis_start = opener_end - close_len;
1292                const emphasis_len = start - emphasis_start + close_len;
1293                try ip.completed_inlines.append(ip.parent.allocator, .{
1294                    .node = emphasis,
1295                    .start = emphasis_start,
1296                    .len = emphasis_len,
1297                });
1298
1299                // There may still be other openers further down in the
1300                // stack to close, or part of this run might serve as an
1301                // opener itself.
1302                start += close_len;
1303                len -= close_len;
1304
1305                // Remove any pending inlines above this on the stack, since
1306                // closing this emphasis will prevent them from being closed.
1307                // Additionally, if this opener is completely consumed by
1308                // being closed, it can be removed.
1309                opener.data.emphasis.run_len -= close_len;
1310                if (opener.data.emphasis.run_len == 0) {
1311                    ip.pending_inlines.shrinkRetainingCapacity(i);
1312                } else {
1313                    ip.pending_inlines.shrinkRetainingCapacity(i + 1);
1314                }
1315            }
1316        }
1317
1318        if (can_open and len > 0) {
1319            try ip.pending_inlines.append(ip.parent.allocator, .{
1320                .tag = .emphasis,
1321                .data = .{ .emphasis = .{
1322                    .underscore = underscore,
1323                    .run_len = len,
1324                } },
1325                .start = start,
1326            });
1327        }
1328    }
1329
1330    /// Encodes emphasis specified by a run of `run_len` emphasis characters,
1331    /// with `start..end` being the range of content contained within the
1332    /// emphasis.
1333    fn encodeEmphasis(ip: *InlineParser, start: usize, end: usize, run_len: usize) !Node.Index {
1334        const children = try ip.encodeChildren(start, end);
1335        var inner = switch (run_len % 3) {
1336            1 => try ip.parent.addNode(.{
1337                .tag = .emphasis,
1338                .data = .{ .container = .{
1339                    .children = children,
1340                } },
1341            }),
1342            2 => try ip.parent.addNode(.{
1343                .tag = .strong,
1344                .data = .{ .container = .{
1345                    .children = children,
1346                } },
1347            }),
1348            0 => strong_emphasis: {
1349                const strong = try ip.parent.addNode(.{
1350                    .tag = .strong,
1351                    .data = .{ .container = .{
1352                        .children = children,
1353                    } },
1354                });
1355                break :strong_emphasis try ip.parent.addNode(.{
1356                    .tag = .emphasis,
1357                    .data = .{ .container = .{
1358                        .children = try ip.parent.addExtraChildren(&.{strong}),
1359                    } },
1360                });
1361            },
1362            else => unreachable,
1363        };
1364
1365        var run_left = run_len;
1366        while (run_left > 3) : (run_left -= 3) {
1367            const strong = try ip.parent.addNode(.{
1368                .tag = .strong,
1369                .data = .{ .container = .{
1370                    .children = try ip.parent.addExtraChildren(&.{inner}),
1371                } },
1372            });
1373            inner = try ip.parent.addNode(.{
1374                .tag = .emphasis,
1375                .data = .{ .container = .{
1376                    .children = try ip.parent.addExtraChildren(&.{strong}),
1377                } },
1378            });
1379        }
1380
1381        return inner;
1382    }
1383
1384    /// Parses a code span, starting at the beginning of the opening backtick
1385    /// run. `ip.pos` is left at the last character in the closing run after
1386    /// parsing.
1387    fn parseCodeSpan(ip: *InlineParser) !void {
1388        const opener_start = ip.pos;
1389        ip.pos = mem.indexOfNonePos(u8, ip.content, ip.pos, "`") orelse ip.content.len;
1390        const opener_len = ip.pos - opener_start;
1391
1392        const start = ip.pos;
1393        const end = while (mem.indexOfScalarPos(u8, ip.content, ip.pos, '`')) |closer_start| {
1394            ip.pos = mem.indexOfNonePos(u8, ip.content, closer_start, "`") orelse ip.content.len;
1395            const closer_len = ip.pos - closer_start;
1396
1397            if (closer_len == opener_len) break closer_start;
1398        } else unterminated: {
1399            ip.pos = ip.content.len;
1400            break :unterminated ip.content.len;
1401        };
1402
1403        var content = if (start < ip.content.len)
1404            ip.content[start..end]
1405        else
1406            "";
1407        // This single space removal rule allows code spans to be written which
1408        // start or end with backticks.
1409        if (mem.startsWith(u8, content, " `")) content = content[1..];
1410        if (mem.endsWith(u8, content, "` ")) content = content[0 .. content.len - 1];
1411
1412        const text = try ip.parent.addNode(.{
1413            .tag = .code_span,
1414            .data = .{ .text = .{
1415                .content = try ip.parent.addString(content),
1416            } },
1417        });
1418        try ip.completed_inlines.append(ip.parent.allocator, .{
1419            .node = text,
1420            .start = opener_start,
1421            .len = ip.pos - opener_start,
1422        });
1423        // Ensure ip.pos is pointing at the last character of the
1424        // closer, not after it.
1425        ip.pos -= 1;
1426    }
1427
1428    /// Encodes children parsed in the content range `start..end`. The children
1429    /// will be text nodes and any completed inlines within the range.
1430    fn encodeChildren(ip: *InlineParser, start: usize, end: usize) !ExtraIndex {
1431        const scratch_extra_top = ip.parent.scratch_extra.items.len;
1432        defer ip.parent.scratch_extra.shrinkRetainingCapacity(scratch_extra_top);
1433
1434        var child_index = ip.completed_inlines.items.len;
1435        while (child_index > 0 and ip.completed_inlines.items[child_index - 1].start >= start) {
1436            child_index -= 1;
1437        }
1438        const start_child_index = child_index;
1439
1440        var pos = start;
1441        while (child_index < ip.completed_inlines.items.len) : (child_index += 1) {
1442            const child_inline = ip.completed_inlines.items[child_index];
1443            // Completed inlines must be strictly nested within the encodable
1444            // content.
1445            assert(child_inline.start >= pos and child_inline.start + child_inline.len <= end);
1446
1447            if (child_inline.start > pos) {
1448                try ip.encodeTextNode(pos, child_inline.start);
1449            }
1450            try ip.parent.addScratchExtraNode(child_inline.node);
1451
1452            pos = child_inline.start + child_inline.len;
1453        }
1454        ip.completed_inlines.shrinkRetainingCapacity(start_child_index);
1455
1456        if (pos < end) {
1457            try ip.encodeTextNode(pos, end);
1458        }
1459
1460        const children = ip.parent.scratch_extra.items[scratch_extra_top..];
1461        return try ip.parent.addExtraChildren(@ptrCast(children));
1462    }
1463
1464    /// Encodes textual content `ip.content[start..end]` to `scratch_extra`. The
1465    /// encoded content may include both `text` and `line_break` nodes.
1466    fn encodeTextNode(ip: *InlineParser, start: usize, end: usize) !void {
1467        // For efficiency, we can encode directly into string_bytes rather than
1468        // creating a temporary string and then encoding it, since this process
1469        // is entirely linear.
1470        const string_top = ip.parent.string_bytes.items.len;
1471        errdefer ip.parent.string_bytes.shrinkRetainingCapacity(string_top);
1472
1473        var string_start = string_top;
1474        var text_iter: TextIterator = .{ .content = ip.content[start..end] };
1475        while (text_iter.next()) |content| {
1476            switch (content) {
1477                .char => |c| try ip.parent.string_bytes.append(ip.parent.allocator, c),
1478                .text => |s| try ip.parent.string_bytes.appendSlice(ip.parent.allocator, s),
1479                .line_break => {
1480                    if (ip.parent.string_bytes.items.len > string_start) {
1481                        try ip.parent.string_bytes.append(ip.parent.allocator, 0);
1482                        try ip.parent.addScratchExtraNode(try ip.parent.addNode(.{
1483                            .tag = .text,
1484                            .data = .{ .text = .{
1485                                .content = @enumFromInt(string_start),
1486                            } },
1487                        }));
1488                        string_start = ip.parent.string_bytes.items.len;
1489                    }
1490                    try ip.parent.addScratchExtraNode(try ip.parent.addNode(.{
1491                        .tag = .line_break,
1492                        .data = .{ .none = {} },
1493                    }));
1494                },
1495            }
1496        }
1497        if (ip.parent.string_bytes.items.len > string_start) {
1498            try ip.parent.string_bytes.append(ip.parent.allocator, 0);
1499            try ip.parent.addScratchExtraNode(try ip.parent.addNode(.{
1500                .tag = .text,
1501                .data = .{ .text = .{
1502                    .content = @enumFromInt(string_start),
1503                } },
1504            }));
1505        }
1506    }
1507
1508    /// An iterator over parts of textual content, handling unescaping of
1509    /// escaped characters and line breaks.
1510    const TextIterator = struct {
1511        content: []const u8,
1512        pos: usize = 0,
1513
1514        const Content = union(enum) {
1515            char: u8,
1516            text: []const u8,
1517            line_break,
1518        };
1519
1520        const replacement = "\u{FFFD}";
1521
1522        fn next(iter: *TextIterator) ?Content {
1523            if (iter.pos >= iter.content.len) return null;
1524            if (iter.content[iter.pos] == '\\') {
1525                iter.pos += 1;
1526                if (iter.pos == iter.content.len) {
1527                    return .{ .char = '\\' };
1528                } else if (iter.content[iter.pos] == '\n') {
1529                    iter.pos += 1;
1530                    return .line_break;
1531                } else if (isPunctuation(iter.content[iter.pos])) {
1532                    const c = iter.content[iter.pos];
1533                    iter.pos += 1;
1534                    return .{ .char = c };
1535                } else {
1536                    return .{ .char = '\\' };
1537                }
1538            }
1539            return iter.nextCodepoint();
1540        }
1541
1542        fn nextCodepoint(iter: *TextIterator) ?Content {
1543            switch (iter.content[iter.pos]) {
1544                0 => {
1545                    iter.pos += 1;
1546                    return .{ .text = replacement };
1547                },
1548                1...127 => |c| {
1549                    iter.pos += 1;
1550                    return .{ .char = c };
1551                },
1552                else => |b| {
1553                    const cp_len = std.unicode.utf8ByteSequenceLength(b) catch {
1554                        iter.pos += 1;
1555                        return .{ .text = replacement };
1556                    };
1557                    const is_valid = iter.pos + cp_len <= iter.content.len and
1558                        std.unicode.utf8ValidateSlice(iter.content[iter.pos..][0..cp_len]);
1559                    const cp_encoded = if (is_valid)
1560                        iter.content[iter.pos..][0..cp_len]
1561                    else
1562                        replacement;
1563                    iter.pos += cp_len;
1564                    return .{ .text = cp_encoded };
1565                },
1566            }
1567        }
1568    };
1569};
1570
1571fn parseInlines(p: *Parser, content: []const u8) !ExtraIndex {
1572    var ip: InlineParser = .{
1573        .parent = p,
1574        .content = mem.trim(u8, content, " \t\n"),
1575    };
1576    defer ip.deinit();
1577    return try ip.parse();
1578}
1579
1580pub fn extraData(p: Parser, comptime T: type, index: ExtraIndex) ExtraData(T) {
1581    const fields = @typeInfo(T).@"struct".fields;
1582    var i: usize = @intFromEnum(index);
1583    var result: T = undefined;
1584    inline for (fields) |field| {
1585        @field(result, field.name) = switch (field.type) {
1586            u32 => p.extra.items[i],
1587            else => @compileError("bad field type"),
1588        };
1589        i += 1;
1590    }
1591    return .{ .data = result, .end = i };
1592}
1593
1594pub fn extraChildren(p: Parser, index: ExtraIndex) []const Node.Index {
1595    const children = p.extraData(Node.Children, index);
1596    return @ptrCast(p.extra.items[children.end..][0..children.data.len]);
1597}
1598
1599fn addNode(p: *Parser, node: Node) !Node.Index {
1600    const index: Node.Index = @enumFromInt(@as(u32, @intCast(p.nodes.len)));
1601    try p.nodes.append(p.allocator, node);
1602    return index;
1603}
1604
1605fn addString(p: *Parser, s: []const u8) !StringIndex {
1606    if (s.len == 0) return .empty;
1607
1608    const index: StringIndex = @enumFromInt(@as(u32, @intCast(p.string_bytes.items.len)));
1609    try p.string_bytes.ensureUnusedCapacity(p.allocator, s.len + 1);
1610    p.string_bytes.appendSliceAssumeCapacity(s);
1611    p.string_bytes.appendAssumeCapacity(0);
1612    return index;
1613}
1614
1615fn addExtraChildren(p: *Parser, nodes: []const Node.Index) !ExtraIndex {
1616    const index: ExtraIndex = @enumFromInt(@as(u32, @intCast(p.extra.items.len)));
1617    try p.extra.ensureUnusedCapacity(p.allocator, nodes.len + 1);
1618    p.extra.appendAssumeCapacity(@intCast(nodes.len));
1619    p.extra.appendSliceAssumeCapacity(@ptrCast(nodes));
1620    return index;
1621}
1622
1623fn addScratchExtraNode(p: *Parser, node: Node.Index) !void {
1624    try p.scratch_extra.append(p.allocator, @intFromEnum(node));
1625}
1626
1627fn addScratchStringLine(p: *Parser, line: []const u8) !void {
1628    try p.scratch_string.ensureUnusedCapacity(p.allocator, line.len + 1);
1629    p.scratch_string.appendSliceAssumeCapacity(line);
1630    p.scratch_string.appendAssumeCapacity('\n');
1631}
1632
1633fn isBlank(line: []const u8) bool {
1634    return mem.indexOfNone(u8, line, " \t") == null;
1635}
1636
1637fn isPunctuation(c: u8) bool {
1638    return switch (c) {
1639        '!',
1640        '"',
1641        '#',
1642        '$',
1643        '%',
1644        '&',
1645        '\'',
1646        '(',
1647        ')',
1648        '*',
1649        '+',
1650        ',',
1651        '-',
1652        '.',
1653        '/',
1654        ':',
1655        ';',
1656        '<',
1657        '=',
1658        '>',
1659        '?',
1660        '@',
1661        '[',
1662        '\\',
1663        ']',
1664        '^',
1665        '_',
1666        '`',
1667        '{',
1668        '|',
1669        '}',
1670        '~',
1671        => true,
1672        else => false,
1673    };
1674}