zig/lib/compiler/resinator/literals.zig at master

   1const std = @import("std");
   2const code_pages = @import("code_pages.zig");
   3const SupportedCodePage = code_pages.SupportedCodePage;
   4const windows1252 = @import("windows1252.zig");
   5const ErrorDetails = @import("errors.zig").ErrorDetails;
   6const DiagnosticsContext = @import("errors.zig").DiagnosticsContext;
   7const Token = @import("lex.zig").Token;
   8
   9/// rc is maximally liberal in terms of what it accepts as a number literal
  10/// for data values. As long as it starts with a number or - or ~, that's good enough.
  11pub fn isValidNumberDataLiteral(str: []const u8) bool {
  12    if (str.len == 0) return false;
  13    switch (str[0]) {
  14        '~', '-', '0'...'9' => return true,
  15        else => return false,
  16    }
  17}
  18
  19pub const SourceBytes = struct {
  20    slice: []const u8,
  21    code_page: SupportedCodePage,
  22};
  23
  24pub const StringType = enum { ascii, wide };
  25
  26/// Valid escapes:
  27///  "" -> "
  28///  \a, \A => 0x08 (not 0x07 like in C)
  29///  \n => 0x0A
  30///  \r => 0x0D
  31///  \t, \T => 0x09
  32///  \\ => \
  33///  \nnn => byte with numeric value given by nnn interpreted as octal
  34///          (wraps on overflow, number of digits can be 1-3 for ASCII strings
  35///          and 1-7 for wide strings)
  36///  \xhh => byte with numeric value given by hh interpreted as hex
  37///          (number of digits can be 0-2 for ASCII strings and 0-4 for
  38///          wide strings)
  39///  \<\r+> => \
  40///  \<[\r\n\t ]+> => <nothing>
  41///
  42/// Special cases:
  43///  <\t> => 1-8 spaces, dependent on columns in the source rc file itself
  44///  <\r> => <nothing>
  45///  <\n+><\w+?\n?> => <space><\n>
  46///
  47/// Special, especially weird case:
  48///  \"" => "
  49/// NOTE: This leads to footguns because the preprocessor can start parsing things
  50///       out-of-sync with the RC compiler, expanding macros within string literals, etc.
  51///       This parse function handles this case the same as the Windows RC compiler, but
  52///       \" within a string literal is treated as an error by the lexer, so the relevant
  53///       branches should never actually be hit during this function.
  54pub const IterativeStringParser = struct {
  55    source: []const u8,
  56    code_page: SupportedCodePage,
  57    /// The type of the string inferred by the prefix (L"" or "")
  58    /// This is what matters for things like the maximum digits in an
  59    /// escape sequence, whether or not invalid escape sequences are skipped, etc.
  60    declared_string_type: StringType,
  61    pending_codepoint: ?u21 = null,
  62    num_pending_spaces: u8 = 0,
  63    index: usize = 0,
  64    column: usize = 0,
  65    diagnostics: ?DiagnosticsContext = null,
  66    seen_tab: bool = false,
  67
  68    const State = enum {
  69        normal,
  70        quote,
  71        newline,
  72        escaped,
  73        escaped_cr,
  74        escaped_newlines,
  75        escaped_octal,
  76        escaped_hex,
  77    };
  78
  79    pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser {
  80        const declared_string_type: StringType = switch (bytes.slice[0]) {
  81            'L', 'l' => .wide,
  82            else => .ascii,
  83        };
  84        var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove ""
  85        var column = options.start_column + 1; // for the removed "
  86        if (declared_string_type == .wide) {
  87            source = source[1..]; // remove L
  88            column += 1; // for the removed L
  89        }
  90        return .{
  91            .source = source,
  92            .code_page = bytes.code_page,
  93            .declared_string_type = declared_string_type,
  94            .column = column,
  95            .diagnostics = options.diagnostics,
  96        };
  97    }
  98
  99    pub const ParsedCodepoint = struct {
 100        codepoint: u21,
 101        /// Note: If this is true, `codepoint` will have an effective maximum value
 102        /// of 0xFFFF, as `codepoint` is calculated using wrapping arithmetic on a u16.
 103        /// If the value needs to be truncated to a smaller integer (e.g. for ASCII string
 104        /// literals), then that must be done by the caller.
 105        from_escaped_integer: bool = false,
 106        /// Denotes that the codepoint is:
 107        /// - Escaped (has a \ in front of it), and
 108        /// - Has a value >= U+10000, meaning it would be encoded as a surrogate
 109        ///   pair in UTF-16, and
 110        /// - Is part of a wide string literal
 111        ///
 112        /// Normally in wide string literals, invalid escapes are omitted
 113        /// during parsing (the codepoints are not returned at all during
 114        /// the `next` call), but this is a special case in which the
 115        /// escape only applies to the high surrogate pair of the codepoint.
 116        ///
 117        /// TODO: Maybe just return the low surrogate codepoint by itself in this case.
 118        escaped_surrogate_pair: bool = false,
 119    };
 120
 121    pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
 122        const result = try self.nextUnchecked();
 123        if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) {
 124            switch (result.?.codepoint) {
 125                0x0900, 0x0A00, 0x0A0D, 0x2000, 0x0D00 => {
 126                    const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00)
 127                        .rc_would_miscompile_codepoint_skip
 128                    else
 129                        .rc_would_miscompile_codepoint_whitespace;
 130                    try self.diagnostics.?.diagnostics.append(ErrorDetails{
 131                        .err = err,
 132                        .type = .warning,
 133                        .code_page = self.code_page,
 134                        .token = self.diagnostics.?.token,
 135                        .extra = .{ .number = result.?.codepoint },
 136                    });
 137                },
 138                0xFFFE, 0xFFFF => {
 139                    try self.diagnostics.?.diagnostics.append(ErrorDetails{
 140                        .err = .rc_would_miscompile_codepoint_bom,
 141                        .type = .warning,
 142                        .code_page = self.code_page,
 143                        .token = self.diagnostics.?.token,
 144                        .extra = .{ .number = result.?.codepoint },
 145                    });
 146                    try self.diagnostics.?.diagnostics.append(ErrorDetails{
 147                        .err = .rc_would_miscompile_codepoint_bom,
 148                        .type = .note,
 149                        .code_page = self.code_page,
 150                        .token = self.diagnostics.?.token,
 151                        .print_source_line = false,
 152                        .extra = .{ .number = result.?.codepoint },
 153                    });
 154                },
 155                else => {},
 156            }
 157        }
 158        return result;
 159    }
 160
 161    pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
 162        if (self.num_pending_spaces > 0) {
 163            // Ensure that we don't get into this predicament so we can ensure that
 164            // the order of processing any pending stuff doesn't matter
 165            std.debug.assert(self.pending_codepoint == null);
 166            self.num_pending_spaces -= 1;
 167            return .{ .codepoint = ' ' };
 168        }
 169        if (self.pending_codepoint) |pending_codepoint| {
 170            self.pending_codepoint = null;
 171            return .{ .codepoint = pending_codepoint };
 172        }
 173        if (self.index >= self.source.len) return null;
 174
 175        var state: State = .normal;
 176        var string_escape_n: u16 = 0;
 177        var string_escape_i: u8 = 0;
 178        const max_octal_escape_digits: u8 = switch (self.declared_string_type) {
 179            .ascii => 3,
 180            .wide => 7,
 181        };
 182        const max_hex_escape_digits: u8 = switch (self.declared_string_type) {
 183            .ascii => 2,
 184            .wide => 4,
 185        };
 186
 187        var backtrack: bool = undefined;
 188        while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : ({
 189            if (!backtrack) self.index += codepoint.byte_len;
 190        }) {
 191            backtrack = false;
 192            const c = codepoint.value;
 193            defer {
 194                if (!backtrack) {
 195                    if (c == '\t') {
 196                        self.column += columnsUntilTabStop(self.column, 8);
 197                    } else {
 198                        self.column += codepoint.byte_len;
 199                    }
 200                }
 201            }
 202            switch (state) {
 203                .normal => switch (c) {
 204                    '\\' => state = .escaped,
 205                    '"' => state = .quote,
 206                    '\r' => {},
 207                    '\n' => state = .newline,
 208                    '\t' => {
 209                        // Only warn about a tab getting converted to spaces once per string
 210                        if (self.diagnostics != null and !self.seen_tab) {
 211                            try self.diagnostics.?.diagnostics.append(ErrorDetails{
 212                                .err = .tab_converted_to_spaces,
 213                                .type = .warning,
 214                                .code_page = self.code_page,
 215                                .token = self.diagnostics.?.token,
 216                            });
 217                            try self.diagnostics.?.diagnostics.append(ErrorDetails{
 218                                .err = .tab_converted_to_spaces,
 219                                .type = .note,
 220                                .code_page = self.code_page,
 221                                .token = self.diagnostics.?.token,
 222                                .print_source_line = false,
 223                            });
 224                            self.seen_tab = true;
 225                        }
 226                        const cols = columnsUntilTabStop(self.column, 8);
 227                        self.num_pending_spaces = @intCast(cols - 1);
 228                        self.index += codepoint.byte_len;
 229                        return .{ .codepoint = ' ' };
 230                    },
 231                    else => {
 232                        self.index += codepoint.byte_len;
 233                        return .{ .codepoint = c };
 234                    },
 235                },
 236                .quote => switch (c) {
 237                    '"' => {
 238                        // "" => "
 239                        self.index += codepoint.byte_len;
 240                        return .{ .codepoint = '"' };
 241                    },
 242                    else => unreachable, // this is a bug in the lexer
 243                },
 244                .newline => switch (c) {
 245                    '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {},
 246                    else => {
 247                        // we intentionally avoid incrementing self.index
 248                        // to handle the current char in the next call,
 249                        // and we set backtrack so column count is handled correctly
 250                        backtrack = true;
 251
 252                        // <space><newline>
 253                        self.pending_codepoint = '\n';
 254                        return .{ .codepoint = ' ' };
 255                    },
 256                },
 257                .escaped => switch (c) {
 258                    '\r' => state = .escaped_cr,
 259                    '\n' => state = .escaped_newlines,
 260                    '0'...'7' => {
 261                        string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
 262                        string_escape_i = 1;
 263                        state = .escaped_octal;
 264                    },
 265                    'x', 'X' => {
 266                        string_escape_n = 0;
 267                        string_escape_i = 0;
 268                        state = .escaped_hex;
 269                    },
 270                    else => {
 271                        switch (c) {
 272                            'a', 'A' => {
 273                                self.index += codepoint.byte_len;
 274                                // might be a bug in RC, but matches its behavior
 275                                return .{ .codepoint = '\x08' };
 276                            },
 277                            'n' => {
 278                                self.index += codepoint.byte_len;
 279                                return .{ .codepoint = '\n' };
 280                            },
 281                            'r' => {
 282                                self.index += codepoint.byte_len;
 283                                return .{ .codepoint = '\r' };
 284                            },
 285                            't', 'T' => {
 286                                self.index += codepoint.byte_len;
 287                                return .{ .codepoint = '\t' };
 288                            },
 289                            '\\' => {
 290                                self.index += codepoint.byte_len;
 291                                return .{ .codepoint = '\\' };
 292                            },
 293                            '"' => {
 294                                // \" is a special case that doesn't get the \ included,
 295                                backtrack = true;
 296                            },
 297                            else => switch (self.declared_string_type) {
 298                                .wide => {
 299                                    // All invalid escape sequences are skipped in wide strings,
 300                                    // but there is a special case around \<tab> where the \
 301                                    // is skipped but the tab character is processed.
 302                                    // It's actually a bit weirder than that, though, since
 303                                    // the preprocessor is the one that does the <tab> -> spaces
 304                                    // conversion, so it goes something like this:
 305                                    //
 306                                    // Before preprocessing: L"\<tab>"
 307                                    // After preprocessing:  L"\     "
 308                                    //
 309                                    // So the parser only sees an escaped space character followed
 310                                    // by some other number of spaces >= 0.
 311                                    //
 312                                    // However, our preprocessor keeps tab characters intact, so we emulate
 313                                    // the above behavior by skipping the \ and then outputting one less
 314                                    // space than normal for the <tab> character.
 315                                    if (c == '\t') {
 316                                        // Only warn about a tab getting converted to spaces once per string
 317                                        if (self.diagnostics != null and !self.seen_tab) {
 318                                            try self.diagnostics.?.diagnostics.append(ErrorDetails{
 319                                                .err = .tab_converted_to_spaces,
 320                                                .type = .warning,
 321                                                .code_page = self.code_page,
 322                                                .token = self.diagnostics.?.token,
 323                                            });
 324                                            try self.diagnostics.?.diagnostics.append(ErrorDetails{
 325                                                .err = .tab_converted_to_spaces,
 326                                                .type = .note,
 327                                                .code_page = self.code_page,
 328                                                .token = self.diagnostics.?.token,
 329                                                .print_source_line = false,
 330                                            });
 331                                            self.seen_tab = true;
 332                                        }
 333
 334                                        const cols = columnsUntilTabStop(self.column, 8);
 335                                        // If the tab character would only be converted to a single space,
 336                                        // then we can just skip both the \ and the <tab> and move on.
 337                                        if (cols > 1) {
 338                                            self.num_pending_spaces = @intCast(cols - 2);
 339                                            self.index += codepoint.byte_len;
 340                                            return .{ .codepoint = ' ' };
 341                                        }
 342                                    }
 343                                    // There's a second special case when the codepoint would be encoded
 344                                    // as a surrogate pair in UTF-16, as the escape 'applies' to the
 345                                    // high surrogate pair only in this instance. This is a side-effect
 346                                    // of the Win32 RC compiler preprocessor outputting UTF-16 and the
 347                                    // compiler itself seemingly working on code units instead of code points
 348                                    // in this particular instance.
 349                                    //
 350                                    // We emulate this behavior by emitting the codepoint, but with a marker
 351                                    // that indicates that it needs to be handled specially.
 352                                    if (c >= 0x10000 and c != code_pages.Codepoint.invalid) {
 353                                        self.index += codepoint.byte_len;
 354                                        return .{ .codepoint = c, .escaped_surrogate_pair = true };
 355                                    }
 356                                },
 357                                .ascii => {
 358                                    // we intentionally avoid incrementing self.index
 359                                    // to handle the current char in the next call,
 360                                    // and we set backtrack so column count is handled correctly
 361                                    backtrack = true;
 362                                    return .{ .codepoint = '\\' };
 363                                },
 364                            },
 365                        }
 366                        state = .normal;
 367                    },
 368                },
 369                .escaped_cr => switch (c) {
 370                    '\r' => {},
 371                    '\n' => state = .escaped_newlines,
 372                    else => {
 373                        // we intentionally avoid incrementing self.index
 374                        // to handle the current char in the next call,
 375                        // and we set backtrack so column count is handled correctly
 376                        backtrack = true;
 377                        return .{ .codepoint = '\\' };
 378                    },
 379                },
 380                .escaped_newlines => switch (c) {
 381                    '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {},
 382                    else => {
 383                        // backtrack so that we handle the current char properly
 384                        backtrack = true;
 385                        state = .normal;
 386                    },
 387                },
 388                .escaped_octal => switch (c) {
 389                    '0'...'7' => {
 390                        // Note: We use wrapping arithmetic on a u16 here since there's been no observed
 391                        // string parsing scenario where an escaped integer with a value >= the u16
 392                        // max is interpreted as anything but the truncated u16 value.
 393                        string_escape_n *%= 8;
 394                        string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
 395                        string_escape_i += 1;
 396                        if (string_escape_i == max_octal_escape_digits) {
 397                            self.index += codepoint.byte_len;
 398                            return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
 399                        }
 400                    },
 401                    else => {
 402                        // we intentionally avoid incrementing self.index
 403                        // to handle the current char in the next call,
 404                        // and we set backtrack so column count is handled correctly
 405                        backtrack = true;
 406
 407                        // write out whatever byte we have parsed so far
 408                        return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
 409                    },
 410                },
 411                .escaped_hex => switch (c) {
 412                    '0'...'9', 'a'...'f', 'A'...'F' => {
 413                        string_escape_n *= 16;
 414                        string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable;
 415                        string_escape_i += 1;
 416                        if (string_escape_i == max_hex_escape_digits) {
 417                            self.index += codepoint.byte_len;
 418                            return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
 419                        }
 420                    },
 421                    else => {
 422                        // we intentionally avoid incrementing self.index
 423                        // to handle the current char in the next call,
 424                        // and we set backtrack so column count is handled correctly
 425                        backtrack = true;
 426
 427                        // write out whatever byte we have parsed so far
 428                        // (even with 0 actual digits, \x alone parses to 0)
 429                        const escaped_value = string_escape_n;
 430                        return .{ .codepoint = escaped_value, .from_escaped_integer = true };
 431                    },
 432                },
 433            }
 434        }
 435
 436        switch (state) {
 437            .normal, .escaped_newlines => {},
 438            .newline => {
 439                // <space><newline>
 440                self.pending_codepoint = '\n';
 441                return .{ .codepoint = ' ' };
 442            },
 443            .escaped, .escaped_cr => return .{ .codepoint = '\\' },
 444            .escaped_octal, .escaped_hex => {
 445                return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
 446            },
 447            .quote => unreachable, // this is a bug in the lexer
 448        }
 449
 450        return null;
 451    }
 452};
 453
 454pub const StringParseOptions = struct {
 455    start_column: usize = 0,
 456    diagnostics: ?DiagnosticsContext = null,
 457    output_code_page: SupportedCodePage,
 458};
 459
 460pub fn parseQuotedString(
 461    comptime literal_type: StringType,
 462    allocator: std.mem.Allocator,
 463    bytes: SourceBytes,
 464    options: StringParseOptions,
 465) !(switch (literal_type) {
 466    .ascii => []u8,
 467    .wide => [:0]u16,
 468}) {
 469    const T = if (literal_type == .ascii) u8 else u16;
 470    std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars
 471
 472    var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len);
 473    errdefer buf.deinit(allocator);
 474
 475    var iterative_parser = IterativeStringParser.init(bytes, options);
 476
 477    while (try iterative_parser.next()) |parsed| {
 478        const c = parsed.codepoint;
 479        switch (literal_type) {
 480            .ascii => switch (options.output_code_page) {
 481                .windows1252 => {
 482                    if (parsed.from_escaped_integer) {
 483                        try buf.append(allocator, @truncate(c));
 484                    } else if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
 485                        try buf.append(allocator, best_fit);
 486                    } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
 487                        try buf.append(allocator, '?');
 488                    } else {
 489                        try buf.appendSlice(allocator, "??");
 490                    }
 491                },
 492                .utf8 => {
 493                    var codepoint_to_encode = c;
 494                    if (parsed.from_escaped_integer) {
 495                        codepoint_to_encode = @as(T, @truncate(c));
 496                    }
 497                    const escaped_integer_outside_ascii_range = parsed.from_escaped_integer and codepoint_to_encode > 0x7F;
 498                    if (escaped_integer_outside_ascii_range or c == code_pages.Codepoint.invalid) {
 499                        codepoint_to_encode = '�';
 500                    }
 501                    var utf8_buf: [4]u8 = undefined;
 502                    const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable;
 503                    try buf.appendSlice(allocator, utf8_buf[0..utf8_len]);
 504                },
 505            },
 506            .wide => {
 507                // Parsing any string type as a wide string is handled separately, see parseQuotedStringAsWideString
 508                std.debug.assert(iterative_parser.declared_string_type == .wide);
 509                if (parsed.from_escaped_integer) {
 510                    try buf.append(allocator, std.mem.nativeToLittle(u16, @truncate(c)));
 511                } else if (c == code_pages.Codepoint.invalid) {
 512                    try buf.append(allocator, std.mem.nativeToLittle(u16, '�'));
 513                } else if (c < 0x10000) {
 514                    const short: u16 = @intCast(c);
 515                    try buf.append(allocator, std.mem.nativeToLittle(u16, short));
 516                } else {
 517                    if (!parsed.escaped_surrogate_pair) {
 518                        const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
 519                        try buf.append(allocator, std.mem.nativeToLittle(u16, high));
 520                    }
 521                    const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
 522                    try buf.append(allocator, std.mem.nativeToLittle(u16, low));
 523                }
 524            },
 525        }
 526    }
 527
 528    if (literal_type == .wide) {
 529        return buf.toOwnedSliceSentinel(allocator, 0);
 530    } else {
 531        return buf.toOwnedSlice(allocator);
 532    }
 533}
 534
 535pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
 536    std.debug.assert(bytes.slice.len >= 2); // ""
 537    return parseQuotedString(.ascii, allocator, bytes, options);
 538}
 539
 540pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
 541    std.debug.assert(bytes.slice.len >= 3); // L""
 542    return parseQuotedString(.wide, allocator, bytes, options);
 543}
 544
 545/// Parses any string type into a wide string.
 546/// If the string is declared as a wide string (L""), then it is handled normally.
 547/// Otherwise, things are fairly normal with the exception of escaped integers.
 548/// Escaped integers are handled by:
 549/// - Truncating the escape to a u8
 550/// - Reinterpeting the u8 as a byte from the *output* code page
 551/// - Outputting the codepoint that corresponds to the interpreted byte, or � if no such
 552///   interpretation is possible
 553/// For example, if the code page is UTF-8, then while \x80 is a valid start byte, it's
 554/// interpreted as a single byte, so it ends up being seen as invalid and � is outputted.
 555/// If the code page is Windows-1252, then \x80 is interpreted to be € which has the
 556/// codepoint U+20AC, so the UTF-16 encoding of U+20AC is outputted.
 557pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
 558    std.debug.assert(bytes.slice.len >= 2); // ""
 559
 560    if (bytes.slice[0] == 'l' or bytes.slice[0] == 'L') {
 561        return parseQuotedWideString(allocator, bytes, options);
 562    }
 563
 564    // Note: We're only handling the case of parsing an ASCII string into a wide string from here on out.
 565    // TODO: The logic below is similar to that in AcceleratorKeyCodepointTranslator, might be worth merging the two
 566
 567    var buf = try std.ArrayList(u16).initCapacity(allocator, bytes.slice.len);
 568    errdefer buf.deinit(allocator);
 569
 570    var iterative_parser = IterativeStringParser.init(bytes, options);
 571
 572    while (try iterative_parser.next()) |parsed| {
 573        const c = parsed.codepoint;
 574        if (parsed.from_escaped_integer) {
 575            std.debug.assert(c != code_pages.Codepoint.invalid);
 576            const byte_to_interpret: u8 = @truncate(c);
 577            const code_unit_to_encode: u16 = switch (options.output_code_page) {
 578                .windows1252 => windows1252.toCodepoint(byte_to_interpret),
 579                .utf8 => if (byte_to_interpret > 0x7F) '�' else byte_to_interpret,
 580            };
 581            try buf.append(allocator, std.mem.nativeToLittle(u16, code_unit_to_encode));
 582        } else if (c == code_pages.Codepoint.invalid) {
 583            try buf.append(allocator, std.mem.nativeToLittle(u16, '�'));
 584        } else if (c < 0x10000) {
 585            const short: u16 = @intCast(c);
 586            try buf.append(allocator, std.mem.nativeToLittle(u16, short));
 587        } else {
 588            if (!parsed.escaped_surrogate_pair) {
 589                const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
 590                try buf.append(allocator, std.mem.nativeToLittle(u16, high));
 591            }
 592            const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
 593            try buf.append(allocator, std.mem.nativeToLittle(u16, low));
 594        }
 595    }
 596
 597    return buf.toOwnedSliceSentinel(allocator, 0);
 598}
 599
 600test "parse quoted ascii string" {
 601    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
 602    defer arena_allocator.deinit();
 603    const arena = arena_allocator.allocator();
 604
 605    try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{
 606        .slice =
 607        \\"hello"
 608        ,
 609        .code_page = .windows1252,
 610    }, .{
 611        .output_code_page = .windows1252,
 612    }));
 613    // hex with 0 digits
 614    try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{
 615        .slice =
 616        \\"\x"
 617        ,
 618        .code_page = .windows1252,
 619    }, .{
 620        .output_code_page = .windows1252,
 621    }));
 622    // hex max of 2 digits
 623    try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{
 624        .slice =
 625        \\"\XfFf"
 626        ,
 627        .code_page = .windows1252,
 628    }, .{
 629        .output_code_page = .windows1252,
 630    }));
 631    // octal with invalid octal digit
 632    try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{
 633        .slice =
 634        \\"\19"
 635        ,
 636        .code_page = .windows1252,
 637    }, .{
 638        .output_code_page = .windows1252,
 639    }));
 640    // escaped quotes
 641    try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{
 642        .slice =
 643        \\" "" "
 644        ,
 645        .code_page = .windows1252,
 646    }, .{
 647        .output_code_page = .windows1252,
 648    }));
 649    // backslash right before escaped quotes
 650    try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{
 651        .slice =
 652        \\"\"""
 653        ,
 654        .code_page = .windows1252,
 655    }, .{
 656        .output_code_page = .windows1252,
 657    }));
 658    // octal overflow
 659    try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{
 660        .slice =
 661        \\"\401"
 662        ,
 663        .code_page = .windows1252,
 664    }, .{
 665        .output_code_page = .windows1252,
 666    }));
 667    // escapes
 668    try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{
 669        .slice =
 670        \\"\a\n\r\t\\"
 671        ,
 672        .code_page = .windows1252,
 673    }, .{
 674        .output_code_page = .windows1252,
 675    }));
 676    // uppercase escapes
 677    try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{
 678        .slice =
 679        \\"\A\N\R\T\\"
 680        ,
 681        .code_page = .windows1252,
 682    }, .{
 683        .output_code_page = .windows1252,
 684    }));
 685    // backslash on its own
 686    try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{
 687        .slice =
 688        \\"\"
 689        ,
 690        .code_page = .windows1252,
 691    }, .{
 692        .output_code_page = .windows1252,
 693    }));
 694    // unrecognized escapes
 695    try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{
 696        .slice =
 697        \\"\b"
 698        ,
 699        .code_page = .windows1252,
 700    }, .{
 701        .output_code_page = .windows1252,
 702    }));
 703    // escaped carriage returns
 704    try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(
 705        arena,
 706        .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 },
 707        .{ .output_code_page = .windows1252 },
 708    ));
 709    // escaped newlines
 710    try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
 711        arena,
 712        .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 },
 713        .{ .output_code_page = .windows1252 },
 714    ));
 715    // escaped CRLF pairs
 716    try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
 717        arena,
 718        .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 },
 719        .{ .output_code_page = .windows1252 },
 720    ));
 721    // escaped newlines with other whitespace
 722    try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
 723        arena,
 724        .{ .slice = "\"\\\n    \t\r\n \r\t\n  \t\"", .code_page = .windows1252 },
 725        .{ .output_code_page = .windows1252 },
 726    ));
 727    // literal tab characters get converted to spaces (dependent on source file columns)
 728    try std.testing.expectEqualSlices(u8, "       ", try parseQuotedAsciiString(
 729        arena,
 730        .{ .slice = "\"\t\"", .code_page = .windows1252 },
 731        .{ .output_code_page = .windows1252 },
 732    ));
 733    try std.testing.expectEqualSlices(u8, "abc    ", try parseQuotedAsciiString(
 734        arena,
 735        .{ .slice = "\"abc\t\"", .code_page = .windows1252 },
 736        .{ .output_code_page = .windows1252 },
 737    ));
 738    try std.testing.expectEqualSlices(u8, "abcdefg        ", try parseQuotedAsciiString(
 739        arena,
 740        .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 },
 741        .{ .output_code_page = .windows1252 },
 742    ));
 743    try std.testing.expectEqualSlices(u8, "\\      ", try parseQuotedAsciiString(
 744        arena,
 745        .{ .slice = "\"\\\t\"", .code_page = .windows1252 },
 746        .{ .output_code_page = .windows1252 },
 747    ));
 748    // literal CR's get dropped
 749    try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
 750        arena,
 751        .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 },
 752        .{ .output_code_page = .windows1252 },
 753    ));
 754    // contiguous newlines and whitespace get collapsed to <space><newline>
 755    try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString(
 756        arena,
 757        .{ .slice = "\"\n\r\r  \r\n \t  \"", .code_page = .windows1252 },
 758        .{ .output_code_page = .windows1252 },
 759    ));
 760}
 761
 762test "parse quoted ascii string with utf8 code page" {
 763    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
 764    defer arena_allocator.deinit();
 765    const arena = arena_allocator.allocator();
 766
 767    try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
 768        arena,
 769        .{ .slice = "\"\"", .code_page = .utf8 },
 770        .{ .output_code_page = .windows1252 },
 771    ));
 772    // Codepoints that don't have a Windows-1252 representation get converted to ?
 773    try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString(
 774        arena,
 775        .{ .slice = "\"кириллица\"", .code_page = .utf8 },
 776        .{ .output_code_page = .windows1252 },
 777    ));
 778    // Codepoints that have a best fit mapping get converted accordingly,
 779    // these are box drawing codepoints
 780    try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString(
 781        arena,
 782        .{ .slice = "\"┌─┐\"", .code_page = .utf8 },
 783        .{ .output_code_page = .windows1252 },
 784    ));
 785    // Invalid UTF-8 gets converted to ? depending on well-formedness
 786    try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString(
 787        arena,
 788        .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
 789        .{ .output_code_page = .windows1252 },
 790    ));
 791    // Codepoints that would require a UTF-16 surrogate pair get converted to ??
 792    try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString(
 793        arena,
 794        .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
 795        .{ .output_code_page = .windows1252 },
 796    ));
 797
 798    // Output code page changes how invalid UTF-8 gets converted, since it
 799    // now encodes the result as UTF-8 so it can write replacement characters.
 800    try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString(
 801        arena,
 802        .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
 803        .{ .output_code_page = .utf8 },
 804    ));
 805    try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString(
 806        arena,
 807        .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
 808        .{ .output_code_page = .utf8 },
 809    ));
 810
 811    // This used to cause integer overflow when reconsuming the 4-byte long codepoint
 812    // after the escaped CRLF pair.
 813    try std.testing.expectEqualSlices(u8, "\u{10348}", try parseQuotedAsciiString(
 814        arena,
 815        .{ .slice = "\"\\\r\n\u{10348}\"", .code_page = .utf8 },
 816        .{ .output_code_page = .utf8 },
 817    ));
 818}
 819
 820test "parse quoted string with different input/output code pages" {
 821    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
 822    defer arena_allocator.deinit();
 823    const arena = arena_allocator.allocator();
 824
 825    try std.testing.expectEqualSlices(u8, "€���\x60\x7F", try parseQuotedAsciiString(
 826        arena,
 827        .{ .slice = "\"\x80\\x8a\\600\\612\\540\\577\"", .code_page = .windows1252 },
 828        .{ .output_code_page = .utf8 },
 829    ));
 830}
 831
 832test "parse quoted wide string" {
 833    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
 834    defer arena_allocator.deinit();
 835    const arena = arena_allocator.allocator();
 836
 837    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("hello"), try parseQuotedWideString(arena, .{
 838        .slice =
 839        \\L"hello"
 840        ,
 841        .code_page = .windows1252,
 842    }, .{
 843        .output_code_page = .windows1252,
 844    }));
 845    // hex with 0 digits
 846    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{
 847        .slice =
 848        \\L"\x"
 849        ,
 850        .code_page = .windows1252,
 851    }, .{
 852        .output_code_page = .windows1252,
 853    }));
 854    // hex max of 4 digits
 855    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0xFFFF), std.mem.nativeToLittle(u16, 'f') }, try parseQuotedWideString(arena, .{
 856        .slice =
 857        \\L"\XfFfFf"
 858        ,
 859        .code_page = .windows1252,
 860    }, .{
 861        .output_code_page = .windows1252,
 862    }));
 863    // octal max of 7 digits
 864    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x9493), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '3') }, try parseQuotedWideString(arena, .{
 865        .slice =
 866        \\L"\111222333"
 867        ,
 868        .code_page = .windows1252,
 869    }, .{
 870        .output_code_page = .windows1252,
 871    }));
 872    // octal overflow
 873    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0xFF01)}, try parseQuotedWideString(arena, .{
 874        .slice =
 875        \\L"\777401"
 876        ,
 877        .code_page = .windows1252,
 878    }, .{
 879        .output_code_page = .windows1252,
 880    }));
 881    // literal tab characters get converted to spaces (dependent on source file columns)
 882    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg       "), try parseQuotedWideString(
 883        arena,
 884        .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 },
 885        .{ .output_code_page = .windows1252 },
 886    ));
 887    // Windows-1252 conversion
 888    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString(
 889        arena,
 890        .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 },
 891        .{ .output_code_page = .windows1252 },
 892    ));
 893    // Invalid escape sequences are skipped
 894    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString(
 895        arena,
 896        .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
 897        .{ .output_code_page = .windows1252 },
 898    ));
 899}
 900
 901test "parse quoted wide string with utf8 code page" {
 902    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
 903    defer arena_allocator.deinit();
 904    const arena = arena_allocator.allocator();
 905
 906    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString(
 907        arena,
 908        .{ .slice = "L\"\"", .code_page = .utf8 },
 909        .{ .output_code_page = .windows1252 },
 910    ));
 911    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString(
 912        arena,
 913        .{ .slice = "L\"кириллица\"", .code_page = .utf8 },
 914        .{ .output_code_page = .windows1252 },
 915    ));
 916    // Invalid UTF-8 gets converted to � depending on well-formedness
 917    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString(
 918        arena,
 919        .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
 920        .{ .output_code_page = .windows1252 },
 921    ));
 922}
 923
 924test "parse quoted ascii string as wide string" {
 925    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
 926    defer arena_allocator.deinit();
 927    const arena = arena_allocator.allocator();
 928
 929    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString(
 930        arena,
 931        .{ .slice = "\"кириллица\"", .code_page = .utf8 },
 932        .{ .output_code_page = .windows1252 },
 933    ));
 934    // Whether or not invalid escapes are skipped is still determined by the L prefix
 935    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString(
 936        arena,
 937        .{ .slice = "\"\\H\"", .code_page = .windows1252 },
 938        .{ .output_code_page = .windows1252 },
 939    ));
 940    try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString(
 941        arena,
 942        .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
 943        .{ .output_code_page = .windows1252 },
 944    ));
 945    // Maximum escape sequence value is also determined by the L prefix
 946    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x12), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '4') }, try parseQuotedStringAsWideString(
 947        arena,
 948        .{ .slice = "\"\\x1234\"", .code_page = .windows1252 },
 949        .{ .output_code_page = .windows1252 },
 950    ));
 951    try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0x1234)}, try parseQuotedStringAsWideString(
 952        arena,
 953        .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 },
 954        .{ .output_code_page = .windows1252 },
 955    ));
 956}
 957
 958pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
 959    // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4
 960    // 5 => 3, 6 => 2, 7 => 1, 8 => 8
 961    return tab_columns - (column % tab_columns);
 962}
 963
 964pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
 965    return switch (c) {
 966        '\t' => columnsUntilTabStop(cur_column, tab_columns),
 967        else => 1,
 968    };
 969}
 970
 971pub const Number = struct {
 972    value: u32,
 973    is_long: bool = false,
 974
 975    pub fn asWord(self: Number) u16 {
 976        return @truncate(self.value);
 977    }
 978
 979    pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number {
 980        const result = switch (operator_char) {
 981            '-' => lhs.value -% rhs.value,
 982            '+' => lhs.value +% rhs.value,
 983            '|' => lhs.value | rhs.value,
 984            '&' => lhs.value & rhs.value,
 985            else => unreachable, // invalid operator, this would be a lexer/parser bug
 986        };
 987        return .{
 988            .value = result,
 989            .is_long = lhs.is_long or rhs.is_long,
 990        };
 991    }
 992};
 993
 994/// Assumes that number literals normally rejected by RC's preprocessor
 995/// are similarly rejected before being parsed.
 996///
 997/// Relevant RC preprocessor errors:
 998///  RC2021: expected exponent value, not '<digit>'
 999///   example that is rejected: 1e1
1000///   example that is accepted: 1ea
1001///   (this function will parse the two examples above the same)
1002pub fn parseNumberLiteral(bytes: SourceBytes) Number {
1003    std.debug.assert(bytes.slice.len > 0);
1004    var result = Number{ .value = 0, .is_long = false };
1005    var radix: u8 = 10;
1006    var buf = bytes.slice;
1007
1008    const Prefix = enum { none, minus, complement };
1009    var prefix: Prefix = .none;
1010    switch (buf[0]) {
1011        '-' => {
1012            prefix = .minus;
1013            buf = buf[1..];
1014        },
1015        '~' => {
1016            prefix = .complement;
1017            buf = buf[1..];
1018        },
1019        else => {},
1020    }
1021
1022    if (buf.len > 2 and buf[0] == '0') {
1023        switch (buf[1]) {
1024            'o' => { // octal radix prefix is case-sensitive
1025                radix = 8;
1026                buf = buf[2..];
1027            },
1028            'x', 'X' => {
1029                radix = 16;
1030                buf = buf[2..];
1031            },
1032            else => {},
1033        }
1034    }
1035
1036    var i: usize = 0;
1037    while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
1038        const c = codepoint.value;
1039        if (c == 'L' or c == 'l') {
1040            result.is_long = true;
1041            break;
1042        }
1043        const digit = switch (c) {
1044            // On invalid digit for the radix, just stop parsing but don't fail
1045            0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break,
1046            else => break,
1047        };
1048
1049        if (result.value != 0) {
1050            result.value *%= radix;
1051        }
1052        result.value +%= digit;
1053    }
1054
1055    switch (prefix) {
1056        .none => {},
1057        .minus => result.value = 0 -% result.value,
1058        .complement => result.value = ~result.value,
1059    }
1060
1061    return result;
1062}
1063
1064test "parse number literal" {
1065    try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 }));
1066    try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 }));
1067    try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 }));
1068    try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 }));
1069    try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 }));
1070    try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 }));
1071    try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 }));
1072    try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 }));
1073
1074    // can handle any length of number, wraps on overflow appropriately
1075    const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 });
1076    try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow);
1077    try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord());
1078
1079    try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 }));
1080    try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 }));
1081    try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
1082    try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
1083
1084    try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 }));
1085    try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 }));
1086    try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 }));
1087    try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 }));
1088
1089    try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 }));
1090    try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 }));
1091    try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 }));
1092    try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 }));
1093    try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 }));
1094
1095    // anything after L is ignored
1096    try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 }));
1097}