Commit 81a61c8ecd

Ryan Liptak <squeek502@hotmail.com>
2023-10-19 03:00:50
Sync resinator with upstream and fix INCLUDE env var handling
The INCLUDE variable being used during `.rc` preprocessing was an accidental regression in https://github.com/ziglang/zig/pull/17412. Closes #17585. resinator changes: source_mapping: Protect against NUL bytes in #line filenames lex: Avoid recalculating column on every tab stop within string literals Proper error handling for failing to open cwd instead of `catch unreachable` Use platform-specific delimiter for INCLUDE env var parsing
1 parent 32bc077
src/resinator/compile.zig
@@ -28,7 +28,6 @@ const windows1252 = @import("windows1252.zig");
 const lang = @import("lang.zig");
 const code_pages = @import("code_pages.zig");
 const errors = @import("errors.zig");
-const introspect = @import("../introspect.zig");
 
 pub const CompileOptions = struct {
     cwd: std.fs.Dir,
@@ -89,10 +88,23 @@ pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, option
         }
     }
     // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed)
-    // `catch unreachable` since `options.cwd` is expected to be a valid dir handle, so opening
-    // a new handle to it should be fine as well.
-    // TODO: Maybe catch and return an error instead
-    const cwd_dir = options.cwd.openDir(".", .{}) catch @panic("unable to open dir");
+    const cwd_dir = options.cwd.openDir(".", .{}) catch |err| {
+        try options.diagnostics.append(.{
+            .err = .failed_to_open_cwd,
+            .token = .{
+                .id = .invalid,
+                .start = 0,
+                .end = 0,
+                .line_number = 1,
+            },
+            .print_source_line = false,
+            .extra = .{ .file_open_error = .{
+                .err = ErrorDetails.FileOpenError.enumFromError(err),
+                .filename_string_index = undefined,
+            } },
+        });
+        return error.CompileError;
+    };
     try search_dirs.append(.{ .dir = cwd_dir, .path = null });
     for (options.extra_include_paths) |extra_include_path| {
         var dir = openSearchPathDir(options.cwd, extra_include_path) catch {
@@ -111,11 +123,16 @@ pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, option
         try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) });
     }
     if (!options.ignore_include_env_var) {
-        const INCLUDE = (introspect.EnvVar.INCLUDE.get(allocator) catch @panic("OOM")) orelse "";
+        const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch "";
         defer allocator.free(INCLUDE);
 
-        // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)?
-        var it = std.mem.tokenize(u8, INCLUDE, ";");
+        // The only precedence here is llvm-rc which also uses the platform-specific
+        // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+        const delimiter = switch (builtin.os.tag) {
+            .windows => ';',
+            else => ':',
+        };
+        var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
         while (it.next()) |search_path| {
             var dir = openSearchPathDir(options.cwd, search_path) catch continue;
             errdefer dir.close();
src/resinator/errors.zig
@@ -395,6 +395,10 @@ pub const ErrorDetails = struct {
         // General (used in various places)
         /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
         win32_non_ascii_ordinal,
+
+        // Initialization
+        /// `file_open_error` is populated, but `filename_string_index` is not
+        failed_to_open_cwd,
     };
 
     pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
@@ -766,6 +770,9 @@ pub const ErrorDetails = struct {
                 .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
                 .hint => return,
             },
+            .failed_to_open_cwd => {
+                try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)});
+            },
         }
     }
 
@@ -804,7 +811,8 @@ pub const ErrorDetails = struct {
                 .point_offset = self.token.start - source_line_start,
                 .after_len = after: {
                     const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end);
-                    if (end == self.token.start) break :after 0;
+                    // end may be less than start when pointing to EOF
+                    if (end <= self.token.start) break :after 0;
                     break :after end - self.token.start - 1;
                 },
             },
@@ -816,13 +824,18 @@ pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_con
     if (err_details.type == .hint) return;
 
     const source_line_start = err_details.token.getLineStart(source);
-    const column = err_details.token.calculateColumn(source, 1, source_line_start);
-
-    // var counting_writer_container = std.io.countingWriter(writer);
-    // const counting_writer = counting_writer_container.writer();
-
-    const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings) |mappings| mappings.get(err_details.token.line_number) else null;
-    const corresponding_file: ?[]const u8 = if (source_mappings) |mappings| mappings.files.get(corresponding_span.?.filename_offset) else null;
+    // Treat tab stops as 1 column wide for error display purposes,
+    // and add one to get a 1-based column
+    const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1;
+
+    const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings != null and source_mappings.?.has(err_details.token.line_number))
+        source_mappings.?.get(err_details.token.line_number)
+    else
+        null;
+    const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null)
+        source_mappings.?.files.get(corresponding_span.?.filename_offset)
+    else
+        null;
 
     const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number;
 
@@ -897,7 +910,7 @@ pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_con
     try writer.writeByte('\n');
     try tty_config.setColor(writer, .reset);
 
-    if (source_mappings) |_| {
+    if (corresponding_span != null and corresponding_file != null) {
         var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?);
         defer corresponding_lines.deinit(allocator);
 
src/resinator/lex.zig
@@ -6,7 +6,7 @@
 
 const std = @import("std");
 const ErrorDetails = @import("errors.zig").ErrorDetails;
-const columnsUntilTabStop = @import("literals.zig").columnsUntilTabStop;
+const columnWidth = @import("literals.zig").columnWidth;
 const code_pages = @import("code_pages.zig");
 const CodePage = code_pages.CodePage;
 const SourceMappings = @import("source_mapping.zig").SourceMappings;
@@ -69,17 +69,14 @@ pub const Token = struct {
         };
     }
 
+    /// Returns 0-based column
     pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize {
         const line_start = maybe_line_start orelse token.getLineStart(source);
 
         var i: usize = line_start;
         var column: usize = 0;
         while (i < token.start) : (i += 1) {
-            const c = source[i];
-            switch (c) {
-                '\t' => column += columnsUntilTabStop(column, tab_columns),
-                else => column += 1,
-            }
+            column += columnWidth(column, source[i], tab_columns);
         }
         return column;
     }
@@ -109,6 +106,7 @@ pub const Token = struct {
         const line_start = maybe_line_start orelse token.getLineStart(source);
 
         var line_end = line_start + 1;
+        if (line_end >= source.len or source[line_end] == '\n') return source[line_start..line_start];
         while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {}
         while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {}
 
@@ -404,6 +402,9 @@ pub const Lexer = struct {
         // TODO: Understand this more, bring it more in line with how the Win32 limits work.
         //       Alternatively, do something that makes more sense but may be more permissive.
         var string_literal_length: usize = 0;
+        // Keeping track of the string literal column prevents pathological edge cases when
+        // there are tons of tab stop characters within a string literal.
+        var string_literal_column: usize = 0;
         var string_literal_collapsing_whitespace: bool = false;
         var still_could_have_exponent: bool = true;
         var exponent_index: ?usize = null;
@@ -471,6 +472,14 @@ pub const Lexer = struct {
                         self.at_start_of_line = false;
                         string_literal_collapsing_whitespace = false;
                         string_literal_length = 0;
+
+                        var dummy_token = Token{
+                            .start = self.index,
+                            .end = self.index,
+                            .line_number = self.line_handler.line_number,
+                            .id = .invalid,
+                        };
+                        string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
                     },
                     '+', '&', '|' => {
                         self.index += 1;
@@ -618,6 +627,14 @@ pub const Lexer = struct {
                         state = .quoted_wide_string;
                         string_literal_collapsing_whitespace = false;
                         string_literal_length = 0;
+
+                        var dummy_token = Token{
+                            .start = self.index,
+                            .end = self.index,
+                            .line_number = self.line_handler.line_number,
+                            .id = .invalid,
+                        };
+                        string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
                     },
                     else => {
                         state = .literal;
@@ -695,18 +712,23 @@ pub const Lexer = struct {
                 },
                 .quoted_ascii_string, .quoted_wide_string => switch (c) {
                     '"' => {
+                        string_literal_column += 1;
                         state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end;
                     },
                     '\\' => {
+                        string_literal_length += 1;
+                        string_literal_column += 1;
                         state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape;
                     },
                     '\r' => {
+                        string_literal_column = 0;
                         // \r doesn't count towards string literal length
 
                         // Increment line number but don't affect the result token's line number
                         _ = self.incrementLineNumber();
                     },
                     '\n' => {
+                        string_literal_column = 0;
                         // first \n expands to <space><\n>
                         if (!string_literal_collapsing_whitespace) {
                             string_literal_length += 2;
@@ -720,33 +742,17 @@ pub const Lexer = struct {
                     // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing
                     '\t', ' ', '\x0b', '\x0c' => {
                         if (!string_literal_collapsing_whitespace) {
-                            if (c == '\t') {
-                                // Literal tab characters are counted as the number of space characters
-                                // needed to reach the next 8-column tab stop.
-                                //
-                                // This implemention is ineffecient but hopefully it's enough of an
-                                // edge case that it doesn't matter too much. Literal tab characters in
-                                // string literals being replaced by a variable number of spaces depending
-                                // on which column the tab character is located in the source .rc file seems
-                                // like it has extremely limited use-cases, so it seems unlikely that it's used
-                                // in real .rc files.
-                                var dummy_token = Token{
-                                    .start = self.index,
-                                    .end = self.index,
-                                    .line_number = self.line_handler.line_number,
-                                    .id = .invalid,
-                                };
-                                dummy_token.start = self.index;
-                                const current_column = dummy_token.calculateColumn(self.buffer, 8, null);
-                                string_literal_length += columnsUntilTabStop(current_column, 8);
-                            } else {
-                                string_literal_length += 1;
-                            }
+                            // Literal tab characters are counted as the number of space characters
+                            // needed to reach the next 8-column tab stop.
+                            const width = columnWidth(string_literal_column, @intCast(c), 8);
+                            string_literal_length += width;
+                            string_literal_column += width;
                         }
                     },
                     else => {
                         string_literal_collapsing_whitespace = false;
                         string_literal_length += 1;
+                        string_literal_column += 1;
                     },
                 },
                 .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) {
@@ -760,14 +766,19 @@ pub const Lexer = struct {
                         return error.FoundCStyleEscapedQuote;
                     },
                     else => {
+                        string_literal_length += 1;
+                        string_literal_column += 1;
                         state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string;
                     },
                 },
                 .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) {
                     '"' => {
                         state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
-                        // Escaped quotes only count as 1 char for string literal length checks,
-                        // so we don't increment string_literal_length here.
+                        // Escaped quotes count as 1 char for string literal length checks.
+                        // Since we did not increment on the first " (because it could have been
+                        // the end of the quoted string), we increment here
+                        string_literal_length += 1;
+                        string_literal_column += 1;
                     },
                     else => {
                         result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
@@ -807,6 +818,8 @@ pub const Lexer = struct {
             }
         }
 
+        result.end = self.index;
+
         if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) {
             if (string_literal_length > self.max_string_literal_codepoints) {
                 self.error_context_token = result;
@@ -814,7 +827,6 @@ pub const Lexer = struct {
             }
         }
 
-        result.end = self.index;
         return result;
     }
 
@@ -877,6 +889,7 @@ pub const Lexer = struct {
             .end = end,
             .line_number = self.line_handler.line_number,
         };
+        errdefer self.error_context_token = token;
         const full_command = self.buffer[start..end];
         var command = full_command;
 
@@ -901,7 +914,6 @@ pub const Lexer = struct {
         }
 
         if (command.len == 0 or command[0] != '(') {
-            self.error_context_token = token;
             return error.CodePagePragmaMissingLeftParen;
         }
         command = command[1..];
@@ -917,7 +929,6 @@ pub const Lexer = struct {
         }
 
         if (num_str.len == 0) {
-            self.error_context_token = token;
             return error.CodePagePragmaNotInteger;
         }
 
@@ -926,7 +937,6 @@ pub const Lexer = struct {
         }
 
         if (command.len == 0 or command[0] != ')') {
-            self.error_context_token = token;
             return error.CodePagePragmaMissingRightParen;
         }
 
@@ -943,41 +953,26 @@ pub const Lexer = struct {
             //
             // Instead of that, we just have a separate error specifically for overflow.
             const num = parseCodePageNum(num_str) catch |err| switch (err) {
-                error.InvalidCharacter => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaNotInteger;
-                },
-                error.Overflow => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaOverflow;
-                },
+                error.InvalidCharacter => return error.CodePagePragmaNotInteger,
+                error.Overflow => return error.CodePagePragmaOverflow,
             };
 
             // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252
             if (num_str[0] == '0' and num != 0) {
-                self.error_context_token = token;
                 return error.CodePagePragmaInvalidCodePage;
             }
             // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation.
             else if (num == 0) {
-                self.error_context_token = token;
                 return error.CodePagePragmaNotInteger;
             }
             // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16.
             if (num > std.math.maxInt(u16)) {
-                self.error_context_token = token;
                 return error.CodePagePragmaInvalidCodePage;
             }
 
             break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) {
-                error.InvalidCodePage => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaInvalidCodePage;
-                },
-                error.UnsupportedCodePage => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaUnsupportedCodePage;
-                },
+                error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage,
+                error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage,
             };
         };
 
@@ -990,7 +985,6 @@ pub const Lexer = struct {
         // to still be able to work correctly after this error is returned.
         if (self.source_mappings) |source_mappings| {
             if (!source_mappings.isRootFile(token.line_number)) {
-                self.error_context_token = token;
                 return error.CodePagePragmaInIncludedFile;
             }
         }
src/resinator/literals.zig
@@ -775,6 +775,13 @@ pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
     return tab_columns - (column % tab_columns);
 }
 
+pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
+    return switch (c) {
+        '\t' => columnsUntilTabStop(cur_column, tab_columns),
+        else => 1,
+    };
+}
+
 pub const Number = struct {
     value: u32,
     is_long: bool = false,
src/resinator/preprocess.zig
@@ -1,7 +1,7 @@
 const std = @import("std");
+const builtin = @import("builtin");
 const Allocator = std.mem.Allocator;
 const cli = @import("cli.zig");
-const introspect = @import("../introspect.zig");
 
 pub const IncludeArgs = struct {
     clang_target: ?[]const u8 = null,
@@ -68,10 +68,15 @@ pub fn appendClangArgs(arena: Allocator, argv: *std.ArrayList([]const u8), optio
     }
 
     if (!options.ignore_include_env_var) {
-        const INCLUDE = (introspect.EnvVar.INCLUDE.get(arena) catch @panic("OOM")) orelse "";
+        const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch "";
 
-        // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)?
-        var it = std.mem.tokenize(u8, INCLUDE, ";");
+        // The only precedence here is llvm-rc which also uses the platform-specific
+        // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+        const delimiter = switch (builtin.os.tag) {
+            .windows => ';',
+            else => ':',
+        };
+        var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
         while (it.next()) |include_path| {
             try argv.append("-isystem");
             try argv.append(include_path);
src/resinator/source_mapping.zig
@@ -240,6 +240,9 @@ pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current
     };
     defer allocator.free(filename);
 
+    // \x00 bytes in the filename is incompatible with how StringTable works
+    if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return;
+
     current_mapping.line_num = linenum;
     current_mapping.filename.clearRetainingCapacity();
     try current_mapping.filename.appendSlice(allocator, filename);
@@ -441,7 +444,7 @@ pub const SourceMappings = struct {
         ptr.* = span;
     }
 
-    pub fn has(self: *SourceMappings, line_num: usize) bool {
+    pub fn has(self: SourceMappings, line_num: usize) bool {
         return self.mapping.items.len >= line_num;
     }
 
src/Compilation.zig
@@ -4755,6 +4755,10 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
         };
         defer options.deinit();
 
+        // We never want to read the INCLUDE environment variable, so
+        // unconditionally set `ignore_include_env_var` to true
+        options.ignore_include_env_var = true;
+
         var argv = std.ArrayList([]const u8).init(comp.gpa);
         defer argv.deinit();
 
src/introspect.zig
@@ -157,8 +157,6 @@ pub const EnvVar = enum {
     NO_COLOR,
     XDG_CACHE_HOME,
     HOME,
-    /// https://github.com/ziglang/zig/issues/17585
-    INCLUDE,
 
     pub fn isSet(comptime ev: EnvVar) bool {
         return std.process.hasEnvVarConstant(@tagName(ev));