Commit 81a61c8ecd
Changed files (8)
src/resinator/compile.zig
@@ -28,7 +28,6 @@ const windows1252 = @import("windows1252.zig");
const lang = @import("lang.zig");
const code_pages = @import("code_pages.zig");
const errors = @import("errors.zig");
-const introspect = @import("../introspect.zig");
pub const CompileOptions = struct {
cwd: std.fs.Dir,
@@ -89,10 +88,23 @@ pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, option
}
}
// Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed)
- // `catch unreachable` since `options.cwd` is expected to be a valid dir handle, so opening
- // a new handle to it should be fine as well.
- // TODO: Maybe catch and return an error instead
- const cwd_dir = options.cwd.openDir(".", .{}) catch @panic("unable to open dir");
+ const cwd_dir = options.cwd.openDir(".", .{}) catch |err| {
+ try options.diagnostics.append(.{
+ .err = .failed_to_open_cwd,
+ .token = .{
+ .id = .invalid,
+ .start = 0,
+ .end = 0,
+ .line_number = 1,
+ },
+ .print_source_line = false,
+ .extra = .{ .file_open_error = .{
+ .err = ErrorDetails.FileOpenError.enumFromError(err),
+ .filename_string_index = undefined,
+ } },
+ });
+ return error.CompileError;
+ };
try search_dirs.append(.{ .dir = cwd_dir, .path = null });
for (options.extra_include_paths) |extra_include_path| {
var dir = openSearchPathDir(options.cwd, extra_include_path) catch {
@@ -111,11 +123,16 @@ pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, option
try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) });
}
if (!options.ignore_include_env_var) {
- const INCLUDE = (introspect.EnvVar.INCLUDE.get(allocator) catch @panic("OOM")) orelse "";
+ const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch "";
defer allocator.free(INCLUDE);
- // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)?
- var it = std.mem.tokenize(u8, INCLUDE, ";");
+ // The only precedence here is llvm-rc which also uses the platform-specific
+ // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+ const delimiter = switch (builtin.os.tag) {
+ .windows => ';',
+ else => ':',
+ };
+ var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
while (it.next()) |search_path| {
var dir = openSearchPathDir(options.cwd, search_path) catch continue;
errdefer dir.close();
src/resinator/errors.zig
@@ -395,6 +395,10 @@ pub const ErrorDetails = struct {
// General (used in various places)
/// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
win32_non_ascii_ordinal,
+
+ // Initialization
+ /// `file_open_error` is populated, but `filename_string_index` is not
+ failed_to_open_cwd,
};
pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
@@ -766,6 +770,9 @@ pub const ErrorDetails = struct {
.note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
.hint => return,
},
+ .failed_to_open_cwd => {
+ try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)});
+ },
}
}
@@ -804,7 +811,8 @@ pub const ErrorDetails = struct {
.point_offset = self.token.start - source_line_start,
.after_len = after: {
const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end);
- if (end == self.token.start) break :after 0;
+ // end may be less than start when pointing to EOF
+ if (end <= self.token.start) break :after 0;
break :after end - self.token.start - 1;
},
},
@@ -816,13 +824,18 @@ pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_con
if (err_details.type == .hint) return;
const source_line_start = err_details.token.getLineStart(source);
- const column = err_details.token.calculateColumn(source, 1, source_line_start);
-
- // var counting_writer_container = std.io.countingWriter(writer);
- // const counting_writer = counting_writer_container.writer();
-
- const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings) |mappings| mappings.get(err_details.token.line_number) else null;
- const corresponding_file: ?[]const u8 = if (source_mappings) |mappings| mappings.files.get(corresponding_span.?.filename_offset) else null;
+ // Treat tab stops as 1 column wide for error display purposes,
+ // and add one to get a 1-based column
+ const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1;
+
+ const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings != null and source_mappings.?.has(err_details.token.line_number))
+ source_mappings.?.get(err_details.token.line_number)
+ else
+ null;
+ const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null)
+ source_mappings.?.files.get(corresponding_span.?.filename_offset)
+ else
+ null;
const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number;
@@ -897,7 +910,7 @@ pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_con
try writer.writeByte('\n');
try tty_config.setColor(writer, .reset);
- if (source_mappings) |_| {
+ if (corresponding_span != null and corresponding_file != null) {
var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?);
defer corresponding_lines.deinit(allocator);
src/resinator/lex.zig
@@ -6,7 +6,7 @@
const std = @import("std");
const ErrorDetails = @import("errors.zig").ErrorDetails;
-const columnsUntilTabStop = @import("literals.zig").columnsUntilTabStop;
+const columnWidth = @import("literals.zig").columnWidth;
const code_pages = @import("code_pages.zig");
const CodePage = code_pages.CodePage;
const SourceMappings = @import("source_mapping.zig").SourceMappings;
@@ -69,17 +69,14 @@ pub const Token = struct {
};
}
+ /// Returns 0-based column
pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize {
const line_start = maybe_line_start orelse token.getLineStart(source);
var i: usize = line_start;
var column: usize = 0;
while (i < token.start) : (i += 1) {
- const c = source[i];
- switch (c) {
- '\t' => column += columnsUntilTabStop(column, tab_columns),
- else => column += 1,
- }
+ column += columnWidth(column, source[i], tab_columns);
}
return column;
}
@@ -109,6 +106,7 @@ pub const Token = struct {
const line_start = maybe_line_start orelse token.getLineStart(source);
var line_end = line_start + 1;
+ if (line_end >= source.len or source[line_end] == '\n') return source[line_start..line_start];
while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {}
while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {}
@@ -404,6 +402,9 @@ pub const Lexer = struct {
// TODO: Understand this more, bring it more in line with how the Win32 limits work.
// Alternatively, do something that makes more sense but may be more permissive.
var string_literal_length: usize = 0;
+ // Keeping track of the string literal column prevents pathological edge cases when
+ // there are tons of tab stop characters within a string literal.
+ var string_literal_column: usize = 0;
var string_literal_collapsing_whitespace: bool = false;
var still_could_have_exponent: bool = true;
var exponent_index: ?usize = null;
@@ -471,6 +472,14 @@ pub const Lexer = struct {
self.at_start_of_line = false;
string_literal_collapsing_whitespace = false;
string_literal_length = 0;
+
+ var dummy_token = Token{
+ .start = self.index,
+ .end = self.index,
+ .line_number = self.line_handler.line_number,
+ .id = .invalid,
+ };
+ string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
},
'+', '&', '|' => {
self.index += 1;
@@ -618,6 +627,14 @@ pub const Lexer = struct {
state = .quoted_wide_string;
string_literal_collapsing_whitespace = false;
string_literal_length = 0;
+
+ var dummy_token = Token{
+ .start = self.index,
+ .end = self.index,
+ .line_number = self.line_handler.line_number,
+ .id = .invalid,
+ };
+ string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
},
else => {
state = .literal;
@@ -695,18 +712,23 @@ pub const Lexer = struct {
},
.quoted_ascii_string, .quoted_wide_string => switch (c) {
'"' => {
+ string_literal_column += 1;
state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end;
},
'\\' => {
+ string_literal_length += 1;
+ string_literal_column += 1;
state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape;
},
'\r' => {
+ string_literal_column = 0;
// \r doesn't count towards string literal length
// Increment line number but don't affect the result token's line number
_ = self.incrementLineNumber();
},
'\n' => {
+ string_literal_column = 0;
// first \n expands to <space><\n>
if (!string_literal_collapsing_whitespace) {
string_literal_length += 2;
@@ -720,33 +742,17 @@ pub const Lexer = struct {
// only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing
'\t', ' ', '\x0b', '\x0c' => {
if (!string_literal_collapsing_whitespace) {
- if (c == '\t') {
- // Literal tab characters are counted as the number of space characters
- // needed to reach the next 8-column tab stop.
- //
- // This implemention is ineffecient but hopefully it's enough of an
- // edge case that it doesn't matter too much. Literal tab characters in
- // string literals being replaced by a variable number of spaces depending
- // on which column the tab character is located in the source .rc file seems
- // like it has extremely limited use-cases, so it seems unlikely that it's used
- // in real .rc files.
- var dummy_token = Token{
- .start = self.index,
- .end = self.index,
- .line_number = self.line_handler.line_number,
- .id = .invalid,
- };
- dummy_token.start = self.index;
- const current_column = dummy_token.calculateColumn(self.buffer, 8, null);
- string_literal_length += columnsUntilTabStop(current_column, 8);
- } else {
- string_literal_length += 1;
- }
+ // Literal tab characters are counted as the number of space characters
+ // needed to reach the next 8-column tab stop.
+ const width = columnWidth(string_literal_column, @intCast(c), 8);
+ string_literal_length += width;
+ string_literal_column += width;
}
},
else => {
string_literal_collapsing_whitespace = false;
string_literal_length += 1;
+ string_literal_column += 1;
},
},
.quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) {
@@ -760,14 +766,19 @@ pub const Lexer = struct {
return error.FoundCStyleEscapedQuote;
},
else => {
+ string_literal_length += 1;
+ string_literal_column += 1;
state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string;
},
},
.quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) {
'"' => {
state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
- // Escaped quotes only count as 1 char for string literal length checks,
- // so we don't increment string_literal_length here.
+ // Escaped quotes count as 1 char for string literal length checks.
+ // Since we did not increment on the first " (because it could have been
+ // the end of the quoted string), we increment here
+ string_literal_length += 1;
+ string_literal_column += 1;
},
else => {
result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
@@ -807,6 +818,8 @@ pub const Lexer = struct {
}
}
+ result.end = self.index;
+
if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) {
if (string_literal_length > self.max_string_literal_codepoints) {
self.error_context_token = result;
@@ -814,7 +827,6 @@ pub const Lexer = struct {
}
}
- result.end = self.index;
return result;
}
@@ -877,6 +889,7 @@ pub const Lexer = struct {
.end = end,
.line_number = self.line_handler.line_number,
};
+ errdefer self.error_context_token = token;
const full_command = self.buffer[start..end];
var command = full_command;
@@ -901,7 +914,6 @@ pub const Lexer = struct {
}
if (command.len == 0 or command[0] != '(') {
- self.error_context_token = token;
return error.CodePagePragmaMissingLeftParen;
}
command = command[1..];
@@ -917,7 +929,6 @@ pub const Lexer = struct {
}
if (num_str.len == 0) {
- self.error_context_token = token;
return error.CodePagePragmaNotInteger;
}
@@ -926,7 +937,6 @@ pub const Lexer = struct {
}
if (command.len == 0 or command[0] != ')') {
- self.error_context_token = token;
return error.CodePagePragmaMissingRightParen;
}
@@ -943,41 +953,26 @@ pub const Lexer = struct {
//
// Instead of that, we just have a separate error specifically for overflow.
const num = parseCodePageNum(num_str) catch |err| switch (err) {
- error.InvalidCharacter => {
- self.error_context_token = token;
- return error.CodePagePragmaNotInteger;
- },
- error.Overflow => {
- self.error_context_token = token;
- return error.CodePagePragmaOverflow;
- },
+ error.InvalidCharacter => return error.CodePagePragmaNotInteger,
+ error.Overflow => return error.CodePagePragmaOverflow,
};
// Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252
if (num_str[0] == '0' and num != 0) {
- self.error_context_token = token;
return error.CodePagePragmaInvalidCodePage;
}
// Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation.
else if (num == 0) {
- self.error_context_token = token;
return error.CodePagePragmaNotInteger;
}
// Anything above u16 max is not going to be found since our CodePage enum is backed by a u16.
if (num > std.math.maxInt(u16)) {
- self.error_context_token = token;
return error.CodePagePragmaInvalidCodePage;
}
break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) {
- error.InvalidCodePage => {
- self.error_context_token = token;
- return error.CodePagePragmaInvalidCodePage;
- },
- error.UnsupportedCodePage => {
- self.error_context_token = token;
- return error.CodePagePragmaUnsupportedCodePage;
- },
+ error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage,
+ error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage,
};
};
@@ -990,7 +985,6 @@ pub const Lexer = struct {
// to still be able to work correctly after this error is returned.
if (self.source_mappings) |source_mappings| {
if (!source_mappings.isRootFile(token.line_number)) {
- self.error_context_token = token;
return error.CodePagePragmaInIncludedFile;
}
}
src/resinator/literals.zig
@@ -775,6 +775,13 @@ pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
return tab_columns - (column % tab_columns);
}
+pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
+ return switch (c) {
+ '\t' => columnsUntilTabStop(cur_column, tab_columns),
+ else => 1,
+ };
+}
+
pub const Number = struct {
value: u32,
is_long: bool = false,
src/resinator/preprocess.zig
@@ -1,7 +1,7 @@
const std = @import("std");
+const builtin = @import("builtin");
const Allocator = std.mem.Allocator;
const cli = @import("cli.zig");
-const introspect = @import("../introspect.zig");
pub const IncludeArgs = struct {
clang_target: ?[]const u8 = null,
@@ -68,10 +68,15 @@ pub fn appendClangArgs(arena: Allocator, argv: *std.ArrayList([]const u8), optio
}
if (!options.ignore_include_env_var) {
- const INCLUDE = (introspect.EnvVar.INCLUDE.get(arena) catch @panic("OOM")) orelse "";
+ const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch "";
- // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)?
- var it = std.mem.tokenize(u8, INCLUDE, ";");
+ // The only precedence here is llvm-rc which also uses the platform-specific
+ // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+ const delimiter = switch (builtin.os.tag) {
+ .windows => ';',
+ else => ':',
+ };
+ var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
while (it.next()) |include_path| {
try argv.append("-isystem");
try argv.append(include_path);
src/resinator/source_mapping.zig
@@ -240,6 +240,9 @@ pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current
};
defer allocator.free(filename);
+ // \x00 bytes in the filename is incompatible with how StringTable works
+ if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return;
+
current_mapping.line_num = linenum;
current_mapping.filename.clearRetainingCapacity();
try current_mapping.filename.appendSlice(allocator, filename);
@@ -441,7 +444,7 @@ pub const SourceMappings = struct {
ptr.* = span;
}
- pub fn has(self: *SourceMappings, line_num: usize) bool {
+ pub fn has(self: SourceMappings, line_num: usize) bool {
return self.mapping.items.len >= line_num;
}
src/Compilation.zig
@@ -4755,6 +4755,10 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
};
defer options.deinit();
+ // We never want to read the INCLUDE environment variable, so
+ // unconditionally set `ignore_include_env_var` to true
+ options.ignore_include_env_var = true;
+
var argv = std.ArrayList([]const u8).init(comp.gpa);
defer argv.deinit();
src/introspect.zig
@@ -157,8 +157,6 @@ pub const EnvVar = enum {
NO_COLOR,
XDG_CACHE_HOME,
HOME,
- /// https://github.com/ziglang/zig/issues/17585
- INCLUDE,
pub fn isSet(comptime ev: EnvVar) bool {
return std.process.hasEnvVarConstant(@tagName(ev));