Commit ebb81ebe59
Changed files (4)
src-self-hosted
src-self-hosted/ir/text.zig
@@ -19,6 +19,9 @@ pub const Inst = struct {
src: usize,
name: []const u8,
+ /// Slice into the source of the part after the = and before the next instruction.
+ contents: []const u8,
+
/// These names are used directly as the instruction names in the text format.
pub const Tag = enum {
breakpoint,
@@ -798,11 +801,12 @@ const Parser = struct {
}
fn parseInstruction(self: *Parser, body_ctx: ?*Body, name: []const u8) InnerError!*Inst {
+ const contents_start = self.i;
const fn_name = try skipToAndOver(self, '(');
inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
if (mem.eql(u8, field.name, fn_name)) {
const tag = @field(Inst.Tag, field.name);
- return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name);
+ return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name, contents_start);
}
}
return self.fail("unknown instruction '{}'", .{fn_name});
@@ -814,12 +818,14 @@ const Parser = struct {
comptime InstType: type,
body_ctx: ?*Body,
inst_name: []const u8,
+ contents_start: usize,
) InnerError!*Inst {
const inst_specific = try self.arena.allocator.create(InstType);
inst_specific.base = .{
.name = inst_name,
.src = self.i,
.tag = InstType.base_tag,
+ .contents = undefined,
};
if (@hasField(InstType, "ty")) {
@@ -867,6 +873,8 @@ const Parser = struct {
}
try requireEatBytes(self, ")");
+ inst_specific.base.contents = self.source[contents_start..self.i];
+
return &inst_specific.base;
}
@@ -952,6 +960,7 @@ const Parser = struct {
.name = try self.generateName(),
.src = src,
.tag = Inst.Str.base_tag,
+ .contents = undefined,
},
.positionals = .{ .bytes = ident },
.kw_args = .{},
@@ -962,6 +971,7 @@ const Parser = struct {
.name = try self.generateName(),
.src = src,
.tag = Inst.DeclRef.base_tag,
+ .contents = undefined,
},
.positionals = .{ .name = &name.base },
.kw_args = .{},
src-self-hosted/codegen.zig
@@ -33,6 +33,7 @@ pub fn generateSymbol(
var function = Function{
.target = &bin_file.options.target,
+ .bin_file = bin_file,
.mod_fn = module_fn,
.code = code,
.inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator),
@@ -144,6 +145,7 @@ pub fn generateSymbol(
}
const Function = struct {
+ bin_file: *link.ElfFile,
target: *const std.Target,
mod_fn: *const ir.Module.Fn,
code: *std.ArrayList(u8),
@@ -160,6 +162,8 @@ const Function = struct {
/// The value is in a target-specific register. The value can
/// be @intToEnum casted to the respective Reg enum.
register: usize,
+ /// The value is in memory at a hard-coded address.
+ memory: u64,
};
fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue {
@@ -375,6 +379,7 @@ const Function = struct {
},
.embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}),
.register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}),
+ .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rax = memory", .{}),
},
.rdx => switch (mcv) {
.none, .unreach => unreachable,
@@ -406,6 +411,7 @@ const Function = struct {
},
.embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}),
.register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}),
+ .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = memory", .{}),
},
.rdi => switch (mcv) {
.none, .unreach => unreachable,
@@ -437,10 +443,37 @@ const Function = struct {
},
.embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = embedded_in_code", .{}),
.register => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = register", .{}),
+ .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = memory", .{}),
},
.rsi => switch (mcv) {
.none, .unreach => unreachable,
- .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = immediate", .{}),
+ .immediate => |x| {
+ // Setting the edi register zeroes the upper part of rdi, so if the number is small
+ // enough, that is preferable.
+ // Best case: zero
+ // 31 f6 xor esi,esi
+ if (x == 0) {
+ return self.code.appendSlice(&[_]u8{ 0x31, 0xf6 });
+ }
+ // Next best case: set esi with 4 bytes
+ // be 40 30 20 10 mov esi,0x10203040
+ if (x <= std.math.maxInt(u32)) {
+ try self.code.resize(self.code.items.len + 5);
+ self.code.items[self.code.items.len - 5] = 0xbe;
+ const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+ mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
+ return;
+ }
+ // Worst case: set rsi with 8 bytes
+ // 48 be 80 70 60 50 40 30 20 10 movabs rsi,0x1020304050607080
+
+ try self.code.resize(self.code.items.len + 10);
+ self.code.items[self.code.items.len - 10] = 0x48;
+ self.code.items[self.code.items.len - 9] = 0xbe;
+ const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
+ mem.writeIntLittle(u64, imm_ptr, x);
+ return;
+ },
.embedded_in_code => |code_offset| {
// Examples:
// lea rsi, [rip + 0x01020304]
@@ -462,6 +495,21 @@ const Function = struct {
return;
},
.register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}),
+ .memory => |x| {
+ if (x <= std.math.maxInt(u32)) {
+ // 48 8b 34 25 40 30 20 10 mov rsi,QWORD PTR ds:0x10203040
+ try self.code.resize(self.code.items.len + 8);
+ self.code.items[self.code.items.len - 8] = 0x48;
+ self.code.items[self.code.items.len - 7] = 0x8b;
+ self.code.items[self.code.items.len - 6] = 0x34;
+ self.code.items[self.code.items.len - 5] = 0x25;
+ const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
+ mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
+ return;
+ } else {
+ return self.fail(src, "TODO implement genSetReg for x86_64 setting rsi to 64-bit memory", .{});
+ }
+ },
},
else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}),
},
@@ -493,33 +541,21 @@ const Function = struct {
}
fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue {
+ const ptr_bits = self.target.cpu.arch.ptrBitWidth();
+ const ptr_bytes: u64 = @divExact(ptr_bits, 8);
const allocator = self.code.allocator;
switch (typed_value.ty.zigTypeTag()) {
.Pointer => {
- const ptr_elem_type = typed_value.ty.elemType();
- switch (ptr_elem_type.zigTypeTag()) {
- .Array => {
- // TODO more checks to make sure this can be emitted as a string literal
- const bytes = typed_value.val.toAllocatedBytes(allocator) catch |err| switch (err) {
- error.AnalysisFail => unreachable,
- else => |e| return e,
- };
- defer allocator.free(bytes);
- const smaller_len = std.math.cast(u32, bytes.len) catch
- return self.fail(src, "TODO handle a larger string constant", .{});
-
- // Emit the string literal directly into the code; jump over it.
- try self.genRelativeFwdJump(src, smaller_len);
- const offset = self.code.items.len;
- try self.code.appendSlice(bytes);
- return MCValue{ .embedded_in_code = offset };
- },
- else => |t| return self.fail(src, "TODO implement emitTypedValue for pointer to '{}'", .{@tagName(t)}),
+ if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| {
+ const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?];
+ const decl = payload.decl;
+ const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes;
+ return MCValue{ .memory = got_addr };
}
+ return self.fail(src, "TODO codegen more kinds of const pointers", .{});
},
.Int => {
const info = typed_value.ty.intInfo(self.target.*);
- const ptr_bits = self.target.cpu.arch.ptrBitWidth();
if (info.bits > ptr_bits or info.signed) {
return self.fail(src, "TODO const int bigger than ptr and signed int", .{});
}
src-self-hosted/ir.zig
@@ -292,6 +292,8 @@ pub const Module = struct {
/// TODO look into using a lightweight map/set data structure rather than a linear array.
dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){},
+ contents_hash: Hash,
+
pub fn destroy(self: *Decl, allocator: *Allocator) void {
allocator.free(mem.spanZ(self.name));
if (self.typedValueManaged()) |tvm| {
@@ -465,26 +467,42 @@ pub const Module = struct {
module: *text.Module,
},
status: enum {
- unloaded,
+ never_loaded,
+ unloaded_success,
unloaded_parse_failure,
+ unloaded_sema_failure,
loaded_parse_failure,
loaded_sema_failure,
loaded_success,
},
- pub fn deinit(self: *ZIRModule, allocator: *Allocator) void {
+ pub fn unload(self: *ZIRModule, allocator: *Allocator) void {
switch (self.status) {
- .unloaded,
+ .never_loaded,
.unloaded_parse_failure,
+ .unloaded_sema_failure,
+ .unloaded_success,
=> {},
- .loaded_success, .loaded_sema_failure => {
+
+ .loaded_success => {
+ allocator.free(self.source.bytes);
+ self.contents.module.deinit(allocator);
+ self.status = .unloaded_success;
+ },
+ .loaded_sema_failure => {
allocator.free(self.source.bytes);
self.contents.module.deinit(allocator);
+ self.status = .unloaded_sema_failure;
},
.loaded_parse_failure => {
allocator.free(self.source.bytes);
+ self.status = .unloaded_parse_failure;
},
}
+ }
+
+ pub fn deinit(self: *ZIRModule, allocator: *Allocator) void {
+ self.unload(allocator);
self.* = undefined;
}
@@ -623,7 +641,8 @@ pub const Module = struct {
try self.performAllTheWork();
- // TODO unload all the source files from memory
+ // Unload all the source files from memory.
+ self.root_scope.unload(self.allocator);
try self.bin_file.flush();
self.link_error_flags = self.bin_file.error_flags;
@@ -722,8 +741,8 @@ pub const Module = struct {
.success => {},
}
}
- if (!decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits())
- continue;
+
+ assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits());
self.bin_file.updateDecl(self, decl) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
@@ -748,7 +767,7 @@ pub const Module = struct {
fn getTextModule(self: *Module, root_scope: *Scope.ZIRModule) !*text.Module {
switch (root_scope.status) {
- .unloaded => {
+ .never_loaded, .unloaded_success => {
try self.failed_files.ensureCapacity(self.failed_files.size + 1);
var keep_source = false;
@@ -789,6 +808,7 @@ pub const Module = struct {
},
.unloaded_parse_failure,
+ .unloaded_sema_failure,
.loaded_parse_failure,
.loaded_sema_failure,
=> return error.AnalysisFail,
@@ -804,16 +824,62 @@ pub const Module = struct {
// Here we simulate adding a source file which was previously not part of the compilation,
// which means scanning the decls looking for exports.
// TODO also identify decls that need to be deleted.
- const src_module = try self.getTextModule(root_scope);
+ switch (root_scope.status) {
+ .never_loaded => {
+ const src_module = try self.getTextModule(root_scope);
- // Here we ensure enough queue capacity to store all the decls, so that later we can use
- // appendAssumeCapacity.
- try self.work_queue.ensureUnusedCapacity(src_module.decls.len);
+ // Here we ensure enough queue capacity to store all the decls, so that later we can use
+ // appendAssumeCapacity.
+ try self.work_queue.ensureUnusedCapacity(src_module.decls.len);
- for (src_module.decls) |decl| {
- if (decl.cast(text.Inst.Export)) |export_inst| {
- _ = try self.resolveDecl(&root_scope.base, &export_inst.base);
- }
+ for (src_module.decls) |decl| {
+ if (decl.cast(text.Inst.Export)) |export_inst| {
+ _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty);
+ }
+ }
+ },
+
+ .unloaded_parse_failure,
+ .unloaded_sema_failure,
+ .loaded_parse_failure,
+ .loaded_sema_failure,
+ .loaded_success,
+ .unloaded_success,
+ => {
+ const src_module = try self.getTextModule(root_scope);
+
+ // Look for changed decls.
+ for (src_module.decls) |src_decl| {
+ const name_hash = Decl.hashSimpleName(src_decl.name);
+ if (self.decl_table.get(name_hash)) |kv| {
+ const decl = kv.value;
+ const new_contents_hash = Decl.hashSimpleName(src_decl.contents);
+ if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) {
+ // TODO recursive dependency management
+ std.debug.warn("noticed that '{}' changed\n", .{src_decl.name});
+ self.decl_table.removeAssertDiscard(name_hash);
+ const saved_link = decl.link;
+ decl.destroy(self.allocator);
+ if (self.export_owners.getValue(decl)) |exports| {
+ @panic("TODO handle updating a decl that does an export");
+ }
+ const new_decl = self.resolveDecl(
+ &root_scope.base,
+ src_decl,
+ saved_link,
+ ) catch |err| switch (err) {
+ error.OutOfMemory => return error.OutOfMemory,
+ error.AnalysisFail => continue,
+ };
+ if (self.decl_exports.remove(decl)) |entry| {
+ self.decl_exports.putAssumeCapacityNoClobber(new_decl, entry.value);
+ }
+ }
+ } else if (src_decl.cast(text.Inst.Export)) |export_inst| {
+ _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty);
+ }
+ }
+ },
}
}
@@ -846,11 +912,17 @@ pub const Module = struct {
};
}
- fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl {
+ fn resolveDecl(
+ self: *Module,
+ scope: *Scope,
+ old_inst: *text.Inst,
+ bin_file_link: link.ElfFile.Decl,
+ ) InnerError!*Decl {
const hash = Decl.hashSimpleName(old_inst.name);
if (self.decl_table.get(hash)) |kv| {
return kv.value;
} else {
+ std.debug.warn("creating new decl for {}\n", .{old_inst.name});
const new_decl = blk: {
try self.decl_table.ensureCapacity(self.decl_table.size + 1);
const new_decl = try self.allocator.create(Decl);
@@ -863,6 +935,8 @@ pub const Module = struct {
.src = old_inst.src,
.typed_value = .{ .never_succeeded = {} },
.analysis = .initial_in_progress,
+ .contents_hash = Decl.hashSimpleName(old_inst.contents),
+ .link = bin_file_link,
};
self.decl_table.putAssumeCapacityNoClobber(hash, new_decl);
break :blk new_decl;
@@ -887,6 +961,14 @@ pub const Module = struct {
};
const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State);
+ const has_codegen_bits = typed_value.ty.hasCodeGenBits();
+ if (has_codegen_bits) {
+ // We don't fully codegen the decl until later, but we do need to reserve a global
+ // offset table index for it. This allows us to codegen decls out of dependency order,
+ // increasing how many computations can be done in parallel.
+ try self.bin_file.allocateDeclIndexes(new_decl);
+ }
+
arena_state.* = decl_scope.arena.state;
new_decl.typed_value = .{
@@ -896,14 +978,16 @@ pub const Module = struct {
},
};
new_decl.analysis = .complete;
- // We ensureCapacity when scanning for decls.
- self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl });
+ if (has_codegen_bits) {
+ // We ensureCapacity when scanning for decls.
+ self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl });
+ }
return new_decl;
}
}
fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl {
- const decl = try self.resolveDecl(scope, old_inst);
+ const decl = try self.resolveDecl(scope, old_inst, link.ElfFile.Decl.empty);
switch (decl.analysis) {
.initial_in_progress => unreachable,
.repeat_in_progress => unreachable,
@@ -2088,8 +2172,8 @@ pub fn main() anyerror!void {
const src_path = args[1];
const bin_path = args[2];
- const debug_error_trace = true;
- const output_zir = true;
+ const debug_error_trace = false;
+ const output_zir = false;
const object_format: ?std.builtin.ObjectFormat = null;
const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{});
@@ -2112,7 +2196,7 @@ pub fn main() anyerror!void {
.sub_file_path = root_pkg.root_src_path,
.source = .{ .unloaded = {} },
.contents = .{ .not_available = {} },
- .status = .unloaded,
+ .status = .never_loaded,
};
break :blk Module{
@@ -2132,22 +2216,38 @@ pub fn main() anyerror!void {
};
defer module.deinit();
- try module.update();
+ const stdin = std.io.getStdIn().inStream();
+ const stderr = std.io.getStdErr().outStream();
+ var repl_buf: [1024]u8 = undefined;
- var errors = try module.getAllErrorsAlloc();
- defer errors.deinit(allocator);
+ while (true) {
+ try module.update();
- if (errors.list.len != 0) {
- for (errors.list) |full_err_msg| {
- std.debug.warn("{}:{}:{}: error: {}\n", .{
- full_err_msg.src_path,
- full_err_msg.line + 1,
- full_err_msg.column + 1,
- full_err_msg.msg,
- });
+ var errors = try module.getAllErrorsAlloc();
+ defer errors.deinit(allocator);
+
+ if (errors.list.len != 0) {
+ for (errors.list) |full_err_msg| {
+ std.debug.warn("{}:{}:{}: error: {}\n", .{
+ full_err_msg.src_path,
+ full_err_msg.line + 1,
+ full_err_msg.column + 1,
+ full_err_msg.msg,
+ });
+ }
+ if (debug_error_trace) return error.AnalysisFail;
+ }
+
+ try stderr.print("🦎 ", .{});
+ if (try stdin.readUntilDelimiterOrEof(&repl_buf, '\n')) |line| {
+ if (mem.eql(u8, line, "update")) {
+ continue;
+ } else {
+ try stderr.print("unknown command: {}\n", .{line});
+ }
+ } else {
+ break;
}
- if (debug_error_trace) return error.AnalysisFail;
- std.process.exit(1);
}
if (output_zir) {
src-self-hosted/link.zig
@@ -310,7 +310,7 @@ pub const ElfFile = struct {
// TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at.
// we'll need to re-use that function anyway, in case the GOT grows and overlaps something
// else in virtual memory.
- const default_got_addr = 0x80000000;
+ const default_got_addr = 0x4000000;
try self.program_headers.append(self.allocator, .{
.p_type = elf.PT_LOAD,
.p_offset = off,
@@ -755,6 +755,35 @@ pub const ElfFile = struct {
};
}
+ pub fn allocateDeclIndexes(self: *ElfFile, decl: *ir.Module.Decl) !void {
+ if (decl.link.local_sym_index != 0) return;
+
+ try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1);
+ try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1);
+ const local_sym_index = self.local_symbols.items.len;
+ const offset_table_index = self.offset_table.items.len;
+ const phdr = &self.program_headers.items[self.phdr_load_re_index.?];
+
+ self.local_symbols.appendAssumeCapacity(.{
+ .st_name = 0,
+ .st_info = 0,
+ .st_other = 0,
+ .st_shndx = 0,
+ .st_value = phdr.p_vaddr,
+ .st_size = 0,
+ });
+ errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1);
+ self.offset_table.appendAssumeCapacity(0);
+ errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1);
+
+ self.offset_table_count_dirty = true;
+
+ decl.link = .{
+ .local_sym_index = @intCast(u32, local_sym_index),
+ .offset_table_index = @intCast(u32, offset_table_index),
+ };
+ }
+
pub fn updateDecl(self: *ElfFile, module: *ir.Module, decl: *ir.Module.Decl) !void {
var code_buffer = std.ArrayList(u8).init(self.allocator);
defer code_buffer.deinit();
@@ -781,21 +810,33 @@ pub const ElfFile = struct {
if (decl.link.local_sym_index != 0) {
const local_sym = &self.local_symbols.items[decl.link.local_sym_index];
const existing_block = self.findAllocatedTextBlock(local_sym.*);
- const need_realloc = code.len > existing_block.size_capacity or
+ const need_realloc = local_sym.st_size == 0 or
+ code.len > existing_block.size_capacity or
!mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment);
+ // TODO check for collision with another symbol
const file_offset = if (need_realloc) fo: {
const new_block = try self.allocateTextBlock(code.len, required_alignment);
local_sym.st_value = new_block.vaddr;
- local_sym.st_size = code.len;
+ self.offset_table.items[decl.link.offset_table_index] = new_block.vaddr;
+ //std.debug.warn("{}: writing got index {}=0x{x}\n", .{
+ // decl.name,
+ // decl.link.offset_table_index,
+ // self.offset_table.items[decl.link.offset_table_index],
+ //});
try self.writeOffsetTableEntry(decl.link.offset_table_index);
break :fo new_block.file_offset;
} else existing_block.file_offset;
+ local_sym.st_size = code.len;
local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(decl.name));
local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits;
+ local_sym.st_other = 0;
+ local_sym.st_shndx = self.text_section_index.?;
// TODO this write could be avoided if no fields of the symbol were changed.
try self.writeSymbol(decl.link.local_sym_index);
+
+ //std.debug.warn("updating {} at vaddr 0x{x}\n", .{ decl.name, local_sym.st_value });
break :blk file_offset;
} else {
try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1);
@@ -829,6 +870,7 @@ pub const ElfFile = struct {
.offset_table_index = @intCast(u32, offset_table_index),
};
+ //std.debug.warn("writing new {} at vaddr 0x{x}\n", .{ decl.name, new_block.vaddr });
break :blk new_block.file_offset;
}
};