Commit bb74f72e97

Luuk de Gram <Luukdegram@users.noreply.github.com>
2021-01-10 18:51:01
stage2: refactor wasm backend - similar to the other backends
1 parent b204ea0
Changed files (2)
src
codegen
link
src/codegen/wasm.zig
@@ -7,136 +7,292 @@ const mem = std.mem;
 
 const Module = @import("../Module.zig");
 const Decl = Module.Decl;
-const Inst = @import("../ir.zig").Inst;
+const ir = @import("../ir.zig");
+const Inst = ir.Inst;
 const Type = @import("../type.zig").Type;
 const Value = @import("../value.zig").Value;
+const Compilation = @import("../Compilation.zig");
 
-fn genValtype(ty: Type) u8 {
+/// Wasm Value, created when generating an instruction
+const WValue = union(enum) {
+    none: void,
+    /// Index of the local variable
+    local: u32,
+    /// A constant instruction
+    constant: *Inst,
+    /// Each newly created wasm block have a label
+    /// in the form of an index.
+    block_idx: u32,
+};
+
+pub const ValueTable = std.AutoArrayHashMap(*Inst, WValue);
+
+/// Using a given Zig type, returns the corresponding wasm value type
+fn genValtype(ty: Type) ?u8 {
     return switch (ty.tag()) {
-        .u32, .i32 => 0x7F,
-        .u64, .i64 => 0x7E,
         .f32 => 0x7D,
         .f64 => 0x7C,
-        else => @panic("TODO: Implement more types for wasm."),
+        .u32, .i32 => 0x7F,
+        .u64, .i64 => 0x7E,
+        else => null,
     };
 }
 
-pub fn genFunctype(buf: *ArrayList(u8), decl: *Decl) !void {
-    const ty = decl.typed_value.most_recent.typed_value.ty;
-    const writer = buf.writer();
+/// Code represents the `Code` section of wasm that
+/// belongs to a function
+pub const Code = struct {
+    /// Reference to the function declaration the code
+    /// section belongs to
+    decl: *Decl,
+    gpa: *mem.Allocator,
+    /// Table to save `WValue`'s generated by an `Inst`
+    values: ValueTable,
+    /// `bytes` contains the wasm instructions that have been emitted
+    /// this is what will be emitted after codegen to write the wasm binary
+    bytes: ArrayList(u8),
+    /// Contains the generated function type bytecode for the current function
+    func_type_data: ArrayList(u8),
+    /// The index the next local generated will have
+    /// NOTE: arguments share the index with locals therefore the first variable
+    /// will have the index that comes after the last argument's index
+    local_index: u32 = 0,
+    /// The index the next argument generated will have
+    arg_index: u32 = 0,
+    /// If codegen fails, an error messages will be allocated and saved
+    /// in `err_msg`
+    err_msg: *Compilation.ErrorMsg,
+
+    const InnerError = error{
+        OutOfMemory,
+        CodegenFail,
+    };
+
+    fn fail(self: *Code, src: usize, comptime fmt: []const u8, args: anytype) InnerError {
+        self.err_msg = try Compilation.ErrorMsg.create(self.gpa, src, fmt, args);
+        return error.CodegenFail;
+    }
+
+    /// Returns the `WValue` for the given `inst`
+    /// creates a new WValue for constants and returns that instead
+    fn resolveInst(self: Code, inst: *Inst) !WValue {
+        if (inst.value()) |_| {
+            return WValue{ .constant = inst };
+        }
+
+        return self.values.get(inst).?; // Instruction does not dominate all uses!
+    }
+
+    /// Writes the bytecode depending on the given `WValue` in `val`
+    fn emitWValue(self: *Code, val: WValue) !void {
+        const writer = self.bytes.writer();
+        switch (val) {
+            .none => unreachable,
+            .block_idx => unreachable,
+            // loads the local onto the stack at the given index
+            .local => |idx| {
+                // local.set
+                try writer.writeByte(0x20);
+                try leb.writeULEB128(writer, idx);
+            },
+            // creates a new constant onto the stack
+            .constant => |inst| try self.emitConstant(inst.castTag(.constant).?),
+        }
+    }
 
-    // functype magic
-    try writer.writeByte(0x60);
+    fn genFunctype(self: *Code) !void {
+        const ty = self.decl.typed_value.most_recent.typed_value.ty;
+        const writer = self.func_type_data.writer();
 
-    // param types
-    try leb.writeULEB128(writer, @intCast(u32, ty.fnParamLen()));
-    if (ty.fnParamLen() != 0) {
-        const params = try buf.allocator.alloc(Type, ty.fnParamLen());
-        defer buf.allocator.free(params);
-        ty.fnParamTypes(params);
-        for (params) |param_type| try writer.writeByte(genValtype(param_type));
+        // functype magic
+        try writer.writeByte(0x60);
+
+        // param types
+        try leb.writeULEB128(writer, @intCast(u32, ty.fnParamLen()));
+        if (ty.fnParamLen() != 0) {
+            const params = try self.gpa.alloc(Type, ty.fnParamLen());
+            defer self.gpa.free(params);
+            ty.fnParamTypes(params);
+            for (params) |param_type| {
+                const val_type = genValtype(param_type) orelse
+                    return self.fail(self.decl.src(), "TODO: Wasm generate wasm type value for type '{s}'", .{param_type.tag()});
+                try writer.writeByte(val_type);
+            }
+        }
+
+        // return type
+        const return_type = ty.fnReturnType();
+        switch (return_type.tag()) {
+            .void, .noreturn => try leb.writeULEB128(writer, @as(u32, 0)),
+            else => |ret_type| {
+                try leb.writeULEB128(writer, @as(u32, 1));
+                const val_type = genValtype(return_type) orelse
+                    return self.fail(self.decl.src(), "TODO: Wasm generate wasm return type value for type '{s}'", .{ret_type});
+                try writer.writeByte(val_type);
+            },
+        }
     }
 
-    // return type
-    const return_type = ty.fnReturnType();
-    switch (return_type.tag()) {
-        .void, .noreturn => try leb.writeULEB128(writer, @as(u32, 0)),
-        else => {
+    /// Generates the wasm bytecode for the given `code`
+    pub fn gen(self: *Code) !void {
+        assert(self.bytes.items.len == 0);
+        try self.genFunctype();
+        const writer = self.bytes.writer();
+
+        // Reserve space to write the size after generating the code
+        try self.bytes.resize(5);
+
+        // Write instructions
+        // TODO: check for and handle death of instructions
+        const tv = self.decl.typed_value.most_recent.typed_value;
+        const mod_fn = tv.val.castTag(.function).?.data;
+
+        var locals = std.ArrayList(u8).init(self.gpa);
+        defer locals.deinit();
+
+        for (mod_fn.body.instructions) |inst| {
+            if (inst.tag != .alloc) continue;
+
+            const alloc: *Inst.NoOp = inst.castTag(.alloc).?;
+            const elem_type = alloc.base.ty.elemType();
+
+            const wasm_type = genValtype(elem_type) orelse
+                return self.fail(inst.src, "TODO: Wasm generate wasm type value for type '{s}'", .{elem_type.tag()});
+
+            try locals.append(wasm_type);
+        }
+
+        try leb.writeULEB128(writer, @intCast(u32, locals.items.len));
+
+        // emit the actual locals amount
+        for (locals.items) |local| {
             try leb.writeULEB128(writer, @as(u32, 1));
-            try writer.writeByte(genValtype(return_type));
-        },
+            try leb.writeULEB128(writer, local); // valtype
+        }
+
+        for (mod_fn.body.instructions) |inst| {
+            const result = try self.genInst(inst);
+
+            if (result != .none) {
+                try self.values.putNoClobber(inst, result);
+            }
+        }
+
+        // Write 'end' opcode
+        try writer.writeByte(0x0B);
+
+        // Fill in the size of the generated code to the reserved space at the
+        // beginning of the buffer.
+        const size = self.bytes.items.len - 5 + self.decl.fn_link.wasm.?.idx_refs.items.len * 5;
+        leb.writeUnsignedFixed(5, self.bytes.items[0..5], @intCast(u32, size));
     }
-}
 
-pub fn genCode(buf: *ArrayList(u8), decl: *Decl) !void {
-    assert(buf.items.len == 0);
-    const writer = buf.writer();
+    fn genInst(self: *Code, inst: *Inst) !WValue {
+        return switch (inst.tag) {
+            .alloc => self.genAlloc(inst.castTag(.alloc).?),
+            .arg => self.genArg(inst.castTag(.arg).?),
+            .call => self.genCall(inst.castTag(.call).?),
+            .constant => unreachable,
+            .dbg_stmt => WValue.none,
+            .load => self.genLoad(inst.castTag(.load).?),
+            .ret => self.genRet(inst.castTag(.ret).?),
+            .retvoid => WValue.none,
+            .store => self.genStore(inst.castTag(.store).?),
+            else => self.fail(inst.src, "TODO: Implement wasm inst: {s}", .{inst.tag}),
+        };
+    }
 
-    // Reserve space to write the size after generating the code
-    try buf.resize(5);
+    fn genRet(self: *Code, inst: *Inst.UnOp) !WValue {
+        const operand = try self.resolveInst(inst.operand);
+        try self.emitWValue(operand);
+        return WValue.none;
+    }
 
-    // Write the size of the locals vec
-    // TODO: implement locals
-    try leb.writeULEB128(writer, @as(u32, 0));
+    fn genCall(self: *Code, inst: *Inst.Call) !WValue {
+        const func_inst = inst.func.castTag(.constant).?;
+        const func = func_inst.val.castTag(.function).?.data;
+        const target = func.owner_decl;
+        const target_ty = target.typed_value.most_recent.typed_value.ty;
 
-    // Write instructions
-    // TODO: check for and handle death of instructions
-    const tv = decl.typed_value.most_recent.typed_value;
-    const mod_fn = tv.val.castTag(.function).?.data;
-    for (mod_fn.body.instructions) |inst| try genInst(buf, decl, inst);
+        for (inst.args) |arg| {
+            const arg_val = try self.resolveInst(arg);
+            try self.emitWValue(arg_val);
+        }
 
-    // Write 'end' opcode
-    try writer.writeByte(0x0B);
+        try self.bytes.append(0x10); // call
 
-    // Fill in the size of the generated code to the reserved space at the
-    // beginning of the buffer.
-    const size = buf.items.len - 5 + decl.fn_link.wasm.?.idx_refs.items.len * 5;
-    leb.writeUnsignedFixed(5, buf.items[0..5], @intCast(u32, size));
-}
+        // The function index immediate argument will be filled in using this data
+        // in link.Wasm.flush().
+        try self.decl.fn_link.wasm.?.idx_refs.append(self.gpa, .{
+            .offset = @intCast(u32, self.bytes.items.len),
+            .decl = target,
+        });
 
-fn genInst(buf: *ArrayList(u8), decl: *Decl, inst: *Inst) !void {
-    return switch (inst.tag) {
-        .call => genCall(buf, decl, inst.castTag(.call).?),
-        .constant => genConstant(buf, decl, inst.castTag(.constant).?),
-        .dbg_stmt => {},
-        .ret => genRet(buf, decl, inst.castTag(.ret).?),
-        .retvoid => {},
-        else => error.TODOImplementMoreWasmCodegen,
-    };
-}
+        return WValue.none;
+    }
 
-fn genConstant(buf: *ArrayList(u8), decl: *Decl, inst: *Inst.Constant) !void {
-    const writer = buf.writer();
-    switch (inst.base.ty.tag()) {
-        .u32 => {
-            try writer.writeByte(0x41); // i32.const
-            try leb.writeILEB128(writer, inst.val.toUnsignedInt());
-        },
-        .i32 => {
-            try writer.writeByte(0x41); // i32.const
-            try leb.writeILEB128(writer, inst.val.toSignedInt());
-        },
-        .u64 => {
-            try writer.writeByte(0x42); // i64.const
-            try leb.writeILEB128(writer, inst.val.toUnsignedInt());
-        },
-        .i64 => {
-            try writer.writeByte(0x42); // i64.const
-            try leb.writeILEB128(writer, inst.val.toSignedInt());
-        },
-        .f32 => {
-            try writer.writeByte(0x43); // f32.const
-            // TODO: enforce LE byte order
-            try writer.writeAll(mem.asBytes(&inst.val.toFloat(f32)));
-        },
-        .f64 => {
-            try writer.writeByte(0x44); // f64.const
-            // TODO: enforce LE byte order
-            try writer.writeAll(mem.asBytes(&inst.val.toFloat(f64)));
-        },
-        .void => {},
-        else => return error.TODOImplementMoreWasmCodegen,
+    fn genAlloc(self: *Code, inst: *Inst.NoOp) !WValue {
+        defer self.local_index += 1;
+        return WValue{ .local = self.local_index };
     }
-}
 
-fn genRet(buf: *ArrayList(u8), decl: *Decl, inst: *Inst.UnOp) !void {
-    try genInst(buf, decl, inst.operand);
-}
+    fn genStore(self: *Code, inst: *Inst.BinOp) !WValue {
+        const writer = self.bytes.writer();
 
-fn genCall(buf: *ArrayList(u8), decl: *Decl, inst: *Inst.Call) !void {
-    const func_inst = inst.func.castTag(.constant).?;
-    const func = func_inst.val.castTag(.function).?.data;
-    const target = func.owner_decl;
-    const target_ty = target.typed_value.most_recent.typed_value.ty;
+        const lhs = try self.resolveInst(inst.lhs);
 
-    if (inst.args.len != 0) return error.TODOImplementMoreWasmCodegen;
+        const rhs = try self.resolveInst(inst.rhs);
+        try self.emitWValue(rhs);
 
-    try buf.append(0x10); // call
+        try writer.writeByte(0x21); // local.set
+        try leb.writeULEB128(writer, lhs.local);
 
-    // The function index immediate argument will be filled in using this data
-    // in link.Wasm.flush().
-    try decl.fn_link.wasm.?.idx_refs.append(buf.allocator, .{
-        .offset = @intCast(u32, buf.items.len),
-        .decl = target,
-    });
-}
+        return WValue.none;
+    }
+
+    fn genLoad(self: *Code, inst: *Inst.UnOp) !WValue {
+        const operand = self.resolveInst(inst.operand);
+
+        // ensure index to local
+        return WValue{ .local = operand.local };
+    }
+
+    fn genArg(self: *Code, inst: *Inst.Arg) !WValue {
+        // arguments share the index with locals
+        defer self.local_index += 1;
+        return WValue{ .local = self.local_index };
+    }
+
+    fn emitConstant(self: *Code, inst: *Inst.Constant) !void {
+        const writer = self.bytes.writer();
+        switch (inst.base.ty.tag()) {
+            .u32 => {
+                try writer.writeByte(0x41); // i32.const
+                try leb.writeILEB128(writer, inst.val.toUnsignedInt());
+            },
+            .i32 => {
+                try writer.writeByte(0x41); // i32.const
+                try leb.writeILEB128(writer, inst.val.toSignedInt());
+            },
+            .u64 => {
+                try writer.writeByte(0x42); // i64.const
+                try leb.writeILEB128(writer, inst.val.toUnsignedInt());
+            },
+            .i64 => {
+                try writer.writeByte(0x42); // i64.const
+                try leb.writeILEB128(writer, inst.val.toSignedInt());
+            },
+            .f32 => {
+                try writer.writeByte(0x43); // f32.const
+                // TODO: enforce LE byte order
+                try writer.writeAll(mem.asBytes(&inst.val.toFloat(f32)));
+            },
+            .f64 => {
+                try writer.writeByte(0x44); // f64.const
+                // TODO: enforce LE byte order
+                try writer.writeAll(mem.asBytes(&inst.val.toFloat(f64)));
+            },
+            .void => {},
+            else => |ty| return self.fail(inst.base.src, "Wasm TODO: emitConstant for type {s}", .{ty}),
+        }
+    }
+};
src/link/Wasm.zig
@@ -118,10 +118,29 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
 
     var managed_functype = fn_data.functype.toManaged(self.base.allocator);
     var managed_code = fn_data.code.toManaged(self.base.allocator);
-    try codegen.genFunctype(&managed_functype, decl);
-    try codegen.genCode(&managed_code, decl);
-    fn_data.functype = managed_functype.toUnmanaged();
-    fn_data.code = managed_code.toUnmanaged();
+
+    var code = codegen.Code{
+        .gpa = self.base.allocator,
+        .values = codegen.ValueTable.init(self.base.allocator),
+        .bytes = managed_code,
+        .func_type_data = managed_functype,
+        .decl = decl,
+        .err_msg = undefined,
+    };
+    defer code.values.deinit();
+
+    // generate the 'code' section for the function declaration
+    code.gen() catch |err| switch (err) {
+        error.CodegenFail => {
+            decl.analysis = .codegen_failure;
+            try module.failed_decls.put(module.gpa, decl, code.err_msg);
+            return;
+        },
+        else => |e| return err,
+    };
+
+    fn_data.functype = code.func_type_data.toUnmanaged();
+    fn_data.code = code.bytes.toUnmanaged();
 }
 
 pub fn updateDeclExports(