Commit ad5f74c0b1

kcbanner <kcbanner@gmail.com>
2023-07-05 15:55:54
dwarf: introduce ExpressionContext, add more expression opcodes
1 parent 576ffaa
lib/std/dwarf/call_frame.zig
@@ -218,13 +218,7 @@ pub const Instruction = union(Opcode) {
                 break :blk result;
             },
             Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode,
-            else => |opcode| blk: {
-
-                // TODO: Remove this
-                std.debug.print("Opcode {x}\n", .{opcode});
-
-                break :blk error.InvalidOpcode;
-            },
+            else => error.InvalidOpcode,
         };
     }
 };
@@ -295,9 +289,7 @@ pub const VirtualMachine = struct {
         pub fn resolveValue(
             self: Column,
             context: *dwarf.UnwindContext,
-            compile_unit: ?*const dwarf.CompileUnit,
-            ucontext: *const std.os.ucontext_t,
-            reg_ctx: abi.RegisterContext,
+            expression_context: dwarf.expressions.ExpressionContext,
             out: []u8,
         ) !void {
             switch (self.rule) {
@@ -311,9 +303,9 @@ pub const VirtualMachine = struct {
                 .same_value => {},
                 .offset => |offset| {
                     if (context.cfa) |cfa| {
-                        const ptr: *const usize = @ptrFromInt(try applyOffset(cfa, offset));
-
-                        // TODO: context.isValidMemory(ptr)
+                        const addr = try applyOffset(cfa, offset);
+                        if (expression_context.isValidMemory) |isValidMemory| if (!isValidMemory(addr)) return error.InvalidAddress;
+                        const ptr: *const usize = @ptrFromInt(addr);
                         mem.writeIntSliceNative(usize, out, ptr.*);
                     } else return error.InvalidCFA;
                 },
@@ -329,7 +321,7 @@ pub const VirtualMachine = struct {
                 },
                 .expression => |expression| {
                     context.stack_machine.reset();
-                    const value = try context.stack_machine.run(expression, context.allocator, compile_unit, ucontext, reg_ctx, context.cfa.?);
+                    const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?);
 
                     if (value != .generic) return error.InvalidExpressionValue;
                     if (!context.isValidMemory(value.generic)) return error.InvalidExpressionAddress;
@@ -339,12 +331,12 @@ pub const VirtualMachine = struct {
                 },
                 .val_expression => |expression| {
                     context.stack_machine.reset();
-                    const value = try context.stack_machine.run(expression, context.allocator, compile_unit, ucontext, reg_ctx, context.cfa.?);
+                    const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?);
 
                     if (value != .generic) return error.InvalidExpressionValue;
                     mem.writeIntSliceNative(usize, out, value.generic);
                 },
-                .architectural => return error.UnimplementedRule,
+                .architectural => return error.UnimplementedRegisterRule,
             }
         }
     };
lib/std/dwarf/expressions.zig
@@ -17,6 +17,24 @@ pub const StackMachineOptions = struct {
     call_frame_mode: bool = false,
 };
 
+/// Expressions can be evaluated in different contexts, each requiring its own set of inputs.
+/// Callers should specify all the fields relevant to their context. If a field is required
+/// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned.
+pub const ExpressionContext = struct {
+    /// If specified, any addresses will pass through this function before being
+    isValidMemory: ?*const fn (address: usize) bool = null,
+
+    /// The compilation unit this expression relates to, if any
+    compile_unit: ?*const dwarf.CompileUnit = null,
+
+    /// Register context
+    ucontext: ?*std.os.ucontext_t,
+    reg_ctx: ?abi.RegisterContext,
+
+    /// Call frame address, if in a CFI context
+    cfa: ?usize,
+};
+
 /// A stack machine that can decode and run DWARF expressions.
 /// Expressions can be decoded for non-native address size and endianness,
 /// but can only be executed if the current target matches the configuration.
@@ -41,6 +59,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
         const Operand = union(enum) {
             generic: addr_type,
             register: u8,
+            type_size: u8,
             base_register: struct {
                 base_register: u8,
                 offset: i64,
@@ -60,22 +79,46 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
             },
             deref_type: struct {
                 size: u8,
-                offset: u64,
+                type_offset: u64,
             },
         };
 
         const Value = union(enum) {
             generic: addr_type,
+
+            // Typed value with a maximum size of a register
             regval_type: struct {
                 // Offset of DW_TAG_base_type DIE
                 type_offset: u64,
+                type_size: u8,
                 value: addr_type,
             },
+
+            // Typed value specified directly in the instruction stream
             const_type: struct {
                 // Offset of DW_TAG_base_type DIE
                 type_offset: u64,
+                // Backed by the instruction stream
                 value_bytes: []const u8,
             },
+
+            pub fn asIntegral(self: Value) !addr_type {
+                return switch (self) {
+                    .generic => |v| v,
+
+                    // TODO: For these two prongs, look up the type and assert it's integral?
+                    .regval_type => |regval_type| regval_type.value,
+                    .const_type => |const_type| {
+                        return switch (const_type.value_bytes.len) {
+                            1 => mem.readIntSliceNative(u8, const_type.value_bytes),
+                            2 => mem.readIntSliceNative(u16, const_type.value_bytes),
+                            4 => mem.readIntSliceNative(u32, const_type.value_bytes),
+                            8 => mem.readIntSliceNative(u64, const_type.value_bytes),
+                            else => return error.InvalidIntegralTypeLength,
+                        };
+                    },
+                };
+            }
         };
 
         stack: std.ArrayListUnmanaged(Value) = .{},
@@ -111,9 +154,10 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                 => generic(try reader.readInt(addr_type, options.endian)),
                 OP.const1u,
                 OP.pick,
+                => generic(try reader.readByte()),
                 OP.deref_size,
                 OP.xderef_size,
-                => generic(try reader.readByte()),
+                => .{ .type_size = try reader.readByte() },
                 OP.const1s => generic(try reader.readByteSigned()),
                 OP.const2u,
                 OP.call2,
@@ -199,7 +243,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                 => .{
                     .deref_type = .{
                         .size = try reader.readByte(),
-                        .offset = try leb.readULEB128(u64, reader),
+                        .type_offset = try leb.readULEB128(u64, reader),
                     },
                 },
                 OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode,
@@ -211,14 +255,12 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
             self: *Self,
             expression: []const u8,
             allocator: std.mem.Allocator,
-            compile_unit: ?*const dwarf.CompileUnit,
-            ucontext: *const std.os.ucontext_t,
-            reg_ctx: abi.RegisterContext,
+            context: ExpressionContext,
             initial_value: usize,
         ) !Value {
             try self.stack.append(allocator, .{ .generic = initial_value });
             var stream = std.io.fixedBufferStream(expression);
-            while (try self.step(&stream, allocator, compile_unit, ucontext, reg_ctx)) {}
+            while (try self.step(&stream, allocator, context)) {}
             if (self.stack.items.len == 0) return error.InvalidExpression;
             return self.stack.items[self.stack.items.len - 1];
         }
@@ -228,9 +270,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
             self: *Self,
             stream: *std.io.FixedBufferStream([]const u8),
             allocator: std.mem.Allocator,
-            compile_unit: ?*const dwarf.CompileUnit,
-            ucontext: *const std.os.ucontext_t,
-            reg_ctx: dwarf.abi.RegisterContext,
+            context: ExpressionContext,
         ) !bool {
             if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian())
                 @compileError("Execution of non-native address sizees / endianness is not supported");
@@ -281,7 +321,9 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                     } });
                 },
 
-                OP.addrx, OP.constx => {
+                OP.addrx,
+                OP.constx,
+                => {
                     const debug_addr_index = (try readOperand(stream, opcode)).?.generic;
 
                     // TODO: Read item from .debug_addr, this requires need DW_AT_addr_base of the compile unit, push onto stack as generic
@@ -292,13 +334,13 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
 
                 // 2.5.1.2: Register Values
                 OP.fbreg => {
-                    if (compile_unit == null) return error.ExpressionRequiresCompileUnit;
-                    if (compile_unit.?.frame_base == null) return error.ExpressionRequiresFrameBase;
+                    if (context.compile_unit == null) return error.ExpressionRequiresCompileUnit;
+                    if (context.compile_unit.?.frame_base == null) return error.ExpressionRequiresFrameBase;
 
                     const offset: i64 = @intCast((try readOperand(stream, opcode)).?.generic);
                     _ = offset;
 
-                    switch (compile_unit.?.frame_base.?.*) {
+                    switch (context.compile_unit.?.frame_base.?.*) {
                         .ExprLoc => {
                             // TODO: Run this expression in a nested stack machine
                             return error.UnimplementedOpcode;
@@ -314,34 +356,136 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                         else => return error.InvalidFrameBase,
                     }
                 },
-                OP.breg0...OP.breg31, OP.bregx => {
+                OP.breg0...OP.breg31,
+                OP.bregx,
+                => {
+                    if (context.ucontext == null) return error.IncompleteExpressionContext;
+
                     const base_register = (try readOperand(stream, opcode)).?.base_register;
-                    var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(ucontext, base_register.base_register, reg_ctx)));
+                    var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, base_register.base_register, context.reg_ctx)));
                     value += base_register.offset;
                     try self.stack.append(allocator, .{ .generic = @intCast(value) });
                 },
                 OP.regval_type => {
                     const register_type = (try readOperand(stream, opcode)).?.register_type;
-                    const value = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, register_type.register, reg_ctx));
+                    const value = mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, register_type.register, context.reg_ctx));
                     try self.stack.append(allocator, .{
                         .regval_type = .{
-                            .value = value,
                             .type_offset = register_type.type_offset,
+                            .type_size = @sizeOf(addr_type),
+                            .value = value,
                         },
                     });
                 },
 
                 // 2.5.1.3: Stack Operations
+                OP.dup => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1]);
+                },
+                OP.drop => {
+                    _ = self.stack.pop();
+                },
+                OP.pick, OP.over => {
+                    const stack_index = if (opcode == OP.over) 1 else (try readOperand(stream, opcode)).?.generic;
+                    if (stack_index >= self.stack.items.len) return error.InvalidExpression;
+                    try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1 - stack_index]);
+                },
+                OP.swap => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    mem.swap(Value, &self.stack.items[self.stack.items.len - 1], &self.stack.items[self.stack.items.len - 2]);
+                },
+                OP.rot => {
+                    if (self.stack.items.len < 3) return error.InvalidExpression;
+                    const first = self.stack.items[self.stack.items.len - 1];
+                    self.stack.items[self.stack.items.len - 1] = self.stack.items[self.stack.items.len - 2];
+                    self.stack.items[self.stack.items.len - 2] = self.stack.items[self.stack.items.len - 3];
+                    self.stack.items[self.stack.items.len - 3] = first;
+                },
+                OP.deref,
+                OP.xderef,
+                OP.deref_size,
+                OP.xderef_size,
+                OP.deref_type,
+                OP.xderef_type,
+                => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    var addr = try self.stack.pop().asIntegral();
+                    const addr_space_identifier: ?usize = switch (opcode) {
+                        OP.xderef,
+                        OP.xderef_size,
+                        OP.xderef_type,
+                        => try self.stack.pop().asIntegral(),
+                        else => null,
+                    };
 
-                OP.dup => {},
+                    // Usage of addr_space_identifier in the address calculation is implementation defined.
+                    // This code will need to be updated to handle any architectures that utilize this.
+                    _ = addr_space_identifier;
 
-                else => {
-                    std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode});
-                    unreachable;
+                    if (context.isValidMemory) |isValidMemory| if (!isValidMemory(addr)) return error.InvalidExpression;
+
+                    const operand = try readOperand(stream, opcode);
+                    const size = switch (opcode) {
+                        OP.deref => @sizeOf(addr_type),
+                        OP.deref_size,
+                        OP.xderef_size,
+                        => operand.?.type_size,
+                        OP.deref_type,
+                        OP.xderef_type,
+                        => operand.?.deref_type.size,
+                        else => unreachable,
+                    };
+
+                    const value: u64 = switch (size) {
+                        1 => @as(*const u8, @ptrFromInt(addr)).*,
+                        2 => @as(*const u16, @ptrFromInt(addr)).*,
+                        4 => @as(*const u32, @ptrFromInt(addr)).*,
+                        8 => @as(*const u64, @ptrFromInt(addr)).*,
+                        else => return error.InvalidExpression,
+                    };
+
+                    if (opcode == OP.deref_type) {
+                        try self.stack.append(allocator, .{
+                            .regval_type = .{
+                                .type_offset = operand.?.deref_type.type_offset,
+                                .type_size = operand.?.deref_type.size,
+                                .value = value,
+                            },
+                        });
+                    } else {
+                        try self.stack.append(allocator, .{ .generic = value });
+                    }
+                },
+                OP.push_object_address,
+                OP.form_tls_address,
+                => {
+                    return error.UnimplementedExpressionOpcode;
+                },
+                OP.call_frame_cfa => {
+                    if (context.cfa) |cfa| {
+                        try self.stack.append(allocator, .{ .generic = cfa });
+                    } else return error.IncompleteExpressionContext;
+                },
+
+                // 2.5.1.4: Arithmetic and Logical Operations
+                OP.abs => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral());
+                    self.stack.items[self.stack.items.len - 1] = .{ .generic = std.math.absCast(value) };
+                },
+                OP.@"and" => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = try self.stack.pop().asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{ .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral() };
                 },
 
                 // These have already been handled by readOperand
                 OP.lo_user...OP.hi_user => unreachable,
+                else => {
+                    //std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode});
+                    return error.UnknownExpressionOpcode;
+                },
             }
 
             return stream.pos < stream.buffer.len;
lib/std/debug.zig
@@ -448,7 +448,7 @@ pub inline fn getContext(context: *StackTraceContext) bool {
     if (native_os == .macos) {
         // TODO: Temp, to discover this size via aarch64 CI
         if (context.mcsize != @sizeOf(std.c.mcontext_t)) {
-            print("context.mcsize does not match! {} vs {}\n", .{ context.mcsize, @sizeOf(std.c.mcontext_t)});
+            print("context.mcsize does not match! {} vs {}\n", .{ context.mcsize, @sizeOf(std.c.mcontext_t) });
         }
 
         assert(context.mcsize == @sizeOf(std.c.mcontext_t));
lib/std/dwarf.zig
@@ -1673,7 +1673,14 @@ pub const DwarfInfo = struct {
             cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE;
         }
 
-        const compile_unit: ?*const CompileUnit = di.findCompileUnit(fde.pc_begin) catch null;
+        var expression_context = .{
+            .isValidMemory = context.isValidMemory,
+            .compile_unit = di.findCompileUnit(fde.pc_begin) catch null,
+            .ucontext = &context.ucontext,
+            .reg_ctx = context.reg_ctx,
+            .cfa = context.cfa,
+        };
+
         context.vm.reset();
         context.reg_ctx.eh_frame = cie.version != 4;
 
@@ -1691,9 +1698,7 @@ pub const DwarfInfo = struct {
                 const value = try context.stack_machine.run(
                     expression,
                     context.allocator,
-                    compile_unit,
-                    &context.ucontext,
-                    context.reg_ctx,
+                    expression_context,
                     context.cfa orelse 0,
                 );
 
@@ -1704,6 +1709,7 @@ pub const DwarfInfo = struct {
         };
 
         if (!context.isValidMemory(context.cfa.?)) return error.InvalidCFA;
+        expression_context.cfa = context.cfa;
 
         // Buffering the modifications is done because copying the ucontext is not portable,
         // some implementations (ie. darwin) use internal pointers to the mcontext.
@@ -1740,9 +1746,7 @@ pub const DwarfInfo = struct {
 
                 try column.resolveValue(
                     context,
-                    compile_unit,
-                    &context.ucontext,
-                    context.reg_ctx,
+                    expression_context,
                     new_value,
                 );
             }