Commit 05032c8693

Sahnvour <sahnvour@pm.me>
2019-07-28 19:03:36
coff & pdb: improved correctness of our implementation, it is now able to handle stage1's pdb and print its stack traces
1 parent d08425a
Changed files (3)
std/coff.zig
@@ -19,6 +19,7 @@ const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b;
 const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b;
 
 const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16;
+const IMAGE_DEBUG_TYPE_CODEVIEW = 2;
 const DEBUG_DIRECTORY = 6;
 
 pub const CoffError = error{
@@ -28,6 +29,7 @@ pub const CoffError = error{
     MissingCoffSection,
 };
 
+// Official documentation of the format: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
 pub const Coff = struct {
     in_file: File,
     allocator: *mem.Allocator,
@@ -120,6 +122,7 @@ pub const Coff = struct {
 
     pub fn getPdbPath(self: *Coff, buffer: []u8) !usize {
         try self.loadSections();
+
         const header = blk: {
             if (self.getSection(".buildid")) |section| {
                 break :blk section.header;
@@ -130,14 +133,32 @@ pub const Coff = struct {
             }
         };
 
-        // The linker puts a chunk that contains the .pdb path right after the
-        // debug_directory.
         const debug_dir = &self.pe_header.data_directory[DEBUG_DIRECTORY];
         const file_offset = debug_dir.virtual_address - header.virtual_address + header.pointer_to_raw_data;
-        try self.in_file.seekTo(file_offset + debug_dir.size);
 
         var file_stream = self.in_file.inStream();
         const in = &file_stream.stream;
+        try self.in_file.seekTo(file_offset);
+
+        // Find the correct DebugDirectoryEntry, and where its data is stored.
+        // It can be in any section.
+        const debug_dir_entry_count = debug_dir.size / @sizeOf(DebugDirectoryEntry);
+        var i: u32 = 0;
+        blk: while (i < debug_dir_entry_count) : (i += 1) {
+            const debug_dir_entry = try in.readStruct(DebugDirectoryEntry);
+            if (debug_dir_entry.type == IMAGE_DEBUG_TYPE_CODEVIEW) {
+                for (self.sections.toSlice()) |*section| {
+                    const section_start = section.header.virtual_address;
+                    const section_size = section.header.misc.virtual_size;
+                    const rva = debug_dir_entry.address_of_raw_data;
+                    const offset = rva - section_start;
+                    if (section_start <= rva and offset < section_size and debug_dir_entry.size_of_data <= section_size - offset) {
+                        try self.in_file.seekTo(section.header.pointer_to_raw_data + offset);
+                        break :blk;
+                    }
+                }
+            }
+        }
 
         var cv_signature: [4]u8 = undefined; // CodeView signature
         try in.readNoEof(cv_signature[0..]);
@@ -149,7 +170,7 @@ pub const Coff = struct {
 
         // Finally read the null-terminated string.
         var byte = try in.readByte();
-        var i: usize = 0;
+        i = 0;
         while (byte != 0 and i < buffer.len) : (i += 1) {
             buffer[i] = byte;
             byte = try in.readByte();
@@ -178,7 +199,7 @@ pub const Coff = struct {
             try self.sections.append(Section{
                 .header = SectionHeader{
                     .name = name,
-                    .misc = SectionHeader.Misc{ .physical_address = try in.readIntLittle(u32) },
+                    .misc = SectionHeader.Misc{ .virtual_size = try in.readIntLittle(u32) },
                     .virtual_address = try in.readIntLittle(u32),
                     .size_of_raw_data = try in.readIntLittle(u32),
                     .pointer_to_raw_data = try in.readIntLittle(u32),
@@ -222,6 +243,17 @@ const OptionalHeader = struct {
     data_directory: [IMAGE_NUMBEROF_DIRECTORY_ENTRIES]DataDirectory,
 };
 
+const DebugDirectoryEntry = packed struct {
+    characteristiccs: u32,
+    time_date_stamp: u32,
+    major_version: u16,
+    minor_version: u16,
+    @"type": u32,
+    size_of_data: u32,
+    address_of_raw_data: u32,
+    pointer_to_raw_data: u32,
+};
+
 pub const Section = struct {
     header: SectionHeader,
 };
std/debug.zig
@@ -375,7 +375,7 @@ fn printSourceAtAddressWindows(di: *DebugInfo, out_stream: var, relocated_addres
     const obj_basename = fs.path.basename(mod.obj_file_name);
 
     var symbol_i: usize = 0;
-    const symbol_name = while (symbol_i != mod.symbols.len) {
+    const symbol_name = if (!mod.populated) "???" else while (symbol_i != mod.symbols.len) {
         const prefix = @ptrCast(*pdb.RecordPrefix, &mod.symbols[symbol_i]);
         if (prefix.RecordLen < 2)
             return error.InvalidDebugInfo;
@@ -858,8 +858,10 @@ fn openSelfDebugInfoWindows(allocator: *mem.Allocator) !DebugInfo {
     const age = try pdb_stream.stream.readIntLittle(u32);
     var guid: [16]u8 = undefined;
     try pdb_stream.stream.readNoEof(guid[0..]);
+    if (version != 20000404) // VC70, only value observed by LLVM team
+        return error.UnknownPDBVersion;
     if (!mem.eql(u8, di.coff.guid, guid) or di.coff.age != age)
-        return error.InvalidDebugInfo;
+        return error.PDBMismatch;
     // We validated the executable and pdb match.
 
     const string_table_index = str_tab_index: {
@@ -903,13 +905,18 @@ fn openSelfDebugInfoWindows(allocator: *mem.Allocator) !DebugInfo {
         return error.MissingDebugInfo;
     };
 
-    di.pdb.string_table = di.pdb.getStreamById(string_table_index) orelse return error.InvalidDebugInfo;
+    di.pdb.string_table = di.pdb.getStreamById(string_table_index) orelse return error.MissingDebugInfo;
     di.pdb.dbi = di.pdb.getStream(pdb.StreamType.Dbi) orelse return error.MissingDebugInfo;
 
     const dbi = di.pdb.dbi;
 
     // Dbi Header
     const dbi_stream_header = try dbi.stream.readStruct(pdb.DbiStreamHeader);
+    if (dbi_stream_header.VersionHeader != 19990903) // V70, only value observed by LLVM team
+        return error.UnknownPDBVersion;
+    if (dbi_stream_header.Age != age)
+        return error.UnmatchingPDB;
+
     const mod_info_size = dbi_stream_header.ModInfoSize;
     const section_contrib_size = dbi_stream_header.SectionContributionSize;
 
std/pdb.zig
@@ -499,45 +499,78 @@ const Msf = struct {
 
         const superblock = try in.readStruct(SuperBlock);
 
+        // Sanity checks
         if (!mem.eql(u8, superblock.FileMagic, SuperBlock.file_magic))
             return error.InvalidDebugInfo;
-
+        if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2)
+            return error.InvalidDebugInfo;
+        if (superblock.NumBlocks * superblock.BlockSize != try file.getEndPos())
+            return error.InvalidDebugInfo;
         switch (superblock.BlockSize) {
             // llvm only supports 4096 but we can handle any of these values
             512, 1024, 2048, 4096 => {},
             else => return error.InvalidDebugInfo,
         }
 
-        if (superblock.NumBlocks * superblock.BlockSize != try file.getEndPos())
-            return error.InvalidDebugInfo;
+        const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize);
+        if (dir_block_count > superblock.BlockSize / @sizeOf(u32))
+            return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment.
 
-        self.directory = try MsfStream.init(
+        try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr);
+        var dir_blocks = try allocator.alloc(u32, dir_block_count);
+        for (dir_blocks) |*b| {
+            b.* = try in.readIntLittle(u32);
+        }
+        self.directory = MsfStream.init(
             superblock.BlockSize,
-            blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize),
-            superblock.BlockSize * superblock.BlockMapAddr,
             file,
-            allocator,
+            dir_blocks,
         );
 
+        const begin = self.directory.pos;
         const stream_count = try self.directory.stream.readIntLittle(u32);
-
         const stream_sizes = try allocator.alloc(u32, stream_count);
-        for (stream_sizes) |*s| {
+        defer allocator.free(stream_sizes);
+
+        // Microsoft's implementation uses u32(-1) for inexistant streams.
+        // These streams are not used, but still participate in the file
+        // and must be taken into account when resolving stream indices.
+        const Nil = 0xFFFFFFFF;
+        for (stream_sizes) |*s, i| {
             const size = try self.directory.stream.readIntLittle(u32);
-            s.* = blockCountFromSize(size, superblock.BlockSize);
+            s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize);
         }
 
         self.streams = try allocator.alloc(MsfStream, stream_count);
         for (self.streams) |*stream, i| {
-            stream.* = try MsfStream.init(
-                superblock.BlockSize,
-                stream_sizes[i],
-                // MsfStream.init expects the file to be at the part where it reads [N]u32
-                try file.getPos(),
-                file,
-                allocator,
-            );
+            const size = stream_sizes[i];
+            if (size == 0) {
+                stream.* = MsfStream{
+                    .blocks = [_]u32{},
+                };
+            } else {
+                var blocks = try allocator.alloc(u32, size);
+                var j: u32 = 0;
+                while (j < size) : (j += 1) {
+                    const block_id = try self.directory.stream.readIntLittle(u32);
+                    const n = (block_id % superblock.BlockSize);
+                    // 0 is for SuperBlock, 1 and 2 for FPMs.
+                    if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > try file.getEndPos())
+                        return error.InvalidBlockIndex;
+                    blocks[j] = block_id;
+                }
+
+                stream.* = MsfStream.init(
+                    superblock.BlockSize,
+                    file,
+                    blocks,
+                );
+            }
         }
+
+        const end = self.directory.pos;
+        if (end - begin != superblock.NumDirectoryBytes)
+            return error.InvalidStreamDirectory;
     }
 };
 
@@ -574,7 +607,6 @@ const SuperBlock = packed struct {
     NumDirectoryBytes: u32,
 
     Unknown: u32,
-
     /// The index of a block within the MSF file. At this block is an array of
     /// ulittle32_t’s listing the blocks that the stream directory resides on.
     /// For large MSF files, the stream directory (which describes the block
@@ -584,45 +616,41 @@ const SuperBlock = packed struct {
     /// and the stream directory itself can be stitched together accordingly.
     /// The number of ulittle32_t’s in this array is given by
     /// ceil(NumDirectoryBytes / BlockSize).
+    // Note: microsoft-pdb code actually suggests this is a variable-length
+    // array. If the indices of blocks occupied by the Stream Directory didn't
+    // fit in one page, there would be other u32 following it.
+    // This would mean the Stream Directory is bigger than BlockSize / sizeof(u32)
+    // blocks. We're not even close to this with a 1GB pdb file, and LLVM didn't
+    // implement it so we're kind of safe making this assumption for now.
     BlockMapAddr: u32,
 };
 
 const MsfStream = struct {
-    in_file: File,
-    pos: u64,
-    blocks: []u32,
-    block_size: u32,
+    in_file: File = undefined,
+    pos: u64 = undefined,
+    blocks: []u32 = undefined,
+    block_size: u32 = undefined,
 
     /// Implementation of InStream trait for Pdb.MsfStream
-    stream: Stream,
+    stream: Stream = undefined,
 
     pub const Error = @typeOf(read).ReturnType.ErrorSet;
     pub const Stream = io.InStream(Error);
 
-    fn init(block_size: u32, block_count: u32, pos: u64, file: File, allocator: *mem.Allocator) !MsfStream {
-        var stream = MsfStream{
+    fn init(block_size: u32, file: File, blocks: []u32) MsfStream {
+        const stream = MsfStream{
             .in_file = file,
             .pos = 0,
-            .blocks = try allocator.alloc(u32, block_count),
+            .blocks = blocks,
             .block_size = block_size,
             .stream = Stream{ .readFn = readFn },
         };
 
-        var file_stream = file.inStream();
-        const in = &file_stream.stream;
-        try file.seekTo(pos);
-
-        var i: u32 = 0;
-        while (i < block_count) : (i += 1) {
-            stream.blocks[i] = try in.readIntLittle(u32);
-        }
-
         return stream;
     }
 
     fn readNullTermString(self: *MsfStream, allocator: *mem.Allocator) ![]u8 {
         var list = ArrayList(u8).init(allocator);
-        defer list.deinit();
         while (true) {
             const byte = try self.stream.readByte();
             if (byte == 0) {
@@ -633,6 +661,7 @@ const MsfStream = struct {
     }
 
     fn read(self: *MsfStream, buffer: []u8) !usize {
+
         var block_id = @intCast(usize, self.pos / self.block_size);
         var block = self.blocks[block_id];
         var offset = self.pos % self.block_size;