master
  1const std = @import("../std.zig");
  2const File = std.fs.File;
  3const Allocator = std.mem.Allocator;
  4const pdb = std.pdb;
  5const assert = std.debug.assert;
  6
  7const Pdb = @This();
  8
  9file_reader: *File.Reader,
 10msf: Msf,
 11allocator: Allocator,
 12string_table: ?*MsfStream,
 13dbi: ?*MsfStream,
 14modules: []Module,
 15sect_contribs: []pdb.SectionContribEntry,
 16guid: [16]u8,
 17age: u32,
 18
 19pub const Module = struct {
 20    mod_info: pdb.ModInfo,
 21    module_name: []u8,
 22    obj_file_name: []u8,
 23    // The fields below are filled on demand.
 24    populated: bool,
 25    symbols: []u8,
 26    subsect_info: []u8,
 27    checksum_offset: ?usize,
 28
 29    pub fn deinit(self: *Module, allocator: Allocator) void {
 30        allocator.free(self.module_name);
 31        allocator.free(self.obj_file_name);
 32        if (self.populated) {
 33            allocator.free(self.symbols);
 34            allocator.free(self.subsect_info);
 35        }
 36    }
 37};
 38
 39pub fn init(gpa: Allocator, file_reader: *File.Reader) !Pdb {
 40    return .{
 41        .file_reader = file_reader,
 42        .allocator = gpa,
 43        .string_table = null,
 44        .dbi = null,
 45        .msf = try Msf.init(gpa, file_reader),
 46        .modules = &.{},
 47        .sect_contribs = &.{},
 48        .guid = undefined,
 49        .age = undefined,
 50    };
 51}
 52
 53pub fn deinit(self: *Pdb) void {
 54    const gpa = self.allocator;
 55    self.msf.deinit(gpa);
 56    for (self.modules) |*module| {
 57        module.deinit(gpa);
 58    }
 59    gpa.free(self.modules);
 60    gpa.free(self.sect_contribs);
 61}
 62
 63pub fn parseDbiStream(self: *Pdb) !void {
 64    var stream = self.getStream(pdb.StreamType.dbi) orelse
 65        return error.InvalidDebugInfo;
 66
 67    const gpa = self.allocator;
 68    const reader = &stream.interface;
 69
 70    const header = try reader.takeStruct(std.pdb.DbiStreamHeader, .little);
 71    if (header.version_header != 19990903) // V70, only value observed by LLVM team
 72        return error.UnknownPDBVersion;
 73    // if (header.Age != age)
 74    //     return error.UnmatchingPDB;
 75
 76    const mod_info_size = header.mod_info_size;
 77    const section_contrib_size = header.section_contribution_size;
 78
 79    var modules = std.array_list.Managed(Module).init(gpa);
 80    errdefer modules.deinit();
 81
 82    // Module Info Substream
 83    var mod_info_offset: usize = 0;
 84    while (mod_info_offset != mod_info_size) {
 85        const mod_info = try reader.takeStruct(pdb.ModInfo, .little);
 86        var this_record_len: usize = @sizeOf(pdb.ModInfo);
 87
 88        var module_name: std.Io.Writer.Allocating = .init(gpa);
 89        defer module_name.deinit();
 90        this_record_len += try reader.streamDelimiterLimit(&module_name.writer, 0, .limited(1024));
 91        assert(reader.buffered()[0] == 0); // TODO change streamDelimiterLimit API
 92        reader.toss(1);
 93        this_record_len += 1;
 94
 95        var obj_file_name: std.Io.Writer.Allocating = .init(gpa);
 96        defer obj_file_name.deinit();
 97        this_record_len += try reader.streamDelimiterLimit(&obj_file_name.writer, 0, .limited(1024));
 98        assert(reader.buffered()[0] == 0); // TODO change streamDelimiterLimit API
 99        reader.toss(1);
100        this_record_len += 1;
101
102        if (this_record_len % 4 != 0) {
103            const round_to_next_4 = (this_record_len | 0x3) + 1;
104            const march_forward_bytes = round_to_next_4 - this_record_len;
105            try stream.seekBy(@as(isize, @intCast(march_forward_bytes)));
106            this_record_len += march_forward_bytes;
107        }
108
109        try modules.append(.{
110            .mod_info = mod_info,
111            .module_name = try module_name.toOwnedSlice(),
112            .obj_file_name = try obj_file_name.toOwnedSlice(),
113
114            .populated = false,
115            .symbols = undefined,
116            .subsect_info = undefined,
117            .checksum_offset = null,
118        });
119
120        mod_info_offset += this_record_len;
121        if (mod_info_offset > mod_info_size)
122            return error.InvalidDebugInfo;
123    }
124
125    // Section Contribution Substream
126    var sect_contribs = std.array_list.Managed(pdb.SectionContribEntry).init(gpa);
127    errdefer sect_contribs.deinit();
128
129    var sect_cont_offset: usize = 0;
130    if (section_contrib_size != 0) {
131        const version = reader.takeEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) {
132            error.InvalidEnumTag, error.EndOfStream => return error.InvalidDebugInfo,
133            error.ReadFailed => return error.ReadFailed,
134        };
135        _ = version;
136        sect_cont_offset += @sizeOf(u32);
137    }
138    while (sect_cont_offset != section_contrib_size) {
139        const entry = try sect_contribs.addOne();
140        entry.* = try reader.takeStruct(pdb.SectionContribEntry, .little);
141        sect_cont_offset += @sizeOf(pdb.SectionContribEntry);
142
143        if (sect_cont_offset > section_contrib_size)
144            return error.InvalidDebugInfo;
145    }
146
147    self.modules = try modules.toOwnedSlice();
148    self.sect_contribs = try sect_contribs.toOwnedSlice();
149}
150
151pub fn parseInfoStream(self: *Pdb) !void {
152    var stream = self.getStream(pdb.StreamType.pdb) orelse return error.InvalidDebugInfo;
153    const reader = &stream.interface;
154
155    // Parse the InfoStreamHeader.
156    const version = try reader.takeInt(u32, .little);
157    const signature = try reader.takeInt(u32, .little);
158    _ = signature;
159    const age = try reader.takeInt(u32, .little);
160    const guid = try reader.takeArray(16);
161
162    if (version != 20000404) // VC70, only value observed by LLVM team
163        return error.UnknownPDBVersion;
164
165    self.guid = guid.*;
166    self.age = age;
167
168    const gpa = self.allocator;
169
170    // Find the string table.
171    const string_table_index = str_tab_index: {
172        const name_bytes_len = try reader.takeInt(u32, .little);
173        const name_bytes = try reader.readAlloc(gpa, name_bytes_len);
174        defer gpa.free(name_bytes);
175
176        const HashTableHeader = extern struct {
177            size: u32,
178            capacity: u32,
179
180            fn maxLoad(cap: u32) u32 {
181                return cap * 2 / 3 + 1;
182            }
183        };
184        const hash_tbl_hdr = try reader.takeStruct(HashTableHeader, .little);
185        if (hash_tbl_hdr.capacity == 0)
186            return error.InvalidDebugInfo;
187
188        if (hash_tbl_hdr.size > HashTableHeader.maxLoad(hash_tbl_hdr.capacity))
189            return error.InvalidDebugInfo;
190
191        const present = try readSparseBitVector(reader, gpa);
192        defer gpa.free(present);
193        if (present.len != hash_tbl_hdr.size)
194            return error.InvalidDebugInfo;
195        const deleted = try readSparseBitVector(reader, gpa);
196        defer gpa.free(deleted);
197
198        for (present) |_| {
199            const name_offset = try reader.takeInt(u32, .little);
200            const name_index = try reader.takeInt(u32, .little);
201            if (name_offset > name_bytes.len)
202                return error.InvalidDebugInfo;
203            const name = std.mem.sliceTo(name_bytes[name_offset..], 0);
204            if (std.mem.eql(u8, name, "/names")) {
205                break :str_tab_index name_index;
206            }
207        }
208        return error.MissingDebugInfo;
209    };
210
211    self.string_table = self.getStreamById(string_table_index) orelse
212        return error.MissingDebugInfo;
213}
214
215pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 {
216    _ = self;
217    std.debug.assert(module.populated);
218
219    var symbol_i: usize = 0;
220    while (symbol_i != module.symbols.len) {
221        const prefix: *align(1) pdb.RecordPrefix = @ptrCast(&module.symbols[symbol_i]);
222        if (prefix.record_len < 2)
223            return null;
224        switch (prefix.record_kind) {
225            .lproc32, .gproc32 => {
226                const proc_sym: *align(1) pdb.ProcSym = @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)]);
227                if (address >= proc_sym.code_offset and address < proc_sym.code_offset + proc_sym.code_size) {
228                    return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.name[0])), 0);
229                }
230            },
231            else => {},
232        }
233        symbol_i += prefix.record_len + @sizeOf(u16);
234    }
235
236    return null;
237}
238
239pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation {
240    std.debug.assert(module.populated);
241    const subsect_info = module.subsect_info;
242    const gpa = self.allocator;
243
244    var sect_offset: usize = 0;
245    var skip_len: usize = undefined;
246    const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo;
247    while (sect_offset != subsect_info.len) : (sect_offset += skip_len) {
248        const subsect_hdr: *align(1) pdb.DebugSubsectionHeader = @ptrCast(&subsect_info[sect_offset]);
249        skip_len = subsect_hdr.length;
250        sect_offset += @sizeOf(pdb.DebugSubsectionHeader);
251
252        switch (subsect_hdr.kind) {
253            .lines => {
254                var line_index = sect_offset;
255
256                const line_hdr: *align(1) pdb.LineFragmentHeader = @ptrCast(&subsect_info[line_index]);
257                if (line_hdr.reloc_segment == 0)
258                    return error.MissingDebugInfo;
259                line_index += @sizeOf(pdb.LineFragmentHeader);
260                const frag_vaddr_start = line_hdr.reloc_offset;
261                const frag_vaddr_end = frag_vaddr_start + line_hdr.code_size;
262
263                if (address >= frag_vaddr_start and address < frag_vaddr_end) {
264                    // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records)
265                    // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in,
266                    // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection.
267                    const subsection_end_index = sect_offset + subsect_hdr.length;
268
269                    while (line_index < subsection_end_index) {
270                        const block_hdr: *align(1) pdb.LineBlockFragmentHeader = @ptrCast(&subsect_info[line_index]);
271                        line_index += @sizeOf(pdb.LineBlockFragmentHeader);
272                        const start_line_index = line_index;
273
274                        const has_column = line_hdr.flags.have_columns;
275
276                        // All line entries are stored inside their line block by ascending start address.
277                        // Heuristic: we want to find the last line entry
278                        // that has a vaddr_start <= address.
279                        // This is done with a simple linear search.
280                        var line_i: u32 = 0;
281                        while (line_i < block_hdr.num_lines) : (line_i += 1) {
282                            const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[line_index]);
283                            line_index += @sizeOf(pdb.LineNumberEntry);
284
285                            const vaddr_start = frag_vaddr_start + line_num_entry.offset;
286                            if (address < vaddr_start) {
287                                break;
288                            }
289                        }
290
291                        // line_i == 0 would mean that no matching pdb.LineNumberEntry was found.
292                        if (line_i > 0) {
293                            const subsect_index = checksum_offset + block_hdr.name_index;
294                            const chksum_hdr: *align(1) pdb.FileChecksumEntryHeader = @ptrCast(&module.subsect_info[subsect_index]);
295                            const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.file_name_offset;
296                            try self.string_table.?.seekTo(strtab_offset);
297                            const source_file_name = s: {
298                                const string_reader = &self.string_table.?.interface;
299                                var source_file_name: std.Io.Writer.Allocating = .init(gpa);
300                                defer source_file_name.deinit();
301                                _ = try string_reader.streamDelimiterLimit(&source_file_name.writer, 0, .limited(1024));
302                                assert(string_reader.buffered()[0] == 0); // TODO change streamDelimiterLimit API
303                                string_reader.toss(1);
304                                break :s try source_file_name.toOwnedSlice();
305                            };
306                            errdefer gpa.free(source_file_name);
307
308                            const line_entry_idx = line_i - 1;
309
310                            const column = if (has_column) blk: {
311                                const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.num_lines;
312                                const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx;
313                                const col_num_entry: *align(1) pdb.ColumnNumberEntry = @ptrCast(&subsect_info[col_index]);
314                                break :blk col_num_entry.start_column;
315                            } else 0;
316
317                            const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry);
318                            const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]);
319
320                            return .{
321                                .file_name = source_file_name,
322                                .line = line_num_entry.flags.start,
323                                .column = column,
324                            };
325                        }
326                    }
327
328                    // Checking that we are not reading garbage after the (possibly) multiple block fragments.
329                    if (line_index != subsection_end_index) {
330                        return error.InvalidDebugInfo;
331                    }
332                }
333            },
334            else => {},
335        }
336
337        if (sect_offset > subsect_info.len)
338            return error.InvalidDebugInfo;
339    }
340
341    return error.MissingDebugInfo;
342}
343
344pub fn getModule(self: *Pdb, index: usize) !?*Module {
345    if (index >= self.modules.len)
346        return null;
347
348    const mod = &self.modules[index];
349    if (mod.populated)
350        return mod;
351
352    // At most one can be non-zero.
353    if (mod.mod_info.c11_byte_size != 0 and mod.mod_info.c13_byte_size != 0)
354        return error.InvalidDebugInfo;
355    if (mod.mod_info.c13_byte_size == 0)
356        return error.InvalidDebugInfo;
357
358    const stream = self.getStreamById(mod.mod_info.module_sym_stream) orelse
359        return error.MissingDebugInfo;
360    const reader = &stream.interface;
361
362    const signature = try reader.takeInt(u32, .little);
363    if (signature != 4)
364        return error.InvalidDebugInfo;
365
366    const gpa = self.allocator;
367
368    mod.symbols = try reader.readAlloc(gpa, mod.mod_info.sym_byte_size - 4);
369    mod.subsect_info = try reader.readAlloc(gpa, mod.mod_info.c13_byte_size);
370
371    var sect_offset: usize = 0;
372    var skip_len: usize = undefined;
373    while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) {
374        const subsect_hdr: *align(1) pdb.DebugSubsectionHeader = @ptrCast(&mod.subsect_info[sect_offset]);
375        skip_len = subsect_hdr.length;
376        sect_offset += @sizeOf(pdb.DebugSubsectionHeader);
377
378        switch (subsect_hdr.kind) {
379            .file_checksums => {
380                mod.checksum_offset = sect_offset;
381                break;
382            },
383            else => {},
384        }
385
386        if (sect_offset > mod.subsect_info.len)
387            return error.InvalidDebugInfo;
388    }
389
390    mod.populated = true;
391    return mod;
392}
393
394pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream {
395    if (id >= self.msf.streams.len) return null;
396    return &self.msf.streams[id];
397}
398
399pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream {
400    const id = @intFromEnum(stream);
401    return self.getStreamById(id);
402}
403
404/// https://llvm.org/docs/PDB/MsfFile.html
405const Msf = struct {
406    directory: MsfStream,
407    streams: []MsfStream,
408
409    fn init(gpa: Allocator, file_reader: *File.Reader) !Msf {
410        const superblock = try file_reader.interface.takeStruct(pdb.SuperBlock, .little);
411
412        if (!std.mem.eql(u8, &superblock.file_magic, pdb.SuperBlock.expect_magic))
413            return error.InvalidDebugInfo;
414        if (superblock.free_block_map_block != 1 and superblock.free_block_map_block != 2)
415            return error.InvalidDebugInfo;
416        if (superblock.num_blocks * superblock.block_size != try file_reader.getSize())
417            return error.InvalidDebugInfo;
418        switch (superblock.block_size) {
419            // llvm only supports 4096 but we can handle any of these values
420            512, 1024, 2048, 4096 => {},
421            else => return error.InvalidDebugInfo,
422        }
423
424        const dir_block_count = blockCountFromSize(superblock.num_directory_bytes, superblock.block_size);
425        if (dir_block_count > superblock.block_size / @sizeOf(u32))
426            return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment.
427
428        try file_reader.seekTo(superblock.block_size * superblock.block_map_addr);
429        const dir_blocks = try gpa.alloc(u32, dir_block_count);
430        errdefer gpa.free(dir_blocks);
431        for (dir_blocks) |*b| {
432            b.* = try file_reader.interface.takeInt(u32, .little);
433        }
434        var directory_buffer: [64]u8 = undefined;
435        var directory = MsfStream.init(superblock.block_size, file_reader, dir_blocks, &directory_buffer);
436
437        const begin = directory.logicalPos();
438        const stream_count = try directory.interface.takeInt(u32, .little);
439        const stream_sizes = try gpa.alloc(u32, stream_count);
440        defer gpa.free(stream_sizes);
441
442        // Microsoft's implementation uses @as(u32, -1) for inexistent streams.
443        // These streams are not used, but still participate in the file
444        // and must be taken into account when resolving stream indices.
445        const nil_size = 0xFFFFFFFF;
446        for (stream_sizes) |*s| {
447            const size = try directory.interface.takeInt(u32, .little);
448            s.* = if (size == nil_size) 0 else blockCountFromSize(size, superblock.block_size);
449        }
450
451        const streams = try gpa.alloc(MsfStream, stream_count);
452        errdefer gpa.free(streams);
453
454        for (streams, stream_sizes) |*stream, size| {
455            if (size == 0) {
456                stream.* = .empty;
457                continue;
458            }
459            const blocks = try gpa.alloc(u32, size);
460            errdefer gpa.free(blocks);
461            for (blocks) |*block| {
462                const block_id = try directory.interface.takeInt(u32, .little);
463                // Index 0 is reserved for the superblock.
464                // In theory, every page which is `n * block_size + 1` or `n * block_size + 2`
465                // is also reserved, for one of the FPMs. However, LLVM has been observed to map
466                // these into actual streams, so allow it for compatibility.
467                if (block_id == 0 or block_id >= superblock.num_blocks) return error.InvalidBlockIndex;
468                block.* = block_id;
469            }
470            const buffer = try gpa.alloc(u8, 64);
471            errdefer gpa.free(buffer);
472            stream.* = .init(superblock.block_size, file_reader, blocks, buffer);
473        }
474
475        const end = directory.logicalPos();
476        if (end - begin != superblock.num_directory_bytes)
477            return error.InvalidStreamDirectory;
478
479        return .{
480            .directory = directory,
481            .streams = streams,
482        };
483    }
484
485    fn deinit(self: *Msf, gpa: Allocator) void {
486        gpa.free(self.directory.blocks);
487        for (self.streams) |*stream| {
488            gpa.free(stream.interface.buffer);
489            gpa.free(stream.blocks);
490        }
491        gpa.free(self.streams);
492    }
493};
494
495const MsfStream = struct {
496    file_reader: *File.Reader,
497    next_read_pos: u64,
498    blocks: []u32,
499    block_size: u32,
500    interface: std.Io.Reader,
501    err: ?Error,
502
503    const Error = File.Reader.SeekError;
504
505    const empty: MsfStream = .{
506        .file_reader = undefined,
507        .next_read_pos = 0,
508        .blocks = &.{},
509        .block_size = undefined,
510        .interface = .ending_instance,
511        .err = null,
512    };
513
514    fn init(block_size: u32, file_reader: *File.Reader, blocks: []u32, buffer: []u8) MsfStream {
515        return .{
516            .file_reader = file_reader,
517            .next_read_pos = 0,
518            .blocks = blocks,
519            .block_size = block_size,
520            .interface = .{
521                .vtable = &.{ .stream = stream },
522                .buffer = buffer,
523                .seek = 0,
524                .end = 0,
525            },
526            .err = null,
527        };
528    }
529
530    fn stream(r: *std.Io.Reader, w: *std.Io.Writer, limit: std.Io.Limit) std.Io.Reader.StreamError!usize {
531        const ms: *MsfStream = @alignCast(@fieldParentPtr("interface", r));
532
533        var block_id: usize = @intCast(ms.next_read_pos / ms.block_size);
534        if (block_id >= ms.blocks.len) return error.EndOfStream;
535        var block = ms.blocks[block_id];
536        var offset = ms.next_read_pos % ms.block_size;
537
538        ms.file_reader.seekTo(block * ms.block_size + offset) catch |err| {
539            ms.err = err;
540            return error.ReadFailed;
541        };
542
543        var remaining = @intFromEnum(limit);
544        while (remaining != 0) {
545            const stream_len: usize = @min(remaining, ms.block_size - offset);
546            const n = try ms.file_reader.interface.stream(w, .limited(stream_len));
547            remaining -= n;
548            offset += n;
549
550            // If we're at the end of a block, go to the next one.
551            if (offset == ms.block_size) {
552                offset = 0;
553                block_id += 1;
554                if (block_id >= ms.blocks.len) break; // End of Stream
555                block = ms.blocks[block_id];
556                ms.file_reader.seekTo(block * ms.block_size) catch |err| {
557                    ms.err = err;
558                    return error.ReadFailed;
559                };
560            }
561        }
562
563        const total = @intFromEnum(limit) - remaining;
564        ms.next_read_pos += total;
565        return total;
566    }
567
568    pub fn logicalPos(ms: *const MsfStream) u64 {
569        return ms.next_read_pos - ms.interface.bufferedLen();
570    }
571
572    pub fn seekBy(ms: *MsfStream, len: i64) !void {
573        ms.next_read_pos = @as(u64, @intCast(@as(i64, @intCast(ms.logicalPos())) + len));
574        if (ms.next_read_pos >= ms.blocks.len * ms.block_size) return error.EOF;
575        ms.interface.tossBuffered();
576    }
577
578    pub fn seekTo(ms: *MsfStream, len: u64) !void {
579        ms.next_read_pos = len;
580        if (ms.next_read_pos >= ms.blocks.len * ms.block_size) return error.EOF;
581        ms.interface.tossBuffered();
582    }
583
584    fn getSize(ms: *const MsfStream) u64 {
585        return ms.blocks.len * ms.block_size;
586    }
587
588    fn getFilePos(ms: *const MsfStream) u64 {
589        const pos = ms.logicalPos();
590        const block_id = pos / ms.block_size;
591        const block = ms.blocks[block_id];
592        const offset = pos % ms.block_size;
593
594        return block * ms.block_size + offset;
595    }
596};
597
598fn readSparseBitVector(reader: *std.Io.Reader, allocator: Allocator) ![]u32 {
599    const num_words = try reader.takeInt(u32, .little);
600    var list = std.array_list.Managed(u32).init(allocator);
601    errdefer list.deinit();
602    var word_i: u32 = 0;
603    while (word_i != num_words) : (word_i += 1) {
604        const word = try reader.takeInt(u32, .little);
605        var bit_i: u5 = 0;
606        while (true) : (bit_i += 1) {
607            if (word & (@as(u32, 1) << bit_i) != 0) {
608                try list.append(word_i * 32 + bit_i);
609            }
610            if (bit_i == std.math.maxInt(u5)) break;
611        }
612    }
613    return try list.toOwnedSlice();
614}
615
616fn blockCountFromSize(size: u32, block_size: u32) u32 {
617    return (size + block_size - 1) / block_size;
618}