master
1const std = @import("../std.zig");
2const File = std.fs.File;
3const Allocator = std.mem.Allocator;
4const pdb = std.pdb;
5const assert = std.debug.assert;
6
7const Pdb = @This();
8
9file_reader: *File.Reader,
10msf: Msf,
11allocator: Allocator,
12string_table: ?*MsfStream,
13dbi: ?*MsfStream,
14modules: []Module,
15sect_contribs: []pdb.SectionContribEntry,
16guid: [16]u8,
17age: u32,
18
19pub const Module = struct {
20 mod_info: pdb.ModInfo,
21 module_name: []u8,
22 obj_file_name: []u8,
23 // The fields below are filled on demand.
24 populated: bool,
25 symbols: []u8,
26 subsect_info: []u8,
27 checksum_offset: ?usize,
28
29 pub fn deinit(self: *Module, allocator: Allocator) void {
30 allocator.free(self.module_name);
31 allocator.free(self.obj_file_name);
32 if (self.populated) {
33 allocator.free(self.symbols);
34 allocator.free(self.subsect_info);
35 }
36 }
37};
38
39pub fn init(gpa: Allocator, file_reader: *File.Reader) !Pdb {
40 return .{
41 .file_reader = file_reader,
42 .allocator = gpa,
43 .string_table = null,
44 .dbi = null,
45 .msf = try Msf.init(gpa, file_reader),
46 .modules = &.{},
47 .sect_contribs = &.{},
48 .guid = undefined,
49 .age = undefined,
50 };
51}
52
53pub fn deinit(self: *Pdb) void {
54 const gpa = self.allocator;
55 self.msf.deinit(gpa);
56 for (self.modules) |*module| {
57 module.deinit(gpa);
58 }
59 gpa.free(self.modules);
60 gpa.free(self.sect_contribs);
61}
62
63pub fn parseDbiStream(self: *Pdb) !void {
64 var stream = self.getStream(pdb.StreamType.dbi) orelse
65 return error.InvalidDebugInfo;
66
67 const gpa = self.allocator;
68 const reader = &stream.interface;
69
70 const header = try reader.takeStruct(std.pdb.DbiStreamHeader, .little);
71 if (header.version_header != 19990903) // V70, only value observed by LLVM team
72 return error.UnknownPDBVersion;
73 // if (header.Age != age)
74 // return error.UnmatchingPDB;
75
76 const mod_info_size = header.mod_info_size;
77 const section_contrib_size = header.section_contribution_size;
78
79 var modules = std.array_list.Managed(Module).init(gpa);
80 errdefer modules.deinit();
81
82 // Module Info Substream
83 var mod_info_offset: usize = 0;
84 while (mod_info_offset != mod_info_size) {
85 const mod_info = try reader.takeStruct(pdb.ModInfo, .little);
86 var this_record_len: usize = @sizeOf(pdb.ModInfo);
87
88 var module_name: std.Io.Writer.Allocating = .init(gpa);
89 defer module_name.deinit();
90 this_record_len += try reader.streamDelimiterLimit(&module_name.writer, 0, .limited(1024));
91 assert(reader.buffered()[0] == 0); // TODO change streamDelimiterLimit API
92 reader.toss(1);
93 this_record_len += 1;
94
95 var obj_file_name: std.Io.Writer.Allocating = .init(gpa);
96 defer obj_file_name.deinit();
97 this_record_len += try reader.streamDelimiterLimit(&obj_file_name.writer, 0, .limited(1024));
98 assert(reader.buffered()[0] == 0); // TODO change streamDelimiterLimit API
99 reader.toss(1);
100 this_record_len += 1;
101
102 if (this_record_len % 4 != 0) {
103 const round_to_next_4 = (this_record_len | 0x3) + 1;
104 const march_forward_bytes = round_to_next_4 - this_record_len;
105 try stream.seekBy(@as(isize, @intCast(march_forward_bytes)));
106 this_record_len += march_forward_bytes;
107 }
108
109 try modules.append(.{
110 .mod_info = mod_info,
111 .module_name = try module_name.toOwnedSlice(),
112 .obj_file_name = try obj_file_name.toOwnedSlice(),
113
114 .populated = false,
115 .symbols = undefined,
116 .subsect_info = undefined,
117 .checksum_offset = null,
118 });
119
120 mod_info_offset += this_record_len;
121 if (mod_info_offset > mod_info_size)
122 return error.InvalidDebugInfo;
123 }
124
125 // Section Contribution Substream
126 var sect_contribs = std.array_list.Managed(pdb.SectionContribEntry).init(gpa);
127 errdefer sect_contribs.deinit();
128
129 var sect_cont_offset: usize = 0;
130 if (section_contrib_size != 0) {
131 const version = reader.takeEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) {
132 error.InvalidEnumTag, error.EndOfStream => return error.InvalidDebugInfo,
133 error.ReadFailed => return error.ReadFailed,
134 };
135 _ = version;
136 sect_cont_offset += @sizeOf(u32);
137 }
138 while (sect_cont_offset != section_contrib_size) {
139 const entry = try sect_contribs.addOne();
140 entry.* = try reader.takeStruct(pdb.SectionContribEntry, .little);
141 sect_cont_offset += @sizeOf(pdb.SectionContribEntry);
142
143 if (sect_cont_offset > section_contrib_size)
144 return error.InvalidDebugInfo;
145 }
146
147 self.modules = try modules.toOwnedSlice();
148 self.sect_contribs = try sect_contribs.toOwnedSlice();
149}
150
151pub fn parseInfoStream(self: *Pdb) !void {
152 var stream = self.getStream(pdb.StreamType.pdb) orelse return error.InvalidDebugInfo;
153 const reader = &stream.interface;
154
155 // Parse the InfoStreamHeader.
156 const version = try reader.takeInt(u32, .little);
157 const signature = try reader.takeInt(u32, .little);
158 _ = signature;
159 const age = try reader.takeInt(u32, .little);
160 const guid = try reader.takeArray(16);
161
162 if (version != 20000404) // VC70, only value observed by LLVM team
163 return error.UnknownPDBVersion;
164
165 self.guid = guid.*;
166 self.age = age;
167
168 const gpa = self.allocator;
169
170 // Find the string table.
171 const string_table_index = str_tab_index: {
172 const name_bytes_len = try reader.takeInt(u32, .little);
173 const name_bytes = try reader.readAlloc(gpa, name_bytes_len);
174 defer gpa.free(name_bytes);
175
176 const HashTableHeader = extern struct {
177 size: u32,
178 capacity: u32,
179
180 fn maxLoad(cap: u32) u32 {
181 return cap * 2 / 3 + 1;
182 }
183 };
184 const hash_tbl_hdr = try reader.takeStruct(HashTableHeader, .little);
185 if (hash_tbl_hdr.capacity == 0)
186 return error.InvalidDebugInfo;
187
188 if (hash_tbl_hdr.size > HashTableHeader.maxLoad(hash_tbl_hdr.capacity))
189 return error.InvalidDebugInfo;
190
191 const present = try readSparseBitVector(reader, gpa);
192 defer gpa.free(present);
193 if (present.len != hash_tbl_hdr.size)
194 return error.InvalidDebugInfo;
195 const deleted = try readSparseBitVector(reader, gpa);
196 defer gpa.free(deleted);
197
198 for (present) |_| {
199 const name_offset = try reader.takeInt(u32, .little);
200 const name_index = try reader.takeInt(u32, .little);
201 if (name_offset > name_bytes.len)
202 return error.InvalidDebugInfo;
203 const name = std.mem.sliceTo(name_bytes[name_offset..], 0);
204 if (std.mem.eql(u8, name, "/names")) {
205 break :str_tab_index name_index;
206 }
207 }
208 return error.MissingDebugInfo;
209 };
210
211 self.string_table = self.getStreamById(string_table_index) orelse
212 return error.MissingDebugInfo;
213}
214
215pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 {
216 _ = self;
217 std.debug.assert(module.populated);
218
219 var symbol_i: usize = 0;
220 while (symbol_i != module.symbols.len) {
221 const prefix: *align(1) pdb.RecordPrefix = @ptrCast(&module.symbols[symbol_i]);
222 if (prefix.record_len < 2)
223 return null;
224 switch (prefix.record_kind) {
225 .lproc32, .gproc32 => {
226 const proc_sym: *align(1) pdb.ProcSym = @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)]);
227 if (address >= proc_sym.code_offset and address < proc_sym.code_offset + proc_sym.code_size) {
228 return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.name[0])), 0);
229 }
230 },
231 else => {},
232 }
233 symbol_i += prefix.record_len + @sizeOf(u16);
234 }
235
236 return null;
237}
238
239pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation {
240 std.debug.assert(module.populated);
241 const subsect_info = module.subsect_info;
242 const gpa = self.allocator;
243
244 var sect_offset: usize = 0;
245 var skip_len: usize = undefined;
246 const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo;
247 while (sect_offset != subsect_info.len) : (sect_offset += skip_len) {
248 const subsect_hdr: *align(1) pdb.DebugSubsectionHeader = @ptrCast(&subsect_info[sect_offset]);
249 skip_len = subsect_hdr.length;
250 sect_offset += @sizeOf(pdb.DebugSubsectionHeader);
251
252 switch (subsect_hdr.kind) {
253 .lines => {
254 var line_index = sect_offset;
255
256 const line_hdr: *align(1) pdb.LineFragmentHeader = @ptrCast(&subsect_info[line_index]);
257 if (line_hdr.reloc_segment == 0)
258 return error.MissingDebugInfo;
259 line_index += @sizeOf(pdb.LineFragmentHeader);
260 const frag_vaddr_start = line_hdr.reloc_offset;
261 const frag_vaddr_end = frag_vaddr_start + line_hdr.code_size;
262
263 if (address >= frag_vaddr_start and address < frag_vaddr_end) {
264 // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records)
265 // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in,
266 // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection.
267 const subsection_end_index = sect_offset + subsect_hdr.length;
268
269 while (line_index < subsection_end_index) {
270 const block_hdr: *align(1) pdb.LineBlockFragmentHeader = @ptrCast(&subsect_info[line_index]);
271 line_index += @sizeOf(pdb.LineBlockFragmentHeader);
272 const start_line_index = line_index;
273
274 const has_column = line_hdr.flags.have_columns;
275
276 // All line entries are stored inside their line block by ascending start address.
277 // Heuristic: we want to find the last line entry
278 // that has a vaddr_start <= address.
279 // This is done with a simple linear search.
280 var line_i: u32 = 0;
281 while (line_i < block_hdr.num_lines) : (line_i += 1) {
282 const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[line_index]);
283 line_index += @sizeOf(pdb.LineNumberEntry);
284
285 const vaddr_start = frag_vaddr_start + line_num_entry.offset;
286 if (address < vaddr_start) {
287 break;
288 }
289 }
290
291 // line_i == 0 would mean that no matching pdb.LineNumberEntry was found.
292 if (line_i > 0) {
293 const subsect_index = checksum_offset + block_hdr.name_index;
294 const chksum_hdr: *align(1) pdb.FileChecksumEntryHeader = @ptrCast(&module.subsect_info[subsect_index]);
295 const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.file_name_offset;
296 try self.string_table.?.seekTo(strtab_offset);
297 const source_file_name = s: {
298 const string_reader = &self.string_table.?.interface;
299 var source_file_name: std.Io.Writer.Allocating = .init(gpa);
300 defer source_file_name.deinit();
301 _ = try string_reader.streamDelimiterLimit(&source_file_name.writer, 0, .limited(1024));
302 assert(string_reader.buffered()[0] == 0); // TODO change streamDelimiterLimit API
303 string_reader.toss(1);
304 break :s try source_file_name.toOwnedSlice();
305 };
306 errdefer gpa.free(source_file_name);
307
308 const line_entry_idx = line_i - 1;
309
310 const column = if (has_column) blk: {
311 const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.num_lines;
312 const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx;
313 const col_num_entry: *align(1) pdb.ColumnNumberEntry = @ptrCast(&subsect_info[col_index]);
314 break :blk col_num_entry.start_column;
315 } else 0;
316
317 const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry);
318 const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]);
319
320 return .{
321 .file_name = source_file_name,
322 .line = line_num_entry.flags.start,
323 .column = column,
324 };
325 }
326 }
327
328 // Checking that we are not reading garbage after the (possibly) multiple block fragments.
329 if (line_index != subsection_end_index) {
330 return error.InvalidDebugInfo;
331 }
332 }
333 },
334 else => {},
335 }
336
337 if (sect_offset > subsect_info.len)
338 return error.InvalidDebugInfo;
339 }
340
341 return error.MissingDebugInfo;
342}
343
344pub fn getModule(self: *Pdb, index: usize) !?*Module {
345 if (index >= self.modules.len)
346 return null;
347
348 const mod = &self.modules[index];
349 if (mod.populated)
350 return mod;
351
352 // At most one can be non-zero.
353 if (mod.mod_info.c11_byte_size != 0 and mod.mod_info.c13_byte_size != 0)
354 return error.InvalidDebugInfo;
355 if (mod.mod_info.c13_byte_size == 0)
356 return error.InvalidDebugInfo;
357
358 const stream = self.getStreamById(mod.mod_info.module_sym_stream) orelse
359 return error.MissingDebugInfo;
360 const reader = &stream.interface;
361
362 const signature = try reader.takeInt(u32, .little);
363 if (signature != 4)
364 return error.InvalidDebugInfo;
365
366 const gpa = self.allocator;
367
368 mod.symbols = try reader.readAlloc(gpa, mod.mod_info.sym_byte_size - 4);
369 mod.subsect_info = try reader.readAlloc(gpa, mod.mod_info.c13_byte_size);
370
371 var sect_offset: usize = 0;
372 var skip_len: usize = undefined;
373 while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) {
374 const subsect_hdr: *align(1) pdb.DebugSubsectionHeader = @ptrCast(&mod.subsect_info[sect_offset]);
375 skip_len = subsect_hdr.length;
376 sect_offset += @sizeOf(pdb.DebugSubsectionHeader);
377
378 switch (subsect_hdr.kind) {
379 .file_checksums => {
380 mod.checksum_offset = sect_offset;
381 break;
382 },
383 else => {},
384 }
385
386 if (sect_offset > mod.subsect_info.len)
387 return error.InvalidDebugInfo;
388 }
389
390 mod.populated = true;
391 return mod;
392}
393
394pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream {
395 if (id >= self.msf.streams.len) return null;
396 return &self.msf.streams[id];
397}
398
399pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream {
400 const id = @intFromEnum(stream);
401 return self.getStreamById(id);
402}
403
404/// https://llvm.org/docs/PDB/MsfFile.html
405const Msf = struct {
406 directory: MsfStream,
407 streams: []MsfStream,
408
409 fn init(gpa: Allocator, file_reader: *File.Reader) !Msf {
410 const superblock = try file_reader.interface.takeStruct(pdb.SuperBlock, .little);
411
412 if (!std.mem.eql(u8, &superblock.file_magic, pdb.SuperBlock.expect_magic))
413 return error.InvalidDebugInfo;
414 if (superblock.free_block_map_block != 1 and superblock.free_block_map_block != 2)
415 return error.InvalidDebugInfo;
416 if (superblock.num_blocks * superblock.block_size != try file_reader.getSize())
417 return error.InvalidDebugInfo;
418 switch (superblock.block_size) {
419 // llvm only supports 4096 but we can handle any of these values
420 512, 1024, 2048, 4096 => {},
421 else => return error.InvalidDebugInfo,
422 }
423
424 const dir_block_count = blockCountFromSize(superblock.num_directory_bytes, superblock.block_size);
425 if (dir_block_count > superblock.block_size / @sizeOf(u32))
426 return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment.
427
428 try file_reader.seekTo(superblock.block_size * superblock.block_map_addr);
429 const dir_blocks = try gpa.alloc(u32, dir_block_count);
430 errdefer gpa.free(dir_blocks);
431 for (dir_blocks) |*b| {
432 b.* = try file_reader.interface.takeInt(u32, .little);
433 }
434 var directory_buffer: [64]u8 = undefined;
435 var directory = MsfStream.init(superblock.block_size, file_reader, dir_blocks, &directory_buffer);
436
437 const begin = directory.logicalPos();
438 const stream_count = try directory.interface.takeInt(u32, .little);
439 const stream_sizes = try gpa.alloc(u32, stream_count);
440 defer gpa.free(stream_sizes);
441
442 // Microsoft's implementation uses @as(u32, -1) for inexistent streams.
443 // These streams are not used, but still participate in the file
444 // and must be taken into account when resolving stream indices.
445 const nil_size = 0xFFFFFFFF;
446 for (stream_sizes) |*s| {
447 const size = try directory.interface.takeInt(u32, .little);
448 s.* = if (size == nil_size) 0 else blockCountFromSize(size, superblock.block_size);
449 }
450
451 const streams = try gpa.alloc(MsfStream, stream_count);
452 errdefer gpa.free(streams);
453
454 for (streams, stream_sizes) |*stream, size| {
455 if (size == 0) {
456 stream.* = .empty;
457 continue;
458 }
459 const blocks = try gpa.alloc(u32, size);
460 errdefer gpa.free(blocks);
461 for (blocks) |*block| {
462 const block_id = try directory.interface.takeInt(u32, .little);
463 // Index 0 is reserved for the superblock.
464 // In theory, every page which is `n * block_size + 1` or `n * block_size + 2`
465 // is also reserved, for one of the FPMs. However, LLVM has been observed to map
466 // these into actual streams, so allow it for compatibility.
467 if (block_id == 0 or block_id >= superblock.num_blocks) return error.InvalidBlockIndex;
468 block.* = block_id;
469 }
470 const buffer = try gpa.alloc(u8, 64);
471 errdefer gpa.free(buffer);
472 stream.* = .init(superblock.block_size, file_reader, blocks, buffer);
473 }
474
475 const end = directory.logicalPos();
476 if (end - begin != superblock.num_directory_bytes)
477 return error.InvalidStreamDirectory;
478
479 return .{
480 .directory = directory,
481 .streams = streams,
482 };
483 }
484
485 fn deinit(self: *Msf, gpa: Allocator) void {
486 gpa.free(self.directory.blocks);
487 for (self.streams) |*stream| {
488 gpa.free(stream.interface.buffer);
489 gpa.free(stream.blocks);
490 }
491 gpa.free(self.streams);
492 }
493};
494
495const MsfStream = struct {
496 file_reader: *File.Reader,
497 next_read_pos: u64,
498 blocks: []u32,
499 block_size: u32,
500 interface: std.Io.Reader,
501 err: ?Error,
502
503 const Error = File.Reader.SeekError;
504
505 const empty: MsfStream = .{
506 .file_reader = undefined,
507 .next_read_pos = 0,
508 .blocks = &.{},
509 .block_size = undefined,
510 .interface = .ending_instance,
511 .err = null,
512 };
513
514 fn init(block_size: u32, file_reader: *File.Reader, blocks: []u32, buffer: []u8) MsfStream {
515 return .{
516 .file_reader = file_reader,
517 .next_read_pos = 0,
518 .blocks = blocks,
519 .block_size = block_size,
520 .interface = .{
521 .vtable = &.{ .stream = stream },
522 .buffer = buffer,
523 .seek = 0,
524 .end = 0,
525 },
526 .err = null,
527 };
528 }
529
530 fn stream(r: *std.Io.Reader, w: *std.Io.Writer, limit: std.Io.Limit) std.Io.Reader.StreamError!usize {
531 const ms: *MsfStream = @alignCast(@fieldParentPtr("interface", r));
532
533 var block_id: usize = @intCast(ms.next_read_pos / ms.block_size);
534 if (block_id >= ms.blocks.len) return error.EndOfStream;
535 var block = ms.blocks[block_id];
536 var offset = ms.next_read_pos % ms.block_size;
537
538 ms.file_reader.seekTo(block * ms.block_size + offset) catch |err| {
539 ms.err = err;
540 return error.ReadFailed;
541 };
542
543 var remaining = @intFromEnum(limit);
544 while (remaining != 0) {
545 const stream_len: usize = @min(remaining, ms.block_size - offset);
546 const n = try ms.file_reader.interface.stream(w, .limited(stream_len));
547 remaining -= n;
548 offset += n;
549
550 // If we're at the end of a block, go to the next one.
551 if (offset == ms.block_size) {
552 offset = 0;
553 block_id += 1;
554 if (block_id >= ms.blocks.len) break; // End of Stream
555 block = ms.blocks[block_id];
556 ms.file_reader.seekTo(block * ms.block_size) catch |err| {
557 ms.err = err;
558 return error.ReadFailed;
559 };
560 }
561 }
562
563 const total = @intFromEnum(limit) - remaining;
564 ms.next_read_pos += total;
565 return total;
566 }
567
568 pub fn logicalPos(ms: *const MsfStream) u64 {
569 return ms.next_read_pos - ms.interface.bufferedLen();
570 }
571
572 pub fn seekBy(ms: *MsfStream, len: i64) !void {
573 ms.next_read_pos = @as(u64, @intCast(@as(i64, @intCast(ms.logicalPos())) + len));
574 if (ms.next_read_pos >= ms.blocks.len * ms.block_size) return error.EOF;
575 ms.interface.tossBuffered();
576 }
577
578 pub fn seekTo(ms: *MsfStream, len: u64) !void {
579 ms.next_read_pos = len;
580 if (ms.next_read_pos >= ms.blocks.len * ms.block_size) return error.EOF;
581 ms.interface.tossBuffered();
582 }
583
584 fn getSize(ms: *const MsfStream) u64 {
585 return ms.blocks.len * ms.block_size;
586 }
587
588 fn getFilePos(ms: *const MsfStream) u64 {
589 const pos = ms.logicalPos();
590 const block_id = pos / ms.block_size;
591 const block = ms.blocks[block_id];
592 const offset = pos % ms.block_size;
593
594 return block * ms.block_size + offset;
595 }
596};
597
598fn readSparseBitVector(reader: *std.Io.Reader, allocator: Allocator) ![]u32 {
599 const num_words = try reader.takeInt(u32, .little);
600 var list = std.array_list.Managed(u32).init(allocator);
601 errdefer list.deinit();
602 var word_i: u32 = 0;
603 while (word_i != num_words) : (word_i += 1) {
604 const word = try reader.takeInt(u32, .little);
605 var bit_i: u5 = 0;
606 while (true) : (bit_i += 1) {
607 if (word & (@as(u32, 1) << bit_i) != 0) {
608 try list.append(word_i * 32 + bit_i);
609 }
610 if (bit_i == std.math.maxInt(u5)) break;
611 }
612 }
613 return try list.toOwnedSlice();
614}
615
616fn blockCountFromSize(size: u32, block_size: u32) u32 {
617 return (size + block_size - 1) / block_size;
618}