master
1path: Path,
2cpu_arch: ?std.Target.Cpu.Arch,
3args: []const Arg,
4
5pub const Arg = struct {
6 needed: bool = false,
7 path: []const u8,
8};
9
10pub fn deinit(ls: *LdScript, gpa: Allocator) void {
11 gpa.free(ls.args);
12 ls.* = undefined;
13}
14
15pub const Error = error{
16 LinkFailure,
17 UnknownCpuArch,
18 OutOfMemory,
19};
20
21pub fn parse(
22 gpa: Allocator,
23 diags: *Diags,
24 /// For error reporting.
25 path: Path,
26 data: []const u8,
27) Error!LdScript {
28 var tokenizer = Tokenizer{ .source = data };
29 var tokens: std.ArrayList(Token) = .empty;
30 defer tokens.deinit(gpa);
31 var line_col: std.ArrayList(LineColumn) = .empty;
32 defer line_col.deinit(gpa);
33
34 var line: usize = 0;
35 var prev_line_last_col: usize = 0;
36
37 while (true) {
38 const tok = tokenizer.next();
39 try tokens.append(gpa, tok);
40 const column = tok.start - prev_line_last_col;
41 try line_col.append(gpa, .{ .line = line, .column = column });
42 switch (tok.id) {
43 .invalid => {
44 return diags.failParse(path, "invalid token in LD script: '{f}' ({d}:{d})", .{
45 std.ascii.hexEscape(tok.get(data), .lower), line, column,
46 });
47 },
48 .new_line => {
49 line += 1;
50 prev_line_last_col = tok.end;
51 },
52 .eof => break,
53 else => {},
54 }
55 }
56
57 var it: TokenIterator = .{ .tokens = tokens.items };
58 var parser: Parser = .{
59 .gpa = gpa,
60 .source = data,
61 .it = &it,
62 .args = .empty,
63 .cpu_arch = null,
64 };
65 defer parser.args.deinit(gpa);
66
67 parser.start() catch |err| switch (err) {
68 error.UnexpectedToken => {
69 const last_token_id = parser.it.pos - 1;
70 const last_token = parser.it.get(last_token_id);
71 const lcol = line_col.items[last_token_id];
72 return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{
73 @tagName(last_token.id),
74 last_token.get(data),
75 lcol.line,
76 lcol.column,
77 });
78 },
79 else => |e| return e,
80 };
81 return .{
82 .path = path,
83 .cpu_arch = parser.cpu_arch,
84 .args = try parser.args.toOwnedSlice(gpa),
85 };
86}
87
88const LineColumn = struct {
89 line: usize,
90 column: usize,
91};
92
93const Command = enum {
94 output_format,
95 input,
96 group,
97 as_needed,
98
99 fn fromString(s: []const u8) ?Command {
100 inline for (@typeInfo(Command).@"enum".fields) |field| {
101 const upper_name = n: {
102 comptime var buf: [field.name.len]u8 = undefined;
103 inline for (field.name, 0..) |c, i| {
104 buf[i] = comptime std.ascii.toUpper(c);
105 }
106 break :n buf;
107 };
108 if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name);
109 }
110 return null;
111 }
112};
113
114const Parser = struct {
115 gpa: Allocator,
116 source: []const u8,
117 it: *TokenIterator,
118
119 cpu_arch: ?std.Target.Cpu.Arch,
120 args: std.ArrayList(Arg),
121
122 fn start(parser: *Parser) !void {
123 while (true) {
124 parser.skipAny(&.{ .comment, .new_line });
125
126 if (parser.maybe(.command)) |cmd_id| {
127 const cmd = parser.getCommand(cmd_id);
128 switch (cmd) {
129 .output_format => parser.cpu_arch = try parser.outputFormat(),
130 // TODO we should verify that group only contains libraries
131 .input, .group => try parser.group(),
132 else => return error.UnexpectedToken,
133 }
134 } else break;
135 }
136
137 if (parser.it.next()) |tok| switch (tok.id) {
138 .eof => {},
139 else => return error.UnexpectedToken,
140 };
141 }
142
143 fn outputFormat(p: *Parser) !std.Target.Cpu.Arch {
144 const value = value: {
145 if (p.skip(&.{.lparen})) {
146 const value_id = try p.require(.literal);
147 const value = p.it.get(value_id);
148 _ = try p.require(.rparen);
149 break :value value.get(p.source);
150 } else if (p.skip(&.{ .new_line, .lbrace })) {
151 const value_id = try p.require(.literal);
152 const value = p.it.get(value_id);
153 _ = p.skip(&.{.new_line});
154 _ = try p.require(.rbrace);
155 break :value value.get(p.source);
156 } else return error.UnexpectedToken;
157 };
158 if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64;
159 if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64;
160 return error.UnknownCpuArch;
161 }
162
163 fn group(p: *Parser) !void {
164 const gpa = p.gpa;
165 if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
166
167 while (true) {
168 if (p.maybe(.literal)) |tok_id| {
169 const tok = p.it.get(tok_id);
170 const path = tok.get(p.source);
171 try p.args.append(gpa, .{ .path = path, .needed = true });
172 } else if (p.maybe(.command)) |cmd_id| {
173 const cmd = p.getCommand(cmd_id);
174 switch (cmd) {
175 .as_needed => try p.asNeeded(),
176 else => return error.UnexpectedToken,
177 }
178 } else break;
179 }
180
181 _ = try p.require(.rparen);
182 }
183
184 fn asNeeded(p: *Parser) !void {
185 const gpa = p.gpa;
186 if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
187
188 while (p.maybe(.literal)) |tok_id| {
189 const tok = p.it.get(tok_id);
190 const path = tok.get(p.source);
191 try p.args.append(gpa, .{ .path = path, .needed = false });
192 }
193
194 _ = try p.require(.rparen);
195 }
196
197 fn skip(p: *Parser, comptime ids: []const Token.Id) bool {
198 const pos = p.it.pos;
199 inline for (ids) |id| {
200 const tok = p.it.next() orelse return false;
201 if (tok.id != id) {
202 p.it.seekTo(pos);
203 return false;
204 }
205 }
206 return true;
207 }
208
209 fn skipAny(p: *Parser, comptime ids: []const Token.Id) void {
210 outer: while (p.it.next()) |tok| {
211 inline for (ids) |id| {
212 if (id == tok.id) continue :outer;
213 }
214 break p.it.seekBy(-1);
215 }
216 }
217
218 fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index {
219 const pos = p.it.pos;
220 const tok = p.it.next() orelse return null;
221 if (tok.id == id) return pos;
222 p.it.seekBy(-1);
223 return null;
224 }
225
226 fn require(p: *Parser, comptime id: Token.Id) !Token.Index {
227 return p.maybe(id) orelse return error.UnexpectedToken;
228 }
229
230 fn getCommand(p: *Parser, index: Token.Index) Command {
231 const tok = p.it.get(index);
232 assert(tok.id == .command);
233 return Command.fromString(tok.get(p.source)).?;
234 }
235};
236
237const Token = struct {
238 id: Id,
239 start: usize,
240 end: usize,
241
242 const Id = enum {
243 eof,
244 invalid,
245
246 new_line,
247 lparen, // (
248 rparen, // )
249 lbrace, // {
250 rbrace, // }
251
252 comment, // /* */
253
254 command, // literal with special meaning, see Command
255 literal,
256 };
257
258 const Index = usize;
259
260 fn get(tok: Token, source: []const u8) []const u8 {
261 return source[tok.start..tok.end];
262 }
263};
264
265const Tokenizer = struct {
266 source: []const u8,
267 index: usize = 0,
268
269 fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool {
270 comptime var count: usize = 0;
271 inline while (count < pattern.len) : (count += 1) {
272 if (count >= slice.len) return false;
273 const c = slice[count];
274 if (pattern[count] != c) return false;
275 }
276 return true;
277 }
278
279 fn matches(tok: Tokenizer, comptime pattern: []const u8) bool {
280 return matchesPattern(pattern, tok.source[tok.index..]);
281 }
282
283 fn isCommand(tok: Tokenizer, start: usize, end: usize) bool {
284 return if (Command.fromString(tok.source[start..end]) == null) false else true;
285 }
286
287 fn next(tok: *Tokenizer) Token {
288 var result = Token{
289 .id = .eof,
290 .start = tok.index,
291 .end = undefined,
292 };
293
294 var state: enum {
295 start,
296 comment,
297 literal,
298 } = .start;
299
300 while (tok.index < tok.source.len) : (tok.index += 1) {
301 const c = tok.source[tok.index];
302 switch (state) {
303 .start => switch (c) {
304 ' ', '\t' => result.start += 1,
305
306 '\n' => {
307 result.id = .new_line;
308 tok.index += 1;
309 break;
310 },
311
312 '\r' => {
313 if (tok.matches("\r\n")) {
314 result.id = .new_line;
315 tok.index += "\r\n".len;
316 } else {
317 result.id = .invalid;
318 tok.index += 1;
319 }
320 break;
321 },
322
323 '/' => if (tok.matches("/*")) {
324 state = .comment;
325 tok.index += "/*".len;
326 } else {
327 state = .literal;
328 },
329
330 '(' => {
331 result.id = .lparen;
332 tok.index += 1;
333 break;
334 },
335
336 ')' => {
337 result.id = .rparen;
338 tok.index += 1;
339 break;
340 },
341
342 '{' => {
343 result.id = .lbrace;
344 tok.index += 1;
345 break;
346 },
347
348 '}' => {
349 result.id = .rbrace;
350 tok.index += 1;
351 break;
352 },
353
354 else => state = .literal,
355 },
356
357 .comment => switch (c) {
358 '*' => if (tok.matches("*/")) {
359 result.id = .comment;
360 tok.index += "*/".len;
361 break;
362 },
363 else => {},
364 },
365
366 .literal => switch (c) {
367 ' ', '(', '\n' => {
368 if (tok.isCommand(result.start, tok.index)) {
369 result.id = .command;
370 } else {
371 result.id = .literal;
372 }
373 break;
374 },
375
376 ')' => {
377 result.id = .literal;
378 break;
379 },
380
381 '\r' => {
382 if (tok.matches("\r\n")) {
383 if (tok.isCommand(result.start, tok.index)) {
384 result.id = .command;
385 } else {
386 result.id = .literal;
387 }
388 } else {
389 result.id = .invalid;
390 tok.index += 1;
391 }
392 break;
393 },
394
395 else => {},
396 },
397 }
398 }
399
400 result.end = tok.index;
401 return result;
402 }
403};
404
405const TokenIterator = struct {
406 tokens: []const Token,
407 pos: Token.Index = 0,
408
409 fn next(it: *TokenIterator) ?Token {
410 const token = it.peek() orelse return null;
411 it.pos += 1;
412 return token;
413 }
414
415 fn peek(it: TokenIterator) ?Token {
416 if (it.pos >= it.tokens.len) return null;
417 return it.tokens[it.pos];
418 }
419
420 fn reset(it: *TokenIterator) void {
421 it.pos = 0;
422 }
423
424 fn seekTo(it: *TokenIterator, pos: Token.Index) void {
425 it.pos = pos;
426 }
427
428 fn seekBy(it: *TokenIterator, offset: isize) void {
429 const new_pos = @as(isize, @bitCast(it.pos)) + offset;
430 if (new_pos < 0) {
431 it.pos = 0;
432 } else {
433 it.pos = @as(usize, @intCast(new_pos));
434 }
435 }
436
437 fn get(it: *TokenIterator, pos: Token.Index) Token {
438 assert(pos < it.tokens.len);
439 return it.tokens[pos];
440 }
441};
442
443const LdScript = @This();
444const Diags = @import("../link.zig").Diags;
445
446const std = @import("std");
447const assert = std.debug.assert;
448const Path = std.Build.Cache.Path;
449const Allocator = std.mem.Allocator;