master
  1path: Path,
  2cpu_arch: ?std.Target.Cpu.Arch,
  3args: []const Arg,
  4
  5pub const Arg = struct {
  6    needed: bool = false,
  7    path: []const u8,
  8};
  9
 10pub fn deinit(ls: *LdScript, gpa: Allocator) void {
 11    gpa.free(ls.args);
 12    ls.* = undefined;
 13}
 14
 15pub const Error = error{
 16    LinkFailure,
 17    UnknownCpuArch,
 18    OutOfMemory,
 19};
 20
 21pub fn parse(
 22    gpa: Allocator,
 23    diags: *Diags,
 24    /// For error reporting.
 25    path: Path,
 26    data: []const u8,
 27) Error!LdScript {
 28    var tokenizer = Tokenizer{ .source = data };
 29    var tokens: std.ArrayList(Token) = .empty;
 30    defer tokens.deinit(gpa);
 31    var line_col: std.ArrayList(LineColumn) = .empty;
 32    defer line_col.deinit(gpa);
 33
 34    var line: usize = 0;
 35    var prev_line_last_col: usize = 0;
 36
 37    while (true) {
 38        const tok = tokenizer.next();
 39        try tokens.append(gpa, tok);
 40        const column = tok.start - prev_line_last_col;
 41        try line_col.append(gpa, .{ .line = line, .column = column });
 42        switch (tok.id) {
 43            .invalid => {
 44                return diags.failParse(path, "invalid token in LD script: '{f}' ({d}:{d})", .{
 45                    std.ascii.hexEscape(tok.get(data), .lower), line, column,
 46                });
 47            },
 48            .new_line => {
 49                line += 1;
 50                prev_line_last_col = tok.end;
 51            },
 52            .eof => break,
 53            else => {},
 54        }
 55    }
 56
 57    var it: TokenIterator = .{ .tokens = tokens.items };
 58    var parser: Parser = .{
 59        .gpa = gpa,
 60        .source = data,
 61        .it = &it,
 62        .args = .empty,
 63        .cpu_arch = null,
 64    };
 65    defer parser.args.deinit(gpa);
 66
 67    parser.start() catch |err| switch (err) {
 68        error.UnexpectedToken => {
 69            const last_token_id = parser.it.pos - 1;
 70            const last_token = parser.it.get(last_token_id);
 71            const lcol = line_col.items[last_token_id];
 72            return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{
 73                @tagName(last_token.id),
 74                last_token.get(data),
 75                lcol.line,
 76                lcol.column,
 77            });
 78        },
 79        else => |e| return e,
 80    };
 81    return .{
 82        .path = path,
 83        .cpu_arch = parser.cpu_arch,
 84        .args = try parser.args.toOwnedSlice(gpa),
 85    };
 86}
 87
 88const LineColumn = struct {
 89    line: usize,
 90    column: usize,
 91};
 92
 93const Command = enum {
 94    output_format,
 95    input,
 96    group,
 97    as_needed,
 98
 99    fn fromString(s: []const u8) ?Command {
100        inline for (@typeInfo(Command).@"enum".fields) |field| {
101            const upper_name = n: {
102                comptime var buf: [field.name.len]u8 = undefined;
103                inline for (field.name, 0..) |c, i| {
104                    buf[i] = comptime std.ascii.toUpper(c);
105                }
106                break :n buf;
107            };
108            if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name);
109        }
110        return null;
111    }
112};
113
114const Parser = struct {
115    gpa: Allocator,
116    source: []const u8,
117    it: *TokenIterator,
118
119    cpu_arch: ?std.Target.Cpu.Arch,
120    args: std.ArrayList(Arg),
121
122    fn start(parser: *Parser) !void {
123        while (true) {
124            parser.skipAny(&.{ .comment, .new_line });
125
126            if (parser.maybe(.command)) |cmd_id| {
127                const cmd = parser.getCommand(cmd_id);
128                switch (cmd) {
129                    .output_format => parser.cpu_arch = try parser.outputFormat(),
130                    // TODO we should verify that group only contains libraries
131                    .input, .group => try parser.group(),
132                    else => return error.UnexpectedToken,
133                }
134            } else break;
135        }
136
137        if (parser.it.next()) |tok| switch (tok.id) {
138            .eof => {},
139            else => return error.UnexpectedToken,
140        };
141    }
142
143    fn outputFormat(p: *Parser) !std.Target.Cpu.Arch {
144        const value = value: {
145            if (p.skip(&.{.lparen})) {
146                const value_id = try p.require(.literal);
147                const value = p.it.get(value_id);
148                _ = try p.require(.rparen);
149                break :value value.get(p.source);
150            } else if (p.skip(&.{ .new_line, .lbrace })) {
151                const value_id = try p.require(.literal);
152                const value = p.it.get(value_id);
153                _ = p.skip(&.{.new_line});
154                _ = try p.require(.rbrace);
155                break :value value.get(p.source);
156            } else return error.UnexpectedToken;
157        };
158        if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64;
159        if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64;
160        return error.UnknownCpuArch;
161    }
162
163    fn group(p: *Parser) !void {
164        const gpa = p.gpa;
165        if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
166
167        while (true) {
168            if (p.maybe(.literal)) |tok_id| {
169                const tok = p.it.get(tok_id);
170                const path = tok.get(p.source);
171                try p.args.append(gpa, .{ .path = path, .needed = true });
172            } else if (p.maybe(.command)) |cmd_id| {
173                const cmd = p.getCommand(cmd_id);
174                switch (cmd) {
175                    .as_needed => try p.asNeeded(),
176                    else => return error.UnexpectedToken,
177                }
178            } else break;
179        }
180
181        _ = try p.require(.rparen);
182    }
183
184    fn asNeeded(p: *Parser) !void {
185        const gpa = p.gpa;
186        if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
187
188        while (p.maybe(.literal)) |tok_id| {
189            const tok = p.it.get(tok_id);
190            const path = tok.get(p.source);
191            try p.args.append(gpa, .{ .path = path, .needed = false });
192        }
193
194        _ = try p.require(.rparen);
195    }
196
197    fn skip(p: *Parser, comptime ids: []const Token.Id) bool {
198        const pos = p.it.pos;
199        inline for (ids) |id| {
200            const tok = p.it.next() orelse return false;
201            if (tok.id != id) {
202                p.it.seekTo(pos);
203                return false;
204            }
205        }
206        return true;
207    }
208
209    fn skipAny(p: *Parser, comptime ids: []const Token.Id) void {
210        outer: while (p.it.next()) |tok| {
211            inline for (ids) |id| {
212                if (id == tok.id) continue :outer;
213            }
214            break p.it.seekBy(-1);
215        }
216    }
217
218    fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index {
219        const pos = p.it.pos;
220        const tok = p.it.next() orelse return null;
221        if (tok.id == id) return pos;
222        p.it.seekBy(-1);
223        return null;
224    }
225
226    fn require(p: *Parser, comptime id: Token.Id) !Token.Index {
227        return p.maybe(id) orelse return error.UnexpectedToken;
228    }
229
230    fn getCommand(p: *Parser, index: Token.Index) Command {
231        const tok = p.it.get(index);
232        assert(tok.id == .command);
233        return Command.fromString(tok.get(p.source)).?;
234    }
235};
236
237const Token = struct {
238    id: Id,
239    start: usize,
240    end: usize,
241
242    const Id = enum {
243        eof,
244        invalid,
245
246        new_line,
247        lparen, // (
248        rparen, // )
249        lbrace, // {
250        rbrace, // }
251
252        comment, // /* */
253
254        command, // literal with special meaning, see Command
255        literal,
256    };
257
258    const Index = usize;
259
260    fn get(tok: Token, source: []const u8) []const u8 {
261        return source[tok.start..tok.end];
262    }
263};
264
265const Tokenizer = struct {
266    source: []const u8,
267    index: usize = 0,
268
269    fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool {
270        comptime var count: usize = 0;
271        inline while (count < pattern.len) : (count += 1) {
272            if (count >= slice.len) return false;
273            const c = slice[count];
274            if (pattern[count] != c) return false;
275        }
276        return true;
277    }
278
279    fn matches(tok: Tokenizer, comptime pattern: []const u8) bool {
280        return matchesPattern(pattern, tok.source[tok.index..]);
281    }
282
283    fn isCommand(tok: Tokenizer, start: usize, end: usize) bool {
284        return if (Command.fromString(tok.source[start..end]) == null) false else true;
285    }
286
287    fn next(tok: *Tokenizer) Token {
288        var result = Token{
289            .id = .eof,
290            .start = tok.index,
291            .end = undefined,
292        };
293
294        var state: enum {
295            start,
296            comment,
297            literal,
298        } = .start;
299
300        while (tok.index < tok.source.len) : (tok.index += 1) {
301            const c = tok.source[tok.index];
302            switch (state) {
303                .start => switch (c) {
304                    ' ', '\t' => result.start += 1,
305
306                    '\n' => {
307                        result.id = .new_line;
308                        tok.index += 1;
309                        break;
310                    },
311
312                    '\r' => {
313                        if (tok.matches("\r\n")) {
314                            result.id = .new_line;
315                            tok.index += "\r\n".len;
316                        } else {
317                            result.id = .invalid;
318                            tok.index += 1;
319                        }
320                        break;
321                    },
322
323                    '/' => if (tok.matches("/*")) {
324                        state = .comment;
325                        tok.index += "/*".len;
326                    } else {
327                        state = .literal;
328                    },
329
330                    '(' => {
331                        result.id = .lparen;
332                        tok.index += 1;
333                        break;
334                    },
335
336                    ')' => {
337                        result.id = .rparen;
338                        tok.index += 1;
339                        break;
340                    },
341
342                    '{' => {
343                        result.id = .lbrace;
344                        tok.index += 1;
345                        break;
346                    },
347
348                    '}' => {
349                        result.id = .rbrace;
350                        tok.index += 1;
351                        break;
352                    },
353
354                    else => state = .literal,
355                },
356
357                .comment => switch (c) {
358                    '*' => if (tok.matches("*/")) {
359                        result.id = .comment;
360                        tok.index += "*/".len;
361                        break;
362                    },
363                    else => {},
364                },
365
366                .literal => switch (c) {
367                    ' ', '(', '\n' => {
368                        if (tok.isCommand(result.start, tok.index)) {
369                            result.id = .command;
370                        } else {
371                            result.id = .literal;
372                        }
373                        break;
374                    },
375
376                    ')' => {
377                        result.id = .literal;
378                        break;
379                    },
380
381                    '\r' => {
382                        if (tok.matches("\r\n")) {
383                            if (tok.isCommand(result.start, tok.index)) {
384                                result.id = .command;
385                            } else {
386                                result.id = .literal;
387                            }
388                        } else {
389                            result.id = .invalid;
390                            tok.index += 1;
391                        }
392                        break;
393                    },
394
395                    else => {},
396                },
397            }
398        }
399
400        result.end = tok.index;
401        return result;
402    }
403};
404
405const TokenIterator = struct {
406    tokens: []const Token,
407    pos: Token.Index = 0,
408
409    fn next(it: *TokenIterator) ?Token {
410        const token = it.peek() orelse return null;
411        it.pos += 1;
412        return token;
413    }
414
415    fn peek(it: TokenIterator) ?Token {
416        if (it.pos >= it.tokens.len) return null;
417        return it.tokens[it.pos];
418    }
419
420    fn reset(it: *TokenIterator) void {
421        it.pos = 0;
422    }
423
424    fn seekTo(it: *TokenIterator, pos: Token.Index) void {
425        it.pos = pos;
426    }
427
428    fn seekBy(it: *TokenIterator, offset: isize) void {
429        const new_pos = @as(isize, @bitCast(it.pos)) + offset;
430        if (new_pos < 0) {
431            it.pos = 0;
432        } else {
433            it.pos = @as(usize, @intCast(new_pos));
434        }
435    }
436
437    fn get(it: *TokenIterator, pos: Token.Index) Token {
438        assert(pos < it.tokens.len);
439        return it.tokens[pos];
440    }
441};
442
443const LdScript = @This();
444const Diags = @import("../link.zig").Diags;
445
446const std = @import("std");
447const assert = std.debug.assert;
448const Path = std.Build.Cache.Path;
449const Allocator = std.mem.Allocator;