master
  1const std = @import("std");
  2const builtin = @import("builtin");
  3const fs = std.fs;
  4const print = std.debug.print;
  5const mem = std.mem;
  6const testing = std.testing;
  7const Allocator = std.mem.Allocator;
  8const max_doc_file_size = 10 * 1024 * 1024;
  9const fatal = std.process.fatal;
 10
 11pub fn main() !void {
 12    var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
 13    defer arena_instance.deinit();
 14    const arena = arena_instance.allocator();
 15
 16    const gpa = arena;
 17
 18    const args = try std.process.argsAlloc(arena);
 19    const input_file = args[1];
 20    const output_file = args[2];
 21
 22    var threaded: std.Io.Threaded = .init(gpa);
 23    defer threaded.deinit();
 24    const io = threaded.io();
 25
 26    var in_file = try fs.cwd().openFile(input_file, .{ .mode = .read_only });
 27    defer in_file.close();
 28
 29    var out_file = try fs.cwd().createFile(output_file, .{});
 30    defer out_file.close();
 31    var out_file_buffer: [4096]u8 = undefined;
 32    var out_file_writer = out_file.writer(&out_file_buffer);
 33
 34    var out_dir = try fs.cwd().openDir(fs.path.dirname(output_file).?, .{});
 35    defer out_dir.close();
 36
 37    var in_file_reader = in_file.reader(io, &.{});
 38    const input_file_bytes = try in_file_reader.interface.allocRemaining(arena, .unlimited);
 39
 40    var tokenizer = Tokenizer.init(input_file, input_file_bytes);
 41
 42    try walk(arena, &tokenizer, out_dir, &out_file_writer.interface);
 43
 44    try out_file_writer.end();
 45}
 46
 47const Token = struct {
 48    id: Id,
 49    start: usize,
 50    end: usize,
 51
 52    const Id = enum {
 53        invalid,
 54        content,
 55        bracket_open,
 56        tag_content,
 57        separator,
 58        bracket_close,
 59        eof,
 60    };
 61};
 62
 63const Tokenizer = struct {
 64    buffer: []const u8,
 65    index: usize,
 66    state: State,
 67    source_file_name: []const u8,
 68
 69    const State = enum {
 70        start,
 71        l_bracket,
 72        hash,
 73        tag_name,
 74        eof,
 75    };
 76
 77    fn init(source_file_name: []const u8, buffer: []const u8) Tokenizer {
 78        return Tokenizer{
 79            .buffer = buffer,
 80            .index = 0,
 81            .state = .start,
 82            .source_file_name = source_file_name,
 83        };
 84    }
 85
 86    fn next(self: *Tokenizer) Token {
 87        var result = Token{
 88            .id = .eof,
 89            .start = self.index,
 90            .end = undefined,
 91        };
 92        while (self.index < self.buffer.len) : (self.index += 1) {
 93            const c = self.buffer[self.index];
 94            switch (self.state) {
 95                .start => switch (c) {
 96                    '{' => {
 97                        self.state = .l_bracket;
 98                    },
 99                    else => {
100                        result.id = .content;
101                    },
102                },
103                .l_bracket => switch (c) {
104                    '#' => {
105                        if (result.id != .eof) {
106                            self.index -= 1;
107                            self.state = .start;
108                            break;
109                        } else {
110                            result.id = .bracket_open;
111                            self.index += 1;
112                            self.state = .tag_name;
113                            break;
114                        }
115                    },
116                    else => {
117                        result.id = .content;
118                        self.state = .start;
119                    },
120                },
121                .tag_name => switch (c) {
122                    '|' => {
123                        if (result.id != .eof) {
124                            break;
125                        } else {
126                            result.id = .separator;
127                            self.index += 1;
128                            break;
129                        }
130                    },
131                    '#' => {
132                        self.state = .hash;
133                    },
134                    else => {
135                        result.id = .tag_content;
136                    },
137                },
138                .hash => switch (c) {
139                    '}' => {
140                        if (result.id != .eof) {
141                            self.index -= 1;
142                            self.state = .tag_name;
143                            break;
144                        } else {
145                            result.id = .bracket_close;
146                            self.index += 1;
147                            self.state = .start;
148                            break;
149                        }
150                    },
151                    else => {
152                        result.id = .tag_content;
153                        self.state = .tag_name;
154                    },
155                },
156                .eof => unreachable,
157            }
158        } else {
159            switch (self.state) {
160                .start, .l_bracket, .eof => {},
161                else => {
162                    result.id = .invalid;
163                },
164            }
165            self.state = .eof;
166        }
167        result.end = self.index;
168        return result;
169    }
170
171    const Location = struct {
172        line: usize,
173        column: usize,
174        line_start: usize,
175        line_end: usize,
176    };
177
178    fn getTokenLocation(self: *Tokenizer, token: Token) Location {
179        var loc = Location{
180            .line = 0,
181            .column = 0,
182            .line_start = 0,
183            .line_end = 0,
184        };
185        for (self.buffer, 0..) |c, i| {
186            if (i == token.start) {
187                loc.line_end = i;
188                while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
189                return loc;
190            }
191            if (c == '\n') {
192                loc.line += 1;
193                loc.column = 0;
194                loc.line_start = i + 1;
195            } else {
196                loc.column += 1;
197            }
198        }
199        return loc;
200    }
201};
202
203fn parseError(tokenizer: *Tokenizer, token: Token, comptime fmt: []const u8, args: anytype) anyerror {
204    const loc = tokenizer.getTokenLocation(token);
205    const args_prefix = .{ tokenizer.source_file_name, loc.line + 1, loc.column + 1 };
206    print("{s}:{d}:{d}: error: " ++ fmt ++ "\n", args_prefix ++ args);
207    if (loc.line_start <= loc.line_end) {
208        print("{s}\n", .{tokenizer.buffer[loc.line_start..loc.line_end]});
209        {
210            var i: usize = 0;
211            while (i < loc.column) : (i += 1) {
212                print(" ", .{});
213            }
214        }
215        {
216            const caret_count = @min(token.end, loc.line_end) - token.start;
217            var i: usize = 0;
218            while (i < caret_count) : (i += 1) {
219                print("~", .{});
220            }
221        }
222        print("\n", .{});
223    }
224    return error.ParseError;
225}
226
227fn assertToken(tokenizer: *Tokenizer, token: Token, id: Token.Id) !void {
228    if (token.id != id) {
229        return parseError(tokenizer, token, "expected {s}, found {s}", .{ @tagName(id), @tagName(token.id) });
230    }
231}
232
233fn eatToken(tokenizer: *Tokenizer, id: Token.Id) !Token {
234    const token = tokenizer.next();
235    try assertToken(tokenizer, token, id);
236    return token;
237}
238
239const ExpectedOutcome = enum {
240    succeed,
241    fail,
242    build_fail,
243};
244
245const Code = struct {
246    id: Id,
247    name: []const u8,
248    source_token: Token,
249    just_check_syntax: bool,
250    mode: std.builtin.OptimizeMode,
251    link_objects: []const []const u8,
252    target_str: ?[]const u8,
253    link_libc: bool,
254    link_mode: ?std.builtin.LinkMode,
255    disable_cache: bool,
256    verbose_cimport: bool,
257    additional_options: []const []const u8,
258
259    const Id = union(enum) {
260        @"test",
261        test_error: []const u8,
262        test_safety: []const u8,
263        exe: ExpectedOutcome,
264        obj: ?[]const u8,
265        lib,
266    };
267};
268
269fn walk(arena: Allocator, tokenizer: *Tokenizer, out_dir: std.fs.Dir, w: anytype) !void {
270    while (true) {
271        const token = tokenizer.next();
272        switch (token.id) {
273            .eof => break,
274            .content,
275            => {
276                try w.writeAll(tokenizer.buffer[token.start..token.end]);
277            },
278            .bracket_open => {
279                const tag_token = try eatToken(tokenizer, .tag_content);
280                const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];
281
282                if (mem.eql(u8, tag_name, "code_begin")) {
283                    _ = try eatToken(tokenizer, .separator);
284                    const code_kind_tok = try eatToken(tokenizer, .tag_content);
285                    _ = try eatToken(tokenizer, .separator);
286                    const name_tok = try eatToken(tokenizer, .tag_content);
287                    const name = tokenizer.buffer[name_tok.start..name_tok.end];
288                    var error_str: []const u8 = "";
289                    const maybe_sep = tokenizer.next();
290                    switch (maybe_sep.id) {
291                        .separator => {
292                            const error_tok = try eatToken(tokenizer, .tag_content);
293                            error_str = tokenizer.buffer[error_tok.start..error_tok.end];
294                            _ = try eatToken(tokenizer, .bracket_close);
295                        },
296                        .bracket_close => {},
297                        else => return parseError(tokenizer, token, "invalid token", .{}),
298                    }
299                    const code_kind_str = tokenizer.buffer[code_kind_tok.start..code_kind_tok.end];
300                    var code_kind_id: Code.Id = undefined;
301                    var just_check_syntax = false;
302                    if (mem.eql(u8, code_kind_str, "exe")) {
303                        code_kind_id = Code.Id{ .exe = .succeed };
304                    } else if (mem.eql(u8, code_kind_str, "exe_err")) {
305                        code_kind_id = Code.Id{ .exe = .fail };
306                    } else if (mem.eql(u8, code_kind_str, "exe_build_err")) {
307                        code_kind_id = Code.Id{ .exe = .build_fail };
308                    } else if (mem.eql(u8, code_kind_str, "test")) {
309                        code_kind_id = .@"test";
310                    } else if (mem.eql(u8, code_kind_str, "test_err")) {
311                        code_kind_id = Code.Id{ .test_error = error_str };
312                    } else if (mem.eql(u8, code_kind_str, "test_safety")) {
313                        code_kind_id = Code.Id{ .test_safety = error_str };
314                    } else if (mem.eql(u8, code_kind_str, "obj")) {
315                        code_kind_id = Code.Id{ .obj = null };
316                    } else if (mem.eql(u8, code_kind_str, "obj_err")) {
317                        code_kind_id = Code.Id{ .obj = error_str };
318                    } else if (mem.eql(u8, code_kind_str, "lib")) {
319                        code_kind_id = Code.Id.lib;
320                    } else if (mem.eql(u8, code_kind_str, "syntax")) {
321                        code_kind_id = Code.Id{ .obj = null };
322                        just_check_syntax = true;
323                    } else {
324                        return parseError(tokenizer, code_kind_tok, "unrecognized code kind: {s}", .{code_kind_str});
325                    }
326
327                    var mode: std.builtin.OptimizeMode = .Debug;
328                    var link_objects = std.array_list.Managed([]const u8).init(arena);
329                    var target_str: ?[]const u8 = null;
330                    var link_libc = false;
331                    var link_mode: ?std.builtin.LinkMode = null;
332                    var disable_cache = false;
333                    var verbose_cimport = false;
334                    var additional_options = std.array_list.Managed([]const u8).init(arena);
335
336                    const source_token = while (true) {
337                        const content_tok = try eatToken(tokenizer, .content);
338                        _ = try eatToken(tokenizer, .bracket_open);
339                        const end_code_tag = try eatToken(tokenizer, .tag_content);
340                        const end_tag_name = tokenizer.buffer[end_code_tag.start..end_code_tag.end];
341                        if (mem.eql(u8, end_tag_name, "code_release_fast")) {
342                            mode = .ReleaseFast;
343                        } else if (mem.eql(u8, end_tag_name, "code_release_safe")) {
344                            mode = .ReleaseSafe;
345                        } else if (mem.eql(u8, end_tag_name, "code_disable_cache")) {
346                            disable_cache = true;
347                        } else if (mem.eql(u8, end_tag_name, "code_verbose_cimport")) {
348                            verbose_cimport = true;
349                        } else if (mem.eql(u8, end_tag_name, "code_link_object")) {
350                            _ = try eatToken(tokenizer, .separator);
351                            const obj_tok = try eatToken(tokenizer, .tag_content);
352                            try link_objects.append(tokenizer.buffer[obj_tok.start..obj_tok.end]);
353                        } else if (mem.eql(u8, end_tag_name, "target_windows")) {
354                            target_str = "x86_64-windows";
355                        } else if (mem.eql(u8, end_tag_name, "target_linux_x86_64")) {
356                            target_str = "x86_64-linux";
357                        } else if (mem.eql(u8, end_tag_name, "target_linux_riscv64")) {
358                            target_str = "riscv64-linux";
359                        } else if (mem.eql(u8, end_tag_name, "target_wasm")) {
360                            target_str = "wasm32-freestanding";
361                        } else if (mem.eql(u8, end_tag_name, "target_wasi")) {
362                            target_str = "wasm32-wasi";
363                        } else if (mem.eql(u8, end_tag_name, "link_libc")) {
364                            link_libc = true;
365                        } else if (mem.eql(u8, end_tag_name, "link_mode_dynamic")) {
366                            link_mode = .dynamic;
367                        } else if (mem.eql(u8, end_tag_name, "additonal_option")) {
368                            _ = try eatToken(tokenizer, .separator);
369                            const option = try eatToken(tokenizer, .tag_content);
370                            try additional_options.append(tokenizer.buffer[option.start..option.end]);
371                        } else if (mem.eql(u8, end_tag_name, "code_end")) {
372                            _ = try eatToken(tokenizer, .bracket_close);
373                            break content_tok;
374                        } else {
375                            return parseError(
376                                tokenizer,
377                                end_code_tag,
378                                "invalid token inside code_begin: {s}",
379                                .{end_tag_name},
380                            );
381                        }
382                        _ = try eatToken(tokenizer, .bracket_close);
383                    };
384
385                    const basename = try std.fmt.allocPrint(arena, "{s}.zig", .{name});
386
387                    var file = out_dir.createFile(basename, .{ .exclusive = true }) catch |err| {
388                        fatal("unable to create file '{s}': {s}", .{ name, @errorName(err) });
389                    };
390                    defer file.close();
391                    var file_buffer: [1024]u8 = undefined;
392                    var file_writer = file.writer(&file_buffer);
393                    const code = &file_writer.interface;
394
395                    const source = tokenizer.buffer[source_token.start..source_token.end];
396                    try code.writeAll(std.mem.trim(u8, source[1..], " \t\r\n"));
397                    try code.writeAll("\n\n");
398
399                    if (just_check_syntax) {
400                        try code.print("// syntax\n", .{});
401                    } else switch (code_kind_id) {
402                        .@"test" => try code.print("// test\n", .{}),
403                        .lib => try code.print("// lib\n", .{}),
404                        .test_error => |s| try code.print("// test_error={s}\n", .{s}),
405                        .test_safety => |s| try code.print("// test_safety={s}\n", .{s}),
406                        .exe => |s| try code.print("// exe={s}\n", .{@tagName(s)}),
407                        .obj => |opt| if (opt) |s| {
408                            try code.print("// obj={s}\n", .{s});
409                        } else {
410                            try code.print("// obj\n", .{});
411                        },
412                    }
413
414                    if (mode != .Debug)
415                        try code.print("// optimize={s}\n", .{@tagName(mode)});
416
417                    for (link_objects.items) |link_object| {
418                        try code.print("// link_object={s}\n", .{link_object});
419                    }
420
421                    if (target_str) |s|
422                        try code.print("// target={s}\n", .{s});
423
424                    if (link_libc) try code.print("// link_libc\n", .{});
425                    if (disable_cache) try code.print("// disable_cache\n", .{});
426                    if (verbose_cimport) try code.print("// verbose_cimport\n", .{});
427
428                    if (link_mode) |m|
429                        try code.print("// link_mode={s}\n", .{@tagName(m)});
430
431                    for (additional_options.items) |o| {
432                        try code.print("// additional_option={s}\n", .{o});
433                    }
434                    try code.flush();
435                    try w.print("{{#code|{s}#}}\n", .{basename});
436                } else {
437                    const close_bracket = while (true) {
438                        const next = tokenizer.next();
439                        if (next.id == .bracket_close) break next;
440                    };
441                    try w.writeAll(tokenizer.buffer[token.start..close_bracket.end]);
442                }
443            },
444            else => return parseError(tokenizer, token, "invalid token", .{}),
445        }
446    }
447}
448
449fn urlize(allocator: Allocator, input: []const u8) ![]u8 {
450    var buf = std.array_list.Managed(u8).init(allocator);
451    defer buf.deinit();
452
453    const out = buf.writer();
454    for (input) |c| {
455        switch (c) {
456            'a'...'z', 'A'...'Z', '_', '-', '0'...'9' => {
457                try out.writeByte(c);
458            },
459            ' ' => {
460                try out.writeByte('-');
461            },
462            else => {},
463        }
464    }
465    return try buf.toOwnedSlice();
466}