master
1const std = @import("std");
2const builtin = @import("builtin");
3const fs = std.fs;
4const print = std.debug.print;
5const mem = std.mem;
6const testing = std.testing;
7const Allocator = std.mem.Allocator;
8const max_doc_file_size = 10 * 1024 * 1024;
9const fatal = std.process.fatal;
10
11pub fn main() !void {
12 var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
13 defer arena_instance.deinit();
14 const arena = arena_instance.allocator();
15
16 const gpa = arena;
17
18 const args = try std.process.argsAlloc(arena);
19 const input_file = args[1];
20 const output_file = args[2];
21
22 var threaded: std.Io.Threaded = .init(gpa);
23 defer threaded.deinit();
24 const io = threaded.io();
25
26 var in_file = try fs.cwd().openFile(input_file, .{ .mode = .read_only });
27 defer in_file.close();
28
29 var out_file = try fs.cwd().createFile(output_file, .{});
30 defer out_file.close();
31 var out_file_buffer: [4096]u8 = undefined;
32 var out_file_writer = out_file.writer(&out_file_buffer);
33
34 var out_dir = try fs.cwd().openDir(fs.path.dirname(output_file).?, .{});
35 defer out_dir.close();
36
37 var in_file_reader = in_file.reader(io, &.{});
38 const input_file_bytes = try in_file_reader.interface.allocRemaining(arena, .unlimited);
39
40 var tokenizer = Tokenizer.init(input_file, input_file_bytes);
41
42 try walk(arena, &tokenizer, out_dir, &out_file_writer.interface);
43
44 try out_file_writer.end();
45}
46
47const Token = struct {
48 id: Id,
49 start: usize,
50 end: usize,
51
52 const Id = enum {
53 invalid,
54 content,
55 bracket_open,
56 tag_content,
57 separator,
58 bracket_close,
59 eof,
60 };
61};
62
63const Tokenizer = struct {
64 buffer: []const u8,
65 index: usize,
66 state: State,
67 source_file_name: []const u8,
68
69 const State = enum {
70 start,
71 l_bracket,
72 hash,
73 tag_name,
74 eof,
75 };
76
77 fn init(source_file_name: []const u8, buffer: []const u8) Tokenizer {
78 return Tokenizer{
79 .buffer = buffer,
80 .index = 0,
81 .state = .start,
82 .source_file_name = source_file_name,
83 };
84 }
85
86 fn next(self: *Tokenizer) Token {
87 var result = Token{
88 .id = .eof,
89 .start = self.index,
90 .end = undefined,
91 };
92 while (self.index < self.buffer.len) : (self.index += 1) {
93 const c = self.buffer[self.index];
94 switch (self.state) {
95 .start => switch (c) {
96 '{' => {
97 self.state = .l_bracket;
98 },
99 else => {
100 result.id = .content;
101 },
102 },
103 .l_bracket => switch (c) {
104 '#' => {
105 if (result.id != .eof) {
106 self.index -= 1;
107 self.state = .start;
108 break;
109 } else {
110 result.id = .bracket_open;
111 self.index += 1;
112 self.state = .tag_name;
113 break;
114 }
115 },
116 else => {
117 result.id = .content;
118 self.state = .start;
119 },
120 },
121 .tag_name => switch (c) {
122 '|' => {
123 if (result.id != .eof) {
124 break;
125 } else {
126 result.id = .separator;
127 self.index += 1;
128 break;
129 }
130 },
131 '#' => {
132 self.state = .hash;
133 },
134 else => {
135 result.id = .tag_content;
136 },
137 },
138 .hash => switch (c) {
139 '}' => {
140 if (result.id != .eof) {
141 self.index -= 1;
142 self.state = .tag_name;
143 break;
144 } else {
145 result.id = .bracket_close;
146 self.index += 1;
147 self.state = .start;
148 break;
149 }
150 },
151 else => {
152 result.id = .tag_content;
153 self.state = .tag_name;
154 },
155 },
156 .eof => unreachable,
157 }
158 } else {
159 switch (self.state) {
160 .start, .l_bracket, .eof => {},
161 else => {
162 result.id = .invalid;
163 },
164 }
165 self.state = .eof;
166 }
167 result.end = self.index;
168 return result;
169 }
170
171 const Location = struct {
172 line: usize,
173 column: usize,
174 line_start: usize,
175 line_end: usize,
176 };
177
178 fn getTokenLocation(self: *Tokenizer, token: Token) Location {
179 var loc = Location{
180 .line = 0,
181 .column = 0,
182 .line_start = 0,
183 .line_end = 0,
184 };
185 for (self.buffer, 0..) |c, i| {
186 if (i == token.start) {
187 loc.line_end = i;
188 while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
189 return loc;
190 }
191 if (c == '\n') {
192 loc.line += 1;
193 loc.column = 0;
194 loc.line_start = i + 1;
195 } else {
196 loc.column += 1;
197 }
198 }
199 return loc;
200 }
201};
202
203fn parseError(tokenizer: *Tokenizer, token: Token, comptime fmt: []const u8, args: anytype) anyerror {
204 const loc = tokenizer.getTokenLocation(token);
205 const args_prefix = .{ tokenizer.source_file_name, loc.line + 1, loc.column + 1 };
206 print("{s}:{d}:{d}: error: " ++ fmt ++ "\n", args_prefix ++ args);
207 if (loc.line_start <= loc.line_end) {
208 print("{s}\n", .{tokenizer.buffer[loc.line_start..loc.line_end]});
209 {
210 var i: usize = 0;
211 while (i < loc.column) : (i += 1) {
212 print(" ", .{});
213 }
214 }
215 {
216 const caret_count = @min(token.end, loc.line_end) - token.start;
217 var i: usize = 0;
218 while (i < caret_count) : (i += 1) {
219 print("~", .{});
220 }
221 }
222 print("\n", .{});
223 }
224 return error.ParseError;
225}
226
227fn assertToken(tokenizer: *Tokenizer, token: Token, id: Token.Id) !void {
228 if (token.id != id) {
229 return parseError(tokenizer, token, "expected {s}, found {s}", .{ @tagName(id), @tagName(token.id) });
230 }
231}
232
233fn eatToken(tokenizer: *Tokenizer, id: Token.Id) !Token {
234 const token = tokenizer.next();
235 try assertToken(tokenizer, token, id);
236 return token;
237}
238
239const ExpectedOutcome = enum {
240 succeed,
241 fail,
242 build_fail,
243};
244
245const Code = struct {
246 id: Id,
247 name: []const u8,
248 source_token: Token,
249 just_check_syntax: bool,
250 mode: std.builtin.OptimizeMode,
251 link_objects: []const []const u8,
252 target_str: ?[]const u8,
253 link_libc: bool,
254 link_mode: ?std.builtin.LinkMode,
255 disable_cache: bool,
256 verbose_cimport: bool,
257 additional_options: []const []const u8,
258
259 const Id = union(enum) {
260 @"test",
261 test_error: []const u8,
262 test_safety: []const u8,
263 exe: ExpectedOutcome,
264 obj: ?[]const u8,
265 lib,
266 };
267};
268
269fn walk(arena: Allocator, tokenizer: *Tokenizer, out_dir: std.fs.Dir, w: anytype) !void {
270 while (true) {
271 const token = tokenizer.next();
272 switch (token.id) {
273 .eof => break,
274 .content,
275 => {
276 try w.writeAll(tokenizer.buffer[token.start..token.end]);
277 },
278 .bracket_open => {
279 const tag_token = try eatToken(tokenizer, .tag_content);
280 const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];
281
282 if (mem.eql(u8, tag_name, "code_begin")) {
283 _ = try eatToken(tokenizer, .separator);
284 const code_kind_tok = try eatToken(tokenizer, .tag_content);
285 _ = try eatToken(tokenizer, .separator);
286 const name_tok = try eatToken(tokenizer, .tag_content);
287 const name = tokenizer.buffer[name_tok.start..name_tok.end];
288 var error_str: []const u8 = "";
289 const maybe_sep = tokenizer.next();
290 switch (maybe_sep.id) {
291 .separator => {
292 const error_tok = try eatToken(tokenizer, .tag_content);
293 error_str = tokenizer.buffer[error_tok.start..error_tok.end];
294 _ = try eatToken(tokenizer, .bracket_close);
295 },
296 .bracket_close => {},
297 else => return parseError(tokenizer, token, "invalid token", .{}),
298 }
299 const code_kind_str = tokenizer.buffer[code_kind_tok.start..code_kind_tok.end];
300 var code_kind_id: Code.Id = undefined;
301 var just_check_syntax = false;
302 if (mem.eql(u8, code_kind_str, "exe")) {
303 code_kind_id = Code.Id{ .exe = .succeed };
304 } else if (mem.eql(u8, code_kind_str, "exe_err")) {
305 code_kind_id = Code.Id{ .exe = .fail };
306 } else if (mem.eql(u8, code_kind_str, "exe_build_err")) {
307 code_kind_id = Code.Id{ .exe = .build_fail };
308 } else if (mem.eql(u8, code_kind_str, "test")) {
309 code_kind_id = .@"test";
310 } else if (mem.eql(u8, code_kind_str, "test_err")) {
311 code_kind_id = Code.Id{ .test_error = error_str };
312 } else if (mem.eql(u8, code_kind_str, "test_safety")) {
313 code_kind_id = Code.Id{ .test_safety = error_str };
314 } else if (mem.eql(u8, code_kind_str, "obj")) {
315 code_kind_id = Code.Id{ .obj = null };
316 } else if (mem.eql(u8, code_kind_str, "obj_err")) {
317 code_kind_id = Code.Id{ .obj = error_str };
318 } else if (mem.eql(u8, code_kind_str, "lib")) {
319 code_kind_id = Code.Id.lib;
320 } else if (mem.eql(u8, code_kind_str, "syntax")) {
321 code_kind_id = Code.Id{ .obj = null };
322 just_check_syntax = true;
323 } else {
324 return parseError(tokenizer, code_kind_tok, "unrecognized code kind: {s}", .{code_kind_str});
325 }
326
327 var mode: std.builtin.OptimizeMode = .Debug;
328 var link_objects = std.array_list.Managed([]const u8).init(arena);
329 var target_str: ?[]const u8 = null;
330 var link_libc = false;
331 var link_mode: ?std.builtin.LinkMode = null;
332 var disable_cache = false;
333 var verbose_cimport = false;
334 var additional_options = std.array_list.Managed([]const u8).init(arena);
335
336 const source_token = while (true) {
337 const content_tok = try eatToken(tokenizer, .content);
338 _ = try eatToken(tokenizer, .bracket_open);
339 const end_code_tag = try eatToken(tokenizer, .tag_content);
340 const end_tag_name = tokenizer.buffer[end_code_tag.start..end_code_tag.end];
341 if (mem.eql(u8, end_tag_name, "code_release_fast")) {
342 mode = .ReleaseFast;
343 } else if (mem.eql(u8, end_tag_name, "code_release_safe")) {
344 mode = .ReleaseSafe;
345 } else if (mem.eql(u8, end_tag_name, "code_disable_cache")) {
346 disable_cache = true;
347 } else if (mem.eql(u8, end_tag_name, "code_verbose_cimport")) {
348 verbose_cimport = true;
349 } else if (mem.eql(u8, end_tag_name, "code_link_object")) {
350 _ = try eatToken(tokenizer, .separator);
351 const obj_tok = try eatToken(tokenizer, .tag_content);
352 try link_objects.append(tokenizer.buffer[obj_tok.start..obj_tok.end]);
353 } else if (mem.eql(u8, end_tag_name, "target_windows")) {
354 target_str = "x86_64-windows";
355 } else if (mem.eql(u8, end_tag_name, "target_linux_x86_64")) {
356 target_str = "x86_64-linux";
357 } else if (mem.eql(u8, end_tag_name, "target_linux_riscv64")) {
358 target_str = "riscv64-linux";
359 } else if (mem.eql(u8, end_tag_name, "target_wasm")) {
360 target_str = "wasm32-freestanding";
361 } else if (mem.eql(u8, end_tag_name, "target_wasi")) {
362 target_str = "wasm32-wasi";
363 } else if (mem.eql(u8, end_tag_name, "link_libc")) {
364 link_libc = true;
365 } else if (mem.eql(u8, end_tag_name, "link_mode_dynamic")) {
366 link_mode = .dynamic;
367 } else if (mem.eql(u8, end_tag_name, "additonal_option")) {
368 _ = try eatToken(tokenizer, .separator);
369 const option = try eatToken(tokenizer, .tag_content);
370 try additional_options.append(tokenizer.buffer[option.start..option.end]);
371 } else if (mem.eql(u8, end_tag_name, "code_end")) {
372 _ = try eatToken(tokenizer, .bracket_close);
373 break content_tok;
374 } else {
375 return parseError(
376 tokenizer,
377 end_code_tag,
378 "invalid token inside code_begin: {s}",
379 .{end_tag_name},
380 );
381 }
382 _ = try eatToken(tokenizer, .bracket_close);
383 };
384
385 const basename = try std.fmt.allocPrint(arena, "{s}.zig", .{name});
386
387 var file = out_dir.createFile(basename, .{ .exclusive = true }) catch |err| {
388 fatal("unable to create file '{s}': {s}", .{ name, @errorName(err) });
389 };
390 defer file.close();
391 var file_buffer: [1024]u8 = undefined;
392 var file_writer = file.writer(&file_buffer);
393 const code = &file_writer.interface;
394
395 const source = tokenizer.buffer[source_token.start..source_token.end];
396 try code.writeAll(std.mem.trim(u8, source[1..], " \t\r\n"));
397 try code.writeAll("\n\n");
398
399 if (just_check_syntax) {
400 try code.print("// syntax\n", .{});
401 } else switch (code_kind_id) {
402 .@"test" => try code.print("// test\n", .{}),
403 .lib => try code.print("// lib\n", .{}),
404 .test_error => |s| try code.print("// test_error={s}\n", .{s}),
405 .test_safety => |s| try code.print("// test_safety={s}\n", .{s}),
406 .exe => |s| try code.print("// exe={s}\n", .{@tagName(s)}),
407 .obj => |opt| if (opt) |s| {
408 try code.print("// obj={s}\n", .{s});
409 } else {
410 try code.print("// obj\n", .{});
411 },
412 }
413
414 if (mode != .Debug)
415 try code.print("// optimize={s}\n", .{@tagName(mode)});
416
417 for (link_objects.items) |link_object| {
418 try code.print("// link_object={s}\n", .{link_object});
419 }
420
421 if (target_str) |s|
422 try code.print("// target={s}\n", .{s});
423
424 if (link_libc) try code.print("// link_libc\n", .{});
425 if (disable_cache) try code.print("// disable_cache\n", .{});
426 if (verbose_cimport) try code.print("// verbose_cimport\n", .{});
427
428 if (link_mode) |m|
429 try code.print("// link_mode={s}\n", .{@tagName(m)});
430
431 for (additional_options.items) |o| {
432 try code.print("// additional_option={s}\n", .{o});
433 }
434 try code.flush();
435 try w.print("{{#code|{s}#}}\n", .{basename});
436 } else {
437 const close_bracket = while (true) {
438 const next = tokenizer.next();
439 if (next.id == .bracket_close) break next;
440 };
441 try w.writeAll(tokenizer.buffer[token.start..close_bracket.end]);
442 }
443 },
444 else => return parseError(tokenizer, token, "invalid token", .{}),
445 }
446 }
447}
448
449fn urlize(allocator: Allocator, input: []const u8) ![]u8 {
450 var buf = std.array_list.Managed(u8).init(allocator);
451 defer buf.deinit();
452
453 const out = buf.writer();
454 for (input) |c| {
455 switch (c) {
456 'a'...'z', 'A'...'Z', '_', '-', '0'...'9' => {
457 try out.writeByte(c);
458 },
459 ' ' => {
460 try out.writeByte('-');
461 },
462 else => {},
463 }
464 }
465 return try buf.toOwnedSlice();
466}