Commit 2b45e23477

Vexu <git@vexu.eu>
2020-08-18 14:33:11
stage2: character literals and multiline strings
1 parent e0b01bd
Changed files (4)
lib
src-self-hosted
test
lib/std/zig.zig
@@ -80,6 +80,107 @@ pub fn binNameAlloc(
     }
 }
 
+/// Only validates escape sequence characters.
+/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between.
+pub fn parseCharLiteral(
+    slice: []const u8,
+    bad_index: *usize, // populated if error.InvalidCharacter is returned)
+) error{InvalidCharacter}!u32 {
+    std.debug.assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
+
+    if (slice[1] == '\\') {
+        switch (slice[2]) {
+            'n' => return '\n',
+            'r' => return '\r',
+            '\\' => return '\\',
+            't' => return '\t',
+            '\'' => return '\'',
+            '"' => return '"',
+            'x' => {
+                if (slice.len != 6) {
+                    bad_index.* = slice.len - 2;
+                    return error.InvalidCharacter;
+                }
+
+                var value: u32 = 0;
+                for (slice[3..5]) |c, i| {
+                    switch (slice[3]) {
+                        '0'...'9' => {
+                            value *= 16;
+                            value += c - '0';
+                        },
+                        'a'...'f' => {
+                            value *= 16;
+                            value += c - 'a';
+                        },
+                        'A'...'F' => {
+                            value *= 16;
+                            value += c - 'a';
+                        },
+                        else => {
+                            bad_index.* = i;
+                            return error.InvalidCharacter;
+                        },
+                    }
+                }
+                return value;
+            },
+            'u' => {
+                if (slice.len < 6 or slice[3] != '{') {
+                    bad_index.* = 2;
+                    return error.InvalidCharacter;
+                }
+                var value: u32 = 0;
+                for (slice[4..]) |c, i| {
+                    if (value > 0x10ffff) {
+                        bad_index.* = i;
+                        return error.InvalidCharacter;
+                    }
+                    switch (c) {
+                        '0'...'9' => {
+                            value *= 16;
+                            value += c - '0';
+                        },
+                        'a'...'f' => {
+                            value *= 16;
+                            value += c - 'a';
+                        },
+                        'A'...'F' => {
+                            value *= 16;
+                            value += c - 'A';
+                        },
+                        '}' => break,
+                        else => {
+                            bad_index.* = i;
+                            return error.InvalidCharacter;
+                        },
+                    }
+                }
+                return value;
+            },
+            else => {
+                bad_index.* = 2;
+                return error.InvalidCharacter;
+            }
+        }
+    }
+    return std.unicode.utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
+}
+
+test "parseCharLiteral" {
+    var bad_index: usize = undefined;
+    std.testing.expectEqual(try parseCharLiteral("'a'", &bad_index), 'a');
+    std.testing.expectEqual(try parseCharLiteral("'ä'", &bad_index), 'ä');
+    std.testing.expectEqual(try parseCharLiteral("'\\x00'", &bad_index), 0);
+    std.testing.expectEqual(try parseCharLiteral("'ぁ'", &bad_index), 0x3041);
+    std.testing.expectEqual(try parseCharLiteral("'\\u{3041}'", &bad_index), 0x3041);
+
+    std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\x0'", &bad_index));
+    std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\y'", &bad_index));
+    std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u'", &bad_index));
+    std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{FFFFFF}'", &bad_index));
+}
+
 test "" {
     @import("std").meta.refAllDecls(@This());
 }
src-self-hosted/astgen.zig
@@ -131,6 +131,8 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr
         .ArrayType => return rlWrap(mod, scope, rl, try arrayType(mod, scope, node.castTag(.ArrayType).?)),
         .ArrayTypeSentinel => return rlWrap(mod, scope, rl, try arrayTypeSentinel(mod, scope, node.castTag(.ArrayTypeSentinel).?)),
         .EnumLiteral => return rlWrap(mod, scope, rl, try enumLiteral(mod, scope, node.castTag(.EnumLiteral).?)),
+        .MultilineStringLiteral => return rlWrap(mod, scope, rl, try multilineStrLiteral(mod, scope, node.castTag(.MultilineStringLiteral).?)),
+        .CharLiteral => return rlWrap(mod, scope, rl, try charLiteral(mod, scope, node.castTag(.CharLiteral).?)),
 
         .Defer => return mod.failNode(scope, node, "TODO implement astgen.expr for .Defer", .{}),
         .Catch => return mod.failNode(scope, node, "TODO implement astgen.expr for .Catch", .{}),
@@ -159,8 +161,6 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr
         .ErrorType => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorType", .{}),
         .FnProto => return mod.failNode(scope, node, "TODO implement astgen.expr for .FnProto", .{}),
         .AnyFrameType => return mod.failNode(scope, node, "TODO implement astgen.expr for .AnyFrameType", .{}),
-        .MultilineStringLiteral => return mod.failNode(scope, node, "TODO implement astgen.expr for .MultilineStringLiteral", .{}),
-        .CharLiteral => return mod.failNode(scope, node, "TODO implement astgen.expr for .CharLiteral", .{}),
         .ErrorSetDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorSetDecl", .{}),
         .ContainerDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ContainerDecl", .{}),
         .Comptime => return mod.failNode(scope, node, "TODO implement astgen.expr for .Comptime", .{}),
@@ -497,6 +497,7 @@ fn arrayType(mod: *Module, scope: *Scope, node: *ast.Node.ArrayType) !*zir.Inst
         .val = Value.initTag(.usize_type),
     });
 
+    // TODO check for [_]T
     const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr);
     const child_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs);
 
@@ -515,6 +516,7 @@ fn arrayTypeSentinel(mod: *Module, scope: *Scope, node: *ast.Node.ArrayTypeSenti
         .val = Value.initTag(.usize_type),
     });
 
+    // TODO check for [_]T
     const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr);
     const sentinel_uncasted = try expr(mod, scope, .none, node.sentinel);
     const elem_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs);
@@ -1120,6 +1122,53 @@ fn stringLiteral(mod: *Module, scope: *Scope, str_lit: *ast.Node.OneToken) Inner
     return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = bytes }, .{});
 }
 
+fn multilineStrLiteral(mod: *Module, scope: *Scope, node: *ast.Node.MultilineStringLiteral) !*zir.Inst {
+    const tree = scope.tree();
+    const lines = node.linesConst();
+    const src = tree.token_locs[lines[0]].start;
+
+    // line lengths and new lines
+    var len = lines.len - 1;
+    for (lines) |line| {
+        len += tree.tokenSlice(line).len - 2;
+    }
+
+    const bytes = try scope.arena().alloc(u8, len);
+    var i: usize = 0;
+    for (lines) |line, line_i| {
+        if (line_i != 0) {
+            bytes[i] = '\n';
+            i += 1;
+        }
+        const slice = tree.tokenSlice(line)[2..];
+        mem.copy(u8, bytes[i..], slice);
+        i += slice.len;
+    }
+
+    return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = bytes }, .{});
+}
+
+fn charLiteral(mod: *Module, scope: *Scope, node: *ast.Node.OneToken) !*zir.Inst {
+    const tree = scope.tree();
+    const src = tree.token_locs[node.token].start;
+    const slice = tree.tokenSlice(node.token);
+
+    var bad_index: usize = undefined;
+    const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) {
+        error.InvalidCharacter => {
+            const bad_byte = slice[bad_index];
+            return mod.fail(scope, src + bad_index, "invalid character: '{c}'\n", .{bad_byte});
+        },
+    };
+
+    const int_payload = try scope.arena().create(Value.Payload.Int_u64);
+    int_payload.* = .{ .int = value };
+    return addZIRInstConst(mod, scope, src, .{
+        .ty = Type.initTag(.comptime_int),
+        .val = Value.initPayload(&int_payload.base),
+    });
+}
+
 fn integerLiteral(mod: *Module, scope: *Scope, int_lit: *ast.Node.OneToken) InnerError!*zir.Inst {
     const arena = scope.arena();
     const tree = scope.tree();
src-self-hosted/zir_sema.zig
@@ -365,6 +365,7 @@ fn analyzeInstEnsureResultNonError(mod: *Module, scope: *Scope, inst: *zir.Inst.
 
 fn analyzeInstAlloc(mod: *Module, scope: *Scope, inst: *zir.Inst.UnOp) InnerError!*Inst {
     const var_type = try resolveType(mod, scope, inst.positionals.operand);
+    // TODO this should happen only for var allocs
     if (!var_type.isValidVarType()) {
         return mod.fail(scope, inst.base.src, "variable of type '{}' must be const or comptime", .{var_type});
     }
test/stage2/compare_output.zig
@@ -543,6 +543,38 @@ pub fn addCases(ctx: *TestContext) !void {
         ,
             "",
         );
+
+        case.addCompareOutput(
+            \\export fn _start() noreturn {
+            \\    const ignore = 
+            \\        \\ cool thx
+            \\        \\
+            \\    ;
+            \\    add('ぁ', '\x03');
+            \\
+            \\    exit();
+            \\}
+            \\
+            \\fn add(a: u32, b: u32) void {
+            \\    assert(a + b == 12356);
+            \\}
+            \\
+            \\pub fn assert(ok: bool) void {
+            \\    if (!ok) unreachable; // assertion failure
+            \\}
+            \\
+            \\fn exit() noreturn {
+            \\    asm volatile ("syscall"
+            \\        :
+            \\        : [number] "{rax}" (231),
+            \\          [arg1] "{rdi}" (0)
+            \\        : "rcx", "r11", "memory"
+            \\    );
+            \\    unreachable;
+            \\}
+        ,
+            "",
+        );
     }
 
     {