Commit 4055e6055b

zooster <r00ster91@proton.me>
2022-08-18 18:54:51
AstGen: disallow leading zeroes in int literals and int types
This makes `0123` and `u0123` etc. illegal. I'm now confident that this is a good change because I actually caught two C header translation mistakes in `haiku.zig` with this. Clearly, `0123` being octal in C (TIL) can cause confusion, and we make this easier to read by requiring `0o` as the prefix and now also disallowing leading zeroes in integers. For consistency and because it looks weird, we disallow it for integer types too (e.g. `u0123`). Fixes #11963 Fixes #12417
1 parent 2523b44
lib/std/c/haiku.zig
@@ -702,7 +702,7 @@ pub const T = struct {
     pub const CSETAF = 0x8002;
     pub const CSETAW = 0x8003;
     pub const CWAITEVENT = 0x8004;
-    pub const CSBRK = 08005;
+    pub const CSBRK = 0x8005;
     pub const CFLSH = 0x8006;
     pub const CXONC = 0x8007;
     pub const CQUERYCONNECTED = 0x8008;
@@ -874,7 +874,7 @@ pub const S = struct {
     pub const IFDIR = 0o040000;
     pub const IFCHR = 0o020000;
     pub const IFIFO = 0o010000;
-    pub const INDEX_DIR = 04000000000;
+    pub const INDEX_DIR = 0o4000000000;
 
     pub const IUMSK = 0o7777;
     pub const ISUID = 0o4000;
lib/std/zig/parser_test.zig
@@ -4254,10 +4254,10 @@ test "zig fmt: integer literals with underscore separators" {
         \\const
         \\ x     =
         \\ 1_234_567
-        \\ + (0b0_1-0o7_0+0xff_FF ) +  0_0;
+        \\ + (0b0_1-0o7_0+0xff_FF ) +  1_0;
     ,
         \\const x =
-        \\    1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0;
+        \\    1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 1_0;
         \\
     );
 }
@@ -4266,7 +4266,7 @@ test "zig fmt: hex literals with underscore separators" {
     try testTransform(
         \\pub fn orMask(a: [ 1_000 ]u64, b: [  1_000]  u64) [1_000]u64 {
         \\    var c: [1_000]u64 =  [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000;
-        \\    for (c [ 0_0 .. ]) |_, i| {
+        \\    for (c [ 1_0 .. ]) |_, i| {
         \\        c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
         \\    }
         \\    return c;
@@ -4276,7 +4276,7 @@ test "zig fmt: hex literals with underscore separators" {
     ,
         \\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 {
         \\    var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000;
-        \\    for (c[0_0..]) |_, i| {
+        \\    for (c[1_0..]) |_, i| {
         \\        c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
         \\    }
         \\    return c;
@@ -4288,14 +4288,14 @@ test "zig fmt: hex literals with underscore separators" {
 test "zig fmt: decimal float literals with underscore separators" {
     try testTransform(
         \\pub fn main() void {
-        \\    const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+00_00.00_10e+4;
-        \\    const b:f64=010.0--0_10.0+0_1_0.0_0+1e2;
+        \\    const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+20_00.00_10e+4;
+        \\    const b:f64=1_0.0--10_10.0+1_0_0.0_0+1e2;
         \\    std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
         \\}
     ,
         \\pub fn main() void {
-        \\    const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4;
-        \\    const b: f64 = 010.0 - -0_10.0 + 0_1_0.0_0 + 1e2;
+        \\    const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 20_00.00_10e+4;
+        \\    const b: f64 = 1_0.0 - -10_10.0 + 1_0_0.0_0 + 1e2;
         \\    std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
         \\}
         \\
src/AstGen.zig
@@ -4088,6 +4088,13 @@ fn testDecl(
                             true => .signed,
                             false => .unsigned,
                         };
+                        if (ident_name_raw.len >= 3 and ident_name_raw[1] == '0') {
+                            return astgen.failTok(
+                                test_name_token,
+                                "primitive integer type '{s}' has leading zero",
+                                .{ident_name_raw},
+                            );
+                        }
                         _ = parseBitCount(ident_name_raw[1..]) catch |err| switch (err) {
                             error.Overflow => return astgen.failTok(
                                 test_name_token,
@@ -6791,6 +6798,13 @@ fn identifier(
                     true => .signed,
                     false => .unsigned,
                 };
+                if (ident_name_raw.len >= 3 and ident_name_raw[1] == '0') {
+                    return astgen.failNode(
+                        ident,
+                        "primitive integer type '{s}' has leading zero",
+                        .{ident_name_raw},
+                    );
+                }
                 const bit_count = parseBitCount(ident_name_raw[1..]) catch |err| switch (err) {
                     error.Overflow => return astgen.failNode(
                         ident,
@@ -7021,17 +7035,6 @@ fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Z
     const main_tokens = tree.nodes.items(.main_token);
     const int_token = main_tokens[node];
     const prefixed_bytes = tree.tokenSlice(int_token);
-    if (std.fmt.parseInt(u64, prefixed_bytes, 0)) |small_int| {
-        const result: Zir.Inst.Ref = switch (small_int) {
-            0 => .zero,
-            1 => .one,
-            else => try gz.addInt(small_int),
-        };
-        return rvalue(gz, rl, result, node);
-    } else |err| switch (err) {
-        error.InvalidCharacter => unreachable, // Caught by the parser.
-        error.Overflow => {},
-    }
 
     var base: u8 = 10;
     var non_prefixed: []const u8 = prefixed_bytes;
@@ -7046,6 +7049,24 @@ fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Z
         non_prefixed = prefixed_bytes[2..];
     }
 
+    if (base == 10 and prefixed_bytes.len >= 2 and prefixed_bytes[0] == '0') {
+        return astgen.failNodeNotes(node, "integer literal '{s}' has leading zero", .{prefixed_bytes}, &.{
+            try astgen.errNoteNode(node, "use '0o' prefix for octal literals", .{}),
+        });
+    }
+
+    if (std.fmt.parseUnsigned(u64, non_prefixed, base)) |small_int| {
+        const result: Zir.Inst.Ref = switch (small_int) {
+            0 => .zero,
+            1 => .one,
+            else => try gz.addInt(small_int),
+        };
+        return rvalue(gz, rl, result, node);
+    } else |err| switch (err) {
+        error.InvalidCharacter => unreachable, // Caught by the parser.
+        error.Overflow => {},
+    }
+
     const gpa = astgen.gpa;
     var big_int = try std.math.big.int.Managed.init(gpa);
     defer big_int.deinit();
test/behavior/math.zig
@@ -239,10 +239,9 @@ test "quad hex float literal parsing in range" {
 }
 
 test "underscore separator parsing" {
-    try expect(0_0_0_0 == 0);
     try expect(1_234_567 == 1234567);
-    try expect(001_234_567 == 1234567);
-    try expect(0_0_1_2_3_4_5_6_7 == 1234567);
+    try expect(1_234_567 == 1234567);
+    try expect(1_2_3_4_5_6_7 == 1234567);
 
     try expect(0b0_0_0_0 == 0);
     try expect(0b1010_1010 == 0b10101010);
@@ -260,7 +259,7 @@ test "underscore separator parsing" {
     try expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
 
     try expect(123_456.789_000e1_0 == 123456.789000e10);
-    try expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
+    try expect(1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
 
     try expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
     try expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
test/cases/compile_errors/invalid_underscore_placement_in_int_literal-1.zig
@@ -1,5 +1,5 @@
 fn main() void {
-    var bad: u128 = 0010_;
+    var bad: u128 = 10_;
     _ = bad;
 }
 
@@ -8,4 +8,4 @@ fn main() void {
 // target=native
 //
 // :2:21: error: expected expression, found 'invalid bytes'
-// :2:26: note: invalid byte: ';'
+// :2:24: note: invalid byte: ';'
test/cases/compile_errors/leading_zero_in_integer.zig
@@ -0,0 +1,27 @@
+export fn entry1() void {
+    const T = u000123;
+    _ = T;
+}
+export fn entry2() void {
+    _ = i0;
+    _ = u0;
+    var x: i01 = 1;
+    _ = x;
+}
+export fn entry3() void {
+    _ = 000123;
+}
+export fn entry4() void {
+    _ = 01;
+}
+
+// error
+// backend=llvm
+// target=native
+//
+// :2:15: error: primitive integer type 'u000123' has leading zero
+// :8:12: error: primitive integer type 'i01' has leading zero
+// :12:9: error: integer literal '000123' has leading zero
+// :12:9: note: use '0o' prefix for octal literals
+// :15:9: error: integer literal '01' has leading zero
+// :15:9: note: use '0o' prefix for octal literals