Commit 715abe8ebe

Andrew Kelley <andrew@ziglang.org>
2021-04-23 08:47:31
AstGen: implement integers bigger than u64
also get rid of the `optional_type_from_ptr_elem` instruction.
1 parent 2d290d6
src/AstGen.zig
@@ -1735,6 +1735,7 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: ast.Node.Index) Inner
             .func,
             .func_inferred,
             .int,
+            .int_big,
             .float,
             .float128,
             .intcast,
@@ -1762,7 +1763,6 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: ast.Node.Index) Inner
             .typeof_elem,
             .xor,
             .optional_type,
-            .optional_type_from_ptr_elem,
             .optional_payload_safe,
             .optional_payload_unsafe,
             .optional_payload_safe_ptr,
@@ -3874,18 +3874,9 @@ fn orelseCatchExpr(
     block_scope.setBreakResultLoc(rl);
     defer block_scope.instructions.deinit(astgen.gpa);
 
-    // TODO get rid of optional_type_from_ptr_elem
     const operand_rl: ResultLoc = switch (block_scope.break_result_loc) {
         .ref => .ref,
-        .discard, .none, .none_or_ref, .block_ptr, .inferred_ptr => .none,
-        .ty => |elem_ty| blk: {
-            const wrapped_ty = try block_scope.addUnNode(.optional_type, elem_ty, node);
-            break :blk .{ .ty = wrapped_ty };
-        },
-        .ptr => |ptr_ty| blk: {
-            const wrapped_ty = try block_scope.addUnNode(.optional_type_from_ptr_elem, ptr_ty, node);
-            break :blk .{ .ty = wrapped_ty };
-        },
+        else => .none,
     };
     block_scope.break_count += 1;
     // This could be a pointer or value depending on the `operand_rl` parameter.
@@ -5755,10 +5746,37 @@ fn integerLiteral(
             else => try gz.addInt(small_int),
         };
         return rvalue(gz, scope, rl, result, node);
-    } else |err| {
-        assert(err != error.InvalidCharacter);
-        return gz.astgen.failNode(node, "TODO implement int literals that don't fit in a u64", .{});
+    } else |err| switch (err) {
+        error.InvalidCharacter => unreachable, // Caught by the parser.
+        error.Overflow => {},
+    }
+
+    var base: u8 = 10;
+    var non_prefixed: []const u8 = prefixed_bytes;
+    if (mem.startsWith(u8, prefixed_bytes, "0x")) {
+        base = 16;
+        non_prefixed = prefixed_bytes[2..];
+    } else if (mem.startsWith(u8, prefixed_bytes, "0o")) {
+        base = 8;
+        non_prefixed = prefixed_bytes[2..];
+    } else if (mem.startsWith(u8, prefixed_bytes, "0b")) {
+        base = 2;
+        non_prefixed = prefixed_bytes[2..];
     }
+
+    const gpa = astgen.gpa;
+    var big_int = try std.math.big.int.Managed.init(gpa);
+    defer big_int.deinit();
+    big_int.setString(base, non_prefixed) catch |err| switch (err) {
+        error.InvalidCharacter => unreachable, // caught by parser
+        error.InvalidBase => unreachable, // we only pass 16, 8, 2, see above
+        error.OutOfMemory => return error.OutOfMemory,
+    };
+
+    const limbs = big_int.limbs[0..big_int.len()];
+    assert(big_int.isPositive());
+    const result = try gz.addIntBig(limbs);
+    return rvalue(gz, scope, rl, result, node);
 }
 
 fn floatLiteral(
src/Module.zig
@@ -1423,6 +1423,26 @@ pub const Scope = struct {
             });
         }
 
+        pub fn addIntBig(gz: *GenZir, limbs: []const std.math.big.Limb) !Zir.Inst.Ref {
+            const astgen = gz.astgen;
+            const gpa = astgen.gpa;
+            try gz.instructions.ensureUnusedCapacity(gpa, 1);
+            try astgen.instructions.ensureUnusedCapacity(gpa, 1);
+            try astgen.string_bytes.ensureUnusedCapacity(gpa, @sizeOf(std.math.big.Limb) * limbs.len);
+
+            const new_index = @intCast(Zir.Inst.Index, astgen.instructions.len);
+            astgen.instructions.appendAssumeCapacity(.{
+                .tag = .int_big,
+                .data = .{ .str = .{
+                    .start = @intCast(u32, astgen.string_bytes.items.len),
+                    .len = @intCast(u32, limbs.len),
+                } },
+            });
+            gz.instructions.appendAssumeCapacity(new_index);
+            astgen.string_bytes.appendSliceAssumeCapacity(mem.sliceAsBytes(limbs));
+            return gz.indexToRef(new_index);
+        }
+
         pub fn addFloat(gz: *GenZir, number: f32, src_node: ast.Node.Index) !Zir.Inst.Ref {
             return gz.add(.{
                 .tag = .float,
@@ -1683,22 +1703,6 @@ pub const Scope = struct {
             return gz.indexToRef(new_index);
         }
 
-        /// Asserts that `str` is 8 or fewer bytes.
-        pub fn addSmallStr(
-            gz: *GenZir,
-            tag: Zir.Inst.Tag,
-            str: []const u8,
-        ) !Zir.Inst.Ref {
-            var buf: [9]u8 = undefined;
-            mem.copy(u8, &buf, str);
-            buf[str.len] = 0;
-
-            return gz.add(.{
-                .tag = tag,
-                .data = .{ .small_str = .{ .bytes = buf[0..8].* } },
-            });
-        }
-
         /// Note that this returns a `Zir.Inst.Index` not a ref.
         /// Does *not* append the block instruction to the scope.
         /// Leaves the `payload_index` field undefined.
src/Sema.zig
@@ -200,6 +200,7 @@ pub fn analyzeBody(
             .import                       => try sema.zirImport(block, inst),
             .indexable_ptr_len            => try sema.zirIndexablePtrLen(block, inst),
             .int                          => try sema.zirInt(block, inst),
+            .int_big                      => try sema.zirIntBig(block, inst),
             .float                        => try sema.zirFloat(block, inst),
             .float128                     => try sema.zirFloat128(block, inst),
             .int_type                     => try sema.zirIntType(block, inst),
@@ -219,7 +220,6 @@ pub fn analyzeBody(
             .optional_payload_unsafe      => try sema.zirOptionalPayload(block, inst, false),
             .optional_payload_unsafe_ptr  => try sema.zirOptionalPayloadPtr(block, inst, false),
             .optional_type                => try sema.zirOptionalType(block, inst),
-            .optional_type_from_ptr_elem  => try sema.zirOptionalTypeFromPtrElem(block, inst),
             .param_type                   => try sema.zirParamType(block, inst),
             .ptr_type                     => try sema.zirPtrType(block, inst),
             .ptr_type_simple              => try sema.zirPtrTypeSimple(block, inst),
@@ -1479,6 +1479,23 @@ fn zirInt(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*In
     return sema.mod.constIntUnsigned(sema.arena, .unneeded, Type.initTag(.comptime_int), int);
 }
 
+fn zirIntBig(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    const arena = sema.arena;
+    const int = sema.code.instructions.items(.data)[inst].str;
+    const byte_count = int.len * @sizeOf(std.math.big.Limb);
+    const limb_bytes = sema.code.string_bytes[int.start..][0..byte_count];
+    const limbs = try arena.alloc(std.math.big.Limb, int.len);
+    mem.copy(u8, mem.sliceAsBytes(limbs), limb_bytes);
+
+    return sema.mod.constInst(arena, .unneeded, .{
+        .ty = Type.initTag(.comptime_int),
+        .val = try Value.Tag.int_big_positive.create(arena, limbs),
+    });
+}
+
 fn zirFloat(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
     const arena = sema.arena;
     const inst_data = sema.code.instructions.items(.data)[inst].float;
@@ -2120,18 +2137,6 @@ fn zirOptionalType(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) Inner
     return sema.mod.constType(sema.arena, src, opt_type);
 }
 
-fn zirOptionalTypeFromPtrElem(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
-    const tracy = trace(@src());
-    defer tracy.end();
-
-    const inst_data = sema.code.instructions.items(.data)[inst].un_node;
-    const ptr = try sema.resolveInst(inst_data.operand);
-    const elem_ty = ptr.ty.elemType();
-    const opt_ty = try sema.mod.optionalType(sema.arena, elem_ty);
-
-    return sema.mod.constType(sema.arena, inst_data.src(), opt_ty);
-}
-
 fn zirElemType(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
     const inst_data = sema.code.instructions.items(.data)[inst].un_node;
     const src = inst_data.src();
src/Zir.zig
@@ -374,8 +374,10 @@ pub const Inst = struct {
         /// Implements the `@import` builtin.
         /// Uses the `str_tok` field.
         import,
-        /// Integer literal that fits in a u64. Uses the int union value.
+        /// Integer literal that fits in a u64. Uses the `int` union field.
         int,
+        /// Arbitrary sized integer literal. Uses the `str` union field.
+        int_big,
         /// A float literal that fits in a f32. Uses the float union value.
         float,
         /// A float literal that fits in a f128. Uses the `pl_node` union value.
@@ -540,10 +542,6 @@ pub const Inst = struct {
         /// Create an optional type '?T'
         /// Uses the `un_node` field.
         optional_type,
-        /// Create an optional type '?T'. The operand is a pointer value. The optional type will
-        /// be the type of the pointer element, wrapped in an optional.
-        /// Uses the `un_node` field.
-        optional_type_from_ptr_elem,
         /// ?T => T with safety.
         /// Given an optional value, returns the payload value, with a safety check that
         /// the value is non-null. Used for `orelse`, `if` and `while`.
@@ -1030,6 +1028,7 @@ pub const Inst = struct {
                 .func_inferred,
                 .has_decl,
                 .int,
+                .int_big,
                 .float,
                 .float128,
                 .intcast,
@@ -1061,7 +1060,6 @@ pub const Inst = struct {
                 .typeof_elem,
                 .xor,
                 .optional_type,
-                .optional_type_from_ptr_elem,
                 .optional_payload_safe,
                 .optional_payload_unsafe,
                 .optional_payload_safe_ptr,
@@ -1700,17 +1698,6 @@ pub const Inst = struct {
                 return code.string_bytes[self.start..][0..self.len];
             }
         },
-        /// Strings 8 or fewer bytes which may not contain null bytes.
-        small_str: struct {
-            bytes: [8]u8,
-
-            pub fn get(self: @This()) []const u8 {
-                const end = for (self.bytes) |byte, i| {
-                    if (byte == 0) break i;
-                } else self.bytes.len;
-                return self.bytes[0..end];
-            }
-        },
         str_tok: struct {
             /// Offset into `string_bytes`. Null-terminated.
             start: u32,
@@ -2324,7 +2311,6 @@ const Writer = struct {
             .ret_node,
             .resolve_inferred_alloc,
             .optional_type,
-            .optional_type_from_ptr_elem,
             .optional_payload_safe,
             .optional_payload_unsafe,
             .optional_payload_safe_ptr,
@@ -2405,6 +2391,7 @@ const Writer = struct {
             .ptr_type_simple => try self.writePtrTypeSimple(stream, inst),
             .ptr_type => try self.writePtrType(stream, inst),
             .int => try self.writeInt(stream, inst),
+            .int_big => try self.writeIntBig(stream, inst),
             .float => try self.writeFloat(stream, inst),
             .float128 => try self.writeFloat128(stream, inst),
             .str => try self.writeStr(stream, inst),
@@ -2710,15 +2697,30 @@ const Writer = struct {
         try stream.writeAll("TODO)");
     }
 
-    fn writeInt(
-        self: *Writer,
-        stream: anytype,
-        inst: Inst.Index,
-    ) (@TypeOf(stream).Error || error{OutOfMemory})!void {
+    fn writeInt(self: *Writer, stream: anytype, inst: Inst.Index) !void {
         const inst_data = self.code.instructions.items(.data)[inst].int;
         try stream.print("{d})", .{inst_data});
     }
 
+    fn writeIntBig(self: *Writer, stream: anytype, inst: Inst.Index) !void {
+        const inst_data = self.code.instructions.items(.data)[inst].str;
+        const byte_count = inst_data.len * @sizeOf(std.math.big.Limb);
+        const limb_bytes = self.code.string_bytes[inst_data.start..][0..byte_count];
+        // limb_bytes is not aligned properly; we must allocate and copy the bytes
+        // in order to accomplish this.
+        const limbs = try self.gpa.alloc(std.math.big.Limb, inst_data.len);
+        defer self.gpa.free(limbs);
+
+        mem.copy(u8, mem.sliceAsBytes(limbs), limb_bytes);
+        const big_int: std.math.big.int.Const = .{
+            .limbs = limbs,
+            .positive = true,
+        };
+        const as_string = try big_int.toStringAlloc(self.gpa, 10, false);
+        defer self.gpa.free(as_string);
+        try stream.print("{s})", .{as_string});
+    }
+
     fn writeFloat(self: *Writer, stream: anytype, inst: Inst.Index) !void {
         const inst_data = self.code.instructions.items(.data)[inst].float;
         const src = inst_data.src();