Commit 0fc7c9f57c

Andrew Kelley <andrew@ziglang.org>
2025-02-26 02:26:19
switch from "id" to "nonce"
mainly this addresses the following use case: 1. Someone creates a template with build.zig.zon, id field included (note that zig init does not create this problem since it generates fresh id every time it runs). 2. User A uses the template, changing package name to "example" but not id field. 3. User B uses the same template, changing package name also to "example", also not changing the id field. Here, both packages have unintentional conflicting logical ids. By making the field a combination of name checksum + random id, this accident is avoided. "nonce" is an OK name for this. Also relaxes errors on remote packages when using `zig fetch`.
1 parent a70307e
doc/build.zig.zon.md
@@ -22,21 +22,30 @@ Zig package namespace.
 
 Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes.
 
-### `id`
+### `nonce`
 
 Together with name, this represents a globally unique package identifier. This
-field should be initialized with a 16-bit random number when the package is
-first created, and then *never change*. This allows Zig to unambiguously detect
-when one package is an updated version of another.
+field is auto-initialized by the toolchain when the package is first created,
+and then *never changes*. This allows Zig to unambiguously detect when one
+package is an updated version of another.
 
-When forking a Zig project, this id should be regenerated with a new random
-number if the upstream project is still maintained. Otherwise, the fork is
-*hostile*, attempting to take control over the original project's identity.
+When forking a Zig project, this nonce should be regenerated if the upstream
+project is still maintained. Otherwise, the fork is *hostile*, attempting to
+take control over the original project's identity. The nonce can be regenerated
+by deleting the field and running `zig build`.
 
-`0x0000` is invalid because it obviously means a random number wasn't used.
+This 64-bit integer is the combination of a 16-bit id component, a 32-bit
+checksum, and 16 bits of reserved zeroes.
+
+The id component within the nonce has these restrictions:
+
+`0x0000` is reserved for legacy packages.
 
 `0xffff` is reserved to represent "naked" packages.
 
+The checksum is computed from `name` and serves to protect Zig users from
+accidental id collisions.
+
 ### `version`
 
 String. Required.
lib/init/build.zig.zon
@@ -13,17 +13,18 @@
     .version = "0.0.0",
 
     // Together with name, this represents a globally unique package
-    // identifier. This field should be initialized with a 16-bit random number
-    // when the package is first created, and then *never change*. This allows
-    // unambiguous detection when one package is an updated version of another.
+    // identifier. This field is generated by the Zig toolchain when the
+    // package is first created, and then *never changes*. This allows
+    // unambiguous detection of one package being an updated version of
+    // another.
     //
-    // When forking a Zig project, this id should be regenerated with a new
-    // random number if the upstream project is still maintained. Otherwise,
-    // the fork is *hostile*, attempting to take control over the original
-    // project's identity. Thus it is recommended to leave the comment on the
-    // following line intact, so that it shows up in code reviews that modify
-    // the field.
-    .id = $i, // Changing this has security and trust implications.
+    // When forking a Zig project, this id should be regenerated (delete the
+    // field and run `zig build`) if the upstream project is still maintained.
+    // Otherwise, the fork is *hostile*, attempting to take control over the
+    // original project's identity. Thus it is recommended to leave the comment
+    // on the following line intact, so that it shows up in code reviews that
+    // modify the field.
+    .nonce = $i, // Changing this has security and trust implications.
 
     // Tracks the earliest Zig version that the package considers to be a
     // supported use case.
src/Package/Fetch.zig
@@ -44,6 +44,8 @@ omit_missing_hash_error: bool,
 /// which specifies inclusion rules. This is intended to be true for the first
 /// fetch task and false for the recursive dependencies.
 allow_missing_paths_field: bool,
+allow_missing_nonce: bool,
+allow_name_string: bool,
 /// If true and URL points to a Git repository, will use the latest commit.
 use_latest_commit: bool,
 
@@ -372,7 +374,7 @@ pub fn run(f: *Fetch) RunError!void {
     };
 
     if (remote.hash) |expected_hash| {
-        var prefixed_pkg_sub_path_buffer: [100]u8 = undefined;
+        var prefixed_pkg_sub_path_buffer: [Package.Hash.max_len + 2]u8 = undefined;
         prefixed_pkg_sub_path_buffer[0] = 'p';
         prefixed_pkg_sub_path_buffer[1] = fs.path.sep;
         const hash_slice = expected_hash.toSlice();
@@ -647,8 +649,8 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void {
 
     f.manifest = try Manifest.parse(arena, ast.*, .{
         .allow_missing_paths_field = f.allow_missing_paths_field,
-        .allow_missing_id = f.allow_missing_paths_field,
-        .allow_name_string = f.allow_missing_paths_field,
+        .allow_missing_nonce = f.allow_missing_nonce,
+        .allow_name_string = f.allow_name_string,
     });
     const manifest = &f.manifest.?;
 
@@ -750,6 +752,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void {
                 .job_queue = f.job_queue,
                 .omit_missing_hash_error = false,
                 .allow_missing_paths_field = true,
+                .allow_missing_nonce = true,
+                .allow_name_string = true,
                 .use_latest_commit = false,
 
                 .package_root = undefined,
@@ -2319,6 +2323,8 @@ const TestFetchBuilder = struct {
             .job_queue = &self.job_queue,
             .omit_missing_hash_error = true,
             .allow_missing_paths_field = false,
+            .allow_missing_nonce = true, // so we can keep using the old testdata .tar.gz
+            .allow_name_string = true, // so we can keep using the old testdata .tar.gz
             .use_latest_commit = true,
 
             .package_root = undefined,
src/Package/Manifest.zig
@@ -52,7 +52,7 @@ pub const ParseOptions = struct {
     /// Deprecated, to be removed after 0.14.0 is tagged.
     allow_name_string: bool = true,
     /// Deprecated, to be removed after 0.14.0 is tagged.
-    allow_missing_id: bool = true,
+    allow_missing_nonce: bool = true,
 };
 
 pub const Error = Allocator.Error;
@@ -81,7 +81,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest {
         .paths = .{},
         .allow_missing_paths_field = options.allow_missing_paths_field,
         .allow_name_string = options.allow_name_string,
-        .allow_missing_id = options.allow_missing_id,
+        .allow_missing_nonce = options.allow_missing_nonce,
         .minimum_zig_version = null,
         .buf = .{},
     };
@@ -157,7 +157,7 @@ const Parse = struct {
     paths: std.StringArrayHashMapUnmanaged(void),
     allow_missing_paths_field: bool,
     allow_name_string: bool,
-    allow_missing_id: bool,
+    allow_missing_nonce: bool,
     minimum_zig_version: ?std.SemanticVersion,
 
     const InnerError = error{ ParseFailure, OutOfMemory };
@@ -175,7 +175,7 @@ const Parse = struct {
         var have_name = false;
         var have_version = false;
         var have_included_paths = false;
-        var have_id = false;
+        var nonce: ?Package.Nonce = null;
 
         for (struct_init.ast.fields) |field_init| {
             const name_token = ast.firstToken(field_init) - 2;
@@ -192,9 +192,8 @@ const Parse = struct {
             } else if (mem.eql(u8, field_name, "name")) {
                 p.name = try parseName(p, field_init);
                 have_name = true;
-            } else if (mem.eql(u8, field_name, "id")) {
-                p.id = try parseId(p, field_init);
-                have_id = true;
+            } else if (mem.eql(u8, field_name, "nonce")) {
+                nonce = try parseNonce(p, field_init);
             } else if (mem.eql(u8, field_name, "version")) {
                 p.version_node = field_init;
                 const version_text = try parseString(p, field_init);
@@ -218,14 +217,23 @@ const Parse = struct {
             }
         }
 
-        if (!have_id and !p.allow_missing_id) {
-            try appendError(p, main_token, "missing top-level 'id' field; suggested value: 0x{x}", .{
-                Package.randomId(),
-            });
-        }
-
         if (!have_name) {
             try appendError(p, main_token, "missing top-level 'name' field", .{});
+        } else {
+            if (nonce) |n| {
+                if (!n.validate(p.name)) {
+                    return fail(p, main_token, "invalid nonce: 0x{x}; if this is a new or forked package, use this value: 0x{x}", .{
+                        n.int(), Package.Nonce.generate(p.name).int(),
+                    });
+                }
+                p.id = n.id;
+            } else if (!p.allow_missing_nonce) {
+                try appendError(p, main_token, "missing top-level 'nonce' field; suggested value: 0x{x}", .{
+                    Package.Nonce.generate(p.name).int(),
+                });
+            } else {
+                p.id = 0;
+            }
         }
 
         if (!have_version) {
@@ -377,7 +385,7 @@ const Parse = struct {
         }
     }
 
-    fn parseId(p: *Parse, node: Ast.Node.Index) !u16 {
+    fn parseNonce(p: *Parse, node: Ast.Node.Index) !Package.Nonce {
         const ast = p.ast;
         const node_tags = ast.nodes.items(.tag);
         const main_tokens = ast.nodes.items(.main_token);
@@ -387,20 +395,12 @@ const Parse = struct {
         }
         const token_bytes = ast.tokenSlice(main_token);
         const parsed = std.zig.parseNumberLiteral(token_bytes);
-        const n = switch (parsed) {
-            .int => |n| n,
-            .big_int, .float => return fail(p, main_token, "expected u16 integer literal, found {s}", .{
+        switch (parsed) {
+            .int => |n| return @bitCast(n),
+            .big_int, .float => return fail(p, main_token, "expected u64 integer literal, found {s}", .{
                 @tagName(parsed),
             }),
             .failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}),
-        };
-        const casted = std.math.cast(u16, n) orelse
-            return fail(p, main_token, "integer value {d} does not fit into u16", .{n});
-        switch (casted) {
-            0x0000, 0xffff => return fail(p, main_token, "id value 0x{x} reserved; use 0x{x} instead", .{
-                casted, Package.randomId(),
-            }),
-            else => return casted,
         }
     }
 
src/main.zig
@@ -4752,10 +4752,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
     };
     var ok_count: usize = 0;
 
-    const id = Package.randomId();
+    const nonce: Package.Nonce = .generate(sanitized_root_name);
 
     for (template_paths) |template_path| {
-        if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, id)) |_| {
+        if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, nonce)) |_| {
             std.log.info("created {s}", .{template_path});
             ok_count += 1;
         } else |err| switch (err) {
@@ -5225,6 +5225,8 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
                     .job_queue = &job_queue,
                     .omit_missing_hash_error = true,
                     .allow_missing_paths_field = false,
+                    .allow_missing_nonce = false,
+                    .allow_name_string = false,
                     .use_latest_commit = false,
 
                     .package_root = undefined,
@@ -7125,6 +7127,8 @@ fn cmdFetch(
         .job_queue = &job_queue,
         .omit_missing_hash_error = true,
         .allow_missing_paths_field = false,
+        .allow_missing_nonce = true,
+        .allow_name_string = true,
         .use_latest_commit = true,
 
         .package_root = undefined,
@@ -7464,10 +7468,10 @@ fn loadManifest(
             0,
         ) catch |err| switch (err) {
             error.FileNotFound => {
-                const id = Package.randomId();
+                const nonce: Package.Nonce = .generate(options.root_name);
                 var templates = findTemplates(gpa, arena);
                 defer templates.deinit();
-                templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, id) catch |e| {
+                templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, nonce) catch |e| {
                     fatal("unable to write {s}: {s}", .{
                         Package.Manifest.basename, @errorName(e),
                     });
@@ -7525,7 +7529,7 @@ const Templates = struct {
         out_dir: fs.Dir,
         root_name: []const u8,
         template_path: []const u8,
-        id: u16,
+        nonce: Package.Nonce,
     ) !void {
         if (fs.path.dirname(template_path)) |dirname| {
             out_dir.makePath(dirname) catch |err| {
@@ -7551,7 +7555,7 @@ const Templates = struct {
                     state = .start;
                 },
                 'i' => {
-                    try templates.buffer.writer().print("0x{x}", .{id});
+                    try templates.buffer.writer().print("0x{x}", .{nonce.int()});
                     state = .start;
                 },
                 'v' => {
src/Package.zig
@@ -10,9 +10,29 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
 pub const multihash_hex_digest_len = 2 * multihash_len;
 pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
 
-pub fn randomId() u16 {
-    return std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff);
-}
+pub const Nonce = packed struct(u64) {
+    id: u16,
+    reserved: u16 = 0,
+    checksum: u32,
+
+    pub fn generate(name: []const u8) Nonce {
+        return .{
+            .id = std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff),
+            .checksum = std.hash.Crc32.hash(name),
+        };
+    }
+
+    pub fn validate(n: Nonce, name: []const u8) bool {
+        switch (n.id) {
+            0x0000, 0xffff => return false,
+            else => return std.hash.Crc32.hash(name) == n.checksum,
+        }
+    }
+
+    pub fn int(n: Nonce) u64 {
+        return @bitCast(n);
+    }
+};
 
 /// A user-readable, file system safe hash that identifies an exact package
 /// snapshot, including file contents.
@@ -72,9 +92,10 @@ pub const Hash = struct {
     }
 
     /// Produces "$name-$semver-$hashplus".
-    /// * name is the name field from build.zig.zon, truncated at 32 bytes and must
-    ///   be a valid zig identifier
-    /// * semver is the version field from build.zig.zon, truncated at 32 bytes
+    /// * name is the name field from build.zig.zon, asserted to be at most 32
+    ///   bytes and assumed be a valid zig identifier
+    /// * semver is the version field from build.zig.zon, asserted to be at
+    ///   most 32 bytes
     /// * hashplus is the following 39-byte array, base64 encoded using -_ to make
     ///   it filesystem safe:
     ///   - (2 bytes) LE u16 Package ID
build.zig.zon
@@ -12,5 +12,5 @@
         },
     },
     .paths = .{""},
-    .id = 0x1cb6,
+    .nonce = 0xc1ce10810000f013,
 }