master
1//! To get started, run this tool with no args and read the help message.
2//!
3//! The build systems of glibc, musl, FreeBSD, and NetBSD require specifying a single target
4//! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for
5//! every target. So the process to create libc headers that Zig ships is to use
6//! this tool.
7//!
8//! First, use the glibc, musl, FreeBSD, and NetBSD build systems to create installations of all the
9//! targets in the `glibc_targets`, `musl_targets`, `freebsd_targets`, and `netbsd_targets`
10//! variables. Next, run this tool to create a new directory which puts .h files into
11//! <arch> subdirectories, with `generic` being files that apply to all architectures.
12//! You'll then have to manually update Zig source repo with these new files.
13
14const std = @import("std");
15const Arch = std.Target.Cpu.Arch;
16const Abi = std.Target.Abi;
17const OsTag = std.Target.Os.Tag;
18const assert = std.debug.assert;
19const Blake3 = std.crypto.hash.Blake3;
20
21const LibCTarget = struct {
22 arch: Arch,
23 abi: Abi,
24 dest: ?[]const u8 = null,
25};
26
27const glibc_targets = [_]LibCTarget{
28 .{ .arch = .arc, .abi = .gnu },
29 .{ .arch = .arm, .abi = .gnueabi, .dest = "arm-linux-gnu" },
30 .{ .arch = .arm, .abi = .gnueabihf, .dest = "arm-linux-gnu" },
31 .{ .arch = .armeb, .abi = .gnueabi, .dest = "arm-linux-gnu" },
32 .{ .arch = .armeb, .abi = .gnueabihf, .dest = "arm-linux-gnu" },
33 .{ .arch = .aarch64, .abi = .gnu, .dest = "aarch64-linux-gnu" },
34 .{ .arch = .aarch64_be, .abi = .gnu, .dest = "aarch64-linux-gnu" },
35 .{ .arch = .csky, .abi = .gnueabi, .dest = "csky-linux-gnu" },
36 .{ .arch = .csky, .abi = .gnueabihf, .dest = "csky-linux-gnu" },
37 .{ .arch = .loongarch64, .abi = .gnu, .dest = "loongarch-linux-gnu" },
38 .{ .arch = .loongarch64, .abi = .gnusf, .dest = "loongarch-linux-gnu" },
39 .{ .arch = .m68k, .abi = .gnu },
40 .{ .arch = .mips, .abi = .gnueabi, .dest = "mips-linux-gnu" },
41 .{ .arch = .mips, .abi = .gnueabihf, .dest = "mips-linux-gnu" },
42 .{ .arch = .mipsel, .abi = .gnueabi, .dest = "mips-linux-gnu" },
43 .{ .arch = .mipsel, .abi = .gnueabihf, .dest = "mips-linux-gnu" },
44 .{ .arch = .mips64, .abi = .gnuabi64, .dest = "mips-linux-gnu" },
45 .{ .arch = .mips64, .abi = .gnuabin32, .dest = "mips-linux-gnu" },
46 .{ .arch = .mips64el, .abi = .gnuabi64, .dest = "mips-linux-gnu" },
47 .{ .arch = .mips64el, .abi = .gnuabin32, .dest = "mips-linux-gnu" },
48 .{ .arch = .powerpc, .abi = .gnueabi, .dest = "powerpc-linux-gnu" },
49 .{ .arch = .powerpc, .abi = .gnueabihf, .dest = "powerpc-linux-gnu" },
50 .{ .arch = .powerpc64, .abi = .gnu, .dest = "powerpc-linux-gnu" },
51 .{ .arch = .powerpc64le, .abi = .gnu, .dest = "powerpc-linux-gnu" },
52 .{ .arch = .riscv32, .abi = .gnu, .dest = "riscv-linux-gnu" },
53 .{ .arch = .riscv64, .abi = .gnu, .dest = "riscv-linux-gnu" },
54 .{ .arch = .s390x, .abi = .gnu },
55 .{ .arch = .sparc, .abi = .gnu, .dest = "sparc-linux-gnu" },
56 .{ .arch = .sparc64, .abi = .gnu, .dest = "sparc-linux-gnu" },
57 .{ .arch = .x86, .abi = .gnu, .dest = "x86-linux-gnu" },
58 .{ .arch = .x86_64, .abi = .gnu, .dest = "x86-linux-gnu" },
59 .{ .arch = .x86_64, .abi = .gnux32, .dest = "x86-linux-gnu" },
60};
61
62const musl_targets = [_]LibCTarget{
63 .{ .arch = .arm, .abi = .musl },
64 .{ .arch = .aarch64, .abi = .musl },
65 .{ .arch = .hexagon, .abi = .musl },
66 .{ .arch = .loongarch64, .abi = .musl },
67 .{ .arch = .m68k, .abi = .musl },
68 .{ .arch = .mips, .abi = .musl },
69 .{ .arch = .mips64, .abi = .musl },
70 .{ .arch = .mips64, .abi = .muslabin32 },
71 .{ .arch = .powerpc, .abi = .musl },
72 .{ .arch = .powerpc64, .abi = .musl },
73 .{ .arch = .riscv32, .abi = .musl },
74 .{ .arch = .riscv64, .abi = .musl },
75 .{ .arch = .s390x, .abi = .musl },
76 .{ .arch = .x86, .abi = .musl },
77 .{ .arch = .x86_64, .abi = .musl },
78 .{ .arch = .x86_64, .abi = .muslx32 },
79};
80
81const freebsd_targets = [_]LibCTarget{
82 .{ .arch = .arm, .abi = .eabihf },
83 .{ .arch = .aarch64, .abi = .none },
84 .{ .arch = .powerpc, .abi = .eabihf },
85 .{ .arch = .powerpc64, .abi = .none },
86 .{ .arch = .riscv64, .abi = .none },
87 .{ .arch = .x86, .abi = .none },
88 .{ .arch = .x86_64, .abi = .none },
89};
90
91const netbsd_targets = [_]LibCTarget{
92 .{ .arch = .arm, .abi = .eabi, .dest = "arm-netbsd-eabi" },
93 .{ .arch = .arm, .abi = .eabihf, .dest = "arm-netbsd-eabi" },
94 .{ .arch = .aarch64, .abi = .none },
95 .{ .arch = .m68k, .abi = .none },
96 .{ .arch = .mips, .abi = .eabi, .dest = "mips-netbsd-eabi" },
97 .{ .arch = .mips, .abi = .eabihf, .dest = "mips-netbsd-eabi" },
98 .{ .arch = .powerpc, .abi = .eabi, .dest = "powerpc-netbsd-eabi" },
99 .{ .arch = .powerpc, .abi = .eabihf, .dest = "powerpc-netbsd-eabi" },
100 .{ .arch = .sparc, .abi = .none },
101 .{ .arch = .sparc64, .abi = .none },
102 .{ .arch = .x86, .abi = .none },
103 .{ .arch = .x86_64, .abi = .none },
104};
105
106const Contents = struct {
107 bytes: []const u8,
108 hit_count: usize,
109 hash: []const u8,
110 is_generic: bool,
111
112 fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool {
113 _ = context;
114 return lhs.hit_count < rhs.hit_count;
115 }
116};
117
118const HashToContents = std.StringHashMap(Contents);
119const TargetToHash = std.StringArrayHashMap([]const u8);
120const PathTable = std.StringHashMap(*TargetToHash);
121
122const LibCVendor = enum {
123 musl,
124 glibc,
125 freebsd,
126 netbsd,
127};
128
129pub fn main() !void {
130 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
131 const allocator = arena.allocator();
132 const args = try std.process.argsAlloc(allocator);
133 var search_paths = std.array_list.Managed([]const u8).init(allocator);
134 var opt_out_dir: ?[]const u8 = null;
135 var opt_abi: ?[]const u8 = null;
136
137 var arg_i: usize = 1;
138 while (arg_i < args.len) : (arg_i += 1) {
139 if (std.mem.eql(u8, args[arg_i], "--help"))
140 usageAndExit(args[0]);
141 if (arg_i + 1 >= args.len) {
142 std.debug.print("expected argument after '{s}'\n", .{args[arg_i]});
143 usageAndExit(args[0]);
144 }
145
146 if (std.mem.eql(u8, args[arg_i], "--search-path")) {
147 try search_paths.append(args[arg_i + 1]);
148 } else if (std.mem.eql(u8, args[arg_i], "--out")) {
149 assert(opt_out_dir == null);
150 opt_out_dir = args[arg_i + 1];
151 } else if (std.mem.eql(u8, args[arg_i], "--abi")) {
152 assert(opt_abi == null);
153 opt_abi = args[arg_i + 1];
154 } else {
155 std.debug.print("unrecognized argument: {s}\n", .{args[arg_i]});
156 usageAndExit(args[0]);
157 }
158
159 arg_i += 1;
160 }
161
162 const out_dir = opt_out_dir orelse usageAndExit(args[0]);
163 const abi_name = opt_abi orelse usageAndExit(args[0]);
164 const vendor = std.meta.stringToEnum(LibCVendor, abi_name) orelse {
165 std.debug.print("unrecognized C ABI: {s}\n", .{abi_name});
166 usageAndExit(args[0]);
167 };
168
169 const generic_name = try std.fmt.allocPrint(allocator, "generic-{s}", .{abi_name});
170 const libc_targets = switch (vendor) {
171 .glibc => &glibc_targets,
172 .musl => &musl_targets,
173 .freebsd => &freebsd_targets,
174 .netbsd => &netbsd_targets,
175 };
176
177 var path_table = PathTable.init(allocator);
178 var hash_to_contents = HashToContents.init(allocator);
179 var max_bytes_saved: usize = 0;
180 var total_bytes: usize = 0;
181
182 var hasher = Blake3.init(.{});
183
184 for (libc_targets) |libc_target| {
185 const libc_dir = switch (vendor) {
186 .glibc => try std.zig.target.glibcRuntimeTriple(allocator, libc_target.arch, .linux, libc_target.abi),
187 .musl => std.zig.target.muslArchName(libc_target.arch, libc_target.abi),
188 .freebsd => switch (libc_target.arch) {
189 .arm => "armv7",
190 .x86 => "i386",
191 .x86_64 => "amd64",
192
193 .aarch64,
194 .powerpc,
195 .powerpc64,
196 .riscv64,
197 => |a| @tagName(a),
198
199 else => unreachable,
200 },
201 .netbsd => switch (libc_target.arch) {
202 .arm => if (libc_target.abi == .eabihf) "evbarmv7hf" else "evbarmv7",
203 .aarch64 => "evbarm64",
204 .m68k => "mac68k",
205 .mips => if (libc_target.abi == .eabihf) "evbmips" else "evbmipssf",
206 .powerpc => if (libc_target.abi == .eabihf) "evbppc" else "evbppcsf",
207 .x86 => "i386",
208 .x86_64 => "amd64",
209
210 .sparc,
211 .sparc64,
212 => |a| @tagName(a),
213
214 else => unreachable,
215 },
216 };
217
218 const dest_target = if (libc_target.dest) |dest| dest else try std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{
219 @tagName(libc_target.arch),
220 switch (vendor) {
221 .musl, .glibc => "linux",
222 .freebsd => "freebsd",
223 .netbsd => "netbsd",
224 },
225 @tagName(libc_target.abi),
226 });
227
228 search: for (search_paths.items) |search_path| {
229 const sub_path = switch (vendor) {
230 .glibc,
231 .freebsd,
232 .netbsd,
233 => &[_][]const u8{ search_path, libc_dir, "usr", "include" },
234 .musl => &[_][]const u8{ search_path, libc_dir, "usr", "local", "musl", "include" },
235 };
236 const target_include_dir = try std.fs.path.join(allocator, sub_path);
237 var dir_stack = std.array_list.Managed([]const u8).init(allocator);
238 try dir_stack.append(target_include_dir);
239
240 while (dir_stack.pop()) |full_dir_name| {
241 var dir = std.fs.cwd().openDir(full_dir_name, .{ .iterate = true }) catch |err| switch (err) {
242 error.FileNotFound => continue :search,
243 error.AccessDenied => continue :search,
244 else => return err,
245 };
246 defer dir.close();
247
248 var dir_it = dir.iterate();
249
250 while (try dir_it.next()) |entry| {
251 const full_path = try std.fs.path.join(allocator, &[_][]const u8{ full_dir_name, entry.name });
252 switch (entry.kind) {
253 .directory => try dir_stack.append(full_path),
254 .file, .sym_link => {
255 const rel_path = try std.fs.path.relative(allocator, target_include_dir, full_path);
256 const max_size = 2 * 1024 * 1024 * 1024;
257 const raw_bytes = try std.fs.cwd().readFileAlloc(full_path, allocator, .limited(max_size));
258 const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t");
259 total_bytes += raw_bytes.len;
260 const hash = try allocator.alloc(u8, 32);
261 hasher = Blake3.init(.{});
262 hasher.update(rel_path);
263 hasher.update(trimmed);
264 hasher.final(hash);
265 const gop = try hash_to_contents.getOrPut(hash);
266 if (gop.found_existing) {
267 max_bytes_saved += raw_bytes.len;
268 gop.value_ptr.hit_count += 1;
269 std.debug.print("duplicate: {s} {s} ({B})\n", .{
270 libc_dir,
271 rel_path,
272 raw_bytes.len,
273 });
274 } else {
275 gop.value_ptr.* = Contents{
276 .bytes = trimmed,
277 .hit_count = 1,
278 .hash = hash,
279 .is_generic = false,
280 };
281 }
282 const path_gop = try path_table.getOrPut(rel_path);
283 const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: {
284 const ptr = try allocator.create(TargetToHash);
285 ptr.* = TargetToHash.init(allocator);
286 path_gop.value_ptr.* = ptr;
287 break :blk ptr;
288 };
289 // When `dest` is set, there are a few rare cases where we expect to overwrite a header. For
290 // example, `bits/long-double.h` differs very slightly between `powerpc64le-linux-gnu` and
291 // other `powerpc*-linux-gnu` targets, and we unify those targets as `powerpc-linux-gnu`. In
292 // such cases, we manually patch the affected header after processing, so it's fine that
293 // only one header wins here.
294 if (libc_target.dest != null) {
295 const hash_gop = try target_to_hash.getOrPut(dest_target);
296 if (hash_gop.found_existing) std.debug.print("overwrote: {s} {s} {s}\n", .{
297 libc_dir,
298 rel_path,
299 dest_target,
300 }) else hash_gop.value_ptr.* = hash;
301 } else {
302 try target_to_hash.putNoClobber(dest_target, hash);
303 }
304 },
305 else => std.debug.print("warning: weird file: {s}\n", .{full_path}),
306 }
307 }
308 }
309 break;
310 } else {
311 std.debug.print("warning: libc target not found: {s}\n", .{libc_dir});
312 }
313 }
314 std.debug.print("summary: {B} could be reduced to {B}\n", .{
315 total_bytes,
316 total_bytes - max_bytes_saved,
317 });
318 try std.fs.cwd().makePath(out_dir);
319
320 var missed_opportunity_bytes: usize = 0;
321 // iterate path_table. for each path, put all the hashes into a list. sort by hit_count.
322 // the hash with the highest hit_count gets to be the "generic" one. everybody else
323 // gets their header in a separate arch directory.
324 var path_it = path_table.iterator();
325 while (path_it.next()) |path_kv| {
326 var contents_list = std.array_list.Managed(*Contents).init(allocator);
327 {
328 var hash_it = path_kv.value_ptr.*.iterator();
329 while (hash_it.next()) |hash_kv| {
330 const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?;
331 try contents_list.append(contents);
332 }
333 }
334 std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan);
335 const best_contents = contents_list.pop().?;
336 if (best_contents.hit_count > 1) {
337 // worth it to make it generic
338 const full_path = try std.fs.path.join(allocator, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* });
339 try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
340 try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = best_contents.bytes });
341 best_contents.is_generic = true;
342 while (contents_list.pop()) |contender| {
343 if (contender.hit_count > 1) {
344 const this_missed_bytes = contender.hit_count * contender.bytes.len;
345 missed_opportunity_bytes += this_missed_bytes;
346 std.debug.print("Missed opportunity ({B}): {s}\n", .{
347 this_missed_bytes,
348 path_kv.key_ptr.*,
349 });
350 } else break;
351 }
352 }
353 var hash_it = path_kv.value_ptr.*.iterator();
354 while (hash_it.next()) |hash_kv| {
355 const contents = hash_to_contents.get(hash_kv.value_ptr.*).?;
356 if (contents.is_generic) continue;
357
358 const dest_target = hash_kv.key_ptr.*;
359 const full_path = try std.fs.path.join(allocator, &[_][]const u8{ out_dir, dest_target, path_kv.key_ptr.* });
360 try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
361 try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = contents.bytes });
362 }
363 }
364}
365
366fn usageAndExit(arg0: []const u8) noreturn {
367 std.debug.print("Usage: {s} [--search-path <dir>] --out <dir> --abi <name>\n", .{arg0});
368 std.debug.print("--search-path can be used any number of times.\n", .{});
369 std.debug.print(" subdirectories of search paths look like, e.g. x86_64-linux-gnu\n", .{});
370 std.debug.print("--out is a dir that will be created, and populated with the results\n", .{});
371 std.debug.print("--abi is either glibc, musl, freebsd, or netbsd\n", .{});
372 std.process.exit(1);
373}