master
1//! To get started, run this tool with no args and read the help message.
2//!
3//! The build system of Linux requires specifying a single target
4//! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for
5//! every target. So the process to create libc headers that Zig ships is to use
6//! this tool.
7//!
8//! First, use the Linux build systems to create installations of all the
9//! targets in the `linux_targets` variable.
10//!
11//! Next, run this tool to create a new directory which puts .h files into
12//! <arch> subdirectories, with `any-linux-any` being files that apply to
13//! all architectures.
14//!
15//! You'll then have to manually update Zig source repo with these new files.
16
17const std = @import("std");
18const Arch = std.Target.Cpu.Arch;
19const Abi = std.Target.Abi;
20const assert = std.debug.assert;
21const Blake3 = std.crypto.hash.Blake3;
22
23const LibCTarget = struct {
24 name: []const u8,
25 arch: MultiArch,
26};
27
28const MultiArch = union(enum) {
29 arm,
30 arm64,
31 loongarch,
32 mips,
33 powerpc,
34 riscv,
35 sparc,
36 x86,
37 specific: Arch,
38
39 fn eql(a: MultiArch, b: MultiArch) bool {
40 if (@intFromEnum(a) != @intFromEnum(b))
41 return false;
42 if (a != .specific)
43 return true;
44 return a.specific == b.specific;
45 }
46};
47
48const linux_targets = [_]LibCTarget{
49 LibCTarget{
50 .name = "arc",
51 .arch = MultiArch{ .specific = Arch.arc },
52 },
53 LibCTarget{
54 .name = "arm",
55 .arch = .arm,
56 },
57 LibCTarget{
58 .name = "arm64",
59 .arch = .{ .specific = .aarch64 },
60 },
61 LibCTarget{
62 .name = "csky",
63 .arch = .{ .specific = .csky },
64 },
65 LibCTarget{
66 .name = "hexagon",
67 .arch = .{ .specific = .hexagon },
68 },
69 LibCTarget{
70 .name = "m68k",
71 .arch = .{ .specific = .m68k },
72 },
73 LibCTarget{
74 .name = "loongarch",
75 .arch = .loongarch,
76 },
77 LibCTarget{
78 .name = "mips",
79 .arch = .mips,
80 },
81 LibCTarget{
82 .name = "powerpc",
83 .arch = .powerpc,
84 },
85 LibCTarget{
86 .name = "riscv",
87 .arch = .riscv,
88 },
89 LibCTarget{
90 .name = "s390",
91 .arch = .{ .specific = .s390x },
92 },
93 LibCTarget{
94 .name = "sparc",
95 .arch = .{ .specific = .sparc },
96 },
97 LibCTarget{
98 .name = "x86",
99 .arch = .x86,
100 },
101 LibCTarget{
102 .name = "xtensa",
103 .arch = .{ .specific = .xtensa },
104 },
105};
106
107const DestTarget = struct {
108 arch: MultiArch,
109
110 const HashContext = struct {
111 pub fn hash(self: @This(), a: DestTarget) u32 {
112 _ = self;
113 var hasher = std.hash.Wyhash.init(0);
114 std.hash.autoHash(&hasher, a.arch);
115 return @as(u32, @truncate(hasher.final()));
116 }
117
118 pub fn eql(self: @This(), a: DestTarget, b: DestTarget, b_index: usize) bool {
119 _ = self;
120 _ = b_index;
121 return a.arch.eql(b.arch);
122 }
123 };
124};
125
126const Contents = struct {
127 bytes: []const u8,
128 hit_count: usize,
129 hash: []const u8,
130 is_generic: bool,
131
132 fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool {
133 _ = context;
134 return lhs.hit_count < rhs.hit_count;
135 }
136};
137
138const HashToContents = std.StringHashMap(Contents);
139const TargetToHash = std.ArrayHashMap(DestTarget, []const u8, DestTarget.HashContext, true);
140const PathTable = std.StringHashMap(*TargetToHash);
141
142pub fn main() !void {
143 var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
144 const arena = arena_state.allocator();
145 const args = try std.process.argsAlloc(arena);
146 var search_paths = std.array_list.Managed([]const u8).init(arena);
147 var opt_out_dir: ?[]const u8 = null;
148
149 var arg_i: usize = 1;
150 while (arg_i < args.len) : (arg_i += 1) {
151 if (std.mem.eql(u8, args[arg_i], "--help"))
152 usageAndExit(args[0]);
153 if (arg_i + 1 >= args.len) {
154 std.debug.print("expected argument after '{s}'\n", .{args[arg_i]});
155 usageAndExit(args[0]);
156 }
157
158 if (std.mem.eql(u8, args[arg_i], "--search-path")) {
159 try search_paths.append(args[arg_i + 1]);
160 } else if (std.mem.eql(u8, args[arg_i], "--out")) {
161 assert(opt_out_dir == null);
162 opt_out_dir = args[arg_i + 1];
163 } else {
164 std.debug.print("unrecognized argument: {s}\n", .{args[arg_i]});
165 usageAndExit(args[0]);
166 }
167
168 arg_i += 1;
169 }
170
171 const out_dir = opt_out_dir orelse usageAndExit(args[0]);
172 const generic_name = "any-linux-any";
173
174 var path_table = PathTable.init(arena);
175 var hash_to_contents = HashToContents.init(arena);
176 var max_bytes_saved: usize = 0;
177 var total_bytes: usize = 0;
178
179 var hasher = Blake3.init(.{});
180
181 for (linux_targets) |linux_target| {
182 const dest_target = DestTarget{
183 .arch = linux_target.arch,
184 };
185 search: for (search_paths.items) |search_path| {
186 const target_include_dir = try std.fs.path.join(arena, &.{
187 search_path, linux_target.name, "include",
188 });
189 var dir_stack = std.array_list.Managed([]const u8).init(arena);
190 try dir_stack.append(target_include_dir);
191
192 while (dir_stack.pop()) |full_dir_name| {
193 var dir = std.fs.cwd().openDir(full_dir_name, .{ .iterate = true }) catch |err| switch (err) {
194 error.FileNotFound => continue :search,
195 error.AccessDenied => continue :search,
196 else => return err,
197 };
198 defer dir.close();
199
200 var dir_it = dir.iterate();
201
202 while (try dir_it.next()) |entry| {
203 const full_path = try std.fs.path.join(arena, &[_][]const u8{ full_dir_name, entry.name });
204 switch (entry.kind) {
205 .directory => try dir_stack.append(full_path),
206 .file => {
207 const rel_path = try std.fs.path.relative(arena, target_include_dir, full_path);
208 const max_size = 2 * 1024 * 1024 * 1024;
209 const raw_bytes = try std.fs.cwd().readFileAlloc(full_path, arena, .limited(max_size));
210 const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t");
211 total_bytes += raw_bytes.len;
212 const hash = try arena.alloc(u8, 32);
213 hasher = Blake3.init(.{});
214 hasher.update(rel_path);
215 hasher.update(trimmed);
216 hasher.final(hash);
217 const gop = try hash_to_contents.getOrPut(hash);
218 if (gop.found_existing) {
219 max_bytes_saved += raw_bytes.len;
220 gop.value_ptr.hit_count += 1;
221 std.debug.print("duplicate: {s} {s} ({B})\n", .{
222 linux_target.name,
223 rel_path,
224 raw_bytes.len,
225 });
226 } else {
227 gop.value_ptr.* = Contents{
228 .bytes = trimmed,
229 .hit_count = 1,
230 .hash = hash,
231 .is_generic = false,
232 };
233 }
234 const path_gop = try path_table.getOrPut(rel_path);
235 const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: {
236 const ptr = try arena.create(TargetToHash);
237 ptr.* = TargetToHash.init(arena);
238 path_gop.value_ptr.* = ptr;
239 break :blk ptr;
240 };
241 try target_to_hash.putNoClobber(dest_target, hash);
242 },
243 else => std.debug.print("warning: weird file: {s}\n", .{full_path}),
244 }
245 }
246 }
247 break;
248 } else {
249 std.debug.print("warning: libc target not found: {s}\n", .{linux_target.name});
250 }
251 }
252 std.debug.print("summary: {B} could be reduced to {B}\n", .{
253 total_bytes,
254 total_bytes - max_bytes_saved,
255 });
256 try std.fs.cwd().makePath(out_dir);
257
258 var missed_opportunity_bytes: usize = 0;
259 // iterate path_table. for each path, put all the hashes into a list. sort by hit_count.
260 // the hash with the highest hit_count gets to be the "generic" one. everybody else
261 // gets their header in a separate arch directory.
262 var path_it = path_table.iterator();
263 while (path_it.next()) |path_kv| {
264 var contents_list = std.array_list.Managed(*Contents).init(arena);
265 {
266 var hash_it = path_kv.value_ptr.*.iterator();
267 while (hash_it.next()) |hash_kv| {
268 const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?;
269 try contents_list.append(contents);
270 }
271 }
272 std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan);
273 const best_contents = contents_list.pop().?;
274 if (best_contents.hit_count > 1) {
275 // worth it to make it generic
276 const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* });
277 try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
278 try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = best_contents.bytes });
279 best_contents.is_generic = true;
280 while (contents_list.pop()) |contender| {
281 if (contender.hit_count > 1) {
282 const this_missed_bytes = contender.hit_count * contender.bytes.len;
283 missed_opportunity_bytes += this_missed_bytes;
284 std.debug.print("Missed opportunity ({B}): {s}\n", .{
285 this_missed_bytes,
286 path_kv.key_ptr.*,
287 });
288 } else break;
289 }
290 }
291 var hash_it = path_kv.value_ptr.*.iterator();
292 while (hash_it.next()) |hash_kv| {
293 const contents = hash_to_contents.get(hash_kv.value_ptr.*).?;
294 if (contents.is_generic) continue;
295
296 const dest_target = hash_kv.key_ptr.*;
297 const arch_name = switch (dest_target.arch) {
298 .specific => |a| @tagName(a),
299 else => @tagName(dest_target.arch),
300 };
301 const out_subpath = try std.fmt.allocPrint(arena, "{s}-linux-any", .{arch_name});
302 const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, out_subpath, path_kv.key_ptr.* });
303 try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
304 try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = contents.bytes });
305 }
306 }
307
308 const bad_files = [_][]const u8{
309 "any-linux-any/linux/netfilter/xt_CONNMARK.h",
310 "any-linux-any/linux/netfilter/xt_DSCP.h",
311 "any-linux-any/linux/netfilter/xt_MARK.h",
312 "any-linux-any/linux/netfilter/xt_RATEEST.h",
313 "any-linux-any/linux/netfilter/xt_TCPMSS.h",
314 "any-linux-any/linux/netfilter_ipv4/ipt_ECN.h",
315 "any-linux-any/linux/netfilter_ipv4/ipt_TTL.h",
316 "any-linux-any/linux/netfilter_ipv6/ip6t_HL.h",
317 };
318 for (bad_files) |bad_file| {
319 const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, bad_file });
320 try std.fs.cwd().deleteFile(full_path);
321 }
322}
323
324fn usageAndExit(arg0: []const u8) noreturn {
325 std.debug.print("Usage: {s} [--search-path <dir>] --out <dir> --abi <name>\n", .{arg0});
326 std.debug.print("--search-path can be used any number of times.\n", .{});
327 std.debug.print(" subdirectories of search paths look like, e.g. x86_64-linux-gnu\n", .{});
328 std.debug.print("--out is a dir that will be created, and populated with the results\n", .{});
329 std.process.exit(1);
330}