master
  1//! To get started, run this tool with no args and read the help message.
  2//!
  3//! The build system of Linux requires specifying a single target
  4//! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for
  5//! every target. So the process to create libc headers that Zig ships is to use
  6//! this tool.
  7//!
  8//! First, use the Linux build systems to create installations of all the
  9//! targets in the `linux_targets` variable.
 10//!
 11//! Next, run this tool to create a new directory which puts .h files into
 12//! <arch> subdirectories, with `any-linux-any` being files that apply to
 13//! all architectures.
 14//!
 15//! You'll then have to manually update Zig source repo with these new files.
 16
 17const std = @import("std");
 18const Arch = std.Target.Cpu.Arch;
 19const Abi = std.Target.Abi;
 20const assert = std.debug.assert;
 21const Blake3 = std.crypto.hash.Blake3;
 22
 23const LibCTarget = struct {
 24    name: []const u8,
 25    arch: MultiArch,
 26};
 27
 28const MultiArch = union(enum) {
 29    arm,
 30    arm64,
 31    loongarch,
 32    mips,
 33    powerpc,
 34    riscv,
 35    sparc,
 36    x86,
 37    specific: Arch,
 38
 39    fn eql(a: MultiArch, b: MultiArch) bool {
 40        if (@intFromEnum(a) != @intFromEnum(b))
 41            return false;
 42        if (a != .specific)
 43            return true;
 44        return a.specific == b.specific;
 45    }
 46};
 47
 48const linux_targets = [_]LibCTarget{
 49    LibCTarget{
 50        .name = "arc",
 51        .arch = MultiArch{ .specific = Arch.arc },
 52    },
 53    LibCTarget{
 54        .name = "arm",
 55        .arch = .arm,
 56    },
 57    LibCTarget{
 58        .name = "arm64",
 59        .arch = .{ .specific = .aarch64 },
 60    },
 61    LibCTarget{
 62        .name = "csky",
 63        .arch = .{ .specific = .csky },
 64    },
 65    LibCTarget{
 66        .name = "hexagon",
 67        .arch = .{ .specific = .hexagon },
 68    },
 69    LibCTarget{
 70        .name = "m68k",
 71        .arch = .{ .specific = .m68k },
 72    },
 73    LibCTarget{
 74        .name = "loongarch",
 75        .arch = .loongarch,
 76    },
 77    LibCTarget{
 78        .name = "mips",
 79        .arch = .mips,
 80    },
 81    LibCTarget{
 82        .name = "powerpc",
 83        .arch = .powerpc,
 84    },
 85    LibCTarget{
 86        .name = "riscv",
 87        .arch = .riscv,
 88    },
 89    LibCTarget{
 90        .name = "s390",
 91        .arch = .{ .specific = .s390x },
 92    },
 93    LibCTarget{
 94        .name = "sparc",
 95        .arch = .{ .specific = .sparc },
 96    },
 97    LibCTarget{
 98        .name = "x86",
 99        .arch = .x86,
100    },
101    LibCTarget{
102        .name = "xtensa",
103        .arch = .{ .specific = .xtensa },
104    },
105};
106
107const DestTarget = struct {
108    arch: MultiArch,
109
110    const HashContext = struct {
111        pub fn hash(self: @This(), a: DestTarget) u32 {
112            _ = self;
113            var hasher = std.hash.Wyhash.init(0);
114            std.hash.autoHash(&hasher, a.arch);
115            return @as(u32, @truncate(hasher.final()));
116        }
117
118        pub fn eql(self: @This(), a: DestTarget, b: DestTarget, b_index: usize) bool {
119            _ = self;
120            _ = b_index;
121            return a.arch.eql(b.arch);
122        }
123    };
124};
125
126const Contents = struct {
127    bytes: []const u8,
128    hit_count: usize,
129    hash: []const u8,
130    is_generic: bool,
131
132    fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool {
133        _ = context;
134        return lhs.hit_count < rhs.hit_count;
135    }
136};
137
138const HashToContents = std.StringHashMap(Contents);
139const TargetToHash = std.ArrayHashMap(DestTarget, []const u8, DestTarget.HashContext, true);
140const PathTable = std.StringHashMap(*TargetToHash);
141
142pub fn main() !void {
143    var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
144    const arena = arena_state.allocator();
145    const args = try std.process.argsAlloc(arena);
146    var search_paths = std.array_list.Managed([]const u8).init(arena);
147    var opt_out_dir: ?[]const u8 = null;
148
149    var arg_i: usize = 1;
150    while (arg_i < args.len) : (arg_i += 1) {
151        if (std.mem.eql(u8, args[arg_i], "--help"))
152            usageAndExit(args[0]);
153        if (arg_i + 1 >= args.len) {
154            std.debug.print("expected argument after '{s}'\n", .{args[arg_i]});
155            usageAndExit(args[0]);
156        }
157
158        if (std.mem.eql(u8, args[arg_i], "--search-path")) {
159            try search_paths.append(args[arg_i + 1]);
160        } else if (std.mem.eql(u8, args[arg_i], "--out")) {
161            assert(opt_out_dir == null);
162            opt_out_dir = args[arg_i + 1];
163        } else {
164            std.debug.print("unrecognized argument: {s}\n", .{args[arg_i]});
165            usageAndExit(args[0]);
166        }
167
168        arg_i += 1;
169    }
170
171    const out_dir = opt_out_dir orelse usageAndExit(args[0]);
172    const generic_name = "any-linux-any";
173
174    var path_table = PathTable.init(arena);
175    var hash_to_contents = HashToContents.init(arena);
176    var max_bytes_saved: usize = 0;
177    var total_bytes: usize = 0;
178
179    var hasher = Blake3.init(.{});
180
181    for (linux_targets) |linux_target| {
182        const dest_target = DestTarget{
183            .arch = linux_target.arch,
184        };
185        search: for (search_paths.items) |search_path| {
186            const target_include_dir = try std.fs.path.join(arena, &.{
187                search_path, linux_target.name, "include",
188            });
189            var dir_stack = std.array_list.Managed([]const u8).init(arena);
190            try dir_stack.append(target_include_dir);
191
192            while (dir_stack.pop()) |full_dir_name| {
193                var dir = std.fs.cwd().openDir(full_dir_name, .{ .iterate = true }) catch |err| switch (err) {
194                    error.FileNotFound => continue :search,
195                    error.AccessDenied => continue :search,
196                    else => return err,
197                };
198                defer dir.close();
199
200                var dir_it = dir.iterate();
201
202                while (try dir_it.next()) |entry| {
203                    const full_path = try std.fs.path.join(arena, &[_][]const u8{ full_dir_name, entry.name });
204                    switch (entry.kind) {
205                        .directory => try dir_stack.append(full_path),
206                        .file => {
207                            const rel_path = try std.fs.path.relative(arena, target_include_dir, full_path);
208                            const max_size = 2 * 1024 * 1024 * 1024;
209                            const raw_bytes = try std.fs.cwd().readFileAlloc(full_path, arena, .limited(max_size));
210                            const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t");
211                            total_bytes += raw_bytes.len;
212                            const hash = try arena.alloc(u8, 32);
213                            hasher = Blake3.init(.{});
214                            hasher.update(rel_path);
215                            hasher.update(trimmed);
216                            hasher.final(hash);
217                            const gop = try hash_to_contents.getOrPut(hash);
218                            if (gop.found_existing) {
219                                max_bytes_saved += raw_bytes.len;
220                                gop.value_ptr.hit_count += 1;
221                                std.debug.print("duplicate: {s} {s} ({B})\n", .{
222                                    linux_target.name,
223                                    rel_path,
224                                    raw_bytes.len,
225                                });
226                            } else {
227                                gop.value_ptr.* = Contents{
228                                    .bytes = trimmed,
229                                    .hit_count = 1,
230                                    .hash = hash,
231                                    .is_generic = false,
232                                };
233                            }
234                            const path_gop = try path_table.getOrPut(rel_path);
235                            const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: {
236                                const ptr = try arena.create(TargetToHash);
237                                ptr.* = TargetToHash.init(arena);
238                                path_gop.value_ptr.* = ptr;
239                                break :blk ptr;
240                            };
241                            try target_to_hash.putNoClobber(dest_target, hash);
242                        },
243                        else => std.debug.print("warning: weird file: {s}\n", .{full_path}),
244                    }
245                }
246            }
247            break;
248        } else {
249            std.debug.print("warning: libc target not found: {s}\n", .{linux_target.name});
250        }
251    }
252    std.debug.print("summary: {B} could be reduced to {B}\n", .{
253        total_bytes,
254        total_bytes - max_bytes_saved,
255    });
256    try std.fs.cwd().makePath(out_dir);
257
258    var missed_opportunity_bytes: usize = 0;
259    // iterate path_table. for each path, put all the hashes into a list. sort by hit_count.
260    // the hash with the highest hit_count gets to be the "generic" one. everybody else
261    // gets their header in a separate arch directory.
262    var path_it = path_table.iterator();
263    while (path_it.next()) |path_kv| {
264        var contents_list = std.array_list.Managed(*Contents).init(arena);
265        {
266            var hash_it = path_kv.value_ptr.*.iterator();
267            while (hash_it.next()) |hash_kv| {
268                const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?;
269                try contents_list.append(contents);
270            }
271        }
272        std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan);
273        const best_contents = contents_list.pop().?;
274        if (best_contents.hit_count > 1) {
275            // worth it to make it generic
276            const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* });
277            try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
278            try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = best_contents.bytes });
279            best_contents.is_generic = true;
280            while (contents_list.pop()) |contender| {
281                if (contender.hit_count > 1) {
282                    const this_missed_bytes = contender.hit_count * contender.bytes.len;
283                    missed_opportunity_bytes += this_missed_bytes;
284                    std.debug.print("Missed opportunity ({B}): {s}\n", .{
285                        this_missed_bytes,
286                        path_kv.key_ptr.*,
287                    });
288                } else break;
289            }
290        }
291        var hash_it = path_kv.value_ptr.*.iterator();
292        while (hash_it.next()) |hash_kv| {
293            const contents = hash_to_contents.get(hash_kv.value_ptr.*).?;
294            if (contents.is_generic) continue;
295
296            const dest_target = hash_kv.key_ptr.*;
297            const arch_name = switch (dest_target.arch) {
298                .specific => |a| @tagName(a),
299                else => @tagName(dest_target.arch),
300            };
301            const out_subpath = try std.fmt.allocPrint(arena, "{s}-linux-any", .{arch_name});
302            const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, out_subpath, path_kv.key_ptr.* });
303            try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
304            try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = contents.bytes });
305        }
306    }
307
308    const bad_files = [_][]const u8{
309        "any-linux-any/linux/netfilter/xt_CONNMARK.h",
310        "any-linux-any/linux/netfilter/xt_DSCP.h",
311        "any-linux-any/linux/netfilter/xt_MARK.h",
312        "any-linux-any/linux/netfilter/xt_RATEEST.h",
313        "any-linux-any/linux/netfilter/xt_TCPMSS.h",
314        "any-linux-any/linux/netfilter_ipv4/ipt_ECN.h",
315        "any-linux-any/linux/netfilter_ipv4/ipt_TTL.h",
316        "any-linux-any/linux/netfilter_ipv6/ip6t_HL.h",
317    };
318    for (bad_files) |bad_file| {
319        const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, bad_file });
320        try std.fs.cwd().deleteFile(full_path);
321    }
322}
323
324fn usageAndExit(arg0: []const u8) noreturn {
325    std.debug.print("Usage: {s} [--search-path <dir>] --out <dir> --abi <name>\n", .{arg0});
326    std.debug.print("--search-path can be used any number of times.\n", .{});
327    std.debug.print("    subdirectories of search paths look like, e.g. x86_64-linux-gnu\n", .{});
328    std.debug.print("--out is a dir that will be created, and populated with the results\n", .{});
329    std.process.exit(1);
330}