Commit a337046832

Andrew Kelley <andrew@ziglang.org>
2020-09-26 05:52:02
stage2: properly handle zig cc used as a preprocessor
This cleans up how the CLI parses and handles -E, -S, and -c. Compilation explicitly acknowledges when it is being used to do C preprocessing. -S is properly translated to -fno-emit-bin -femit-asm but Compilation does not yet handle -femit-asm. There is not yet a mechanism for skipping the linking step when there is only a single object file, and so to make this work we have to do a file copy in link.flush() to copy the file from zig-cache into the output directory.
1 parent aded86e
src/link/Elf.zig
@@ -1401,10 +1401,7 @@ fn linkWithLLD(self: *Elf, comp: *Compilation) !void {
         try argv.append("-pie");
     }
 
-    const full_out_path = if (directory.path) |dir_path|
-        try fs.path.join(arena, &[_][]const u8{dir_path, self.base.options.sub_path})
-    else 
-        self.base.options.sub_path;
+    const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.sub_path});
     try argv.append("-o");
     try argv.append(full_out_path);
 
src/clang_options_data.zig
@@ -7,7 +7,7 @@ flagpd1("CC"),
 .{
     .name = "E",
     .syntax = .flag,
-    .zig_equivalent = .pp_or_asm,
+    .zig_equivalent = .preprocess_only,
     .pd1 = true,
     .pd2 = false,
     .psl = false,
@@ -95,7 +95,7 @@ flagpd1("Qy"),
 .{
     .name = "S",
     .syntax = .flag,
-    .zig_equivalent = .pp_or_asm,
+    .zig_equivalent = .asm_only,
     .pd1 = true,
     .pd2 = false,
     .psl = false,
@@ -196,7 +196,7 @@ sepd1("Zlinker-input"),
 .{
     .name = "E",
     .syntax = .flag,
-    .zig_equivalent = .pp_or_asm,
+    .zig_equivalent = .preprocess_only,
     .pd1 = true,
     .pd2 = false,
     .psl = true,
@@ -1477,7 +1477,7 @@ flagpsl("MT"),
 .{
     .name = "assemble",
     .syntax = .flag,
-    .zig_equivalent = .pp_or_asm,
+    .zig_equivalent = .asm_only,
     .pd1 = false,
     .pd2 = true,
     .psl = false,
@@ -1805,7 +1805,7 @@ flagpsl("MT"),
 .{
     .name = "preprocess",
     .syntax = .flag,
-    .zig_equivalent = .pp_or_asm,
+    .zig_equivalent = .preprocess_only,
     .pd1 = false,
     .pd2 = true,
     .psl = false,
@@ -3406,6 +3406,8 @@ flagpd1("mlong-double-128"),
 flagpd1("mlong-double-64"),
 flagpd1("mlong-double-80"),
 flagpd1("mlongcall"),
+flagpd1("mlvi-cfi"),
+flagpd1("mlvi-hardening"),
 flagpd1("mlwp"),
 flagpd1("mlzcnt"),
 flagpd1("mmadd4"),
@@ -3499,6 +3501,8 @@ flagpd1("mno-ldc1-sdc1"),
 flagpd1("mno-local-sdata"),
 flagpd1("mno-long-calls"),
 flagpd1("mno-longcall"),
+flagpd1("mno-lvi-cfi"),
+flagpd1("mno-lvi-hardening"),
 flagpd1("mno-lwp"),
 flagpd1("mno-lzcnt"),
 flagpd1("mno-madd4"),
src/Compilation.zig
@@ -49,6 +49,7 @@ sanitize_c: bool,
 /// Otherwise we attempt to parse the error messages and expose them via the Compilation API.
 /// This is `true` for `zig cc`, `zig c++`, and `zig translate-c`.
 clang_passthrough_mode: bool,
+clang_preprocessor_mode: ClangPreprocessorMode,
 /// Whether to print clang argvs to stdout.
 verbose_cc: bool,
 verbose_tokenize: bool,
@@ -271,6 +272,14 @@ pub const EmitLoc = struct {
     basename: []const u8,
 };
 
+pub const ClangPreprocessorMode = enum {
+    no,
+    /// This means we are doing `zig cc -E -o <path>`.
+    yes,
+    /// This means we are doing `zig cc -E`.
+    stdout,
+};
+
 pub const InitOptions = struct {
     zig_lib_directory: Directory,
     local_cache_directory: Directory,
@@ -285,6 +294,8 @@ pub const InitOptions = struct {
     emit_bin: ?EmitLoc,
     /// `null` means to not emit a C header file.
     emit_h: ?EmitLoc = null,
+    /// `null` means to not emit assembly.
+    emit_asm: ?EmitLoc = null,
     link_mode: ?std.builtin.LinkMode = null,
     dll_export_fns: ?bool = false,
     /// Normally when using LLD to link, Zig uses a file named "lld.id" in the
@@ -349,6 +360,7 @@ pub const InitOptions = struct {
     version: ?std.builtin.Version = null,
     libc_installation: ?*const LibCInstallation = null,
     machine_code_model: std.builtin.CodeModel = .default,
+    clang_preprocessor_mode: ClangPreprocessorMode = .no,
     /// This is for stage1 and should be deleted upon completion of self-hosting.
     color: @import("main.zig").Color = .Auto,
     test_filter: ?[]const u8 = null,
@@ -478,6 +490,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
         } else must_pic;
 
         if (options.emit_h != null) fatal("-femit-h not supported yet", .{}); // TODO
+        if (options.emit_asm != null) fatal("-femit-asm not supported yet", .{}); // TODO
 
         const emit_bin = options.emit_bin orelse fatal("-fno-emit-bin not supported yet", .{}); // TODO
 
@@ -750,6 +763,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
             .sanitize_c = sanitize_c,
             .rand = options.rand,
             .clang_passthrough_mode = options.clang_passthrough_mode,
+            .clang_preprocessor_mode = options.clang_preprocessor_mode,
             .verbose_cc = options.verbose_cc,
             .verbose_tokenize = options.verbose_tokenize,
             .verbose_ast = options.verbose_ast,
@@ -1215,7 +1229,6 @@ fn obtainCObjectCacheManifest(comp: *Compilation) Cache.Manifest {
     // Only things that need to be added on top of the base hash, and only things
     // that apply both to @cImport and compiling C objects. No linking stuff here!
     // Also nothing that applies only to compiling .zig code.
-
     man.hash.add(comp.sanitize_c);
     man.hash.addListOfBytes(comp.clang_argv);
     man.hash.add(comp.bin_file.options.link_libcpp);
@@ -1381,6 +1394,8 @@ fn updateCObject(comp: *Compilation, c_object: *CObject) !void {
     var man = comp.obtainCObjectCacheManifest();
     defer man.deinit();
 
+    man.hash.add(comp.clang_preprocessor_mode);
+
     _ = try man.addFile(c_object.src.src_path, null);
     {
         // Hash the extra flags, with special care to call addFile for file parameters.
@@ -1424,7 +1439,7 @@ fn updateCObject(comp: *Compilation, c_object: *CObject) !void {
         var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{});
         defer zig_cache_tmp_dir.close();
 
-        try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang", "-c" });
+        try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang" });
 
         const ext = classifyFileExt(c_object.src.src_path);
         const out_dep_path: ?[]const u8 = if (comp.disable_c_depfile or !ext.clangSupportsDepFile())
@@ -1433,8 +1448,12 @@ fn updateCObject(comp: *Compilation, c_object: *CObject) !void {
             try std.fmt.allocPrint(arena, "{}.d", .{out_obj_path});
         try comp.addCCArgs(arena, &argv, ext, out_dep_path);
 
-        try argv.append("-o");
-        try argv.append(out_obj_path);
+        try argv.ensureCapacity(argv.items.len + 3);
+        switch (comp.clang_preprocessor_mode) {
+            .no => argv.appendSliceAssumeCapacity(&[_][]const u8{"-c", "-o", out_obj_path}),
+            .yes => argv.appendSliceAssumeCapacity(&[_][]const u8{"-E", "-o", out_obj_path}),
+            .stdout => argv.appendAssumeCapacity("-E"),
+        }
 
         try argv.append(c_object.src.src_path);
         try argv.appendSlice(c_object.src.extra_flags);
@@ -1460,6 +1479,8 @@ fn updateCObject(comp: *Compilation, c_object: *CObject) !void {
                         // TODO https://github.com/ziglang/zig/issues/6342
                         std.process.exit(1);
                     }
+                    if (comp.clang_preprocessor_mode == .stdout)
+                        std.process.exit(0);
                 },
                 else => std.process.exit(1),
             }
@@ -1522,14 +1543,11 @@ fn updateCObject(comp: *Compilation, c_object: *CObject) !void {
         break :blk digest;
     };
 
-    const components = if (comp.local_cache_directory.path) |p|
-        &[_][]const u8{ p, "o", &digest, o_basename }
-    else
-        &[_][]const u8{ "o", &digest, o_basename };
-
     c_object.status = .{
         .success = .{
-            .object_path = try std.fs.path.join(comp.gpa, components),
+            .object_path = try comp.local_cache_directory.join(comp.gpa, &[_][]const u8{
+                "o", &digest, o_basename,
+            }),
             .lock = man.toOwnedLock(),
         },
     };
src/link.zig
@@ -1,16 +1,18 @@
 const std = @import("std");
 const mem = std.mem;
 const Allocator = std.mem.Allocator;
+const fs = std.fs;
+const log = std.log.scoped(.link);
+const assert = std.debug.assert;
+
 const Compilation = @import("Compilation.zig");
 const Module = @import("Module.zig");
-const fs = std.fs;
 const trace = @import("tracy.zig").trace;
 const Package = @import("Package.zig");
 const Type = @import("type.zig").Type;
 const Cache = @import("Cache.zig");
 const build_options = @import("build_options");
 const LibCInstallation = @import("libc_installation.zig").LibCInstallation;
-const log = std.log.scoped(.link);
 
 pub const producer_string = if (std.builtin.is_test) "zig test" else "zig " ++ build_options.version;
 
@@ -303,6 +305,21 @@ pub const File = struct {
     /// Commit pending changes and write headers. Takes into account final output mode
     /// and `use_lld`, not only `effectiveOutputMode`.
     pub fn flush(base: *File, comp: *Compilation) !void {
+        if (comp.clang_preprocessor_mode == .yes) {
+            // TODO: avoid extra link step when it's just 1 object file (the `zig cc -c` case)
+            // Until then, we do `lld -r -o output.o input.o` even though the output is the same
+            // as the input. For the preprocessing case (`zig cc -E -o foo`) we copy the file
+            // to the final location.
+            const full_out_path = try base.options.directory.join(comp.gpa, &[_][]const u8{
+                base.options.sub_path,
+            });
+            defer comp.gpa.free(full_out_path);
+            assert(comp.c_object_table.count() == 1);
+            const the_entry = comp.c_object_table.items()[0];
+            const cached_pp_file_path = the_entry.key.status.success.object_path;
+            try fs.cwd().copyFile(cached_pp_file_path, fs.cwd(), full_out_path, .{});
+            return;
+        }
         const use_lld = build_options.have_llvm and base.options.use_lld;
         if (use_lld and base.options.output_mode == .Lib and base.options.link_mode == .Static and
             !base.options.target.isWasm())
src/main.zig
@@ -327,6 +327,7 @@ pub fn buildOutputType(
     var time_report = false;
     var show_builtin = false;
     var emit_bin: Emit = .yes_default_path;
+    var emit_asm: Emit = .no;
     var emit_zir: Emit = .no;
     var target_arch_os_abi: []const u8 = "native";
     var target_mcpu: ?[]const u8 = null;
@@ -345,7 +346,6 @@ pub fn buildOutputType(
     var want_stack_check: ?bool = null;
     var want_valgrind: ?bool = null;
     var rdynamic: bool = false;
-    var only_pp_or_asm = false;
     var linker_script: ?[]const u8 = null;
     var version_script: ?[]const u8 = null;
     var disable_c_depfile = false;
@@ -371,6 +371,7 @@ pub fn buildOutputType(
     var override_global_cache_dir: ?[]const u8 = null;
     var override_lib_dir: ?[]const u8 = null;
     var main_pkg_path: ?[]const u8 = null;
+    var clang_preprocessor_mode: Compilation.ClangPreprocessorMode = .no;
 
     var system_libs = std.ArrayList([]const u8).init(gpa);
     defer system_libs.deinit();
@@ -752,7 +753,14 @@ pub fn buildOutputType(
             ensure_libcpp_on_non_freestanding = arg_mode == .cpp;
             want_native_include_dirs = true;
 
-            var c_arg = false;
+            const COutMode = enum {
+                link,
+                object,
+                assembly,
+                preprocessor,
+            };
+            var c_out_mode: COutMode = .link;
+            var out_path: ?[]const u8 = null;
             var is_shared_lib = false;
             var linker_args = std.ArrayList([]const u8).init(arena);
             var it = ClangArgIterator.init(arena, all_args);
@@ -762,12 +770,10 @@ pub fn buildOutputType(
                 };
                 switch (it.zig_equivalent) {
                     .target => target_arch_os_abi = it.only_arg, // example: -target riscv64-linux-unknown
-                    .o => {
-                        // -o
-                        emit_bin = .{ .yes = it.only_arg };
-                        enable_cache = true;
-                    },
-                    .c => c_arg = true, // -c
+                    .o => out_path = it.only_arg, // -o
+                    .c => c_out_mode = .object, // -c
+                    .asm_only => c_out_mode = .assembly, // -S
+                    .preprocess_only => c_out_mode = .preprocessor, // -E
                     .other => {
                         try clang_argv.appendSlice(it.other_args);
                     },
@@ -813,11 +819,6 @@ pub fn buildOutputType(
                             try linker_args.append(linker_arg);
                         }
                     },
-                    .pp_or_asm => {
-                        // This handles both -E and -S.
-                        only_pp_or_asm = true;
-                        try clang_argv.appendSlice(it.other_args);
-                    },
                     .optimize => {
                         // Alright, what release mode do they want?
                         if (mem.eql(u8, it.only_arg, "Os")) {
@@ -999,32 +1000,43 @@ pub fn buildOutputType(
                 }
             }
 
-            if (only_pp_or_asm) {
-                output_mode = .Obj;
-                fatal("TODO implement using zig cc as a preprocessor", .{});
-                //// Transfer "link_objects" into c_source_files so that all those
-                //// args make it onto the command line.
-                //try c_source_files.appendSlice(link_objects.items);
-                //for (c_source_files.items) |c_source_file| {
-                //    const src_path = switch (emit_bin) {
-                //        .yes => |p| p,
-                //        else => c_source_file.source_path,
-                //    };
-                //    const basename = fs.path.basename(src_path);
-                //    c_source_file.preprocessor_only_basename = basename;
-                //}
-                //emit_bin = .no;
-            } else if (!c_arg) {
-                output_mode = if (is_shared_lib) .Lib else .Exe;
-                switch (emit_bin) {
-                    .no, .yes_default_path => {
-                        emit_bin = .{ .yes = "a.out" };
-                        enable_cache = true;
-                    },
-                    .yes => {},
-                }
-            } else {
-                output_mode = .Obj;
+            switch (c_out_mode) {
+                .link => {
+                    output_mode = if (is_shared_lib) .Lib else .Exe;
+                    emit_bin = .{ .yes = out_path orelse "a.out" };
+                    enable_cache = true;
+                },
+                .object => {
+                    output_mode = .Obj;
+                    if (out_path) |p| {
+                        emit_bin = .{ .yes = p };
+                    } else {
+                        emit_bin = .yes_default_path;
+                    }
+                },
+                .assembly => {
+                    output_mode = .Obj;
+                    emit_bin = .no;
+                    if (out_path) |p| {
+                        emit_asm = .{ .yes = p };
+                    } else {
+                        emit_asm = .yes_default_path;
+                    }
+                },
+                .preprocessor => {
+                    output_mode = .Obj;
+                    // An error message is generated when there is more than 1 C source file.
+                    if (c_source_files.items.len != 1) {
+                        // For example `zig cc` and no args should print the "no input files" message.
+                        return punt_to_clang(arena, all_args);
+                    }
+                    if (out_path) |p| {
+                        emit_bin = .{ .yes = p };
+                        clang_preprocessor_mode = .yes;
+                    } else {
+                        clang_preprocessor_mode = .stdout;
+                    }
+                },
             }
             if (c_source_files.items.len == 0 and link_objects.items.len == 0) {
                 // For example `zig cc` and no args should print the "no input files" message.
@@ -1407,6 +1419,7 @@ pub fn buildOutputType(
         .self_exe_path = self_exe_path,
         .rand = &default_prng.random,
         .clang_passthrough_mode = arg_mode != .build,
+        .clang_preprocessor_mode = clang_preprocessor_mode,
         .version = optional_version,
         .libc_installation = if (libc_installation) |*lci| lci else null,
         .verbose_cc = verbose_cc,
@@ -1453,11 +1466,6 @@ pub fn buildOutputType(
 
     try updateModule(gpa, comp, zir_out_path, hook);
 
-    if (build_options.have_llvm and only_pp_or_asm) {
-        // this may include dumping the output to stdout
-        fatal("TODO: implement `zig cc` when using it as a preprocessor", .{});
-    }
-
     if (build_options.is_stage1 and comp.stage1_lock != null and watch) {
         std.log.warn("--watch is not recommended with the stage1 backend; it leaks memory and is not capable of incremental compilation", .{});
     }
@@ -2436,7 +2444,8 @@ pub const ClangArgIterator = struct {
         shared,
         rdynamic,
         wl,
-        pp_or_asm,
+        preprocess_only,
+        asm_only,
         optimize,
         debug,
         sanitize,
tools/update_clang_options.zig
@@ -116,19 +116,19 @@ const known_options = [_]KnownOpt{
     },
     .{
         .name = "E",
-        .ident = "pp_or_asm",
+        .ident = "preprocess_only",
     },
     .{
         .name = "preprocess",
-        .ident = "pp_or_asm",
+        .ident = "preprocess_only",
     },
     .{
         .name = "S",
-        .ident = "pp_or_asm",
+        .ident = "asm_only",
     },
     .{
         .name = "assemble",
-        .ident = "pp_or_asm",
+        .ident = "asm_only",
     },
     .{
         .name = "O1",
@@ -346,7 +346,7 @@ pub fn main() anyerror!void {
             for (blacklisted_options) |blacklisted_key| {
                 if (std.mem.eql(u8, blacklisted_key, kv.key)) continue :it_map;
             }
-            if (kv.value.Object.get("Name").?.value.String.len == 0) continue;
+            if (kv.value.Object.get("Name").?.String.len == 0) continue;
             try all_objects.append(&kv.value.Object);
         }
     }
@@ -365,11 +365,11 @@ pub fn main() anyerror!void {
     );
 
     for (all_objects.span()) |obj| {
-        const name = obj.get("Name").?.value.String;
+        const name = obj.get("Name").?.String;
         var pd1 = false;
         var pd2 = false;
         var pslash = false;
-        for (obj.get("Prefixes").?.value.Array.span()) |prefix_json| {
+        for (obj.get("Prefixes").?.Array.span()) |prefix_json| {
             const prefix = prefix_json.String;
             if (std.mem.eql(u8, prefix, "-")) {
                 pd1 = true;
@@ -465,7 +465,7 @@ const Syntax = union(enum) {
         self: Syntax,
         comptime fmt: []const u8,
         options: std.fmt.FormatOptions,
-        out_stream: var,
+        out_stream: anytype,
     ) !void {
         switch (self) {
             .multi_arg => |n| return out_stream.print(".{{.{}={}}}", .{ @tagName(self), n }),
@@ -475,8 +475,8 @@ const Syntax = union(enum) {
 };
 
 fn objSyntax(obj: *json.ObjectMap) Syntax {
-    const num_args = @intCast(u8, obj.get("NumArgs").?.value.Integer);
-    for (obj.get("!superclasses").?.value.Array.span()) |superclass_json| {
+    const num_args = @intCast(u8, obj.get("NumArgs").?.Integer);
+    for (obj.get("!superclasses").?.Array.span()) |superclass_json| {
         const superclass = superclass_json.String;
         if (std.mem.eql(u8, superclass, "Joined")) {
             return .joined;
@@ -510,19 +510,19 @@ fn objSyntax(obj: *json.ObjectMap) Syntax {
             return .{ .multi_arg = num_args };
         }
     }
-    const name = obj.get("Name").?.value.String;
+    const name = obj.get("Name").?.String;
     if (std.mem.eql(u8, name, "<input>")) {
         return .flag;
     } else if (std.mem.eql(u8, name, "<unknown>")) {
         return .flag;
     }
-    const kind_def = obj.get("Kind").?.value.Object.get("def").?.value.String;
+    const kind_def = obj.get("Kind").?.Object.get("def").?.String;
     if (std.mem.eql(u8, kind_def, "KIND_FLAG")) {
         return .flag;
     }
-    const key = obj.get("!name").?.value.String;
+    const key = obj.get("!name").?.String;
     std.debug.warn("{} (key {}) has unrecognized superclasses:\n", .{ name, key });
-    for (obj.get("!superclasses").?.value.Array.span()) |superclass_json| {
+    for (obj.get("!superclasses").?.Array.span()) |superclass_json| {
         std.debug.warn(" {}\n", .{superclass_json.String});
     }
     std.process.exit(1);
@@ -560,15 +560,15 @@ fn objectLessThan(context: void, a: *json.ObjectMap, b: *json.ObjectMap) bool {
     }
 
     if (!a_match_with_eql and !b_match_with_eql) {
-        const a_name = a.get("Name").?.value.String;
-        const b_name = b.get("Name").?.value.String;
+        const a_name = a.get("Name").?.String;
+        const b_name = b.get("Name").?.String;
         if (a_name.len != b_name.len) {
             return a_name.len > b_name.len;
         }
     }
 
-    const a_key = a.get("!name").?.value.String;
-    const b_key = b.get("!name").?.value.String;
+    const a_key = a.get("!name").?.String;
+    const b_key = b.get("!name").?.String;
     return std.mem.lessThan(u8, a_key, b_key);
 }
 
BRANCH_TODO
@@ -1,5 +1,3 @@
- * make sure that `zig cc -o hello hello.c -target native-native-musl` and `zig build-exe hello.zig -lc -target native-native-musl` will share the same libc build.
- * zig cc as a preprocessor (-E)
  * tests passing with -Dskip-non-native
  * `-ftime-report`
  *  -fstack-report               print stack size diagnostics\n"
@@ -20,6 +18,7 @@
  * restore error messages for stage2_add_link_lib
  * windows CUSTOMBUILD : error : unable to build compiler_rt: FileNotFound [D:\a\1\s\build\zig_install_lib_files.vcxproj]
  * try building some software with zig cc
+ * implement support for -femit-asm
 
  * implement proper parsing of clang stderr/stdout and exposing compile errors with the Compilation API
  * implement proper parsing of LLD stderr/stdout and exposing compile errors with the Compilation API
@@ -57,4 +56,3 @@
  * make std.Progress support multithreaded
  * update musl.zig static data to use native path separator in static data rather than replacing '/' at runtime
  * linking hello world with LLD, lld is silently calling exit(1) instead of reporting ok=false. when run standalone the error message is: ld.lld: error: section [index 3] has a sh_offset (0x57000) + sh_size (0x68) that is greater than the file size (0x57060)
-