Commit 075f93fa10

Cody Tapscott <topolarity@tapscott.me>
2022-07-29 02:06:57
stage2 LLVM: Pass inline assembly outputs directly when not targeting memory
This change provides a basic implementation of #2349 for stage2. There's still quite a lot of work before this logic is as complete as what's in Clang (https://github.com/llvm/llvm-project/blob/b3645353041818f61e2580635409ddb81ff5a272/clang/lib/CodeGen/CGStmt.cpp#L2304-L2795), particularly considering the diversity of constraints across targets. It's probably not worth doing the complete work until there's a clearer picture for constraints in Zig's future dedicated ASM syntax, but at least this gives us a small improvement for now. As a bonus, this also fixes a bug with how we were handling `_` identifiers.
1 parent 1ab15b6
Changed files (1)
src
codegen
src/codegen/llvm.zig
@@ -5491,22 +5491,26 @@ pub const FuncGen = struct {
         defer arena_allocator.deinit();
         const arena = arena_allocator.allocator();
 
-        const return_count: u8 = for (outputs) |output| {
-            if (output == .none) break 1;
-        } else 0;
-        const llvm_params_len = inputs.len + outputs.len - return_count;
-        const llvm_param_types = try arena.alloc(*const llvm.Type, llvm_params_len);
-        const llvm_param_values = try arena.alloc(*const llvm.Value, llvm_params_len);
-        const llvm_param_attrs = try arena.alloc(bool, llvm_params_len);
+        // The exact number of return / parameter values depends on which output values
+        // are passed by reference as indirect outputs (determined below).
+        const max_return_count = outputs.len;
+        const llvm_ret_types = try arena.alloc(*const llvm.Type, max_return_count);
+        const llvm_ret_indirect = try arena.alloc(bool, max_return_count);
+
+        const max_param_count = inputs.len + outputs.len;
+        const llvm_param_types = try arena.alloc(*const llvm.Type, max_param_count);
+        const llvm_param_values = try arena.alloc(*const llvm.Value, max_param_count);
+        const llvm_param_attrs = try arena.alloc(bool, max_param_count);
         const target = self.dg.module.getTarget();
 
+        var llvm_ret_i: usize = 0;
         var llvm_param_i: usize = 0;
-        var total_i: usize = 0;
+        var total_i: u16 = 0;
 
-        var name_map: std.StringArrayHashMapUnmanaged(void) = .{};
-        try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len);
+        var name_map: std.StringArrayHashMapUnmanaged(u16) = .{};
+        try name_map.ensureUnusedCapacity(arena, max_param_count);
 
-        for (outputs) |output| {
+        for (outputs) |output, i| {
             const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
             const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
             const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
@@ -5519,15 +5523,30 @@ pub const FuncGen = struct {
                 llvm_constraints.appendAssumeCapacity(',');
             }
             llvm_constraints.appendAssumeCapacity('=');
+
+            // Pass any non-return outputs indirectly, if the constraint accepts a memory location
+            llvm_ret_indirect[i] = (output != .none) and constraintAllowsMemory(constraint);
             if (output != .none) {
                 try llvm_constraints.ensureUnusedCapacity(self.gpa, llvm_constraints.capacity + 1);
-                llvm_constraints.appendAssumeCapacity('*');
-
                 const output_inst = try self.resolveInst(output);
-                llvm_param_values[llvm_param_i] = output_inst;
-                llvm_param_types[llvm_param_i] = output_inst.typeOf();
-                llvm_param_attrs[llvm_param_i] = true;
-                llvm_param_i += 1;
+
+                if (llvm_ret_indirect[i]) {
+                    // Pass the result by reference as an indirect output (e.g. "=*m")
+                    llvm_constraints.appendAssumeCapacity('*');
+
+                    llvm_param_values[llvm_param_i] = output_inst;
+                    llvm_param_types[llvm_param_i] = output_inst.typeOf();
+                    llvm_param_attrs[llvm_param_i] = true;
+                    llvm_param_i += 1;
+                } else {
+                    // Pass the result directly (e.g. "=r")
+                    llvm_ret_types[llvm_ret_i] = output_inst.typeOf().getElementType();
+                    llvm_ret_i += 1;
+                }
+            } else {
+                const ret_ty = self.air.typeOfIndex(inst);
+                llvm_ret_types[llvm_ret_i] = try self.dg.lowerType(ret_ty);
+                llvm_ret_i += 1;
             }
 
             // LLVM uses commas internally to separate different constraints,
@@ -5536,13 +5555,16 @@ pub const FuncGen = struct {
             // to GCC's inline assembly.
             // http://llvm.org/docs/LangRef.html#constraint-codes
             for (constraint[1..]) |byte| {
-                llvm_constraints.appendAssumeCapacity(switch (byte) {
-                    ',' => '|',
-                    else => byte,
-                });
+                switch (byte) {
+                    ',' => llvm_constraints.appendAssumeCapacity('|'),
+                    '*' => {}, // Indirect outputs are handled above
+                    else => llvm_constraints.appendAssumeCapacity(byte),
+                }
             }
 
-            name_map.putAssumeCapacityNoClobber(name, {});
+            if (!std.mem.eql(u8, name, "_")) {
+                name_map.putAssumeCapacityNoClobber(name, total_i);
+            }
             total_i += 1;
         }
 
@@ -5594,7 +5616,7 @@ pub const FuncGen = struct {
             }
 
             if (!std.mem.eql(u8, name, "_")) {
-                name_map.putAssumeCapacityNoClobber(name, {});
+                name_map.putAssumeCapacityNoClobber(name, total_i);
             }
 
             // In the case of indirect inputs, LLVM requires the callsite to have
@@ -5625,6 +5647,11 @@ pub const FuncGen = struct {
             }
         }
 
+        // We have finished scanning through all inputs/outputs, so the number of
+        // parameters and return values is known.
+        const param_count = llvm_param_i;
+        const return_count = llvm_ret_i;
+
         // For some targets, Clang unconditionally adds some clobbers to all inline assembly.
         // While this is probably not strictly necessary, if we don't follow Clang's lead
         // here then we may risk tripping LLVM bugs since anything not used by Clang tends
@@ -5682,7 +5709,7 @@ pub const FuncGen = struct {
                         const name = asm_source[name_start..i];
                         state = .start;
 
-                        const index = name_map.getIndex(name) orelse {
+                        const index = name_map.get(name) orelse {
                             // we should validate the assembly in Sema; by now it is too late
                             return self.todo("unknown input or output name: '{s}'", .{name});
                         };
@@ -5693,12 +5720,20 @@ pub const FuncGen = struct {
             }
         }
 
-        const ret_ty = self.air.typeOfIndex(inst);
-        const ret_llvm_ty = try self.dg.lowerType(ret_ty);
+        const ret_llvm_ty = switch (return_count) {
+            0 => self.context.voidType(),
+            1 => llvm_ret_types[0],
+            else => self.context.structType(
+                llvm_ret_types.ptr,
+                @intCast(c_uint, return_count),
+                .False,
+            ),
+        };
+
         const llvm_fn_ty = llvm.functionType(
             ret_llvm_ty,
             llvm_param_types.ptr,
-            @intCast(c_uint, llvm_param_types.len),
+            @intCast(c_uint, param_count),
             .False,
         );
         const asm_fn = llvm.getInlineAsm(
@@ -5715,18 +5750,40 @@ pub const FuncGen = struct {
         const call = self.builder.buildCall(
             asm_fn,
             llvm_param_values.ptr,
-            @intCast(c_uint, llvm_param_values.len),
+            @intCast(c_uint, param_count),
             .C,
             .Auto,
             "",
         );
-        for (llvm_param_attrs) |need_elem_ty, i| {
+        for (llvm_param_attrs[0..param_count]) |need_elem_ty, i| {
             if (need_elem_ty) {
                 const elem_ty = llvm_param_types[i].getElementType();
                 llvm.setCallElemTypeAttr(call, i, elem_ty);
             }
         }
-        return call;
+
+        var ret_val = call;
+        llvm_ret_i = 0;
+        for (outputs) |output, i| {
+            if (llvm_ret_indirect[i]) continue;
+
+            const output_value = if (return_count > 1) b: {
+                break :b self.builder.buildExtractValue(call, @intCast(c_uint, llvm_ret_i), "");
+            } else call;
+
+            if (output != .none) {
+                const output_ptr = try self.resolveInst(output);
+                const output_ptr_ty = self.air.typeOf(output);
+
+                const store_inst = self.builder.buildStore(output_value, output_ptr);
+                store_inst.setAlignment(output_ptr_ty.ptrAlignment(target));
+            } else {
+                ret_val = output_value;
+            }
+            llvm_ret_i += 1;
+        }
+
+        return ret_val;
     }
 
     fn airIsNonNull(
@@ -9709,10 +9766,30 @@ fn errUnionErrorOffset(payload_ty: Type, target: std.Target) u1 {
     return @boolToInt(Type.anyerror.abiAlignment(target) <= payload_ty.abiAlignment(target));
 }
 
+/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a memory location
+///
+/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang
 fn constraintAllowsMemory(constraint: []const u8) bool {
-    return constraint[0] == 'm';
+    // TODO: This implementation is woefully incomplete.
+    for (constraint) |byte| {
+        switch (byte) {
+            '=', '*', ',', '&' => {},
+            'm', 'o', 'X', 'g' => return true,
+            else => {},
+        }
+    } else return false;
 }
 
+/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a register
+///
+/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang
 fn constraintAllowsRegister(constraint: []const u8) bool {
-    return constraint[0] != 'm';
+    // TODO: This implementation is woefully incomplete.
+    for (constraint) |byte| {
+        switch (byte) {
+            '=', '*', ',', '&' => {},
+            'm', 'o' => {},
+            else => return true,
+        }
+    } else return false;
 }