Commit 5f3b91437f

Robin Voetter <robin@voetter.nl>
2022-08-21 01:32:19
stage2: improve addrspace handling
This commit changes the way Zig is intended to deal with variable declaration for exotic targets. Where previously the idea was to enfore local/global variables to be placed into their respective address spaces, depending on the target, this is now fixed to the generic address space. To facilitate this for targets where local variables _must_ be generated into a specific address space (ex. amdgcn where locals must be generated into the private address space), the variable allocations (alloca) are generated into the right address space and then addrspace-casted back to the generic address space. While this could be less efficient in theory, LLVM will hopefull deal with figuring out the actual correct address space for a pointer for us. HIP seems to do the same thing in this regard. Global variables are handled in a similar way.
1 parent 3392de8
Changed files (4)
src/codegen/llvm/bindings.zig
@@ -171,6 +171,9 @@ pub const Value = opaque {
     pub const constAdd = LLVMConstAdd;
     extern fn LLVMConstAdd(LHSConstant: *Value, RHSConstant: *Value) *Value;
 
+    pub const constAddrSpaceCast = LLVMConstAddrSpaceCast;
+    extern fn LLVMConstAddrSpaceCast(ConstantVal: *Value, ToType: *Type) *Value;
+
     pub const setWeak = LLVMSetWeak;
     extern fn LLVMSetWeak(CmpXchgInst: *Value, IsWeak: Bool) void;
 
@@ -956,6 +959,12 @@ pub const Builder = opaque {
 
     pub const setFastMath = ZigLLVMSetFastMath;
     extern fn ZigLLVMSetFastMath(B: *Builder, on_state: bool) void;
+
+    pub const buildAddrSpaceCast = LLVMBuildAddrSpaceCast;
+    extern fn LLVMBuildAddrSpaceCast(B: *Builder, Val: *Value, DestTy: *Type, Name: [*:0]const u8) *Value;
+
+    pub const buildAllocaInAddressSpace = ZigLLVMBuildAllocaInAddressSpace;
+    extern fn ZigLLVMBuildAllocaInAddressSpace(B: *Builder, Ty: *Type, AddressSpace: c_uint, Name: [*:0]const u8) *Value;
 };
 
 pub const MDString = opaque {
src/codegen/llvm.zig
@@ -929,8 +929,7 @@ pub const Object = struct {
                     if (isByRef(param_ty)) {
                         const alignment = param_ty.abiAlignment(target);
                         const param_llvm_ty = param.typeOf();
-                        const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty);
-                        arg_ptr.setAlignment(alignment);
+                        const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target);
                         const store_inst = builder.buildStore(param, arg_ptr);
                         store_inst.setAlignment(alignment);
                         args.appendAssumeCapacity(arg_ptr);
@@ -974,8 +973,7 @@ pub const Object = struct {
                         param_ty.abiAlignment(target),
                         dg.object.target_data.abiAlignmentOfType(int_llvm_ty),
                     );
-                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty);
-                    arg_ptr.setAlignment(alignment);
+                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target);
                     const casted_ptr = builder.buildBitCast(arg_ptr, int_ptr_llvm_ty, "");
                     const store_inst = builder.buildStore(param, casted_ptr);
                     store_inst.setAlignment(alignment);
@@ -1026,8 +1024,7 @@ pub const Object = struct {
                     const param_ty = fn_info.param_types[it.zig_index - 1];
                     const param_llvm_ty = try dg.lowerType(param_ty);
                     const param_alignment = param_ty.abiAlignment(target);
-                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty);
-                    arg_ptr.setAlignment(param_alignment);
+                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, param_alignment, target);
                     var field_types_buf: [8]*llvm.Type = undefined;
                     const field_types = field_types_buf[0..llvm_ints.len];
                     for (llvm_ints) |int_bits, i| {
@@ -1058,8 +1055,7 @@ pub const Object = struct {
                     const param_ty = fn_info.param_types[it.zig_index - 1];
                     const param_llvm_ty = try dg.lowerType(param_ty);
                     const param_alignment = param_ty.abiAlignment(target);
-                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty);
-                    arg_ptr.setAlignment(param_alignment);
+                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, param_alignment, target);
                     var field_types_buf: [8]*llvm.Type = undefined;
                     const field_types = field_types_buf[0..llvm_floats.len];
                     for (llvm_floats) |float_bits, i| {
@@ -1103,8 +1099,7 @@ pub const Object = struct {
                     llvm_arg_i += 1;
 
                     const alignment = param_ty.abiAlignment(target);
-                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty);
-                    arg_ptr.setAlignment(alignment);
+                    const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target);
                     const casted_ptr = builder.buildBitCast(arg_ptr, param.typeOf().pointerType(0), "");
                     _ = builder.buildStore(param, casted_ptr);
 
@@ -2404,21 +2399,27 @@ pub const DeclGen = struct {
                     // mismatch, because we don't have the LLVM type until the *value* is created,
                     // whereas the global needs to be created based on the type alone, because
                     // lowering the value may reference the global as a pointer.
+                    const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target);
+                    const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target);
                     const new_global = dg.object.llvm_module.addGlobalInAddressSpace(
                         llvm_init.typeOf(),
                         "",
-                        dg.llvmAddressSpace(decl.@"addrspace"),
+                        llvm_global_addrspace,
                     );
                     new_global.setLinkage(global.getLinkage());
                     new_global.setUnnamedAddr(global.getUnnamedAddress());
                     new_global.setAlignment(global.getAlignment());
                     if (decl.@"linksection") |section| new_global.setSection(section);
                     new_global.setInitializer(llvm_init);
-                    // replaceAllUsesWith requires the type to be unchanged. So we bitcast
+                    // replaceAllUsesWith requires the type to be unchanged. So we convert
                     // the new global to the old type and use that as the thing to replace
                     // old uses.
-                    const new_global_ptr = new_global.constBitCast(global.typeOf());
-                    global.replaceAllUsesWith(new_global_ptr);
+                    const new_global_ptr = if (llvm_addrspace != llvm_global_addrspace)
+                        new_global.constAddrSpaceCast(llvm_init.typeOf().pointerType(llvm_addrspace))
+                    else
+                        new_global;
+                    const new_global_casted_ptr = new_global_ptr.constBitCast(global.typeOf());
+                    global.replaceAllUsesWith(new_global_casted_ptr);
                     dg.object.decl_map.putAssumeCapacity(decl_index, new_global);
                     new_global.takeName(global);
                     global.deleteGlobal();
@@ -2465,7 +2466,7 @@ pub const DeclGen = struct {
         const fqn = try decl.getFullyQualifiedName(dg.module);
         defer dg.gpa.free(fqn);
 
-        const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace");
+        const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target);
         const llvm_fn = dg.llvmModule().addFunctionInAddressSpace(fqn, fn_type, llvm_addrspace);
         gop.value_ptr.* = llvm_fn;
 
@@ -2613,9 +2614,15 @@ pub const DeclGen = struct {
         const fqn = try decl.getFullyQualifiedName(dg.module);
         defer dg.gpa.free(fqn);
 
+        const target = dg.module.getTarget();
+
         const llvm_type = try dg.lowerType(decl.ty);
-        const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace");
-        const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace(llvm_type, fqn, llvm_addrspace);
+        const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target);
+        const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace(
+            llvm_type,
+            fqn,
+            toLlvmGlobalAddressSpace(llvm_addrspace, target),
+        );
         gop.value_ptr.* = llvm_global;
 
         // This is needed for declarations created by `@extern`.
@@ -2640,40 +2647,6 @@ pub const DeclGen = struct {
         return llvm_global;
     }
 
-    fn llvmAddressSpace(self: DeclGen, address_space: std.builtin.AddressSpace) c_uint {
-        const target = self.module.getTarget();
-        return switch (target.cpu.arch) {
-            .i386, .x86_64 => switch (address_space) {
-                .generic => llvm.address_space.default,
-                .gs => llvm.address_space.x86.gs,
-                .fs => llvm.address_space.x86.fs,
-                .ss => llvm.address_space.x86.ss,
-                else => unreachable,
-            },
-            .nvptx, .nvptx64 => switch (address_space) {
-                .generic => llvm.address_space.default,
-                .global => llvm.address_space.nvptx.global,
-                .constant => llvm.address_space.nvptx.constant,
-                .param => llvm.address_space.nvptx.param,
-                .shared => llvm.address_space.nvptx.shared,
-                .local => llvm.address_space.nvptx.local,
-                else => unreachable,
-            },
-            .amdgcn => switch (address_space) {
-                .generic => llvm.address_space.flat,
-                .global => llvm.address_space.amdgpu.global,
-                .constant => llvm.address_space.amdgpu.constant,
-                .shared => llvm.address_space.amdgpu.local,
-                .local => llvm.address_space.amdgpu.private,
-                else => unreachable,
-            }.
-            else => switch (address_space) {
-                .generic => llvm.address_space.default,
-                else => unreachable,
-            },
-        };
-    }
-
     fn isUnnamedType(dg: *DeclGen, ty: Type, val: *llvm.Value) bool {
         // Once `lowerType` succeeds, successive calls to it with the same Zig type
         // are guaranteed to succeed. So if a call to `lowerType` fails here it means
@@ -2739,7 +2712,7 @@ pub const DeclGen = struct {
                     return dg.context.structType(&fields, fields.len, .False);
                 }
                 const ptr_info = t.ptrInfo().data;
-                const llvm_addrspace = dg.llvmAddressSpace(ptr_info.@"addrspace");
+                const llvm_addrspace = toLlvmAddressSpace(ptr_info.@"addrspace", target);
                 if (ptr_info.host_size != 0) {
                     return dg.context.intType(ptr_info.host_size * 8).pointerType(llvm_addrspace);
                 }
@@ -3268,11 +3241,18 @@ pub const DeclGen = struct {
                     const decl_index = tv.val.castTag(.variable).?.data.owner_decl;
                     const decl = dg.module.declPtr(decl_index);
                     dg.module.markDeclAlive(decl);
-                    const val = try dg.resolveGlobalDecl(decl_index);
                     const llvm_var_type = try dg.lowerType(tv.ty);
-                    const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace");
-                    const llvm_type = llvm_var_type.pointerType(llvm_addrspace);
-                    return val.constBitCast(llvm_type);
+                    const llvm_var_addrspace = toLlvmAddressSpace(decl.@"addrspace", target);
+                    const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_var_addrspace, target);
+                    const llvm_var_ptr_type = llvm_var_type.pointerType(llvm_global_addrspace);
+
+                    const val = try dg.resolveGlobalDecl(decl_index);
+                    const val_ptr = val.constBitCast(llvm_var_ptr_type);
+                    if (llvm_global_addrspace != llvm_var_addrspace) {
+                        const llvm_ptr_type = llvm_var_type.pointerType(llvm_var_addrspace);
+                        return val_ptr.constAddrSpaceCast(llvm_ptr_type);
+                    }
+                    return val_ptr;
                 },
                 .slice => {
                     const slice = tv.val.castTag(.slice).?.data;
@@ -4069,11 +4049,20 @@ pub const DeclGen = struct {
 
         self.module.markDeclAlive(decl);
 
-        const llvm_val = if (is_fn_body)
+        const llvm_decl_val = if (is_fn_body)
             try self.resolveLlvmFunction(decl_index)
         else
             try self.resolveGlobalDecl(decl_index);
 
+        const target = self.module.getTarget();
+        const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target);
+        const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target);
+        const llvm_val = if (llvm_addrspace != llvm_global_addrspace) blk: {
+            const llvm_decl_ty = try self.lowerType(decl.ty);
+            const llvm_decl_ptr_ty = llvm_decl_ty.pointerType(llvm_addrspace);
+            break :blk llvm_decl_val.constAddrSpaceCast(llvm_decl_ptr_ty);
+        } else llvm_decl_val;
+
         const llvm_type = try self.lowerType(tv.ty);
         if (tv.ty.zigTypeTag() == .Int) {
             return llvm_val.constPtrToInt(llvm_type);
@@ -4339,7 +4328,9 @@ pub const FuncGen = struct {
         // We have an LLVM value but we need to create a global constant and
         // set the value as its initializer, and then return a pointer to the global.
         const target = self.dg.module.getTarget();
-        const global = self.dg.object.llvm_module.addGlobal(llvm_val.typeOf(), "");
+        const llvm_addrspace = toLlvmAddressSpace(.generic, target);
+        const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target);
+        const global = self.dg.object.llvm_module.addGlobalInAddressSpace(llvm_val.typeOf(), "", llvm_global_addrspace);
         global.setInitializer(llvm_val);
         global.setLinkage(.Private);
         global.setGlobalConstant(.True);
@@ -4349,8 +4340,10 @@ pub const FuncGen = struct {
         // the type of global constants might not match the type it is supposed to
         // be, and so we must bitcast the pointer at the usage sites.
         const wanted_llvm_ty = try self.dg.lowerType(ty);
-        const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(0);
-        const casted_ptr = global.constBitCast(wanted_llvm_ptr_ty);
+        const wanted_bitcasted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_global_addrspace);
+        const bitcasted_ptr = global.constBitCast(wanted_bitcasted_llvm_ptr_ty);
+        const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_addrspace);
+        const casted_ptr = bitcasted_ptr.constAddrSpaceCast(wanted_llvm_ptr_ty);
         gop.value_ptr.* = casted_ptr;
         return casted_ptr;
     }
@@ -4606,8 +4599,7 @@ pub const FuncGen = struct {
 
         const ret_ptr = if (!sret) null else blk: {
             const llvm_ret_ty = try self.dg.lowerType(return_type);
-            const ret_ptr = self.buildAlloca(llvm_ret_ty);
-            ret_ptr.setAlignment(return_type.abiAlignment(target));
+            const ret_ptr = self.buildAlloca(llvm_ret_ty, return_type.abiAlignment(target));
             try llvm_args.append(ret_ptr);
             break :blk ret_ptr;
         };
@@ -4654,8 +4646,7 @@ pub const FuncGen = struct {
                 } else {
                     const alignment = param_ty.abiAlignment(target);
                     const param_llvm_ty = llvm_arg.typeOf();
-                    const arg_ptr = self.buildAlloca(param_llvm_ty);
-                    arg_ptr.setAlignment(alignment);
+                    const arg_ptr = self.buildAlloca(param_llvm_ty, alignment);
                     const store_inst = self.builder.buildStore(llvm_arg, arg_ptr);
                     store_inst.setAlignment(alignment);
                     try llvm_args.append(arg_ptr);
@@ -4682,8 +4673,7 @@ pub const FuncGen = struct {
                         param_ty.abiAlignment(target),
                         self.dg.object.target_data.abiAlignmentOfType(int_llvm_ty),
                     );
-                    const int_ptr = self.buildAlloca(int_llvm_ty);
-                    int_ptr.setAlignment(alignment);
+                    const int_ptr = self.buildAlloca(int_llvm_ty, alignment);
                     const param_llvm_ty = try self.dg.lowerType(param_ty);
                     const casted_ptr = self.builder.buildBitCast(int_ptr, param_llvm_ty.pointerType(0), "");
                     const store_inst = self.builder.buildStore(llvm_arg, casted_ptr);
@@ -4709,7 +4699,7 @@ pub const FuncGen = struct {
                 const llvm_arg = try self.resolveInst(arg);
                 const is_by_ref = isByRef(param_ty);
                 const arg_ptr = if (is_by_ref) llvm_arg else p: {
-                    const p = self.buildAlloca(llvm_arg.typeOf());
+                    const p = self.buildAlloca(llvm_arg.typeOf(), null);
                     const store_inst = self.builder.buildStore(llvm_arg, p);
                     store_inst.setAlignment(param_ty.abiAlignment(target));
                     break :p p;
@@ -4738,7 +4728,7 @@ pub const FuncGen = struct {
                 const llvm_arg = try self.resolveInst(arg);
                 const is_by_ref = isByRef(param_ty);
                 const arg_ptr = if (is_by_ref) llvm_arg else p: {
-                    const p = self.buildAlloca(llvm_arg.typeOf());
+                    const p = self.buildAlloca(llvm_arg.typeOf(), null);
                     const store_inst = self.builder.buildStore(llvm_arg, p);
                     store_inst.setAlignment(param_ty.abiAlignment(target));
                     break :p p;
@@ -4775,7 +4765,7 @@ pub const FuncGen = struct {
                 const arg_ty = self.air.typeOf(arg);
                 var llvm_arg = try self.resolveInst(arg);
                 if (!isByRef(arg_ty)) {
-                    const p = self.buildAlloca(llvm_arg.typeOf());
+                    const p = self.buildAlloca(llvm_arg.typeOf(), null);
                     const store_inst = self.builder.buildStore(llvm_arg, p);
                     store_inst.setAlignment(arg_ty.abiAlignment(target));
                     llvm_arg = store_inst;
@@ -4832,9 +4822,8 @@ pub const FuncGen = struct {
             // In this case the function return type is honoring the calling convention by having
             // a different LLVM type than the usual one. We solve this here at the callsite
             // by bitcasting a pointer to our canonical type, then loading it if necessary.
-            const rp = self.buildAlloca(llvm_ret_ty);
             const alignment = return_type.abiAlignment(target);
-            rp.setAlignment(alignment);
+            const rp = self.buildAlloca(llvm_ret_ty, alignment);
             const ptr_abi_ty = abi_ret_ty.pointerType(0);
             const casted_ptr = self.builder.buildBitCast(rp, ptr_abi_ty, "");
             const store_inst = self.builder.buildStore(call, casted_ptr);
@@ -4851,9 +4840,8 @@ pub const FuncGen = struct {
         if (isByRef(return_type)) {
             // our by-ref status disagrees with sret so we must allocate, store,
             // and return the allocation pointer.
-            const rp = self.buildAlloca(llvm_ret_ty);
             const alignment = return_type.abiAlignment(target);
-            rp.setAlignment(alignment);
+            const rp = self.buildAlloca(llvm_ret_ty, alignment);
             const store_inst = self.builder.buildStore(call, rp);
             store_inst.setAlignment(alignment);
             return rp;
@@ -4912,8 +4900,7 @@ pub const FuncGen = struct {
             return null;
         }
 
-        const rp = self.buildAlloca(llvm_ret_ty);
-        rp.setAlignment(alignment);
+        const rp = self.buildAlloca(llvm_ret_ty, alignment);
         const store_inst = self.builder.buildStore(operand, rp);
         store_inst.setAlignment(alignment);
         const casted_ptr = self.builder.buildBitCast(rp, ptr_abi_ty, "");
@@ -6031,8 +6018,7 @@ pub const FuncGen = struct {
                     llvm_param_types[llvm_param_i] = arg_llvm_value.typeOf();
                 } else {
                     const alignment = arg_ty.abiAlignment(target);
-                    const arg_ptr = self.buildAlloca(arg_llvm_value.typeOf());
-                    arg_ptr.setAlignment(alignment);
+                    const arg_ptr = self.buildAlloca(arg_llvm_value.typeOf(), alignment);
                     const store_inst = self.builder.buildStore(arg_llvm_value, arg_ptr);
                     store_inst.setAlignment(alignment);
                     llvm_param_values[llvm_param_i] = arg_ptr;
@@ -6533,8 +6519,7 @@ pub const FuncGen = struct {
         const llvm_optional_ty = try self.dg.lowerType(optional_ty);
         if (isByRef(optional_ty)) {
             const target = self.dg.module.getTarget();
-            const optional_ptr = self.buildAlloca(llvm_optional_ty);
-            optional_ptr.setAlignment(optional_ty.abiAlignment(target));
+            const optional_ptr = self.buildAlloca(llvm_optional_ty, optional_ty.abiAlignment(target));
             const payload_ptr = self.builder.buildStructGEP(llvm_optional_ty, optional_ptr, 0, "");
             var ptr_ty_payload: Type.Payload.ElemType = .{
                 .base = .{ .tag = .single_mut_pointer },
@@ -6567,8 +6552,7 @@ pub const FuncGen = struct {
         const payload_offset = errUnionPayloadOffset(payload_ty, target);
         const error_offset = errUnionErrorOffset(payload_ty, target);
         if (isByRef(err_un_ty)) {
-            const result_ptr = self.buildAlloca(err_un_llvm_ty);
-            result_ptr.setAlignment(err_un_ty.abiAlignment(target));
+            const result_ptr = self.buildAlloca(err_un_llvm_ty, err_un_ty.abiAlignment(target));
             const err_ptr = self.builder.buildStructGEP(err_un_llvm_ty, result_ptr, error_offset, "");
             const store_inst = self.builder.buildStore(ok_err_code, err_ptr);
             store_inst.setAlignment(Type.anyerror.abiAlignment(target));
@@ -6602,8 +6586,7 @@ pub const FuncGen = struct {
         const payload_offset = errUnionPayloadOffset(payload_ty, target);
         const error_offset = errUnionErrorOffset(payload_ty, target);
         if (isByRef(err_un_ty)) {
-            const result_ptr = self.buildAlloca(err_un_llvm_ty);
-            result_ptr.setAlignment(err_un_ty.abiAlignment(target));
+            const result_ptr = self.buildAlloca(err_un_llvm_ty, err_un_ty.abiAlignment(target));
             const err_ptr = self.builder.buildStructGEP(err_un_llvm_ty, result_ptr, error_offset, "");
             const store_inst = self.builder.buildStore(operand, err_ptr);
             store_inst.setAlignment(Type.anyerror.abiAlignment(target));
@@ -7021,9 +7004,8 @@ pub const FuncGen = struct {
 
         if (isByRef(dest_ty)) {
             const target = self.dg.module.getTarget();
-            const alloca_inst = self.buildAlloca(llvm_dest_ty);
             const result_alignment = dest_ty.abiAlignment(target);
-            alloca_inst.setAlignment(result_alignment);
+            const alloca_inst = self.buildAlloca(llvm_dest_ty, result_alignment);
             {
                 const field_ptr = self.builder.buildStructGEP(llvm_dest_ty, alloca_inst, result_index, "");
                 const store_inst = self.builder.buildStore(result, field_ptr);
@@ -7373,9 +7355,8 @@ pub const FuncGen = struct {
 
         if (isByRef(dest_ty)) {
             const target = self.dg.module.getTarget();
-            const alloca_inst = self.buildAlloca(llvm_dest_ty);
             const result_alignment = dest_ty.abiAlignment(target);
-            alloca_inst.setAlignment(result_alignment);
+            const alloca_inst = self.buildAlloca(llvm_dest_ty, result_alignment);
             {
                 const field_ptr = self.builder.buildStructGEP(llvm_dest_ty, alloca_inst, result_index, "");
                 const store_inst = self.builder.buildStore(result, field_ptr);
@@ -7653,7 +7634,7 @@ pub const FuncGen = struct {
             if (!result_is_ref) {
                 return self.dg.todo("implement bitcast vector to non-ref array", .{});
             }
-            const array_ptr = self.buildAlloca(llvm_dest_ty);
+            const array_ptr = self.buildAlloca(llvm_dest_ty, null);
             const bitcast_ok = elem_ty.bitSize(target) == elem_ty.abiSize(target) * 8;
             if (bitcast_ok) {
                 const llvm_vector_ty = try self.dg.lowerType(operand_ty);
@@ -7729,8 +7710,7 @@ pub const FuncGen = struct {
         if (result_is_ref) {
             // Bitcast the result pointer, then store.
             const alignment = @maximum(operand_ty.abiAlignment(target), inst_ty.abiAlignment(target));
-            const result_ptr = self.buildAlloca(llvm_dest_ty);
-            result_ptr.setAlignment(alignment);
+            const result_ptr = self.buildAlloca(llvm_dest_ty, alignment);
             const operand_llvm_ty = try self.dg.lowerType(operand_ty);
             const casted_ptr = self.builder.buildBitCast(result_ptr, operand_llvm_ty.pointerType(0), "");
             const store_inst = self.builder.buildStore(operand, casted_ptr);
@@ -7743,8 +7723,7 @@ pub const FuncGen = struct {
             // but LLVM won't let us bitcast struct values.
             // Therefore, we store operand to bitcasted alloca, then load for result.
             const alignment = @maximum(operand_ty.abiAlignment(target), inst_ty.abiAlignment(target));
-            const result_ptr = self.buildAlloca(llvm_dest_ty);
-            result_ptr.setAlignment(alignment);
+            const result_ptr = self.buildAlloca(llvm_dest_ty, alignment);
             const operand_llvm_ty = try self.dg.lowerType(operand_ty);
             const casted_ptr = self.builder.buildBitCast(result_ptr, operand_llvm_ty.pointerType(0), "");
             const store_inst = self.builder.buildStore(operand, casted_ptr);
@@ -7820,11 +7799,9 @@ pub const FuncGen = struct {
         if (!pointee_type.isFnOrHasRuntimeBitsIgnoreComptime()) return self.dg.lowerPtrToVoid(ptr_ty);
 
         const pointee_llvm_ty = try self.dg.lowerType(pointee_type);
-        const alloca_inst = self.buildAlloca(pointee_llvm_ty);
         const target = self.dg.module.getTarget();
         const alignment = ptr_ty.ptrAlignment(target);
-        alloca_inst.setAlignment(alignment);
-        return alloca_inst;
+        return self.buildAlloca(pointee_llvm_ty, alignment);
     }
 
     fn airRetPtr(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@@ -7835,15 +7812,13 @@ pub const FuncGen = struct {
         if (self.ret_ptr) |ret_ptr| return ret_ptr;
         const ret_llvm_ty = try self.dg.lowerType(ret_ty);
         const target = self.dg.module.getTarget();
-        const alloca_inst = self.buildAlloca(ret_llvm_ty);
-        alloca_inst.setAlignment(ptr_ty.ptrAlignment(target));
-        return alloca_inst;
+        return self.buildAlloca(ret_llvm_ty, ptr_ty.ptrAlignment(target));
     }
 
     /// Use this instead of builder.buildAlloca, because this function makes sure to
     /// put the alloca instruction at the top of the function!
-    fn buildAlloca(self: *FuncGen, llvm_ty: *llvm.Type) *llvm.Value {
-        return buildAllocaInner(self.builder, self.llvm_func, self.di_scope != null, llvm_ty);
+    fn buildAlloca(self: *FuncGen, llvm_ty: *llvm.Type, alignment: ?c_uint) *llvm.Value {
+        return buildAllocaInner(self.builder, self.llvm_func, self.di_scope != null, llvm_ty, alignment, self.dg.module.getTarget());
     }
 
     fn airStore(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@@ -8801,10 +8776,9 @@ pub const FuncGen = struct {
 
                 if (isByRef(result_ty)) {
                     const llvm_u32 = self.context.intType(32);
-                    const alloca_inst = self.buildAlloca(llvm_result_ty);
                     // TODO in debug builds init to undef so that the padding will be 0xaa
                     // even if we fully populate the fields.
-                    alloca_inst.setAlignment(result_ty.abiAlignment(target));
+                    const alloca_inst = self.buildAlloca(llvm_result_ty, result_ty.abiAlignment(target));
 
                     var indices: [2]*llvm.Value = .{ llvm_u32.constNull(), undefined };
                     for (elements) |elem, i| {
@@ -8842,8 +8816,7 @@ pub const FuncGen = struct {
                 assert(isByRef(result_ty));
 
                 const llvm_usize = try self.dg.lowerType(Type.usize);
-                const alloca_inst = self.buildAlloca(llvm_result_ty);
-                alloca_inst.setAlignment(result_ty.abiAlignment(target));
+                const alloca_inst = self.buildAlloca(llvm_result_ty, result_ty.abiAlignment(target));
 
                 const array_info = result_ty.arrayInfo();
                 var elem_ptr_payload: Type.Payload.Pointer = .{
@@ -8918,7 +8891,7 @@ pub const FuncGen = struct {
         // necessarily match the format that we need, depending on which tag is active. We
         // must construct the correct unnamed struct type here and bitcast, in order to
         // then set the fields appropriately.
-        const result_ptr = self.buildAlloca(union_llvm_ty);
+        const result_ptr = self.buildAlloca(union_llvm_ty, null);
         const llvm_payload = try self.resolveInst(extra.init);
         assert(union_obj.haveFieldTypes());
         const field = union_obj.fields.values()[extra.field_index];
@@ -9234,9 +9207,8 @@ pub const FuncGen = struct {
 
         if (isByRef(optional_ty)) {
             const target = self.dg.module.getTarget();
-            const alloca_inst = self.buildAlloca(optional_llvm_ty);
             const payload_alignment = optional_ty.abiAlignment(target);
-            alloca_inst.setAlignment(payload_alignment);
+            const alloca_inst = self.buildAlloca(optional_llvm_ty, payload_alignment);
 
             {
                 const field_ptr = self.builder.buildStructGEP(optional_llvm_ty, alloca_inst, 0, "");
@@ -9360,8 +9332,7 @@ pub const FuncGen = struct {
             if (isByRef(info.pointee_type)) {
                 const result_align = info.pointee_type.abiAlignment(target);
                 const max_align = @maximum(result_align, ptr_alignment);
-                const result_ptr = self.buildAlloca(elem_llvm_ty);
-                result_ptr.setAlignment(max_align);
+                const result_ptr = self.buildAlloca(elem_llvm_ty, max_align);
                 const llvm_ptr_u8 = self.context.intType(8).pointerType(0);
                 const llvm_usize = self.context.intType(Type.usize.intInfo(target).bits);
                 const size_bytes = info.pointee_type.abiSize(target);
@@ -9394,8 +9365,7 @@ pub const FuncGen = struct {
 
         if (isByRef(info.pointee_type)) {
             const result_align = info.pointee_type.abiAlignment(target);
-            const result_ptr = self.buildAlloca(elem_llvm_ty);
-            result_ptr.setAlignment(result_align);
+            const result_ptr = self.buildAlloca(elem_llvm_ty, result_align);
 
             const same_size_int = self.context.intType(elem_bits);
             const truncated_int = self.builder.buildTrunc(shifted_value, same_size_int, "");
@@ -9519,8 +9489,7 @@ pub const FuncGen = struct {
             .x86_64 => {
                 const array_llvm_ty = usize_llvm_ty.arrayType(6);
                 const array_ptr = fg.valgrind_client_request_array orelse a: {
-                    const array_ptr = fg.buildAlloca(array_llvm_ty);
-                    array_ptr.setAlignment(usize_alignment);
+                    const array_ptr = fg.buildAlloca(array_llvm_ty, usize_alignment);
                     fg.valgrind_client_request_array = array_ptr;
                     break :a array_ptr;
                 };
@@ -9822,6 +9791,74 @@ fn toLlvmCallConv(cc: std.builtin.CallingConvention, target: std.Target) llvm.Ca
     };
 }
 
+/// Convert a zig-address space to an llvm address space.
+fn toLlvmAddressSpace(address_space: std.builtin.AddressSpace, target: std.Target) c_uint {
+    return switch (target.cpu.arch) {
+        .i386, .x86_64 => switch (address_space) {
+            .generic => llvm.address_space.default,
+            .gs => llvm.address_space.x86.gs,
+            .fs => llvm.address_space.x86.fs,
+            .ss => llvm.address_space.x86.ss,
+            else => unreachable,
+        },
+        .nvptx, .nvptx64 => switch (address_space) {
+            .generic => llvm.address_space.default,
+            .global => llvm.address_space.nvptx.global,
+            .constant => llvm.address_space.nvptx.constant,
+            .param => llvm.address_space.nvptx.param,
+            .shared => llvm.address_space.nvptx.shared,
+            .local => llvm.address_space.nvptx.local,
+            else => unreachable,
+        },
+        .amdgcn => switch (address_space) {
+            .generic => llvm.address_space.amdgpu.flat,
+            .global => llvm.address_space.amdgpu.global,
+            .constant => llvm.address_space.amdgpu.constant,
+            .shared => llvm.address_space.amdgpu.local,
+            .local => llvm.address_space.amdgpu.private,
+            else => unreachable,
+        },
+        else => switch (address_space) {
+            .generic => llvm.address_space.default,
+            else => unreachable,
+        },
+    };
+}
+
+/// On some targets, local values that are in the generic address space must be generated into a
+/// different address, space and then cast back to the generic address space.
+/// For example, on GPUs local variable declarations must be generated into the local address space.
+/// This function returns the address space local values should be generated into.
+fn llvmAllocaAddressSpace(target: std.Target) c_uint {
+    return switch (target.cpu.arch) {
+        // On amdgcn, locals should be generated into the private address space.
+        // To make Zig not impossible to use, these are then converted to addresses in the
+        // generic address space and treates as regular pointers. This is the way that HIP also does it.
+        .amdgcn => llvm.address_space.amdgpu.private,
+        else => llvm.address_space.default,
+    };
+}
+
+/// On some targets, global values that are in the generic address space must be generated into a
+/// different address space, and then cast back to the generic address space.
+fn llvmDefaultGlobalAddressSpace(target: std.Target) c_uint {
+    return switch (target.cpu.arch) {
+        // On amdgcn, globals must be explicitly allocated and uploaded so that the program can access
+        // them.
+        .amdgcn => llvm.address_space.amdgpu.global,
+        else => llvm.address_space.default,
+    };
+}
+
+/// If `llvm_addrspace` is generic, convert it to the actual address space that globals
+/// should be stored in by default.
+fn toLlvmGlobalAddressSpace(llvm_addrspace: c_uint, target: std.Target) c_uint {
+    return if (llvm_addrspace == llvm.address_space.default)
+        llvmDefaultGlobalAddressSpace(target)
+    else
+        llvm_addrspace;
+}
+
 /// Take into account 0 bit fields and padding. Returns null if an llvm
 /// field could not be found.
 /// This only happens if you want the field index of a zero sized field at
@@ -10523,25 +10560,43 @@ fn buildAllocaInner(
     llvm_func: *llvm.Value,
     di_scope_non_null: bool,
     llvm_ty: *llvm.Type,
+    maybe_alignment: ?c_uint,
+    target: std.Target,
 ) *llvm.Value {
-    const prev_block = builder.getInsertBlock();
-    const prev_debug_location = builder.getCurrentDebugLocation2();
-    defer {
-        builder.positionBuilderAtEnd(prev_block);
-        if (di_scope_non_null) {
-            builder.setCurrentDebugLocation2(prev_debug_location);
+    const address_space = llvmAllocaAddressSpace(target);
+
+    const alloca = blk: {
+        const prev_block = builder.getInsertBlock();
+        const prev_debug_location = builder.getCurrentDebugLocation2();
+        defer {
+            builder.positionBuilderAtEnd(prev_block);
+            if (di_scope_non_null) {
+                builder.setCurrentDebugLocation2(prev_debug_location);
+            }
+        }
+
+        const entry_block = llvm_func.getFirstBasicBlock().?;
+        if (entry_block.getFirstInstruction()) |first_inst| {
+            builder.positionBuilder(entry_block, first_inst);
+        } else {
+            builder.positionBuilderAtEnd(entry_block);
         }
+        builder.clearCurrentDebugLocation();
+
+        break :blk builder.buildAllocaInAddressSpace(llvm_ty, address_space, "");
+    };
+
+    if (maybe_alignment) |alignment| {
+        alloca.setAlignment(alignment);
     }
 
-    const entry_block = llvm_func.getFirstBasicBlock().?;
-    if (entry_block.getFirstInstruction()) |first_inst| {
-        builder.positionBuilder(entry_block, first_inst);
-    } else {
-        builder.positionBuilderAtEnd(entry_block);
+    // The pointer returned from this function should have the generic address space,
+    // if this isn't the case then cast it to the generic address space.
+    if (address_space != llvm.address_space.default) {
+        return builder.buildAddrSpaceCast(alloca, llvm_ty.pointerType(llvm.address_space.default), "");
     }
-    builder.clearCurrentDebugLocation();
 
-    return builder.buildAlloca(llvm_ty, "");
+    return alloca;
 }
 
 fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u1 {
src/zig_llvm.cpp
@@ -512,22 +512,22 @@ LLVMValueRef ZigLLVMBuildUSubSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRe
 
 LLVMValueRef ZigLLVMBuildSMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name) {
     llvm::Type* types[1] = {
-        unwrap(LHS)->getType(), 
+        unwrap(LHS)->getType(),
     };
     // pass scale = 0 as third argument
     llvm::Value* values[3] = {unwrap(LHS), unwrap(RHS), unwrap(B)->getInt32(0)};
-    
+
     CallInst *call_inst = unwrap(B)->CreateIntrinsic(Intrinsic::smul_fix_sat, types, values, nullptr, name);
     return wrap(call_inst);
 }
 
 LLVMValueRef ZigLLVMBuildUMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name) {
     llvm::Type* types[1] = {
-        unwrap(LHS)->getType(), 
+        unwrap(LHS)->getType(),
     };
     // pass scale = 0 as third argument
     llvm::Value* values[3] = {unwrap(LHS), unwrap(RHS), unwrap(B)->getInt32(0)};
-    
+
     CallInst *call_inst = unwrap(B)->CreateIntrinsic(Intrinsic::umul_fix_sat, types, values, nullptr, name);
     return wrap(call_inst);
 }
@@ -808,7 +808,7 @@ void ZigLLVMSetCurrentDebugLocation2(LLVMBuilderRef builder, unsigned int line,
         unsigned int column, ZigLLVMDIScope *scope, ZigLLVMDILocation *inlined_at)
 {
     DIScope* di_scope = reinterpret_cast<DIScope*>(scope);
-    DebugLoc debug_loc = DILocation::get(di_scope->getContext(), line, column, di_scope, 
+    DebugLoc debug_loc = DILocation::get(di_scope->getContext(), line, column, di_scope,
         reinterpret_cast<DILocation *>(inlined_at), false);
     unwrap(builder)->SetCurrentDebugLocation(debug_loc);
 }
@@ -1177,9 +1177,14 @@ LLVMValueRef ZigLLVMBuildAShrExact(LLVMBuilderRef builder, LLVMValueRef LHS, LLV
     return wrap(unwrap(builder)->CreateAShr(unwrap(LHS), unwrap(RHS), name, true));
 }
 
+LLVMValueRef ZigLLVMBuildAllocaInAddressSpace(LLVMBuilderRef builder, LLVMTypeRef Ty,
+        unsigned AddressSpace, const char *Name) {
+  return wrap(unwrap(builder)->CreateAlloca(unwrap(Ty), AddressSpace, nullptr, Name));
+}
+
 void ZigLLVMSetTailCall(LLVMValueRef Call) {
     unwrap<CallInst>(Call)->setTailCallKind(CallInst::TCK_MustTail);
-} 
+}
 
 void ZigLLVMSetCallSret(LLVMValueRef Call, LLVMTypeRef return_type) {
     CallInst *call_inst = unwrap<CallInst>(Call);
src/zig_llvm.h
@@ -162,6 +162,8 @@ ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildLShrExact(LLVMBuilderRef builder, LLVMValu
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildAShrExact(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS,
         const char *name);
 
+ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildAllocaInAddressSpace(LLVMBuilderRef builder, LLVMTypeRef Ty, unsigned AddressSpace,
+        const char *Name);
 
 ZIG_EXTERN_C struct ZigLLVMDIType *ZigLLVMCreateDebugPointerType(struct ZigLLVMDIBuilder *dibuilder,
         struct ZigLLVMDIType *pointee_type, uint64_t size_in_bits, uint64_t align_in_bits, const char *name);