Commit b67d1810be

Andrew Kelley <andrew@ziglang.org>
2021-09-16 03:55:39
stage2: implement `@atomicRmw` and `@atomicLoad`
* langref: add some more "see also" links for atomics * Add the following AIR instructions - atomic_load - atomic_store_unordered - atomic_store_monotonic - atomic_store_release - atomic_store_seq_cst - atomic_rmw * Implement those AIR instructions in LLVM and C backends. * AstGen: make the `ty` result locations for `@atomicRmw`, `@atomicLoad`, and `@atomicStore` be `coerced_ty` to avoid unnecessary ZIR instructions when Sema will be doing the coercions redundantly. * Sema for `@atomicLoad` and `@atomicRmw` is done, however Sema for `@atomicStore` is not yet implemented. - comptime eval for `@atomicRmw` is not yet implemented. * Sema: flesh out `coerceInMemoryAllowed` a little bit more. It can now handle pointers.
1 parent f83a4b4
doc/langref.html.in
@@ -7216,7 +7216,9 @@ fn func(y: *i32) void {
       {#syntax#}T{#endsyntax#} must be a pointer, a {#syntax#}bool{#endsyntax#}, a float,
       an integer or an enum.
       </p>
+      {#see_also|@atomicStore|@atomicRmw|@fence|@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
+
       {#header_open|@atomicRmw#}
       <pre>{#syntax#}@atomicRmw(comptime T: type, ptr: *T, comptime op: builtin.AtomicRmwOp, operand: T, comptime ordering: builtin.AtomicOrder) T{#endsyntax#}</pre>
       <p>
@@ -7242,7 +7244,9 @@ fn func(y: *i32) void {
         <li>{#syntax#}.Max{#endsyntax#} - stores the operand if it is larger. Supports integers and floats.</li>
         <li>{#syntax#}.Min{#endsyntax#} - stores the operand if it is smaller. Supports integers and floats.</li>
       </ul>
+      {#see_also|@atomicStore|@atomicLoad|@fence|@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
+
       {#header_open|@atomicStore#}
       <pre>{#syntax#}@atomicStore(comptime T: type, ptr: *T, value: T, comptime ordering: builtin.AtomicOrder) void{#endsyntax#}</pre>
       <p>
@@ -7252,6 +7256,7 @@ fn func(y: *i32) void {
       {#syntax#}T{#endsyntax#} must be a pointer, a {#syntax#}bool{#endsyntax#}, a float,
       an integer or an enum.
       </p>
+      {#see_also|@atomicLoad|@atomicRmw|@fence|@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@bitCast#}
@@ -7540,8 +7545,9 @@ fn cmpxchgStrongButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_v
       an integer or an enum.
       </p>
       <p>{#syntax#}@typeInfo(@TypeOf(ptr)).Pointer.alignment{#endsyntax#} must be {#syntax#}>= @sizeOf(T).{#endsyntax#}</p>
-      {#see_also|Compile Variables|cmpxchgWeak#}
+      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@fence|@cmpxchgWeak#}
       {#header_close#}
+
       {#header_open|@cmpxchgWeak#}
       <pre>{#syntax#}@cmpxchgWeak(comptime T: type, ptr: *T, expected_value: T, new_value: T, success_order: AtomicOrder, fail_order: AtomicOrder) ?T{#endsyntax#}</pre>
       <p>
@@ -7569,7 +7575,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       an integer or an enum.
       </p>
       <p>{#syntax#}@typeInfo(@TypeOf(ptr)).Pointer.alignment{#endsyntax#} must be {#syntax#}>= @sizeOf(T).{#endsyntax#}</p>
-      {#see_also|Compile Variables|cmpxchgStrong#}
+      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@fence|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@compileError#}
@@ -7849,7 +7855,7 @@ export fn @"A function name that is a complete sentence."() void {}
       <p>
       {#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.
       </p>
-      {#see_also|Compile Variables#}
+      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@field#}
src/codegen/llvm/bindings.zig
@@ -133,6 +133,9 @@ pub const Value = opaque {
 
     pub const constIntToPtr = LLVMConstIntToPtr;
     extern fn LLVMConstIntToPtr(ConstantVal: *const Value, ToType: *const Type) *const Value;
+
+    pub const setOrdering = LLVMSetOrdering;
+    extern fn LLVMSetOrdering(MemoryAccessInst: *const Value, Ordering: AtomicOrdering) void;
 };
 
 pub const Type = opaque {
@@ -167,6 +170,9 @@ pub const Type = opaque {
         ElementCount: c_uint,
         Packed: Bool,
     ) void;
+
+    pub const getTypeKind = LLVMGetTypeKind;
+    extern fn LLVMGetTypeKind(Ty: *const Type) TypeKind;
 };
 
 pub const Module = opaque {
@@ -477,6 +483,14 @@ pub const Builder = opaque {
         Name: [*:0]const u8,
     ) *const Value;
 
+    pub const buildIntToPtr = LLVMBuildIntToPtr;
+    extern fn LLVMBuildIntToPtr(
+        *const Builder,
+        Val: *const Value,
+        DestTy: *const Type,
+        Name: [*:0]const u8,
+    ) *const Value;
+
     pub const buildStructGEP = LLVMBuildStructGEP;
     extern fn LLVMBuildStructGEP(
         B: *const Builder,
@@ -530,6 +544,16 @@ pub const Builder = opaque {
         singleThread: Bool,
         Name: [*:0]const u8,
     ) *const Value;
+
+    pub const buildAtomicRmw = LLVMBuildAtomicRMW;
+    extern fn LLVMBuildAtomicRMW(
+        B: *const Builder,
+        op: AtomicRMWBinOp,
+        PTR: *const Value,
+        Val: *const Value,
+        ordering: AtomicOrdering,
+        singleThread: Bool,
+    ) *const Value;
 };
 
 pub const IntPredicate = enum(c_uint) {
@@ -901,3 +925,42 @@ pub const AtomicOrdering = enum(c_uint) {
     AcquireRelease = 6,
     SequentiallyConsistent = 7,
 };
+
+pub const AtomicRMWBinOp = enum(c_int) {
+    Xchg,
+    Add,
+    Sub,
+    And,
+    Nand,
+    Or,
+    Xor,
+    Max,
+    Min,
+    UMax,
+    UMin,
+    FAdd,
+    FSub,
+};
+
+pub const TypeKind = enum(c_int) {
+    Void,
+    Half,
+    Float,
+    Double,
+    X86_FP80,
+    FP128,
+    PPC_FP128,
+    Label,
+    Integer,
+    Function,
+    Struct,
+    Array,
+    Pointer,
+    Vector,
+    Metadata,
+    X86_MMX,
+    Token,
+    ScalableVector,
+    BFloat,
+    X86_AMX,
+};
src/codegen/c.zig
@@ -914,6 +914,13 @@ fn genBody(o: *Object, body: []const Air.Inst.Index) error{ AnalysisFail, OutOfM
             .array_to_slice   => try airArrayToSlice(o, inst),
             .cmpxchg_weak     => try airCmpxchg(o, inst, "weak"),
             .cmpxchg_strong   => try airCmpxchg(o, inst, "strong"),
+            .atomic_rmw       => try airAtomicRmw(o, inst),
+            .atomic_load      => try airAtomicLoad(o, inst),
+
+            .atomic_store_unordered => try airAtomicStore(o, inst, toMemoryOrder(.Unordered)),
+            .atomic_store_monotonic => try airAtomicStore(o, inst, toMemoryOrder(.Monotonic)),
+            .atomic_store_release   => try airAtomicStore(o, inst, toMemoryOrder(.Release)),
+            .atomic_store_seq_cst   => try airAtomicStore(o, inst, toMemoryOrder(.SeqCst)),
 
             .struct_field_ptr_index_0 => try airStructFieldPtrIndex(o, inst, 0),
             .struct_field_ptr_index_1 => try airStructFieldPtrIndex(o, inst, 1),
@@ -1917,8 +1924,61 @@ fn airCmpxchg(o: *Object, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue {
     return local;
 }
 
-fn writeMemoryOrder(w: anytype, order: std.builtin.AtomicOrder) !void {
-    const str = switch (order) {
+fn airAtomicRmw(o: *Object, inst: Air.Inst.Index) !CValue {
+    const pl_op = o.air.instructions.items(.data)[inst].pl_op;
+    const extra = o.air.extraData(Air.AtomicRmw, pl_op.payload).data;
+    const inst_ty = o.air.typeOfIndex(inst);
+    const ptr = try o.resolveInst(pl_op.operand);
+    const operand = try o.resolveInst(extra.operand);
+    const local = try o.allocLocal(inst_ty, .Const);
+    const writer = o.writer();
+
+    try writer.print(" = zig_atomicrmw_{s}(", .{toAtomicRmwSuffix(extra.op())});
+    try o.writeCValue(writer, ptr);
+    try writer.writeAll(", ");
+    try o.writeCValue(writer, operand);
+    try writer.writeAll(", ");
+    try writeMemoryOrder(writer, extra.ordering());
+    try writer.writeAll(");\n");
+
+    return local;
+}
+
+fn airAtomicLoad(o: *Object, inst: Air.Inst.Index) !CValue {
+    const atomic_load = o.air.instructions.items(.data)[inst].atomic_load;
+    const inst_ty = o.air.typeOfIndex(inst);
+    const ptr = try o.resolveInst(atomic_load.ptr);
+    const local = try o.allocLocal(inst_ty, .Const);
+    const writer = o.writer();
+
+    try writer.writeAll(" = zig_atomic_load(");
+    try o.writeCValue(writer, ptr);
+    try writer.writeAll(", ");
+    try writeMemoryOrder(writer, atomic_load.order);
+    try writer.writeAll(");\n");
+
+    return local;
+}
+
+fn airAtomicStore(o: *Object, inst: Air.Inst.Index, order: [*:0]const u8) !CValue {
+    const bin_op = o.air.instructions.items(.data)[inst].bin_op;
+    const ptr = try o.resolveInst(bin_op.lhs);
+    const element = try o.resolveInst(bin_op.rhs);
+    const inst_ty = o.air.typeOfIndex(inst);
+    const local = try o.allocLocal(inst_ty, .Const);
+    const writer = o.writer();
+
+    try writer.writeAll(" = zig_atomic_store(");
+    try o.writeCValue(writer, ptr);
+    try writer.writeAll(", ");
+    try o.writeCValue(writer, element);
+    try writer.print(", {s});\n", .{order});
+
+    return local;
+}
+
+fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 {
+    return switch (order) {
         .Unordered => "memory_order_relaxed",
         .Monotonic => "memory_order_consume",
         .Acquire => "memory_order_acquire",
@@ -1926,7 +1986,24 @@ fn writeMemoryOrder(w: anytype, order: std.builtin.AtomicOrder) !void {
         .AcqRel => "memory_order_acq_rel",
         .SeqCst => "memory_order_seq_cst",
     };
-    return w.writeAll(str);
+}
+
+fn writeMemoryOrder(w: anytype, order: std.builtin.AtomicOrder) !void {
+    return w.writeAll(toMemoryOrder(order));
+}
+
+fn toAtomicRmwSuffix(order: std.builtin.AtomicRmwOp) []const u8 {
+    return switch (order) {
+        .Xchg => "xchg",
+        .Add => "add",
+        .Sub => "sub",
+        .And => "and",
+        .Nand => "nand",
+        .Or => "or",
+        .Xor => "xor",
+        .Max => "max",
+        .Min => "min",
+    };
 }
 
 fn IndentWriter(comptime UnderlyingWriter: type) type {
src/codegen/llvm.zig
@@ -1060,6 +1060,13 @@ pub const FuncGen = struct {
                 .cmpxchg_weak   => try self.airCmpxchg(inst, true),
                 .cmpxchg_strong => try self.airCmpxchg(inst, false),
                 .fence          => try self.airFence(inst),
+                .atomic_rmw     => try self.airAtomicRmw(inst),
+                .atomic_load    => try self.airAtomicLoad(inst),
+
+                .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
+                .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
+                .atomic_store_release   => try self.airAtomicStore(inst, .Release),
+                .atomic_store_seq_cst   => try self.airAtomicStore(inst, .SequentiallyConsistent),
 
                 .struct_field_ptr => try self.airStructFieldPtr(inst),
                 .struct_field_val => try self.airStructFieldVal(inst),
@@ -1983,20 +1990,19 @@ pub const FuncGen = struct {
     fn airStore(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const dest_ptr = try self.resolveInst(bin_op.lhs);
+        const ptr_ty = self.air.typeOf(bin_op.lhs);
         const src_operand = try self.resolveInst(bin_op.rhs);
-        // TODO set volatile on this store properly
-        _ = self.builder.buildStore(src_operand, dest_ptr);
+        _ = self.store(dest_ptr, ptr_ty, src_operand);
         return null;
     }
 
     fn airLoad(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-        const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr();
-        if (!is_volatile and self.liveness.isUnused(inst))
+        const ptr_ty = self.air.typeOf(ty_op.operand);
+        if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst))
             return null;
         const ptr = try self.resolveInst(ty_op.operand);
-        // TODO set volatile on this load properly
-        return self.builder.buildLoad(ptr, "");
+        return self.load(ptr, ptr_ty);
     }
 
     fn airBreakpoint(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -2033,14 +2039,12 @@ pub const FuncGen = struct {
                 new_value = self.builder.buildZExt(new_value, abi_ty, "");
             }
         }
-        const success_order = toLlvmAtomicOrdering(extra.successOrder());
-        const failure_order = toLlvmAtomicOrdering(extra.failureOrder());
         const result = self.builder.buildCmpXchg(
             ptr,
             expected_value,
             new_value,
-            success_order,
-            failure_order,
+            toLlvmAtomicOrdering(extra.successOrder()),
+            toLlvmAtomicOrdering(extra.failureOrder()),
             is_weak,
             self.single_threaded,
         );
@@ -2066,6 +2070,109 @@ pub const FuncGen = struct {
         return self.builder.buildInsertValue(partial, non_null_bit, 1, "");
     }
 
+    fn airAtomicRmw(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data;
+        const ptr = try self.resolveInst(pl_op.operand);
+        const ptr_ty = self.air.typeOf(pl_op.operand);
+        const operand_ty = ptr_ty.elemType();
+        const operand = try self.resolveInst(extra.operand);
+        const is_signed_int = operand_ty.isSignedInt();
+        const is_float = operand_ty.isFloat();
+        const op = toLlvmAtomicRmwBinOp(extra.op(), is_signed_int, is_float);
+        const ordering = toLlvmAtomicOrdering(extra.ordering());
+        const single_threaded = llvm.Bool.fromBool(self.single_threaded);
+        const opt_abi_ty = self.dg.getAtomicAbiType(operand_ty, op == .Xchg);
+        if (opt_abi_ty) |abi_ty| {
+            // operand needs widening and truncating or bitcasting.
+            const casted_ptr = self.builder.buildBitCast(ptr, abi_ty.pointerType(0), "");
+            const casted_operand = if (is_float)
+                self.builder.buildBitCast(operand, abi_ty, "")
+            else if (is_signed_int)
+                self.builder.buildSExt(operand, abi_ty, "")
+            else
+                self.builder.buildZExt(operand, abi_ty, "");
+
+            const uncasted_result = self.builder.buildAtomicRmw(
+                op,
+                casted_ptr,
+                casted_operand,
+                ordering,
+                single_threaded,
+            );
+            const operand_llvm_ty = try self.dg.llvmType(operand_ty);
+            if (is_float) {
+                return self.builder.buildBitCast(uncasted_result, operand_llvm_ty, "");
+            } else {
+                return self.builder.buildTrunc(uncasted_result, operand_llvm_ty, "");
+            }
+        }
+
+        if (operand.typeOf().getTypeKind() != .Pointer) {
+            return self.builder.buildAtomicRmw(op, ptr, operand, ordering, single_threaded);
+        }
+
+        // It's a pointer but we need to treat it as an int.
+        const usize_llvm_ty = try self.dg.llvmType(Type.initTag(.usize));
+        const casted_ptr = self.builder.buildBitCast(ptr, usize_llvm_ty.pointerType(0), "");
+        const casted_operand = self.builder.buildPtrToInt(operand, usize_llvm_ty, "");
+        const uncasted_result = self.builder.buildAtomicRmw(
+            op,
+            casted_ptr,
+            casted_operand,
+            ordering,
+            single_threaded,
+        );
+        const operand_llvm_ty = try self.dg.llvmType(operand_ty);
+        return self.builder.buildIntToPtr(uncasted_result, operand_llvm_ty, "");
+    }
+
+    fn airAtomicLoad(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        const atomic_load = self.air.instructions.items(.data)[inst].atomic_load;
+        const ptr = try self.resolveInst(atomic_load.ptr);
+        const ptr_ty = self.air.typeOf(atomic_load.ptr);
+        const ordering = toLlvmAtomicOrdering(atomic_load.order);
+        const operand_ty = ptr_ty.elemType();
+        const opt_abi_ty = self.dg.getAtomicAbiType(operand_ty, false);
+
+        if (opt_abi_ty) |abi_ty| {
+            // operand needs widening and truncating
+            const casted_ptr = self.builder.buildBitCast(ptr, abi_ty.pointerType(0), "");
+            const load_inst = self.load(casted_ptr, ptr_ty);
+            load_inst.setOrdering(ordering);
+            return self.builder.buildTrunc(load_inst, try self.dg.llvmType(operand_ty), "");
+        }
+        const load_inst = self.load(ptr, ptr_ty);
+        load_inst.setOrdering(ordering);
+        return load_inst;
+    }
+
+    fn airAtomicStore(
+        self: *FuncGen,
+        inst: Air.Inst.Index,
+        ordering: llvm.AtomicOrdering,
+    ) !?*const llvm.Value {
+        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+        var ptr = try self.resolveInst(bin_op.lhs);
+        const ptr_ty = self.air.typeOf(bin_op.lhs);
+        var element = try self.resolveInst(bin_op.rhs);
+        const operand_ty = ptr_ty.elemType();
+        const opt_abi_ty = self.dg.getAtomicAbiType(operand_ty, false);
+
+        if (opt_abi_ty) |abi_ty| {
+            // operand needs widening
+            ptr = self.builder.buildBitCast(ptr, abi_ty.pointerType(0), "");
+            if (operand_ty.isSignedInt()) {
+                element = self.builder.buildSExt(element, abi_ty, "");
+            } else {
+                element = self.builder.buildZExt(element, abi_ty, "");
+            }
+        }
+        const store_inst = self.store(ptr, ptr_ty, element);
+        store_inst.setOrdering(ordering);
+        return null;
+    }
+
     fn getIntrinsic(self: *FuncGen, name: []const u8) *const llvm.Value {
         const id = llvm.lookupIntrinsicID(name.ptr, name.len);
         assert(id != 0);
@@ -2074,6 +2181,21 @@ pub const FuncGen = struct {
         //       `getIntrinsicDeclaration`
         return self.llvmModule().getIntrinsicDeclaration(id, null, 0);
     }
+
+    fn load(self: *FuncGen, ptr: *const llvm.Value, ptr_ty: Type) *const llvm.Value {
+        _ = ptr_ty; // TODO set volatile and alignment on this load properly
+        return self.builder.buildLoad(ptr, "");
+    }
+
+    fn store(
+        self: *FuncGen,
+        ptr: *const llvm.Value,
+        ptr_ty: Type,
+        elem: *const llvm.Value,
+    ) *const llvm.Value {
+        _ = ptr_ty; // TODO set volatile and alignment on this store properly
+        return self.builder.buildStore(elem, ptr);
+    }
 };
 
 fn initializeLLVMTarget(arch: std.Target.Cpu.Arch) void {
@@ -2227,3 +2349,21 @@ fn toLlvmAtomicOrdering(atomic_order: std.builtin.AtomicOrder) llvm.AtomicOrderi
         .SeqCst => .SequentiallyConsistent,
     };
 }
+
+fn toLlvmAtomicRmwBinOp(
+    op: std.builtin.AtomicRmwOp,
+    is_signed: bool,
+    is_float: bool,
+) llvm.AtomicRMWBinOp {
+    return switch (op) {
+        .Xchg => .Xchg,
+        .Add => if (is_float) llvm.AtomicRMWBinOp.FAdd else return .Add,
+        .Sub => if (is_float) llvm.AtomicRMWBinOp.FSub else return .Sub,
+        .And => .And,
+        .Nand => .Nand,
+        .Or => .Or,
+        .Xor => .Xor,
+        .Max => if (is_signed) llvm.AtomicRMWBinOp.Max else return .UMax,
+        .Min => if (is_signed) llvm.AtomicRMWBinOp.Min else return .UMin,
+    };
+}
src/link/C/zig.h
@@ -62,16 +62,61 @@
 
 #if __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
 #include <stdatomic.h>
-#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) atomic_compare_exchange_strong_explicit(obj, expected, desired, succ, fail)
-#define zig_cmpxchg_weak(obj, expected, desired, succ, fail) atomic_compare_exchange_weak_explicit(obj, expected, desired, succ, fail)
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) atomic_compare_exchange_strong_explicit(obj, &(expected), desired, succ, fail)
+#define zig_cmpxchg_weak  (obj, expected, desired, succ, fail) atomic_compare_exchange_weak_explicit  (obj, &(expected), desired, succ, fail)
+#define zig_atomicrmw_xchg(obj, arg, order) atomic_exchange_explicit  (obj, arg, order)
+#define zig_atomicrmw_add (obj, arg, order) atomic_fetch_add_explicit (obj, arg, order)
+#define zig_atomicrmw_sub (obj, arg, order) atomic_fetch_sub_explicit (obj, arg, order)
+#define zig_atomicrmw_or  (obj, arg, order) atomic_fetch_or_explicit  (obj, arg, order)
+#define zig_atomicrmw_xor (obj, arg, order) atomic_fetch_xor_explicit (obj, arg, order)
+#define zig_atomicrmw_and (obj, arg, order) atomic_fetch_and_explicit (obj, arg, order)
+#define zig_atomicrmw_nand(obj, arg, order) atomic_fetch_nand_explicit(obj, arg, order)
+#define zig_atomicrmw_min (obj, arg, order) atomic_fetch_min_explicit (obj, arg, order)
+#define zig_atomicrmw_max (obj, arg, order) atomic_fetch_max_explicit (obj, arg, order)
+#define zig_atomic_store  (obj, arg, order) atomic_store_explicit     (obj, arg, order)
+#define zig_atomic_load   (obj,      order) atomic_load_explicit      (obj,      order)
 #define zig_fence(order) atomic_thread_fence(order)
 #elif __GNUC__
-#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) __sync_val_compare_and_swap(obj, expected, desired)
-#define zig_cmpxchg_weak(obj, expected, desired, succ, fail) __sync_val_compare_and_swap(obj, expected, desired)
-#define zig_fence(order) __sync_synchronize(order)
+#define memory_order_relaxed __ATOMIC_RELAXED
+#define memory_order_consume __ATOMIC_CONSUME
+#define memory_order_acquire __ATOMIC_ACQUIRE
+#define memory_order_release __ATOMIC_RELEASE
+#define memory_order_acq_rel __ATOMIC_ACQ_REL
+#define memory_order_seq_cst __ATOMIC_SEQ_CST
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) __atomic_compare_exchange_n(obj, &(expected), desired, false, succ, fail)
+#define zig_cmpxchg_weak  (obj, expected, desired, succ, fail) __atomic_compare_exchange_n(obj, &(expected), desired, true , succ, fail)
+#define zig_atomicrmw_xchg(obj, arg, order) __atomic_exchange_n(obj, arg, order)
+#define zig_atomicrmw_add (obj, arg, order) __atomic_fetch_add (obj, arg, order)
+#define zig_atomicrmw_sub (obj, arg, order) __atomic_fetch_sub (obj, arg, order)
+#define zig_atomicrmw_or  (obj, arg, order) __atomic_fetch_or  (obj, arg, order)
+#define zig_atomicrmw_xor (obj, arg, order) __atomic_fetch_xor (obj, arg, order)
+#define zig_atomicrmw_and (obj, arg, order) __atomic_fetch_and (obj, arg, order)
+#define zig_atomicrmw_nand(obj, arg, order) __atomic_fetch_nand(obj, arg, order)
+#define zig_atomicrmw_min (obj, arg, order) __atomic_fetch_min (obj, arg, order)
+#define zig_atomicrmw_max (obj, arg, order) __atomic_fetch_max (obj, arg, order)
+#define zig_atomic_store  (obj, arg, order) __atomic_store     (obj, arg, order)
+#define zig_atomic_load   (obj,      order) __atomic_load      (obj,      order)
+#define zig_fence(order) __atomic_thread_fence(order)
 #else
+#define memory_order_relaxed 0
+#define memory_order_consume 1
+#define memory_order_acquire 2
+#define memory_order_release 3
+#define memory_order_acq_rel 4
+#define memory_order_seq_cst 5
 #define zig_cmpxchg_strong(obj, expected, desired, succ, fail) zig_unimplemented()
-#define zig_cmpxchg_weak(obj, expected, desired, succ, fail) zig_unimplemented()
+#define zig_cmpxchg_weak  (obj, expected, desired, succ, fail) zig_unimplemented()
+#define zig_atomicrmw_xchg(obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_add (obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_sub (obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_or  (obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_xor (obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_and (obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_nand(obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_min (obj, arg, order) zig_unimplemented()
+#define zig_atomicrmw_max (obj, arg, order) zig_unimplemented()
+#define zig_atomic_store  (obj, arg, order) zig_unimplemented()
+#define zig_atomic_load   (obj,      order) zig_unimplemented()
 #define zig_fence(order) zig_unimplemented()
 #endif
 
src/Air.zig
@@ -127,14 +127,11 @@ pub const Inst = struct {
         /// Lowers to a hardware trap instruction, or the next best thing.
         /// Result type is always void.
         breakpoint,
-        /// Lowers to a memory fence instruction.
-        /// Result type is always void.
-        /// Uses the `fence` field.
-        fence,
         /// Function call.
         /// Result type is the return type of the function being called.
         /// Uses the `pl_op` field with the `Call` payload. operand is the callee.
         call,
+
         /// `<`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_lt,
@@ -153,6 +150,7 @@ pub const Inst = struct {
         /// `!=`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_neq,
+
         /// Conditional branch.
         /// Result type is always noreturn; no instructions in a block follow this one.
         /// Uses the `pl_op` field. Operand is the condition. Payload is `CondBr`.
@@ -313,10 +311,33 @@ pub const Inst = struct {
         /// Given a pointer to an array, return a slice.
         /// Uses the `ty_op` field.
         array_to_slice,
+
         /// Uses the `ty_pl` field with payload `Cmpxchg`.
         cmpxchg_weak,
         /// Uses the `ty_pl` field with payload `Cmpxchg`.
         cmpxchg_strong,
+        /// Lowers to a memory fence instruction.
+        /// Result type is always void.
+        /// Uses the `fence` field.
+        fence,
+        /// Atomically load from a pointer.
+        /// Result type is the element type of the pointer.
+        /// Uses the `atomic_load` field.
+        atomic_load,
+        /// Atomically store through a pointer.
+        /// Result type is always `void`.
+        /// Uses the `bin_op` field. LHS is pointer, RHS is element.
+        atomic_store_unordered,
+        /// Same as `atomic_store_unordered` but with `AtomicOrder.Monotonic`.
+        atomic_store_monotonic,
+        /// Same as `atomic_store_unordered` but with `AtomicOrder.Release`.
+        atomic_store_release,
+        /// Same as `atomic_store_unordered` but with `AtomicOrder.SeqCst`.
+        atomic_store_seq_cst,
+        /// Atomically read-modify-write via a pointer.
+        /// Result type is the element type of the pointer.
+        /// Uses the `pl_op` field with payload `AtomicRmw`. Operand is `ptr`.
+        atomic_rmw,
 
         pub fn fromCmpOp(op: std.math.CompareOperator) Tag {
             return switch (op) {
@@ -385,6 +406,10 @@ pub const Inst = struct {
             column: u32,
         },
         fence: std.builtin.AtomicOrder,
+        atomic_load: struct {
+            ptr: Ref,
+            order: std.builtin.AtomicOrder,
+        },
 
         // Make sure we don't accidentally add a field to make this union
         // bigger than expected. Note that in Debug builds, Zig is allowed
@@ -469,6 +494,21 @@ pub const Cmpxchg = struct {
     }
 };
 
+pub const AtomicRmw = struct {
+    operand: Inst.Ref,
+    /// 0b00000000000000000000000000000XXX - ordering
+    /// 0b0000000000000000000000000XXXX000 - op
+    flags: u32,
+
+    pub fn ordering(self: AtomicRmw) std.builtin.AtomicOrder {
+        return @intToEnum(std.builtin.AtomicOrder, @truncate(u3, self.flags));
+    }
+
+    pub fn op(self: AtomicRmw) std.builtin.AtomicRmwOp {
+        return @intToEnum(std.builtin.AtomicRmwOp, @truncate(u4, self.flags >> 3));
+    }
+};
+
 pub fn getMainBody(air: Air) []const Air.Inst.Index {
     const body_index = air.extra[@enumToInt(ExtraIndex.main_block)];
     const extra = air.extraData(Block, body_index);
@@ -572,6 +612,10 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .dbg_stmt,
         .store,
         .fence,
+        .atomic_store_unordered,
+        .atomic_store_monotonic,
+        .atomic_store_release,
+        .atomic_store_seq_cst,
         => return Type.initTag(.void),
 
         .ptrtoint,
@@ -594,6 +638,14 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
             const inner_ptr_ty = outer_ptr_ty.elemType();
             return inner_ptr_ty.elemType();
         },
+        .atomic_load => {
+            const ptr_ty = air.typeOf(datas[inst].atomic_load.ptr);
+            return ptr_ty.elemType();
+        },
+        .atomic_rmw => {
+            const ptr_ty = air.typeOf(datas[inst].pl_op.operand);
+            return ptr_ty.elemType();
+        },
     }
 }
 
src/AstGen.zig
@@ -7316,6 +7316,7 @@ fn builtinCall(
         
         .atomic_load => {
             const int_type = try typeExpr(gz, scope, params[0]);
+            // TODO allow this pointer type to be volatile
             const ptr_type = try gz.add(.{ .tag = .ptr_type_simple, .data = .{
                 .ptr_type_simple = .{
                     .is_allowzero = false,
@@ -7325,16 +7326,17 @@ fn builtinCall(
                     .elem_type = int_type,
                 },
             } });
-            const ptr = try expr(gz, scope, .{ .ty = ptr_type }, params[1]);
-            const ordering = try expr(gz, scope, .{ .ty = .atomic_order_type }, params[2]);
             const result = try gz.addPlNode(.atomic_load, node, Zir.Inst.Bin{
-                .lhs = ptr,
-                .rhs = ordering,
+                // zig fmt: off
+                .lhs = try expr(gz, scope, .{ .coerced_ty = ptr_type },           params[1]),
+                .rhs = try expr(gz, scope, .{ .coerced_ty = .atomic_order_type }, params[2]),
+                // zig fmt: on
             });
             return rvalue(gz, rl, result, node);
         },
         .atomic_rmw => {
             const int_type = try typeExpr(gz, scope, params[0]);
+            // TODO allow this pointer type to be volatile
             const ptr_type = try gz.add(.{ .tag = .ptr_type_simple, .data = .{
                 .ptr_type_simple = .{
                     .is_allowzero = false,
@@ -7344,20 +7346,19 @@ fn builtinCall(
                     .elem_type = int_type,
                 },
             } });
-            const ptr = try expr(gz, scope, .{ .ty = ptr_type }, params[1]);
-            const operation = try expr(gz, scope, .{ .ty = .atomic_rmw_op_type }, params[2]);
-            const operand = try expr(gz, scope, .{ .ty = int_type }, params[3]);
-            const ordering = try expr(gz, scope, .{ .ty = .atomic_order_type }, params[4]);
             const result = try gz.addPlNode(.atomic_rmw, node, Zir.Inst.AtomicRmw{
-                .ptr = ptr,
-                .operation = operation,
-                .operand = operand,
-                .ordering = ordering,
+                // zig fmt: off
+                .ptr       = try expr(gz, scope, .{ .coerced_ty = ptr_type },            params[1]),
+                .operation = try expr(gz, scope, .{ .coerced_ty = .atomic_rmw_op_type }, params[2]),
+                .operand   = try expr(gz, scope, .{ .coerced_ty = int_type },            params[3]),
+                .ordering  = try expr(gz, scope, .{ .coerced_ty = .atomic_order_type },  params[4]),
+                // zig fmt: on
             });
             return rvalue(gz, rl, result, node);
         },
         .atomic_store => {
             const int_type = try typeExpr(gz, scope, params[0]);
+            // TODO allow this pointer type to be volatile
             const ptr_type = try gz.add(.{ .tag = .ptr_type_simple, .data = .{
                 .ptr_type_simple = .{
                     .is_allowzero = false,
@@ -7367,13 +7368,12 @@ fn builtinCall(
                     .elem_type = int_type,
                 },
             } });
-            const ptr = try expr(gz, scope, .{ .ty = ptr_type }, params[1]);
-            const operand = try expr(gz, scope, .{ .ty = int_type }, params[2]);
-            const ordering = try expr(gz, scope, .{ .ty = .atomic_order_type }, params[3]);
             const result = try gz.addPlNode(.atomic_store, node, Zir.Inst.AtomicStore{
-                .ptr = ptr,
-                .operand = operand,
-                .ordering = ordering,
+                // zig fmt: off
+                .ptr      = try expr(gz, scope, .{ .coerced_ty = ptr_type },           params[1]),
+                .operand  = try expr(gz, scope, .{ .coerced_ty = int_type },           params[2]),
+                .ordering = try expr(gz, scope, .{ .coerced_ty = .atomic_order_type }, params[3]),
+                // zig fmt: on
             });
             return rvalue(gz, rl, result, node);
         },
@@ -7456,12 +7456,11 @@ fn builtinCall(
         },
         .Vector => {
             const result = try gz.addPlNode(.vector_type, node, Zir.Inst.Bin{
-                .lhs = try comptimeExpr(gz, scope, .{.ty = .u32_type}, params[0]),
+                .lhs = try comptimeExpr(gz, scope, .{ .ty = .u32_type }, params[0]),
                 .rhs = try typeExpr(gz, scope, params[1]),
             });
             return rvalue(gz, rl, result, node);
         },
-
     }
     // zig fmt: on
 }
src/codegen.zig
@@ -860,6 +860,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .array_to_slice  => try self.airArrayToSlice(inst),
                     .cmpxchg_strong  => try self.airCmpxchg(inst),
                     .cmpxchg_weak    => try self.airCmpxchg(inst),
+                    .atomic_rmw      => try self.airAtomicRmw(inst),
+                    .atomic_load     => try self.airAtomicLoad(inst),
+
+                    .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
+                    .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
+                    .atomic_store_release   => try self.airAtomicStore(inst, .Release),
+                    .atomic_store_seq_cst   => try self.airAtomicStore(inst, .SeqCst),
 
                     .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0),
                     .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1),
@@ -4773,6 +4780,22 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
         }
 
+        fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void {
+            _ = inst;
+            return self.fail("TODO implement airCmpxchg for {}", .{self.target.cpu.arch});
+        }
+
+        fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void {
+            _ = inst;
+            return self.fail("TODO implement airAtomicLoad for {}", .{self.target.cpu.arch});
+        }
+
+        fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void {
+            _ = inst;
+            _ = order;
+            return self.fail("TODO implement airAtomicStore for {}", .{self.target.cpu.arch});
+        }
+
         fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
             // First section of indexes correspond to a set number of constant values.
             const ref_int = @enumToInt(inst);
src/Liveness.zig
@@ -252,6 +252,10 @@ fn analyzeInst(
         .ptr_ptr_elem_val,
         .shl,
         .shr,
+        .atomic_store_unordered,
+        .atomic_store_monotonic,
+        .atomic_store_release,
+        .atomic_store_seq_cst,
         => {
             const o = inst_datas[inst].bin_op;
             return trackOperands(a, new_set, inst, main_tomb, .{ o.lhs, o.rhs, .none });
@@ -345,6 +349,15 @@ fn analyzeInst(
             const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data;
             return trackOperands(a, new_set, inst, main_tomb, .{ extra.ptr, extra.expected_value, extra.new_value });
         },
+        .atomic_load => {
+            const ptr = inst_datas[inst].atomic_load.ptr;
+            return trackOperands(a, new_set, inst, main_tomb, .{ ptr, .none, .none });
+        },
+        .atomic_rmw => {
+            const pl_op = inst_datas[inst].pl_op;
+            const extra = a.air.extraData(Air.AtomicRmw, pl_op.payload).data;
+            return trackOperands(a, new_set, inst, main_tomb, .{ pl_op.operand, extra.operand, .none });
+        },
         .br => {
             const br = inst_datas[inst].br;
             return trackOperands(a, new_set, inst, main_tomb, .{ br.operand, .none, .none });
src/print_air.zig
@@ -193,6 +193,12 @@ const Writer = struct {
             .switch_br => try w.writeSwitchBr(s, inst),
             .cmpxchg_weak, .cmpxchg_strong => try w.writeCmpxchg(s, inst),
             .fence => try w.writeFence(s, inst),
+            .atomic_load => try w.writeAtomicLoad(s, inst),
+            .atomic_store_unordered => try w.writeAtomicStore(s, inst, .Unordered),
+            .atomic_store_monotonic => try w.writeAtomicStore(s, inst, .Monotonic),
+            .atomic_store_release => try w.writeAtomicStore(s, inst, .Release),
+            .atomic_store_seq_cst => try w.writeAtomicStore(s, inst, .SeqCst),
+            .atomic_rmw => try w.writeAtomicRmw(s, inst),
         }
     }
 
@@ -283,6 +289,36 @@ const Writer = struct {
         try s.print("{s}", .{@tagName(atomic_order)});
     }
 
+    fn writeAtomicLoad(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const atomic_load = w.air.instructions.items(.data)[inst].atomic_load;
+
+        try w.writeOperand(s, inst, 0, atomic_load.ptr);
+        try s.print(", {s}", .{@tagName(atomic_load.order)});
+    }
+
+    fn writeAtomicStore(
+        w: *Writer,
+        s: anytype,
+        inst: Air.Inst.Index,
+        order: std.builtin.AtomicOrder,
+    ) @TypeOf(s).Error!void {
+        const bin_op = w.air.instructions.items(.data)[inst].bin_op;
+        try w.writeOperand(s, inst, 0, bin_op.lhs);
+        try s.writeAll(", ");
+        try w.writeOperand(s, inst, 1, bin_op.rhs);
+        try s.print(", {s}", .{@tagName(order)});
+    }
+
+    fn writeAtomicRmw(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const pl_op = w.air.instructions.items(.data)[inst].pl_op;
+        const extra = w.air.extraData(Air.AtomicRmw, pl_op.payload).data;
+
+        try w.writeOperand(s, inst, 0, pl_op.operand);
+        try s.writeAll(", ");
+        try w.writeOperand(s, inst, 1, extra.operand);
+        try s.print(", {s}, {s}", .{ @tagName(extra.op()), @tagName(extra.ordering()) });
+    }
+
     fn writeConstant(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
         const ty_pl = w.air.instructions.items(.data)[inst].ty_pl;
         const val = w.air.values[ty_pl.payload];
src/Sema.zig
@@ -7549,6 +7549,19 @@ fn resolveAtomicOrder(
     return val.toEnum(std.builtin.AtomicOrder);
 }
 
+fn resolveAtomicRmwOp(
+    sema: *Sema,
+    block: *Scope.Block,
+    src: LazySrcLoc,
+    zir_ref: Zir.Inst.Ref,
+) CompileError!std.builtin.AtomicRmwOp {
+    const atomic_rmw_op_ty = try sema.getBuiltinType(block, src, "AtomicRmwOp");
+    const air_ref = sema.resolveInst(zir_ref);
+    const coerced = try sema.coerce(block, atomic_rmw_op_ty, air_ref, src);
+    const val = try sema.resolveConstValue(block, src, coerced);
+    return val.toEnum(std.builtin.AtomicRmwOp);
+}
+
 fn zirCmpxchg(
     sema: *Sema,
     block: *Scope.Block,
@@ -7664,14 +7677,108 @@ fn zirSelect(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileErro
 
 fn zirAtomicLoad(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
     const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
-    const src = inst_data.src();
-    return sema.mod.fail(&block.base, src, "TODO: Sema.zirAtomicLoad", .{});
+    const extra = sema.code.extraData(Zir.Inst.Bin, inst_data.payload_index).data;
+    // zig fmt: off
+    const elem_ty_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const ptr_src    : LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const order_src  : LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
+    // zig fmt: on
+    const ptr = sema.resolveInst(extra.lhs);
+    const elem_ty = sema.typeOf(ptr).elemType();
+    try sema.checkAtomicOperandType(block, elem_ty_src, elem_ty);
+    const order = try sema.resolveAtomicOrder(block, order_src, extra.rhs);
+
+    switch (order) {
+        .Release, .AcqRel => {
+            return sema.mod.fail(
+                &block.base,
+                order_src,
+                "@atomicLoad atomic ordering must not be Release or AcqRel",
+                .{},
+            );
+        },
+        else => {},
+    }
+
+    if (try sema.resolveDefinedValue(block, ptr_src, ptr)) |ptr_val| {
+        if (try ptr_val.pointerDeref(sema.arena)) |elem_val| {
+            return sema.addConstant(elem_ty, elem_val);
+        }
+    }
+
+    try sema.requireRuntimeBlock(block, ptr_src);
+    return block.addInst(.{
+        .tag = .atomic_load,
+        .data = .{ .atomic_load = .{
+            .ptr = ptr,
+            .order = order,
+        } },
+    });
 }
 
 fn zirAtomicRmw(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
+    const mod = sema.mod;
     const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
+    const extra = sema.code.extraData(Zir.Inst.AtomicRmw, inst_data.payload_index).data;
     const src = inst_data.src();
-    return sema.mod.fail(&block.base, src, "TODO: Sema.zirAtomicRmw", .{});
+    // zig fmt: off
+    const operand_ty_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const ptr_src       : LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const op_src        : LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
+    const operand_src   : LazySrcLoc = .{ .node_offset_builtin_call_arg3 = inst_data.src_node };
+    const order_src     : LazySrcLoc = .{ .node_offset_builtin_call_arg4 = inst_data.src_node };
+    // zig fmt: on
+    const ptr = sema.resolveInst(extra.ptr);
+    const operand_ty = sema.typeOf(ptr).elemType();
+    try sema.checkAtomicOperandType(block, operand_ty_src, operand_ty);
+    const op = try sema.resolveAtomicRmwOp(block, op_src, extra.operation);
+
+    switch (operand_ty.zigTypeTag()) {
+        .Enum => if (op != .Xchg) {
+            return mod.fail(&block.base, op_src, "@atomicRmw with enum only allowed with .Xchg", .{});
+        },
+        .Bool => if (op != .Xchg) {
+            return mod.fail(&block.base, op_src, "@atomicRmw with bool only allowed with .Xchg", .{});
+        },
+        .Float => switch (op) {
+            .Xchg, .Add, .Sub => {},
+            else => return mod.fail(&block.base, op_src, "@atomicRmw with float only allowed with .Xchg, .Add, and .Sub", .{}),
+        },
+        else => {},
+    }
+    const operand = try sema.coerce(block, operand_ty, sema.resolveInst(extra.operand), operand_src);
+    const order = try sema.resolveAtomicOrder(block, order_src, extra.ordering);
+
+    if (order == .Unordered) {
+        return mod.fail(&block.base, order_src, "@atomicRmw atomic ordering must not be Unordered", .{});
+    }
+
+    // special case zero bit types
+    if (try sema.typeHasOnePossibleValue(block, operand_ty_src, operand_ty)) |val| {
+        return sema.addConstant(operand_ty, val);
+    }
+
+    const runtime_src = if (try sema.resolveDefinedValue(block, ptr_src, ptr)) |ptr_val| rs: {
+        if (try sema.resolveMaybeUndefVal(block, operand_src, operand)) |operand_val| {
+            _ = ptr_val;
+            _ = operand_val;
+            return mod.fail(&block.base, src, "TODO implement Sema for @atomicRmw at comptime", .{});
+        } else break :rs operand_src;
+    } else ptr_src;
+
+    const flags: u32 = @as(u32, @enumToInt(order)) | (@as(u32, @enumToInt(op)) << 3);
+
+    try sema.requireRuntimeBlock(block, runtime_src);
+    return block.addInst(.{
+        .tag = .atomic_rmw,
+        .data = .{ .pl_op = .{
+            .operand = ptr,
+            .payload = try sema.addExtra(Air.AtomicRmw{
+                .operand = operand,
+                .flags = flags,
+            }),
+        } },
+    });
 }
 
 fn zirAtomicStore(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -8848,7 +8955,7 @@ fn coerce(
     if (dest_type.eql(inst_ty))
         return inst;
 
-    const in_memory_result = coerceInMemoryAllowed(dest_type, inst_ty);
+    const in_memory_result = coerceInMemoryAllowed(dest_type, inst_ty, false);
     if (in_memory_result == .ok) {
         return sema.bitcast(block, dest_type, inst, inst_src);
     }
@@ -8890,11 +8997,12 @@ fn coerce(
                 const array_type = inst_ty.elemType();
                 if (array_type.zigTypeTag() != .Array) break :src_array_ptr;
                 const array_elem_type = array_type.elemType();
-                if (inst_ty.isConstPtr() and !dest_type.isConstPtr()) break :src_array_ptr;
+                const dest_is_mut = !dest_type.isConstPtr();
+                if (inst_ty.isConstPtr() and dest_is_mut) break :src_array_ptr;
                 if (inst_ty.isVolatilePtr() and !dest_type.isVolatilePtr()) break :src_array_ptr;
 
                 const dst_elem_type = dest_type.elemType();
-                switch (coerceInMemoryAllowed(dst_elem_type, array_elem_type)) {
+                switch (coerceInMemoryAllowed(dst_elem_type, array_elem_type, dest_is_mut)) {
                     .ok => {},
                     .no_match => break :src_array_ptr,
                 }
@@ -9001,10 +9109,80 @@ const InMemoryCoercionResult = enum {
     no_match,
 };
 
-fn coerceInMemoryAllowed(dest_type: Type, src_type: Type) InMemoryCoercionResult {
+/// If pointers have the same representation in runtime memory, a bitcast AIR instruction
+/// may be used for the coercion.
+/// * `const` attribute can be gained
+/// * `volatile` attribute can be gained
+/// * `allowzero` attribute can be gained (whether from explicit attribute, C pointer, or optional pointer) but only if !dest_is_mut
+/// * alignment can be decreased
+/// * bit offset attributes must match exactly
+/// * `*`/`[*]` must match exactly, but `[*c]` matches either one
+/// * sentinel-terminated pointers can coerce into `[*]`
+/// TODO improve this function to report recursive compile errors like it does in stage1.
+/// look at the function types_match_const_cast_only
+fn coerceInMemoryAllowed(dest_type: Type, src_type: Type, dest_is_mut: bool) InMemoryCoercionResult {
     if (dest_type.eql(src_type))
         return .ok;
 
+    if (dest_type.zigTypeTag() == .Pointer and
+        src_type.zigTypeTag() == .Pointer)
+    {
+        const dest_info = dest_type.ptrInfo().data;
+        const src_info = src_type.ptrInfo().data;
+
+        const child = coerceInMemoryAllowed(dest_info.pointee_type, src_info.pointee_type, dest_info.mutable);
+        if (child == .no_match) {
+            return child;
+        }
+
+        const ok_sent = dest_info.sentinel == null or src_info.size == .C or
+            (src_info.sentinel != null and
+            dest_info.sentinel.?.eql(src_info.sentinel.?, dest_info.pointee_type));
+        if (!ok_sent) {
+            return .no_match;
+        }
+
+        const ok_ptr_size = src_info.size == dest_info.size or
+            src_info.size == .C or dest_info.size == .C;
+        if (!ok_ptr_size) {
+            return .no_match;
+        }
+
+        const ok_cv_qualifiers =
+            (src_info.mutable or !dest_info.mutable) and
+            (!src_info.@"volatile" or dest_info.@"volatile");
+
+        if (!ok_cv_qualifiers) {
+            return .no_match;
+        }
+
+        const ok_allows_zero = (dest_info.@"allowzero" and
+            (src_info.@"allowzero" or !dest_is_mut)) or
+            (!dest_info.@"allowzero" and !src_info.@"allowzero");
+        if (!ok_allows_zero) {
+            return .no_match;
+        }
+
+        if (dest_type.hasCodeGenBits() != src_type.hasCodeGenBits()) {
+            return .no_match;
+        }
+
+        if (src_info.host_size != dest_info.host_size or
+            src_info.bit_offset != dest_info.bit_offset)
+        {
+            return .no_match;
+        }
+
+        assert(src_info.@"align" != 0);
+        assert(dest_info.@"align" != 0);
+
+        if (dest_info.@"align" > src_info.@"align") {
+            return .no_match;
+        }
+
+        return .ok;
+    }
+
     // TODO: implement more of this function
 
     return .no_match;
test/behavior/atomics.zig
@@ -30,3 +30,26 @@ test "fence" {
     @fence(.SeqCst);
     x = 5678;
 }
+
+test "atomicrmw and atomicload" {
+    var data: u8 = 200;
+    try testAtomicRmw(&data);
+    try expect(data == 42);
+    try testAtomicLoad(&data);
+}
+
+fn testAtomicRmw(ptr: *u8) !void {
+    const prev_value = @atomicRmw(u8, ptr, .Xchg, 42, .SeqCst);
+    try expect(prev_value == 200);
+    comptime {
+        var x: i32 = 1234;
+        const y: i32 = 12345;
+        try expect(@atomicLoad(i32, &x, .SeqCst) == 1234);
+        try expect(@atomicLoad(i32, &y, .SeqCst) == 12345);
+    }
+}
+
+fn testAtomicLoad(ptr: *u8) !void {
+    const x = @atomicLoad(u8, ptr, .SeqCst);
+    try expect(x == 42);
+}
test/behavior/atomics_stage1.zig
@@ -3,29 +3,6 @@ const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
 const builtin = @import("builtin");
 
-test "atomicrmw and atomicload" {
-    var data: u8 = 200;
-    try testAtomicRmw(&data);
-    try expect(data == 42);
-    try testAtomicLoad(&data);
-}
-
-fn testAtomicRmw(ptr: *u8) !void {
-    const prev_value = @atomicRmw(u8, ptr, .Xchg, 42, .SeqCst);
-    try expect(prev_value == 200);
-    comptime {
-        var x: i32 = 1234;
-        const y: i32 = 12345;
-        try expect(@atomicLoad(i32, &x, .SeqCst) == 1234);
-        try expect(@atomicLoad(i32, &y, .SeqCst) == 12345);
-    }
-}
-
-fn testAtomicLoad(ptr: *u8) !void {
-    const x = @atomicLoad(u8, ptr, .SeqCst);
-    try expect(x == 42);
-}
-
 test "cmpxchg with ptr" {
     var data1: i32 = 1234;
     var data2: i32 = 5678;