Commit 15c316b0d8

Andrew Kelley <andrew@ziglang.org>
2019-03-21 00:00:23
add docs for assembly and fix global assembly parsing
Previously, global assembly was parsed expecting it to have the template syntax. However global assembly has no inputs, outputs, or clobbers, and thus does not have template syntax. This is now fixed. This commit also adds a compile error for using volatile on global assembly, since it is meaningless. closes #1515
1 parent 3c7555c
doc/docgen.zig
@@ -274,7 +274,7 @@ const Code = struct {
     is_inline: bool,
     mode: builtin.Mode,
     link_objects: []const []const u8,
-    target_windows: bool,
+    target_str: ?[]const u8,
     link_libc: bool,
 
     const Id = union(enum) {
@@ -491,7 +491,7 @@ fn genToc(allocator: *mem.Allocator, tokenizer: *Tokenizer) !Toc {
                     var mode = builtin.Mode.Debug;
                     var link_objects = std.ArrayList([]const u8).init(allocator);
                     defer link_objects.deinit();
-                    var target_windows = false;
+                    var target_str: ?[]const u8 = null;
                     var link_libc = false;
 
                     const source_token = while (true) {
@@ -506,7 +506,9 @@ fn genToc(allocator: *mem.Allocator, tokenizer: *Tokenizer) !Toc {
                             const obj_tok = try eatToken(tokenizer, Token.Id.TagContent);
                             try link_objects.append(tokenizer.buffer[obj_tok.start..obj_tok.end]);
                         } else if (mem.eql(u8, end_tag_name, "target_windows")) {
-                            target_windows = true;
+                            target_str = "x86_64-windows";
+                        } else if (mem.eql(u8, end_tag_name, "target_linux_x86_64")) {
+                            target_str = "x86_64-linux";
                         } else if (mem.eql(u8, end_tag_name, "link_libc")) {
                             link_libc = true;
                         } else if (mem.eql(u8, end_tag_name, "code_end")) {
@@ -526,7 +528,7 @@ fn genToc(allocator: *mem.Allocator, tokenizer: *Tokenizer) !Toc {
                             .is_inline = is_inline,
                             .mode = mode,
                             .link_objects = link_objects.toOwnedSlice(),
-                            .target_windows = target_windows,
+                            .target_str = target_str,
                             .link_libc = link_libc,
                         },
                     });
@@ -998,7 +1000,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                 try io.writeFile(tmp_source_file_name, trimmed_raw_source);
 
                 switch (code.id) {
-                    Code.Id.Exe => |expected_outcome| {
+                    Code.Id.Exe => |expected_outcome| code_block: {
                         const name_plus_bin_ext = try std.fmt.allocPrint(allocator, "{}{}", code.name, exe_ext);
                         const tmp_bin_file_name = try os.path.join(
                             allocator,
@@ -1046,8 +1048,20 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                             try build_args.append("c");
                             try out.print(" --library c");
                         }
+                        if (code.target_str) |triple| {
+                            try build_args.appendSlice([][]const u8{ "-target", triple });
+                        }
                         _ = exec(allocator, &env_map, build_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "example failed to compile");
 
+                        if (code.target_str) |triple| {
+                            if (mem.startsWith(u8, triple, "x86_64-linux") and
+                                (builtin.os != builtin.Os.linux or builtin.arch != builtin.Arch.x86_64))
+                            {
+                                // skip execution
+                                break :code_block;
+                            }
+                        }
+
                         const run_args = [][]const u8{tmp_bin_file_name};
 
                         const result = if (expected_outcome == ExpectedOutcome.Fail) blk: {
@@ -1105,8 +1119,8 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                                 try out.print(" --release-small");
                             },
                         }
-                        if (code.target_windows) {
-                            try test_args.appendSlice([][]const u8{ "-target", "x86_64-windows" });
+                        if (code.target_str) |triple| {
+                            try test_args.appendSlice([][]const u8{ "-target", triple });
                         }
                         const result = exec(allocator, &env_map, test_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "test failed");
                         const escaped_stderr = try escapeHtml(allocator, result.stderr);
doc/langref.html.in
@@ -5396,11 +5396,196 @@ pub fn main() void {
       {#see_also|inline while|inline for#}
       {#header_close#}
       {#header_open|Assembly#}
-      <p>TODO: example of inline assembly</p>
-      <p>TODO: example of module level assembly</p>
-      <p>TODO: example of using inline assembly return value</p>
-      <p>TODO: example of using inline assembly assigning values to variables</p>
+      <p>
+      For some use cases, it may be necessary to directly control the machine code generated
+      by Zig programs, rather than relying on Zig's code generation. For these cases, one
+      can use inline assembly. Here is an example of implementing Hello, World on x86_64 Linux
+      using inline assembly:
+      </p>
+      {#code_begin|exe#}
+      {#target_linux_x86_64#}
+pub fn main() noreturn {
+    const msg = "hello world\n";
+    _ = syscall3(SYS_write, STDOUT_FILENO, @ptrToInt(&msg), msg.len);
+    _ = syscall1(SYS_exit, 0);
+    unreachable;
+}
+
+pub const SYS_write = 1;
+pub const SYS_exit = 60;
+
+pub const STDOUT_FILENO = 1;
+
+pub fn syscall1(number: usize, arg1: usize) usize {
+    return asm volatile ("syscall"
+        : [ret] "={rax}" (-> usize)
+        : [number] "{rax}" (number),
+          [arg1] "{rdi}" (arg1)
+        : "rcx", "r11"
+    );
+}
+
+pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize {
+    return asm volatile ("syscall"
+        : [ret] "={rax}" (-> usize)
+        : [number] "{rax}" (number),
+          [arg1] "{rdi}" (arg1),
+          [arg2] "{rsi}" (arg2),
+          [arg3] "{rdx}" (arg3)
+        : "rcx", "r11"
+    );
+}
+      {#code_end#}
+      <p>
+      Dissecting the syntax:
+      </p>
+      <pre>{#syntax#}// Inline assembly is an expression which returns a value.
+// the `asm` keyword begins the expression.
+_ = asm
+// `volatile` is an optional modifier that tells Zig this
+// inline assembly expression has side-effects. Without
+// `volatile`, Zig is allowed to delete the inline assembly
+// code if the result is unused.
+volatile (
+// Next is a comptime string which is the assembly code.
+// Inside this string one may use `%[ret]`, `%[number]`,
+// or `%[arg1]` where a register is expected, to specify
+// the register that Zig uses for the argument or return value,
+// if the register constraint strings are used. However in
+// the below code, this is not used. A literal `%` can be
+// obtained by escaping it with a double percent: `%%`.
+// Often multiline string syntax comes in handy here.
+    \\syscall
+// Next is the output. It is possible in the future Zig will
+// support multiple outputs, depending on how
+// https://github.com/ziglang/zig/issues/215 is resolved.
+// It is allowed for there to be no outputs, in which case
+// this colon would be directly followed by the colon for the inputs.
+    :
+// This specifies the name to be used in `%[ret]` syntax in
+// the above assembly string. This example does not use it,
+// but the syntax is mandatory.
+    [ret]
+// Next is the output constraint string. This feature is still
+// considered unstable in Zig, and so LLVM/GCC documentation
+// must be used to understand the semantics.
+// http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string
+// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
+// In this example, the constraint string means "the result value of
+// this inline assembly instruction is whatever is in $rax".
+    "={rax}"
+// Next is either a value binding, or `->` and then a type. The
+// type is the result type of the inline assembly expression.
+// If it is a value binding, then `%[ret]` syntax would be used
+// to refer to the register bound to the value.
+    (-> usize)
+// Next is the list of inputs.
+// The constraint for these inputs means, "when the assembly code is
+// executed, $rax shall have the value of `number` and $rdi shall have
+// the value of `arg1`". Any number of input parameters is allowed,
+// including none.
+    : [number] "{rax}" (number),
+        [arg1] "{rdi}" (arg1)
+// Next is the list of clobbers. These declare a set of registers whose
+// values will not be preserved by the execution of this assembly code. 
+// These do not include output or input registers. The special clobber
+// value of "memory" means that the assembly writes to arbitrary undeclared
+// memory locations - not only the memory pointed to by a declared indirect
+// output. In this example we list $rcx and $r11 because it is known the
+// kernel syscall does not preserve these registers.
+    : "rcx", "r11"
+);{#endsyntax#}</pre>
+      <p>
+      For i386 and x86_64 targets, the syntax is AT&amp;T syntax, rather than the more
+      popular Intel syntax. This is due to technical constraints; assembly parsing is
+      provided by LLVM and its support for Intel syntax is buggy and not well tested.
+      </p>
+      <p>
+      Some day Zig may have its own assembler. This would allow it to integrate more seamlessly
+      into the language, as well as be compatible with the popular NASM syntax. This documentation
+      section will be updated before 1.0.0 is released, with a conclusive statement about the status
+      of AT&amp;T vs Intel/NASM syntax.
+      </p>
+      {#header_open|Output Constraints#}
+      <p>
+      Output constraints are still considered to be unstable in Zig, and 
+      so
+      <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
+      and
+      <a href="https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html">GCC documentation</a>
+      must be used to understand the semantics.
+      </p>
+      <p>
+      Note that some breaking changes to output constraints are planned with
+      <a href="https://github.com/ziglang/zig/issues/215">issue #215</a>.
+      </p>
+      {#header_close#}
+
+      {#header_open|Input Constraints#}
+      <p>
+      Input constraints are still considered to be unstable in Zig, and 
+      so
+      <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
+      and
+      <a href="https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html">GCC documentation</a>
+      must be used to understand the semantics.
+      </p>
+      <p>
+      Note that some breaking changes to input constraints are planned with
+      <a href="https://github.com/ziglang/zig/issues/215">issue #215</a>.
+      </p>
+      {#header_close#}
+
+      {#header_open|Clobbers#}
+      <p>
+      Clobbers are the set of registers whose values will not be preserved by the execution of
+      the assembly code. These do not include output or input registers. The special clobber
+      value of {#syntax#}"memory"{#endsyntax#} means that the assembly causes writes to
+      arbitrary undeclared memory locations - not only the memory pointed to by a declared
+      indirect output. 
+      </p>
+      <p>
+      Failure to declare the full set of clobbers for a given inline assembly
+      expression is unchecked {#link|Undefined Behavior#}.
+      </p>
+      {#header_close#}
+
+      {#header_open|Global Assembly#}
+      <p>
+      When an assembly expression occurs in a top level {#link|comptime#} block, this is
+      <strong>global assembly</strong>.
+      </p>
+      <p>
+      This kind of assembly has different rules than inline assembly. First, {#syntax#}volatile{#endsyntax#}
+      is not valid because all global assembly is unconditionally included.
+      Second, there are no inputs, outputs, or clobbers. All global assembly is concatenated
+      verbatim into one long string and assembled together. There are no template substitution rules regarding
+      <code>%</code> as there are in inline assembly expressions.
+      </p>
+      {#code_begin|test|global-asm#}
+      {#target_linux_x86_64#}
+const std = @import("std");
+const assert = std.debug.assert;
+
+comptime {
+    asm (
+        \\.global my_func;
+        \\.type my_func, @function;
+        \\my_func:
+        \\  lea (%rdi,%rsi,1),%eax
+        \\  retq
+    );
+}
+
+extern fn my_func(a: i32, b: i32) i32;
+
+test "global assembly" {
+    assert(my_func(12, 34) == 46);
+}
+      {#code_end#}
+      {#header_close#}
       {#header_close#}
+
       {#header_open|Atomics#}
       <p>TODO: @fence()</p>
       <p>TODO: @atomic rmw</p>
src/all_types.hpp
@@ -800,9 +800,8 @@ struct AsmToken {
 };
 
 struct AstNodeAsmExpr {
-    bool is_volatile;
-    Buf *asm_template;
-    ZigList<AsmToken> token_list;
+    Token *volatile_token;
+    Token *asm_template;
     ZigList<AsmOutput*> output_list;
     ZigList<AsmInput*> input_list;
     ZigList<Buf*> clobber_list;
@@ -2169,6 +2168,7 @@ enum IrInstructionId {
     IrInstructionIdArrayType,
     IrInstructionIdPromiseType,
     IrInstructionIdSliceType,
+    IrInstructionIdGlobalAsm,
     IrInstructionIdAsm,
     IrInstructionIdSizeOf,
     IrInstructionIdTestNonNull,
@@ -2677,10 +2677,18 @@ struct IrInstructionSliceType {
     bool allow_zero;
 };
 
+struct IrInstructionGlobalAsm {
+    IrInstruction base;
+
+    Buf *asm_code;
+};
+
 struct IrInstructionAsm {
     IrInstruction base;
 
-    // Most information on inline assembly comes from the source node.
+    Buf *asm_template;
+    AsmToken *token_list;
+    size_t token_list_len;
     IrInstruction **input_list;
     IrInstruction **output_types;
     ZigVar **output_vars;
src/ast_render.cpp
@@ -862,8 +862,8 @@ static void render_node_extra(AstRender *ar, AstNode *node, bool grouped) {
         case NodeTypeAsmExpr:
             {
                 AstNodeAsmExpr *asm_expr = &node->data.asm_expr;
-                const char *volatile_str = asm_expr->is_volatile ? " volatile" : "";
-                fprintf(ar->f, "asm%s (\"%s\"\n", volatile_str, buf_ptr(asm_expr->asm_template));
+                const char *volatile_str = (asm_expr->volatile_token != nullptr) ? " volatile" : "";
+                fprintf(ar->f, "asm%s (\"%s\"\n", volatile_str, buf_ptr(&asm_expr->asm_template->data.str_lit.str));
                 print_indent(ar);
                 fprintf(ar->f, ": ");
                 for (size_t i = 0; i < asm_expr->output_list.length; i += 1) {
src/codegen.cpp
@@ -3793,8 +3793,8 @@ static LLVMValueRef ir_render_union_field_ptr(CodeGen *g, IrExecutable *executab
     return bitcasted_union_field_ptr;
 }
 
-static size_t find_asm_index(CodeGen *g, AstNode *node, AsmToken *tok) {
-    const char *ptr = buf_ptr(node->data.asm_expr.asm_template) + tok->start + 2;
+static size_t find_asm_index(CodeGen *g, AstNode *node, AsmToken *tok, Buf *src_template) {
+    const char *ptr = buf_ptr(src_template) + tok->start + 2;
     size_t len = tok->end - tok->start - 2;
     size_t result = 0;
     for (size_t i = 0; i < node->data.asm_expr.output_list.length; i += 1, result += 1) {
@@ -3817,13 +3817,13 @@ static LLVMValueRef ir_render_asm(CodeGen *g, IrExecutable *executable, IrInstru
     assert(asm_node->type == NodeTypeAsmExpr);
     AstNodeAsmExpr *asm_expr = &asm_node->data.asm_expr;
 
-    Buf *src_template = asm_expr->asm_template;
+    Buf *src_template = instruction->asm_template;
 
     Buf llvm_template = BUF_INIT;
     buf_resize(&llvm_template, 0);
 
-    for (size_t token_i = 0; token_i < asm_expr->token_list.length; token_i += 1) {
-        AsmToken *asm_token = &asm_expr->token_list.at(token_i);
+    for (size_t token_i = 0; token_i < instruction->token_list_len; token_i += 1) {
+        AsmToken *asm_token = &instruction->token_list[token_i];
         switch (asm_token->id) {
             case AsmTokenIdTemplate:
                 for (size_t offset = asm_token->start; offset < asm_token->end; offset += 1) {
@@ -3840,7 +3840,7 @@ static LLVMValueRef ir_render_asm(CodeGen *g, IrExecutable *executable, IrInstru
                 break;
             case AsmTokenIdVar:
                 {
-                    size_t index = find_asm_index(g, asm_node, asm_token);
+                    size_t index = find_asm_index(g, asm_node, asm_token, src_template);
                     assert(index < SIZE_MAX);
                     buf_appendf(&llvm_template, "$%" ZIG_PRI_usize "", index);
                     break;
@@ -3937,7 +3937,7 @@ static LLVMValueRef ir_render_asm(CodeGen *g, IrExecutable *executable, IrInstru
     }
     LLVMTypeRef function_type = LLVMFunctionType(ret_type, param_types, (unsigned)input_and_output_count, false);
 
-    bool is_volatile = asm_expr->is_volatile || (asm_expr->output_list.length == 0);
+    bool is_volatile = instruction->has_side_effects || (asm_expr->output_list.length == 0);
     LLVMValueRef asm_fn = LLVMGetInlineAsm(function_type, buf_ptr(&llvm_template), buf_len(&llvm_template),
             buf_ptr(&constraint_buf), buf_len(&constraint_buf), is_volatile, false, LLVMInlineAsmDialectATT);
 
@@ -5480,6 +5480,7 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
         case IrInstructionIdCmpxchgSrc:
         case IrInstructionIdLoadPtr:
         case IrInstructionIdBitCast:
+        case IrInstructionIdGlobalAsm:
             zig_unreachable();
 
         case IrInstructionIdDeclVarGen:
src/ir.cpp
@@ -513,6 +513,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionSliceType *) {
     return IrInstructionIdSliceType;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionGlobalAsm *) {
+    return IrInstructionIdGlobalAsm;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionAsm *) {
     return IrInstructionIdAsm;
 }
@@ -1628,10 +1632,21 @@ static IrInstruction *ir_build_slice_type(IrBuilder *irb, Scope *scope, AstNode
     return &instruction->base;
 }
 
-static IrInstruction *ir_build_asm(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction **input_list,
-        IrInstruction **output_types, ZigVar **output_vars, size_t return_count, bool has_side_effects)
+static IrInstruction *ir_build_global_asm(IrBuilder *irb, Scope *scope, AstNode *source_node, Buf *asm_code) {
+    IrInstructionGlobalAsm *instruction = ir_build_instruction<IrInstructionGlobalAsm>(irb, scope, source_node);
+    instruction->asm_code = asm_code;
+    return &instruction->base;
+}
+
+static IrInstruction *ir_build_asm(IrBuilder *irb, Scope *scope, AstNode *source_node,
+        Buf *asm_template, AsmToken *token_list, size_t token_list_len,
+        IrInstruction **input_list, IrInstruction **output_types, ZigVar **output_vars, size_t return_count,
+        bool has_side_effects)
 {
     IrInstructionAsm *instruction = ir_build_instruction<IrInstructionAsm>(irb, scope, source_node);
+    instruction->asm_template = asm_template;
+    instruction->token_list = token_list;
+    instruction->token_list_len = token_list_len;
     instruction->input_list = input_list;
     instruction->output_types = output_types;
     instruction->output_vars = output_vars;
@@ -5861,21 +5876,142 @@ static IrInstruction *ir_gen_undefined_literal(IrBuilder *irb, Scope *scope, Ast
     return ir_build_const_undefined(irb, scope, node);
 }
 
+static Error parse_asm_template(IrBuilder *irb, AstNode *source_node, Buf *asm_template,
+        ZigList<AsmToken> *tok_list)
+{
+    // TODO Connect the errors in this function back up to the actual source location
+    // rather than just the token. https://github.com/ziglang/zig/issues/2080
+    enum State {
+        StateStart,
+        StatePercent,
+        StateTemplate,
+        StateVar,
+    };
+
+    assert(tok_list->length == 0);
+
+    AsmToken *cur_tok = nullptr;
+
+    enum State state = StateStart;
+
+    for (size_t i = 0; i < buf_len(asm_template); i += 1) {
+        uint8_t c = *((uint8_t*)buf_ptr(asm_template) + i);
+        switch (state) {
+            case StateStart:
+                if (c == '%') {
+                    tok_list->add_one();
+                    cur_tok = &tok_list->last();
+                    cur_tok->id = AsmTokenIdPercent;
+                    cur_tok->start = i;
+                    state = StatePercent;
+                } else {
+                    tok_list->add_one();
+                    cur_tok = &tok_list->last();
+                    cur_tok->id = AsmTokenIdTemplate;
+                    cur_tok->start = i;
+                    state = StateTemplate;
+                }
+                break;
+            case StatePercent:
+                if (c == '%') {
+                    cur_tok->end = i;
+                    state = StateStart;
+                } else if (c == '[') {
+                    cur_tok->id = AsmTokenIdVar;
+                    state = StateVar;
+                } else if (c == '=') {
+                    cur_tok->id = AsmTokenIdUniqueId;
+                    cur_tok->end = i;
+                    state = StateStart;
+                } else {
+                    add_node_error(irb->codegen, source_node,
+                        buf_create_from_str("expected a '%' or '['"));
+                    return ErrorSemanticAnalyzeFail;
+                }
+                break;
+            case StateTemplate:
+                if (c == '%') {
+                    cur_tok->end = i;
+                    i -= 1;
+                    cur_tok = nullptr;
+                    state = StateStart;
+                }
+                break;
+            case StateVar:
+                if (c == ']') {
+                    cur_tok->end = i;
+                    state = StateStart;
+                } else if ((c >= 'a' && c <= 'z') ||
+                        (c >= '0' && c <= '9') ||
+                        (c == '_'))
+                {
+                    // do nothing
+                } else {
+                    add_node_error(irb->codegen, source_node,
+                        buf_sprintf("invalid substitution character: '%c'", c));
+                    return ErrorSemanticAnalyzeFail;
+                }
+                break;
+        }
+    }
+
+    switch (state) {
+        case StateStart:
+            break;
+        case StatePercent:
+        case StateVar:
+            add_node_error(irb->codegen, source_node, buf_sprintf("unexpected end of assembly template"));
+            return ErrorSemanticAnalyzeFail;
+        case StateTemplate:
+            cur_tok->end = buf_len(asm_template);
+            break;
+    }
+    return ErrorNone;
+}
+
 static IrInstruction *ir_gen_asm_expr(IrBuilder *irb, Scope *scope, AstNode *node) {
+    Error err;
     assert(node->type == NodeTypeAsmExpr);
+    AstNodeAsmExpr *asm_expr = &node->data.asm_expr;
+    bool is_volatile = asm_expr->volatile_token != nullptr;
+    bool in_fn_scope = (scope_fn_entry(scope) != nullptr);
+
+    Buf *template_buf = &asm_expr->asm_template->data.str_lit.str;
 
-    IrInstruction **input_list = allocate<IrInstruction *>(node->data.asm_expr.input_list.length);
-    IrInstruction **output_types = allocate<IrInstruction *>(node->data.asm_expr.output_list.length);
-    ZigVar **output_vars = allocate<ZigVar *>(node->data.asm_expr.output_list.length);
+    if (!in_fn_scope) {
+        if (is_volatile) {
+            add_token_error(irb->codegen, node->owner, asm_expr->volatile_token,
+                    buf_sprintf("volatile is meaningless on global assembly"));
+            return irb->codegen->invalid_instruction;
+        }
+
+        if (asm_expr->output_list.length != 0 || asm_expr->input_list.length != 0 ||
+            asm_expr->clobber_list.length != 0)
+        {
+            add_node_error(irb->codegen, node,
+                buf_sprintf("global assembly cannot have inputs, outputs, or clobbers"));
+            return irb->codegen->invalid_instruction;
+        }
+
+        return ir_build_global_asm(irb, scope, node, template_buf);
+    }
+
+    ZigList<AsmToken> tok_list = {};
+    if ((err = parse_asm_template(irb, node, template_buf, &tok_list))) {
+        return irb->codegen->invalid_instruction;
+    }
+
+    IrInstruction **input_list = allocate<IrInstruction *>(asm_expr->input_list.length);
+    IrInstruction **output_types = allocate<IrInstruction *>(asm_expr->output_list.length);
+    ZigVar **output_vars = allocate<ZigVar *>(asm_expr->output_list.length);
     size_t return_count = 0;
-    bool is_volatile = node->data.asm_expr.is_volatile;
-    if (!is_volatile && node->data.asm_expr.output_list.length == 0) {
+    if (!is_volatile && asm_expr->output_list.length == 0) {
         add_node_error(irb->codegen, node,
                 buf_sprintf("assembly expression with no output must be marked volatile"));
         return irb->codegen->invalid_instruction;
     }
-    for (size_t i = 0; i < node->data.asm_expr.output_list.length; i += 1) {
-        AsmOutput *asm_output = node->data.asm_expr.output_list.at(i);
+    for (size_t i = 0; i < asm_expr->output_list.length; i += 1) {
+        AsmOutput *asm_output = asm_expr->output_list.at(i);
         if (asm_output->return_type) {
             return_count += 1;
 
@@ -5911,8 +6047,8 @@ static IrInstruction *ir_gen_asm_expr(IrBuilder *irb, Scope *scope, AstNode *nod
             return irb->codegen->invalid_instruction;
         }
     }
-    for (size_t i = 0; i < node->data.asm_expr.input_list.length; i += 1) {
-        AsmInput *asm_input = node->data.asm_expr.input_list.at(i);
+    for (size_t i = 0; i < asm_expr->input_list.length; i += 1) {
+        AsmInput *asm_input = asm_expr->input_list.at(i);
         IrInstruction *input_value = ir_gen_node(irb, asm_input->expr, scope);
         if (input_value == irb->codegen->invalid_instruction)
             return irb->codegen->invalid_instruction;
@@ -5920,7 +6056,8 @@ static IrInstruction *ir_gen_asm_expr(IrBuilder *irb, Scope *scope, AstNode *nod
         input_list[i] = input_value;
     }
 
-    return ir_build_asm(irb, scope, node, input_list, output_types, output_vars, return_count, is_volatile);
+    return ir_build_asm(irb, scope, node, template_buf, tok_list.items, tok_list.length,
+            input_list, output_types, output_vars, return_count, is_volatile);
 }
 
 static IrInstruction *ir_gen_if_optional_expr(IrBuilder *irb, Scope *scope, AstNode *node) {
@@ -16309,27 +16446,18 @@ static IrInstruction *ir_analyze_instruction_slice_type(IrAnalyze *ira,
     zig_unreachable();
 }
 
+static IrInstruction *ir_analyze_instruction_global_asm(IrAnalyze *ira, IrInstructionGlobalAsm *instruction) {
+    buf_append_char(&ira->codegen->global_asm, '\n');
+    buf_append_buf(&ira->codegen->global_asm, instruction->asm_code);
+
+    return ir_const_void(ira, &instruction->base);
+}
+
 static IrInstruction *ir_analyze_instruction_asm(IrAnalyze *ira, IrInstructionAsm *asm_instruction) {
     assert(asm_instruction->base.source_node->type == NodeTypeAsmExpr);
 
     AstNodeAsmExpr *asm_expr = &asm_instruction->base.source_node->data.asm_expr;
 
-    bool global_scope = (scope_fn_entry(asm_instruction->base.scope) == nullptr);
-    if (global_scope) {
-        if (asm_expr->output_list.length != 0 || asm_expr->input_list.length != 0 ||
-            asm_expr->clobber_list.length != 0)
-        {
-            ir_add_error(ira, &asm_instruction->base,
-                buf_sprintf("global assembly cannot have inputs, outputs, or clobbers"));
-            return ira->codegen->invalid_instruction;
-        }
-
-        buf_append_char(&ira->codegen->global_asm, '\n');
-        buf_append_buf(&ira->codegen->global_asm, asm_expr->asm_template);
-
-        return ir_const_void(ira, &asm_instruction->base);
-    }
-
     if (!ir_emit_global_runtime_side_effect(ira, &asm_instruction->base))
         return ira->codegen->invalid_instruction;
 
@@ -16367,6 +16495,7 @@ static IrInstruction *ir_analyze_instruction_asm(IrAnalyze *ira, IrInstructionAs
 
     IrInstruction *result = ir_build_asm(&ira->new_irb,
         asm_instruction->base.scope, asm_instruction->base.source_node,
+        asm_instruction->asm_template, asm_instruction->token_list, asm_instruction->token_list_len,
         input_list, output_types, asm_instruction->output_vars, asm_instruction->return_count,
         asm_instruction->has_side_effects);
     result->value.type = return_type;
@@ -22584,6 +22713,8 @@ static IrInstruction *ir_analyze_instruction_nocast(IrAnalyze *ira, IrInstructio
             return ir_analyze_instruction_set_float_mode(ira, (IrInstructionSetFloatMode *)instruction);
         case IrInstructionIdSliceType:
             return ir_analyze_instruction_slice_type(ira, (IrInstructionSliceType *)instruction);
+        case IrInstructionIdGlobalAsm:
+            return ir_analyze_instruction_global_asm(ira, (IrInstructionGlobalAsm *)instruction);
         case IrInstructionIdAsm:
             return ir_analyze_instruction_asm(ira, (IrInstructionAsm *)instruction);
         case IrInstructionIdArrayType:
@@ -22938,6 +23069,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdCmpxchgSrc:
         case IrInstructionIdAssertZero:
         case IrInstructionIdResizeSlice:
+        case IrInstructionIdGlobalAsm:
             return true;
 
         case IrInstructionIdPhi:
src/ir_print.cpp
@@ -436,11 +436,15 @@ static void ir_print_slice_type(IrPrint *irp, IrInstructionSliceType *instructio
     ir_print_other_instruction(irp, instruction->child_type);
 }
 
+static void ir_print_global_asm(IrPrint *irp, IrInstructionGlobalAsm *instruction) {
+    fprintf(irp->f, "asm(\"%s\")", buf_ptr(instruction->asm_code));
+}
+
 static void ir_print_asm(IrPrint *irp, IrInstructionAsm *instruction) {
     assert(instruction->base.source_node->type == NodeTypeAsmExpr);
     AstNodeAsmExpr *asm_expr = &instruction->base.source_node->data.asm_expr;
     const char *volatile_kw = instruction->has_side_effects ? " volatile" : "";
-    fprintf(irp->f, "asm%s (\"%s\") : ", volatile_kw, buf_ptr(asm_expr->asm_template));
+    fprintf(irp->f, "asm%s (\"%s\") : ", volatile_kw, buf_ptr(instruction->asm_template));
 
     for (size_t i = 0; i < asm_expr->output_list.length; i += 1) {
         AsmOutput *asm_output = asm_expr->output_list.at(i);
@@ -1519,6 +1523,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdSliceType:
             ir_print_slice_type(irp, (IrInstructionSliceType *)instruction);
             break;
+        case IrInstructionIdGlobalAsm:
+            ir_print_global_asm(irp, (IrInstructionGlobalAsm *)instruction);
+            break;
         case IrInstructionIdAsm:
             ir_print_asm(irp, (IrInstructionAsm *)instruction);
             break;
src/parser.cpp
@@ -85,7 +85,7 @@ static AstNode *ast_parse_asm_output(ParseContext *pc);
 static AsmOutput *ast_parse_asm_output_item(ParseContext *pc);
 static AstNode *ast_parse_asm_input(ParseContext *pc);
 static AsmInput *ast_parse_asm_input_item(ParseContext *pc);
-static AstNode *ast_parse_asm_cloppers(ParseContext *pc);
+static AstNode *ast_parse_asm_clobbers(ParseContext *pc);
 static Token *ast_parse_break_label(ParseContext *pc);
 static Token *ast_parse_block_label(ParseContext *pc);
 static AstNode *ast_parse_field_init(ParseContext *pc);
@@ -140,24 +140,6 @@ static void ast_error(ParseContext *pc, Token *token, const char *format, ...) {
     exit(EXIT_FAILURE);
 }
 
-ATTRIBUTE_PRINTF(4, 5)
-ATTRIBUTE_NORETURN
-static void ast_asm_error(ParseContext *pc, AstNode *node, size_t offset, const char *format, ...) {
-    assert(node->type == NodeTypeAsmExpr);
-    va_list ap;
-    va_start(ap, format);
-    Buf *msg = buf_vprintf(format, ap);
-    va_end(ap);
-
-    ErrorMsg *err = err_msg_create_with_line(pc->owner->data.structure.root_struct->path,
-            node->line, node->column,
-            pc->owner->data.structure.root_struct->source_code,
-            pc->owner->data.structure.root_struct->line_offsets, msg);
-
-    print_err_msg(err, pc->err_color);
-    exit(EXIT_FAILURE);
-}
-
 static Buf ast_token_str(Buf *input, Token *token) {
     Buf str = BUF_INIT;
     buf_init_from_mem(&str, buf_ptr(input) + token->start_pos, token->end_pos - token->start_pos);
@@ -486,93 +468,6 @@ AstNode *ast_parse_bin_op_simple(ParseContext *pc) {
     return res;
 }
 
-static void ast_parse_asm_template(ParseContext *pc, AstNode *node) {
-    Buf *asm_template = node->data.asm_expr.asm_template;
-
-    enum State {
-        StateStart,
-        StatePercent,
-        StateTemplate,
-        StateVar,
-    };
-
-    ZigList<AsmToken> *tok_list = &node->data.asm_expr.token_list;
-    assert(tok_list->length == 0);
-
-    AsmToken *cur_tok = nullptr;
-
-    enum State state = StateStart;
-
-    for (size_t i = 0; i < buf_len(asm_template); i += 1) {
-        uint8_t c = *((uint8_t*)buf_ptr(asm_template) + i);
-        switch (state) {
-            case StateStart:
-                if (c == '%') {
-                    tok_list->add_one();
-                    cur_tok = &tok_list->last();
-                    cur_tok->id = AsmTokenIdPercent;
-                    cur_tok->start = i;
-                    state = StatePercent;
-                } else {
-                    tok_list->add_one();
-                    cur_tok = &tok_list->last();
-                    cur_tok->id = AsmTokenIdTemplate;
-                    cur_tok->start = i;
-                    state = StateTemplate;
-                }
-                break;
-            case StatePercent:
-                if (c == '%') {
-                    cur_tok->end = i;
-                    state = StateStart;
-                } else if (c == '[') {
-                    cur_tok->id = AsmTokenIdVar;
-                    state = StateVar;
-                } else if (c == '=') {
-                    cur_tok->id = AsmTokenIdUniqueId;
-                    cur_tok->end = i;
-                    state = StateStart;
-                } else {
-                    ast_asm_error(pc, node, i, "expected a '%%' or '['");
-                }
-                break;
-            case StateTemplate:
-                if (c == '%') {
-                    cur_tok->end = i;
-                    i -= 1;
-                    cur_tok = nullptr;
-                    state = StateStart;
-                }
-                break;
-            case StateVar:
-                if (c == ']') {
-                    cur_tok->end = i;
-                    state = StateStart;
-                } else if ((c >= 'a' && c <= 'z') ||
-                        (c >= '0' && c <= '9') ||
-                        (c == '_'))
-                {
-                    // do nothing
-                } else {
-                    ast_asm_error(pc, node, i, "invalid substitution character: '%c'", c);
-                }
-                break;
-        }
-    }
-
-    switch (state) {
-        case StateStart:
-            break;
-        case StatePercent:
-        case StateVar:
-            ast_asm_error(pc, node, buf_len(asm_template), "unexpected end of assembly template");
-            break;
-        case StateTemplate:
-            cur_tok->end = buf_len(asm_template);
-            break;
-    }
-}
-
 AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens, ZigType *owner, ErrColor err_color) {
     ParseContext pc = {};
     pc.err_color = err_color;
@@ -1931,9 +1826,8 @@ static AstNode *ast_parse_asm_expr(ParseContext *pc) {
 
     res->line = asm_token->start_line;
     res->column = asm_token->start_column;
-    res->data.asm_expr.is_volatile = volatile_token != nullptr;
-    res->data.asm_expr.asm_template = token_buf(asm_template);
-    ast_parse_asm_template(pc, res);
+    res->data.asm_expr.volatile_token = volatile_token;
+    res->data.asm_expr.asm_template = asm_template;
     return res;
 }
 
@@ -1985,7 +1879,7 @@ static AstNode *ast_parse_asm_input(ParseContext *pc) {
         return nullptr;
 
     ZigList<AsmInput *> input_list = ast_parse_list(pc, TokenIdComma, ast_parse_asm_input_item);
-    AstNode *res = ast_parse_asm_cloppers(pc);
+    AstNode *res = ast_parse_asm_clobbers(pc);
     if (res == nullptr)
         res = ast_create_node_no_line_info(pc, NodeTypeAsmExpr);
 
@@ -2013,7 +1907,7 @@ static AsmInput *ast_parse_asm_input_item(ParseContext *pc) {
 }
 
 // AsmClobbers <- COLON StringList
-static AstNode *ast_parse_asm_cloppers(ParseContext *pc) {
+static AstNode *ast_parse_asm_clobbers(ParseContext *pc) {
     if (eat_token_if(pc, TokenIdColon) == nullptr)
         return nullptr;
 
test/stage1/behavior/asm.zig
@@ -3,7 +3,7 @@ const expect = @import("std").testing.expect;
 
 comptime {
     if (config.arch == config.Arch.x86_64 and config.os == config.Os.linux) {
-        asm volatile (
+        asm (
             \\.globl this_is_my_alias;
             \\.type this_is_my_alias, @function;
             \\.set this_is_my_alias, derp;
test/compile_errors.zig
@@ -2,6 +2,15 @@ const tests = @import("tests.zig");
 const builtin = @import("builtin");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "volatile on global assembly",
+        \\comptime {
+        \\    asm volatile ("");
+        \\}
+    ,
+        "tmp.zig:2:9: error: volatile is meaningless on global assembly",
+    );
+
     cases.add(
         "invalid multiple dereferences",
         \\export fn a() void {