Commit d0b11af2bd

Andrew Kelley <superjoe30@gmail.com>
2016-08-02 08:11:31
new multiline string syntax
This patch also moves a bunch of the parser code into the tokenizer. Closes #162.
1 parent 0450b73
doc/vim/syntax/zig.vim
@@ -1,11 +1,12 @@
 " Vim syntax file
 " Language: Zig
 " Maintainer: Andrew Kelley
-" Latest Revision: 28 July 2016
+" Latest Revision: 03 August 2016
 
 if exists("b:current_syntax")
   finish
 endif
+let b:current_syntax = "zig"
 
 syn keyword zigStorage const var extern export pub noalias inline noinline
 syn keyword zigStructure struct enum union
@@ -24,33 +25,30 @@ syn keyword zigBoolean true false
 syn match zigOperator display "\%(+%\?\|-%\?\|/\|*%\?\|=\|\^\|&\|?\||\|!\|>\|<\|%\|<<%\?\|>>\|&&\|||\)=\?"
 syn match zigArrowCharacter display "->"
 
-syn match zigDecNumber display "\<[0-9][0-9_]*\%([iu]\%(size\|8\|16\|32\|64\)\)\="
-syn match zigHexNumber display "\<0x[a-fA-F0-9_]\+\%([iu]\%(size\|8\|16\|32\|64\)\)\="
-syn match zigOctNumber display "\<0o[0-7_]\+\%([iu]\%(size\|8\|16\|32\|64\)\)\="
-syn match zigBinNumber display "\<0b[01_]\+\%([iu]\%(size\|8\|16\|32\|64\)\)\="
+syn match zigDecNumber display "\<[0-9]*\%(.[0-9]\+\)\=\%([eE][+-]\?[0-9]\+\)\="
+syn match zigHexNumber display "\<0x[a-fA-F0-9]\+\%(.[a-fA-F0-9]\+\%([pP][+-]\?[0-9]\+\)\?\)\="
+syn match zigOctNumber display "\<0o[0-7]\+"
+syn match zigBinNumber display "\<0b[01]\+\%(.[01]\+\%([eE][+-]\?[0-9]\+\)\?\)\="
 
 
 syn match zigCharacterInvalid display contained /b\?'\zs[\n\r\t']\ze'/
 syn match zigCharacterInvalidUnicode display contained /b'\zs[^[:cntrl:][:graph:][:alnum:][:space:]]\ze'/
 syn match zigCharacter /b'\([^\\]\|\\\(.\|x\x\{2}\)\)'/ contains=zigEscape,zigEscapeError,zigCharacterInvalid,zigCharacterInvalidUnicode
-syn match zigCharacter /'\([^\\]\|\\\(.\|x\x\{2}\|u\x\{4}\|U\x\{8}\|u{\x\{1,6}}\)\)'/ contains=zigEscape,zigEscapeUnicode,zigEscapeError,zigCharacterInvalid
-
-syn match zigShebang /\%^#![^[].*/
+syn match zigCharacter /'\([^\\]\|\\\(.\|x\x\{2}\|u\x\{4}\|U\x\{6}\)\)'/ contains=zigEscape,zigEscapeUnicode,zigEscapeError,zigCharacterInvalid
 
 syn region zigCommentLine start="//" end="$" contains=zigTodo,@Spell
 syn region zigCommentLineDoc start="//\%(//\@!\|!\)" end="$" contains=zigTodo,@Spell
 
+" TODO match only the first '\\' within the zigMultilineString as zigMultilineStringPrefix
+syn match zigMultilineStringPrefix display contained /c\?\\\\/
+syn region zigMultilineString start="c\?\\\\" end="$" contains=zigMultilineStringPrefix
+
 syn keyword zigTodo contained TODO XXX
 
 syn match     zigEscapeError   display contained /\\./
-syn match     zigEscape        display contained /\\\([nrt0\\'"]\|x\x\{2}\)/
-syn match     zigEscapeUnicode display contained /\\\(u\x\{4}\|U\x\{8}\)/
-syn match     zigEscapeUnicode display contained /\\u{\x\{1,6}}/
-syn match     zigStringContinuation display contained /\\\n\s*/
-syn region    zigString      start=+c\?"+ skip=+\\\\\|\\"+ end=+"+ oneline contains=zigEscape,zigEscapeUnicode,zigEscapeError,zigStringContinuation,@Spell
-syn region    zigString      start='r"\z([^)]*\)(' end=')\z1"' contains=@Spell
-
-let b:current_syntax = "zig"
+syn match     zigEscape        display contained /\\\([nrt\\'"]\|x\x\{2}\)/
+syn match     zigEscapeUnicode display contained /\\\(u\x\{4}\|U\x\{6}\)/
+syn region    zigString      start=+c\?"+ skip=+\\\\\|\\"+ end=+"+ oneline contains=zigEscape,zigEscapeUnicode,zigEscapeError,@Spell
 
 hi def link zigDecNumber zigNumber
 hi def link zigHexNumber zigNumber
@@ -59,12 +57,12 @@ hi def link zigBinNumber zigNumber
 
 hi def link zigKeyword Keyword
 hi def link zigType Type
-hi def link zigShebang Comment
 hi def link zigCommentLine Comment
 hi def link zigCommentLineDoc SpecialComment
 hi def link zigTodo Todo
-hi def link zigStringContinuation Special
 hi def link zigString String
+hi def link zigMultilineString String
+hi def link zigMultilineStringPrefix Comment
 hi def link zigCharacterInvalid Error
 hi def link zigCharacterInvalidUnicode zigCharacterInvalid
 hi def link zigCharacter Character
doc/langref.md
@@ -7,27 +7,27 @@ Root = many(TopLevelDecl) "EOF"
 
 TopLevelDecl = many(Directive) option(VisibleMod) (FnDef | ExternDecl | ContainerDecl | GlobalVarDecl | ErrorValueDecl | TypeDecl | UseDecl)
 
-TypeDecl = "type" "Symbol" "=" TypeExpr ";"
+TypeDecl = "type" Symbol "=" TypeExpr ";"
 
-ErrorValueDecl = "error" "Symbol" ";"
+ErrorValueDecl = "error" Symbol ";"
 
 GlobalVarDecl = VariableDeclaration ";"
 
-VariableDeclaration = ("var" | "const") "Symbol" option(":" TypeExpr) "=" Expression
+VariableDeclaration = ("var" | "const") Symbol option(":" TypeExpr) "=" Expression
 
-ContainerDecl = ("struct" | "enum" | "union") "Symbol" option(ParamDeclList) "{" many(StructMember) "}"
+ContainerDecl = ("struct" | "enum" | "union") Symbol option(ParamDeclList) "{" many(StructMember) "}"
 
 StructMember = many(Directive) option(VisibleMod) (StructField | FnDef | GlobalVarDecl | ContainerDecl)
 
-StructField = "Symbol" option(":" Expression) ",")
+StructField = Symbol option(":" Expression) ",")
 
 UseDecl = "use" Expression ";"
 
 ExternDecl = "extern" (FnProto | VariableDeclaration) ";"
 
-FnProto = "fn" option("Symbol") ParamDeclList option("->" TypeExpr)
+FnProto = "fn" option(Symbol) ParamDeclList option("->" TypeExpr)
 
-Directive = "#" "Symbol" "(" Expression ")"
+Directive = "#" Symbol "(" Expression ")"
 
 VisibleMod = "pub" | "export"
 
@@ -35,13 +35,13 @@ FnDef = option("inline" | "extern") FnProto Block
 
 ParamDeclList = "(" list(ParamDecl, ",") ")"
 
-ParamDecl = option("noalias" | "inline") option("Symbol" ":") TypeExpr | "..."
+ParamDecl = option("noalias" | "inline") option(Symbol ":") TypeExpr | "..."
 
 Block = "{" list(option(Statement), ";") "}"
 
 Statement = Label | VariableDeclaration ";" | Defer ";" | NonBlockExpression ";" | BlockExpression
 
-Label = "Symbol" ":"
+Label = Symbol ":"
 
 Expression = BlockExpression | NonBlockExpression
 
@@ -49,23 +49,23 @@ TypeExpr = PrefixOpExpression
 
 NonBlockExpression = ReturnExpression | AssignmentExpression
 
-AsmExpression = "asm" option("volatile") "(" "String" option(AsmOutput) ")"
+AsmExpression = "asm" option("volatile") "(" String option(AsmOutput) ")"
 
 AsmOutput = ":" list(AsmOutputItem, ",") option(AsmInput)
 
 AsmInput = ":" list(AsmInputItem, ",") option(AsmClobbers)
 
-AsmOutputItem = "[" "Symbol" "]" "String" "(" ("Symbol" | "->" TypeExpr) ")"
+AsmOutputItem = "[" Symbol "]" String "(" (Symbol | "->" TypeExpr) ")"
 
-AsmInputItem = "[" "Symbol" "]" "String" "(" Expression ")"
+AsmInputItem = "[" Symbol "]" String "(" Expression ")"
 
-AsmClobbers= ":" list("String", ",")
+AsmClobbers= ":" list(String, ",")
 
 UnwrapExpression = BoolOrExpression (UnwrapMaybe | UnwrapError) | BoolOrExpression
 
 UnwrapMaybe = "??" Expression
 
-UnwrapError = "%%" option("|" "Symbol" "|") Expression
+UnwrapError = "%%" option("|" Symbol "|") Expression
 
 AssignmentExpression = UnwrapExpression AssignmentOperator UnwrapExpression | UnwrapExpression
 
@@ -75,13 +75,13 @@ BlockExpression = IfExpression | Block | WhileExpression | ForExpression | Switc
 
 SwitchExpression = "switch" "(" Expression ")" "{" many(SwitchProng) "}"
 
-SwitchProng = (list(SwitchItem, ",") | "else") "=>" option("|" "Symbol" "|") Expression ","
+SwitchProng = (list(SwitchItem, ",") | "else") "=>" option("|" Symbol "|") Expression ","
 
 SwitchItem = Expression | (Expression "..." Expression)
 
 WhileExpression = "while" "(" Expression option(";" Expression) ")" Expression
 
-ForExpression = "for" "(" Expression ")" option("|" option("*") "Symbol" option("," "Symbol") "|") Expression
+ForExpression = "for" "(" Expression ")" option("|" option("*") Symbol option("," Symbol) "|") Expression
 
 BoolOrExpression = BoolAndExpression "||" BoolOrExpression | BoolAndExpression
 
@@ -93,7 +93,7 @@ IfExpression = IfVarExpression | IfBoolExpression
 
 IfBoolExpression = "if" "(" Expression ")" Expression option(Else)
 
-IfVarExpression = "if" "(" ("const" | "var") option("*") "Symbol" option(":" TypeExpr) "?=" Expression ")" Expression Option(Else)
+IfVarExpression = "if" "(" ("const" | "var") option("*") Symbol option(":" TypeExpr) "?=" Expression ")" Expression Option(Else)
 
 Else = "else" Expression
 
@@ -127,7 +127,7 @@ PrefixOpExpression = PrefixOp PrefixOpExpression | SuffixOpExpression
 
 SuffixOpExpression = PrimaryExpression option(FnCallExpression | ArrayAccessExpression | FieldAccessExpression | SliceExpression)
 
-FieldAccessExpression = "." "Symbol"
+FieldAccessExpression = "." Symbol
 
 FnCallExpression = "(" list(Expression, ",") ")"
 
@@ -139,15 +139,15 @@ ContainerInitExpression = "{" ContainerInitBody "}"
 
 ContainerInitBody = list(StructLiteralField, ",") | list(Expression, ",")
 
-StructLiteralField = "." "Symbol" "=" Expression
+StructLiteralField = "." Symbol "=" Expression
 
 PrefixOp = "!" | "-" | "~" | "*" | ("&" option("const")) | "?" | "%" | "%%" | "??" | "-%"
 
-PrimaryExpression = "Number" | "String" | "CharLiteral" | KeywordLiteral | GroupedExpression | GotoExpression | BlockExpression | "Symbol" | ("@" "Symbol" FnCallExpression) | ArrayType | (option("extern") FnProto) | AsmExpression | ("error" "." "Symbol")
+PrimaryExpression = Number | String | CharLiteral | KeywordLiteral | GroupedExpression | GotoExpression | BlockExpression | Symbol | ("@" Symbol FnCallExpression) | ArrayType | (option("extern") FnProto) | AsmExpression | ("error" "." Symbol)
 
 ArrayType = "[" option(Expression) "]" option("const") TypeExpr
 
-GotoExpression = "goto" "Symbol"
+GotoExpression = "goto" Symbol
 
 GroupedExpression = "(" Expression ")"
 
@@ -265,14 +265,13 @@ from codegen.
 ### Literals
 
 #### Character and String Literals
+
 ```
 Literal            Example       Characters   Escapes         Null Term  Type
 
 Byte               'H'           All ASCII    Byte            No         u8
 UTF-8 Bytes        "hello"       All Unicode  Byte & Unicode  No         [5]u8
 UTF-8 C string     c"hello"      All Unicode  Byte & Unicode  Yes        &const u8
-UTF-8 Raw String   r"X(hello)X"  All Unicode  None            No         [5]u8
-UTF-8 Raw C String rc"X(hello)X" All Unicode  None            Yes        &const u8
 ```
 
 ### Escapes
@@ -291,26 +290,56 @@ UTF-8 Raw C String rc"X(hello)X" All Unicode  None            Yes        &const
 
 Note that the maximum valid Unicode point is 0x10ffff.
 
-##### Raw Strings
+##### Multiline String Literals
 
-Raw string literals have no escapes and can span across multiple lines. To
-start a raw string, use 'r"' or 'rc"' followed by unique bytes followed by '('.
-To end a raw string, use ')' followed by the same unique bytes, followed by '"'.
+Multiline string literals have no escapes and can span across multiple lines.
+To start a multiline string literal, use the `\\` token. Just like a comment,
+the string literal goes until the end of the line. The end of the line is not
+included in the string literal.
 
+However, if the next line begins with `\\` then a newline is appended and
+the string literal continues.
 
-#### Numeric Literals
+Example:
 
+```zig
+const hello_world_in_c =
+    \\#include <stdio.h>
+    \\
+    \\int main(int argc, char **argv) {
+    \\    printf("hello world\n");
+    \\    return 0;
+    \\}
+;
 ```
-Number literals     Example      Exponentiation
-
-Decimal integer     98222        N/A
-Hex integer         0xff         N/A
-Octal integer       0o77         N/A
-Binary integer      0b11110000   N/A
-Floating-point      123.0E+77    Optional
-Hex floating point  TODO         TODO
+
+For a multiline C string literal, prepend `c` to each `\\`. Example:
+
+```zig
+const c_string_literal =
+    c\\#include <stdio.h>
+    c\\
+    c\\int main(int argc, char **argv) {
+    c\\    printf("hello world\n");
+    c\\    return 0;
+    c\\}
+;
 ```
 
+In this example the variable `c_string_literal` has type `&const char` and
+has a terminating null byte.
+
+#### Number Literals
+
+ Number literals    | Example     | Exponentiation
+--------------------|-------------|--------------
+ Decimal integer    | 98222       | N/A
+ Hex integer        | 0xff        | N/A
+ Octal integer      | 0o77        | N/A
+ Binary integer     | 0b11110000  | N/A
+ Floating point     | 123.0E+77   | Optional
+ Hex floating point | 0x103.70p-5 | Optional
+
 ### Identifiers
 
 TODO
src/all_types.hpp
@@ -194,7 +194,7 @@ struct AstNodeRoot {
 
 struct AstNodeFnProto {
     TopLevelDecl top_level_decl;
-    Buf name;
+    Buf *name;
     ZigList<AstNode *> params;
     AstNode *return_type;
     bool is_var_args;
@@ -229,7 +229,7 @@ struct AstNodeFnDecl {
 };
 
 struct AstNodeParamDecl {
-    Buf name;
+    Buf *name;
     AstNode *type;
     bool is_noalias;
     bool is_inline;
@@ -279,7 +279,7 @@ struct AstNodeDefer {
 
 struct AstNodeVariableDeclaration {
     TopLevelDecl top_level_decl;
-    Buf symbol;
+    Buf *symbol;
     bool is_const;
     bool is_extern;
     // one or both of type and expr will be non null
@@ -293,7 +293,7 @@ struct AstNodeVariableDeclaration {
 
 struct AstNodeTypeDecl {
     TopLevelDecl top_level_decl;
-    Buf symbol;
+    Buf *symbol;
     AstNode *child_type;
 
     // populated by semantic analyzer
@@ -305,7 +305,7 @@ struct AstNodeTypeDecl {
 
 struct AstNodeErrorValueDecl {
     TopLevelDecl top_level_decl;
-    Buf name;
+    Buf *name;
 
     // populated by semantic analyzer
     ErrorTableEntry *err;
@@ -434,7 +434,7 @@ struct AstNodeSliceExpr {
 
 struct AstNodeFieldAccessExpr {
     AstNode *struct_expr;
-    Buf field_name;
+    Buf *field_name;
 
     // populated by semantic analyzer
     TypeStructField *type_struct_field;
@@ -448,7 +448,7 @@ struct AstNodeFieldAccessExpr {
 };
 
 struct AstNodeDirective {
-    Buf name;
+    Buf *name;
     AstNode *expr;
 };
 
@@ -555,7 +555,7 @@ struct AstNodeSwitchRange {
 };
 
 struct AstNodeLabel {
-    Buf name;
+    Buf *name;
 
     // populated by semantic analyzer
     Expr resolved_expr;
@@ -563,7 +563,7 @@ struct AstNodeLabel {
 };
 
 struct AstNodeGoto {
-    Buf name;
+    Buf *name;
 
     // populated by semantic analyzer
     Expr resolved_expr;
@@ -571,9 +571,9 @@ struct AstNodeGoto {
 };
 
 struct AsmOutput {
-    Buf asm_symbolic_name;
-    Buf constraint;
-    Buf variable_name;
+    Buf *asm_symbolic_name;
+    Buf *constraint;
+    Buf *variable_name;
     AstNode *return_type; // null unless "=r" and return
 
     // populated by semantic analyzer
@@ -581,8 +581,8 @@ struct AsmOutput {
 };
 
 struct AsmInput {
-    Buf asm_symbolic_name;
-    Buf constraint;
+    Buf *asm_symbolic_name;
+    Buf *constraint;
     AstNode *expr;
 };
 
@@ -593,8 +593,7 @@ struct SrcPos {
 
 struct AstNodeAsmExpr {
     bool is_volatile;
-    Buf asm_template;
-    ZigList<SrcPos> offset_map;
+    Buf *asm_template;
     ZigList<AsmToken> token_list;
     ZigList<AsmOutput*> output_list;
     ZigList<AsmInput*> input_list;
@@ -613,7 +612,7 @@ enum ContainerKind {
 
 struct AstNodeStructDecl {
     TopLevelDecl top_level_decl;
-    Buf name;
+    Buf *name;
     ContainerKind kind;
     ZigList<AstNode *> generic_params;
     bool generic_params_is_var_args; // always an error but it can happen from parsing
@@ -629,12 +628,12 @@ struct AstNodeStructDecl {
 
 struct AstNodeStructField {
     TopLevelDecl top_level_decl;
-    Buf name;
+    Buf *name;
     AstNode *type;
 };
 
 struct AstNodeStringLiteral {
-    Buf buf;
+    Buf *buf;
     bool c;
 
     // populated by semantic analyzer:
@@ -648,29 +647,19 @@ struct AstNodeCharLiteral {
     Expr resolved_expr;
 };
 
-enum NumLit {
-    NumLitFloat,
-    NumLitUInt,
-};
-
 struct AstNodeNumberLiteral {
-    NumLit kind;
+    BigNum *bignum;
 
     // overflow is true if when parsing the number, we discovered it would not
     // fit without losing data in a uint64_t or double
     bool overflow;
 
-    union {
-        uint64_t x_uint;
-        double x_float;
-    } data;
-
     // populated by semantic analyzer
     Expr resolved_expr;
 };
 
 struct AstNodeStructValueField {
-    Buf name;
+    Buf *name;
     AstNode *expr;
 
     // populated by semantic analyzer
@@ -706,7 +695,7 @@ struct AstNodeUndefinedLiteral {
 };
 
 struct AstNodeSymbolExpr {
-    Buf symbol;
+    Buf *symbol;
 
     // populated by semantic analyzer
     Expr resolved_expr;
src/analyze.cpp
@@ -1053,7 +1053,7 @@ static void resolve_function_proto(CodeGen *g, AstNode *node, FnTableEntry *fn_t
     if (fn_proto->top_level_decl.directives) {
         for (int i = 0; i < fn_proto->top_level_decl.directives->length; i += 1) {
             AstNode *directive_node = fn_proto->top_level_decl.directives->at(i);
-            Buf *name = &directive_node->data.directive.name;
+            Buf *name = directive_node->data.directive.name;
 
             if (buf_eql_str(name, "attribute")) {
                 if (fn_table_entry->fn_def_node) {
@@ -1251,7 +1251,7 @@ static void resolve_enum_type(CodeGen *g, ImportTableEntry *import, TypeTableEnt
     for (uint32_t i = 0; i < field_count; i += 1) {
         AstNode *field_node = decl_node->data.struct_decl.fields.at(i);
         TypeEnumField *type_enum_field = &enum_type->data.enumeration.fields[i];
-        type_enum_field->name = &field_node->data.struct_field.name;
+        type_enum_field->name = field_node->data.struct_field.name;
         TypeTableEntry *field_type = analyze_type_expr(g, import, context,
                 field_node->data.struct_field.type);
         type_enum_field->type_entry = field_type;
@@ -1365,7 +1365,7 @@ static void resolve_enum_type(CodeGen *g, ImportTableEntry *import, TypeTableEnt
             uint64_t debug_align_in_bits = 8*LLVMABISizeOfType(g->target_data_ref, enum_type->type_ref);
             LLVMZigDIType *replacement_di_type = LLVMZigCreateDebugStructType(g->dbuilder,
                     LLVMZigFileToScope(import->di_file),
-                    buf_ptr(&decl_node->data.struct_decl.name),
+                    buf_ptr(decl_node->data.struct_decl.name),
                     import->di_file, decl_node->line + 1,
                     debug_size_in_bits,
                     debug_align_in_bits,
@@ -1381,7 +1381,7 @@ static void resolve_enum_type(CodeGen *g, ImportTableEntry *import, TypeTableEnt
             uint64_t tag_debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, tag_type_entry->type_ref);
             uint64_t tag_debug_align_in_bits = 8*LLVMABISizeOfType(g->target_data_ref, tag_type_entry->type_ref);
             LLVMZigDIType *tag_di_type = LLVMZigCreateDebugEnumerationType(g->dbuilder,
-                    LLVMZigFileToScope(import->di_file), buf_ptr(&decl_node->data.struct_decl.name),
+                    LLVMZigFileToScope(import->di_file), buf_ptr(decl_node->data.struct_decl.name),
                     import->di_file, decl_node->line + 1,
                     tag_debug_size_in_bits,
                     tag_debug_align_in_bits,
@@ -1441,7 +1441,7 @@ static void resolve_struct_type(CodeGen *g, ImportTableEntry *import, TypeTableE
     for (int i = 0; i < field_count; i += 1) {
         AstNode *field_node = decl_node->data.struct_decl.fields.at(i);
         TypeStructField *type_struct_field = &struct_type->data.structure.fields[i];
-        type_struct_field->name = &field_node->data.struct_field.name;
+        type_struct_field->name = field_node->data.struct_field.name;
         TypeTableEntry *field_type = analyze_type_expr(g, import, context,
                 field_node->data.struct_field.type);
         type_struct_field->type_entry = field_type;
@@ -1514,7 +1514,7 @@ static void resolve_struct_type(CodeGen *g, ImportTableEntry *import, TypeTableE
     uint64_t debug_align_in_bits = 8*LLVMABISizeOfType(g->target_data_ref, struct_type->type_ref);
     LLVMZigDIType *replacement_di_type = LLVMZigCreateDebugStructType(g->dbuilder,
             LLVMZigFileToScope(import->di_file),
-            buf_ptr(&decl_node->data.struct_decl.name),
+            buf_ptr(decl_node->data.struct_decl.name),
             import->di_file, decl_node->line + 1,
             debug_size_in_bits,
             debug_align_in_bits,
@@ -1570,7 +1570,7 @@ static void preview_fn_proto_instance(CodeGen *g, ImportTableEntry *import, AstN
     assert(!is_generic_instance || !is_generic_fn);
 
     AstNode *parent_decl = proto_node->data.fn_proto.top_level_decl.parent_decl;
-    Buf *proto_name = &proto_node->data.fn_proto.name;
+    Buf *proto_name = proto_node->data.fn_proto.name;
 
     AstNode *fn_def_node = proto_node->data.fn_proto.fn_def_node;
     bool is_extern = proto_node->data.fn_proto.is_extern;
@@ -1645,7 +1645,7 @@ static void scan_struct_decl(CodeGen *g, ImportTableEntry *import, BlockContext
         return;
     }
 
-    Buf *name = &node->data.struct_decl.name;
+    Buf *name = node->data.struct_decl.name;
     TypeTableEntry *container_type = get_partial_container_type(g, import, context,
             node->data.struct_decl.kind, node, buf_ptr(name));
     node->data.struct_decl.type_entry = container_type;
@@ -1692,7 +1692,7 @@ static void preview_error_value_decl(CodeGen *g, AstNode *node) {
     ErrorTableEntry *err = allocate<ErrorTableEntry>(1);
 
     err->decl_node = node;
-    buf_init_from_buf(&err->name, &node->data.error_value_decl.name);
+    buf_init_from_buf(&err->name, node->data.error_value_decl.name);
 
     auto existing_entry = g->error_table.maybe_get(&err->name);
     if (existing_entry) {
@@ -1749,7 +1749,7 @@ static void resolve_top_level_decl(CodeGen *g, AstNode *node, bool pointer_only)
         case NodeTypeTypeDecl:
             {
                 AstNode *type_node = node->data.type_decl.child_type;
-                Buf *decl_name = &node->data.type_decl.symbol;
+                Buf *decl_name = node->data.type_decl.symbol;
 
                 TypeTableEntry *entry;
                 if (node->data.type_decl.override_type) {
@@ -2479,12 +2479,12 @@ static TypeTableEntry *analyze_container_init_expr(CodeGen *g, ImportTableEntry
             val_field_node->block_context = context;
 
             TypeStructField *type_field = find_struct_type_field(container_type,
-                    &val_field_node->data.struct_val_field.name);
+                    val_field_node->data.struct_val_field.name);
 
             if (!type_field) {
                 add_node_error(g, val_field_node,
                     buf_sprintf("no member named '%s' in '%s'",
-                        buf_ptr(&val_field_node->data.struct_val_field.name), buf_ptr(&container_type->name)));
+                        buf_ptr(val_field_node->data.struct_val_field.name), buf_ptr(&container_type->name)));
                 continue;
             }
 
@@ -2604,7 +2604,7 @@ static TypeTableEntry *analyze_field_access_expr(CodeGen *g, ImportTableEntry *i
 
     AstNode **struct_expr_node = &node->data.field_access_expr.struct_expr;
     TypeTableEntry *struct_type = analyze_expression(g, import, context, nullptr, *struct_expr_node);
-    Buf *field_name = &node->data.field_access_expr.field_name;
+    Buf *field_name = node->data.field_access_expr.field_name;
 
     bool wrapped_in_fn_call = node->data.field_access_expr.is_fn_call;
 
@@ -2965,28 +2965,33 @@ static TypeTableEntry *resolve_expr_const_val_as_string_lit(CodeGen *g, AstNode
     return get_array_type(g, g->builtin_types.entry_u8, buf_len(str));
 }
 
-
-static TypeTableEntry *resolve_expr_const_val_as_unsigned_num_lit(CodeGen *g, AstNode *node,
-        TypeTableEntry *expected_type, uint64_t x, bool depends_on_compile_var)
+static TypeTableEntry *resolve_expr_const_val_as_bignum(CodeGen *g, AstNode *node,
+        TypeTableEntry *expected_type, BigNum *bignum, bool depends_on_compile_var)
 {
     Expr *expr = get_resolved_expr(node);
     expr->const_val.ok = true;
     expr->const_val.depends_on_compile_var = depends_on_compile_var;
 
-    bignum_init_unsigned(&expr->const_val.data.x_bignum, x);
-
-    return g->builtin_types.entry_num_lit_int;
+    bignum_init_bignum(&expr->const_val.data.x_bignum, bignum);
+    if (bignum->kind == BigNumKindInt) {
+        return g->builtin_types.entry_num_lit_int;
+    } else if (bignum->kind == BigNumKindFloat) {
+        return g->builtin_types.entry_num_lit_float;
+    } else {
+        zig_unreachable();
+    }
 }
 
-static TypeTableEntry *resolve_expr_const_val_as_float_num_lit(CodeGen *g, AstNode *node,
-        TypeTableEntry *expected_type, double x)
+static TypeTableEntry *resolve_expr_const_val_as_unsigned_num_lit(CodeGen *g, AstNode *node,
+        TypeTableEntry *expected_type, uint64_t x, bool depends_on_compile_var)
 {
     Expr *expr = get_resolved_expr(node);
     expr->const_val.ok = true;
+    expr->const_val.depends_on_compile_var = depends_on_compile_var;
 
-    bignum_init_float(&expr->const_val.data.x_bignum, x);
+    bignum_init_unsigned(&expr->const_val.data.x_bignum, x);
 
-    return g->builtin_types.entry_num_lit_float;
+    return g->builtin_types.entry_num_lit_int;
 }
 
 static TypeTableEntry *analyze_error_literal_expr(CodeGen *g, ImportTableEntry *import,
@@ -3073,7 +3078,7 @@ static TypeTableEntry *analyze_symbol_expr(CodeGen *g, ImportTableEntry *import,
         return resolve_expr_const_val_as_type(g, node, node->data.symbol_expr.override_type_entry, false);
     }
 
-    Buf *variable_name = &node->data.symbol_expr.symbol;
+    Buf *variable_name = node->data.symbol_expr.symbol;
 
     auto primitive_table_entry = g->primitive_type_table.maybe_get(variable_name);
     if (primitive_table_entry) {
@@ -3177,7 +3182,7 @@ static TypeTableEntry *analyze_lvalue(CodeGen *g, ImportTableEntry *import, Bloc
             return g->builtin_types.entry_invalid;
         }
         if (purpose != LValPurposeAddressOf) {
-            Buf *name = &lhs_node->data.symbol_expr.symbol;
+            Buf *name = lhs_node->data.symbol_expr.symbol;
             VariableTableEntry *var = find_variable(g, block_context, name);
             if (var) {
                 if (var->is_const) {
@@ -3742,7 +3747,7 @@ static TypeTableEntry *analyze_unwrap_error_expr(CodeGen *g, ImportTableEntry *i
         if (var_node) {
             child_context = new_block_context(node, parent_context);
             var_node->block_context = child_context;
-            Buf *var_name = &var_node->data.symbol_expr.symbol;
+            Buf *var_name = var_node->data.symbol_expr.symbol;
             node->data.unwrap_err_expr.var = add_local_var(g, var_node, import, child_context, var_name,
                     g->builtin_types.entry_pure_error, true, nullptr);
         } else {
@@ -3827,7 +3832,7 @@ static VariableTableEntry *analyze_variable_declaration_raw(CodeGen *g, ImportTa
     assert(type != nullptr); // should have been caught by the parser
 
     VariableTableEntry *var = add_local_var(g, source_node, import, context,
-            &variable_declaration->symbol, type, is_const,
+            variable_declaration->symbol, type, is_const,
             expr_is_maybe ? nullptr : variable_declaration->expr);
 
     variable_declaration->variable = var;
@@ -3886,15 +3891,7 @@ static TypeTableEntry *analyze_number_literal_expr(CodeGen *g, ImportTableEntry
         return g->builtin_types.entry_invalid;
     }
 
-    if (node->data.number_literal.kind == NumLitUInt) {
-        return resolve_expr_const_val_as_unsigned_num_lit(g, node,
-                expected_type, node->data.number_literal.data.x_uint, false);
-    } else if (node->data.number_literal.kind == NumLitFloat) {
-        return resolve_expr_const_val_as_float_num_lit(g, node,
-                expected_type, node->data.number_literal.data.x_float);
-    } else {
-        zig_unreachable();
-    }
+    return resolve_expr_const_val_as_bignum(g, node, expected_type, node->data.number_literal.bignum, false);
 }
 
 static TypeTableEntry *analyze_array_type(CodeGen *g, ImportTableEntry *import, BlockContext *context,
@@ -4034,13 +4031,13 @@ static TypeTableEntry *analyze_for_expr(CodeGen *g, ImportTableEntry *import, Bl
 
     AstNode *elem_var_node = node->data.for_expr.elem_node;
     elem_var_node->block_context = child_context;
-    Buf *elem_var_name = &elem_var_node->data.symbol_expr.symbol;
+    Buf *elem_var_name = elem_var_node->data.symbol_expr.symbol;
     node->data.for_expr.elem_var = add_local_var(g, elem_var_node, import, child_context, elem_var_name,
             var_type, true, nullptr);
 
     AstNode *index_var_node = node->data.for_expr.index_node;
     if (index_var_node) {
-        Buf *index_var_name = &index_var_node->data.symbol_expr.symbol;
+        Buf *index_var_name = index_var_node->data.symbol_expr.symbol;
         index_var_node->block_context = child_context;
         node->data.for_expr.index_var = add_local_var(g, index_var_node, import, child_context, index_var_name,
                 g->builtin_types.entry_usize, true, nullptr);
@@ -4952,7 +4949,7 @@ static TypeTableEntry *analyze_builtin_fn_call_expr(CodeGen *g, ImportTableEntry
     assert(node->type == NodeTypeFnCallExpr);
 
     AstNode *fn_ref_expr = node->data.fn_call_expr.fn_ref_expr;
-    Buf *name = &fn_ref_expr->data.symbol_expr.symbol;
+    Buf *name = fn_ref_expr->data.symbol_expr.symbol;
 
     auto entry = g->builtin_fn_table.maybe_get(name);
 
@@ -5476,7 +5473,7 @@ static TypeTableEntry *analyze_fn_call_with_inline_args(CodeGen *g, ImportTableE
         ConstExprValue *const_val = &get_resolved_expr(*param_node)->const_val;
         if (const_val->ok) {
             VariableTableEntry *var = add_local_var(g, generic_param_decl_node, decl_node->owner, child_context,
-                    &generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
+                    generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
             // This generic function instance could be called with anything, so when this variable is read it
             // needs to know that it depends on compile time variable data.
             var->force_depends_on_compile_var = true;
@@ -5570,7 +5567,7 @@ static TypeTableEntry *analyze_generic_fn_call(CodeGen *g, ImportTableEntry *imp
         ConstExprValue *const_val = &get_resolved_expr(*param_node)->const_val;
         if (const_val->ok) {
             VariableTableEntry *var = add_local_var(g, generic_param_decl_node, decl_node->owner, child_context,
-                    &generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
+                    generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
             var->force_depends_on_compile_var = true;
         } else {
             add_node_error(g, *param_node, buf_sprintf("unable to evaluate constant expression"));
@@ -5964,7 +5961,7 @@ static TypeTableEntry *analyze_switch_expr(CodeGen *g, ImportTableEntry *import,
 
                 if (expr_type->id == TypeTableEntryIdEnum) {
                     if (item_node->type == NodeTypeSymbol) {
-                        Buf *field_name = &item_node->data.symbol_expr.symbol;
+                        Buf *field_name = item_node->data.symbol_expr.symbol;
                         TypeEnumField *type_enum_field = get_enum_field(expr_type, field_name);
                         if (type_enum_field) {
                             item_node->data.symbol_expr.enum_field = type_enum_field;
@@ -6000,7 +5997,7 @@ static TypeTableEntry *analyze_switch_expr(CodeGen *g, ImportTableEntry *import,
                     }
                 } else if (expr_type->id == TypeTableEntryIdErrorUnion) {
                     if (item_node->type == NodeTypeSymbol) {
-                        Buf *err_name = &item_node->data.symbol_expr.symbol;
+                        Buf *err_name = item_node->data.symbol_expr.symbol;
                         bool is_ok_case = buf_eql_str(err_name, "Ok");
                         auto err_table_entry = is_ok_case ? nullptr: g->error_table.maybe_get(err_name);
                         if (is_ok_case || err_table_entry) {
@@ -6072,7 +6069,7 @@ static TypeTableEntry *analyze_switch_expr(CodeGen *g, ImportTableEntry *import,
         AstNode *var_node = prong_node->data.switch_prong.var_symbol;
         if (var_node) {
             assert(var_node->type == NodeTypeSymbol);
-            Buf *var_name = &var_node->data.symbol_expr.symbol;
+            Buf *var_name = var_node->data.symbol_expr.symbol;
             var_node->block_context = child_context;
             prong_node->data.switch_prong.var = add_local_var(g, var_node, import,
                     child_context, var_name, var_type, true, nullptr);
@@ -6228,9 +6225,9 @@ static TypeTableEntry *analyze_string_literal_expr(CodeGen *g, ImportTableEntry
         TypeTableEntry *expected_type, AstNode *node)
 {
     if (node->data.string_literal.c) {
-        return resolve_expr_const_val_as_c_string_lit(g, node, &node->data.string_literal.buf);
+        return resolve_expr_const_val_as_c_string_lit(g, node, node->data.string_literal.buf);
     } else {
-        return resolve_expr_const_val_as_string_lit(g, node, &node->data.string_literal.buf);
+        return resolve_expr_const_val_as_string_lit(g, node, node->data.string_literal.buf);
     }
 }
 
@@ -6255,7 +6252,7 @@ static TypeTableEntry *analyze_block_expr(CodeGen *g, ImportTableEntry *import,
             child->data.label.label_entry = label;
             fn_table_entry->all_labels.append(label);
 
-            child_context->label_table.put(&child->data.label.name, label);
+            child_context->label_table.put(child->data.label.name, label);
 
             return_type = g->builtin_types.entry_void;
             continue;
@@ -6316,7 +6313,7 @@ static TypeTableEntry *analyze_asm_expr(CodeGen *g, ImportTableEntry *import, Bl
                 break;
             }
         } else {
-            Buf *variable_name = &asm_output->variable_name;
+            Buf *variable_name = asm_output->variable_name;
             VariableTableEntry *var = find_variable(g, context, variable_name);
             if (var) {
                 asm_output->variable = var;
@@ -6351,7 +6348,7 @@ static TypeTableEntry *analyze_goto_pass1(CodeGen *g, ImportTableEntry *import,
 
 static void analyze_goto_pass2(CodeGen *g, ImportTableEntry *import, AstNode *node) {
     assert(node->type == NodeTypeGoto);
-    Buf *label_name = &node->data.goto_expr.name;
+    Buf *label_name = node->data.goto_expr.name;
     BlockContext *context = node->block_context;
     assert(context);
     LabelTableEntry *label = find_label(g, context, label_name);
@@ -6549,11 +6546,11 @@ static void analyze_fn_body(CodeGen *g, FnTableEntry *fn_table_entry) {
                 buf_sprintf("byvalue struct parameters not yet supported on extern functions"));
         }
 
-        if (buf_len(&param_decl->name) == 0) {
+        if (buf_len(param_decl->name) == 0) {
             add_node_error(g, param_decl_node, buf_sprintf("missing parameter name"));
         }
 
-        VariableTableEntry *var = add_local_var(g, param_decl_node, import, context, &param_decl->name,
+        VariableTableEntry *var = add_local_var(g, param_decl_node, import, context, param_decl->name,
                 type, true, nullptr);
         var->src_arg_index = i;
         param_decl_node->data.param_decl.variable = var;
@@ -6583,7 +6580,7 @@ static void analyze_fn_body(CodeGen *g, FnTableEntry *fn_table_entry) {
         if (!label->used) {
             add_node_error(g, label->decl_node,
                     buf_sprintf("label '%s' defined but not used",
-                        buf_ptr(&label->decl_node->data.label.name)));
+                        buf_ptr(label->decl_node->data.label.name)));
         }
     }
 
@@ -6640,7 +6637,7 @@ static void scan_decls(CodeGen *g, ImportTableEntry *import, BlockContext *conte
             break;
         case NodeTypeContainerDecl:
             {
-                Buf *name = &node->data.struct_decl.name;
+                Buf *name = node->data.struct_decl.name;
                 add_top_level_decl(g, import, context, node, name);
                 if (node->data.struct_decl.generic_params.length == 0) {
                     scan_struct_decl(g, import, context, node);
@@ -6653,20 +6650,20 @@ static void scan_decls(CodeGen *g, ImportTableEntry *import, BlockContext *conte
             break;
         case NodeTypeVariableDeclaration:
             {
-                Buf *name = &node->data.variable_declaration.symbol;
+                Buf *name = node->data.variable_declaration.symbol;
                 add_top_level_decl(g, import, context, node, name);
                 break;
             }
         case NodeTypeTypeDecl:
             {
-                Buf *name = &node->data.type_decl.symbol;
+                Buf *name = node->data.type_decl.symbol;
                 add_top_level_decl(g, import, context, node, name);
                 break;
             }
         case NodeTypeFnProto:
             {
                 // if the name is missing, we immediately announce an error
-                Buf *fn_name = &node->data.fn_proto.name;
+                Buf *fn_name = node->data.fn_proto.name;
                 if (buf_len(fn_name) == 0) {
                     node->data.fn_proto.skip = true;
                     add_node_error(g, node, buf_sprintf("missing function name"));
@@ -6851,6 +6848,9 @@ ImportTableEntry *add_source_file(CodeGen *g, PackageTableEntry *package,
     assert(import_entry->root);
     if (g->verbose) {
         ast_print(stderr, import_entry->root, 0);
+        //fprintf(stderr, "\nReformatted Source:\n");
+        //fprintf(stderr, "---------------------\n");
+        //ast_render(stderr, import_entry->root, 4);
     }
 
     import_entry->di_file = LLVMZigCreateFile(g->dbuilder, buf_ptr(src_basename), buf_ptr(src_dirname));
@@ -6868,7 +6868,7 @@ ImportTableEntry *add_source_file(CodeGen *g, PackageTableEntry *package,
         if (top_level_decl->type == NodeTypeFnDef) {
             AstNode *proto_node = top_level_decl->data.fn_def.fn_proto;
             assert(proto_node->type == NodeTypeFnProto);
-            Buf *proto_name = &proto_node->data.fn_proto.name;
+            Buf *proto_name = proto_node->data.fn_proto.name;
 
             bool is_private = (proto_node->data.fn_proto.top_level_decl.visib_mod == VisibModPrivate);
 
@@ -7064,7 +7064,7 @@ bool is_node_void_expr(AstNode *node) {
     {
         AstNode *type_node = node->data.container_init_expr.type;
         if (type_node->type == NodeTypeSymbol &&
-            buf_eql_str(&type_node->data.symbol_expr.symbol, "void"))
+            buf_eql_str(type_node->data.symbol_expr.symbol, "void"))
         {
             return true;
         }
src/ast_render.cpp
@@ -78,6 +78,24 @@ static const char *visib_mod_string(VisibMod mod) {
     zig_unreachable();
 }
 
+static const char *return_string(ReturnKind kind) {
+    switch (kind) {
+        case ReturnKindUnconditional: return "return";
+        case ReturnKindError: return "%return";
+        case ReturnKindMaybe: return "?return";
+    }
+    zig_unreachable();
+}
+
+static const char *defer_string(ReturnKind kind) {
+    switch (kind) {
+        case ReturnKindUnconditional: return "defer";
+        case ReturnKindError: return "%defer";
+        case ReturnKindMaybe: return "?defer";
+    }
+    zig_unreachable();
+}
+
 static const char *extern_string(bool is_extern) {
     return is_extern ? "extern " : "";
 }
@@ -243,7 +261,7 @@ static bool is_node_void(AstNode *node) {
     if (node->type == NodeTypeSymbol) {
         if (node->data.symbol_expr.override_type_entry) {
             return node->data.symbol_expr.override_type_entry->id == TypeTableEntryIdVoid;
-        } else if (buf_eql_str(&node->data.symbol_expr.symbol, "void")) {
+        } else if (buf_eql_str(node->data.symbol_expr.symbol, "void")) {
             return true;
         }
     }
@@ -260,7 +278,12 @@ static bool is_digit(uint8_t c) {
 }
 
 static bool is_printable(uint8_t c) {
-    return is_alpha_under(c) || is_digit(c) || c == ' ';
+    static const uint8_t printables[] =
+        " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.~`!@#$%^&*()_-+=\\{}[];'\"?/<>,";
+    for (size_t i = 0; i < array_length(printables); i += 1) {
+        if (c == printables[i]) return true;
+    }
+    return false;
 }
 
 static void string_literal_escape(Buf *source, Buf *dest) {
@@ -353,18 +376,18 @@ static void render_node(AstRender *ar, AstNode *node) {
                 const char *extern_str = extern_string(node->data.fn_proto.is_extern);
                 const char *inline_str = inline_string(node->data.fn_proto.is_inline);
                 fprintf(ar->f, "%s%s%sfn ", pub_str, inline_str, extern_str);
-                print_symbol(ar, &node->data.fn_proto.name);
+                print_symbol(ar, node->data.fn_proto.name);
                 fprintf(ar->f, "(");
                 int arg_count = node->data.fn_proto.params.length;
                 bool is_var_args = node->data.fn_proto.is_var_args;
                 for (int arg_i = 0; arg_i < arg_count; arg_i += 1) {
                     AstNode *param_decl = node->data.fn_proto.params.at(arg_i);
                     assert(param_decl->type == NodeTypeParamDecl);
-                    if (buf_len(&param_decl->data.param_decl.name) > 0) {
+                    if (buf_len(param_decl->data.param_decl.name) > 0) {
                         const char *noalias_str = param_decl->data.param_decl.is_noalias ? "noalias " : "";
                         const char *inline_str = param_decl->data.param_decl.is_inline ? "inline  " : "";
                         fprintf(ar->f, "%s%s", noalias_str, inline_str);
-                        print_symbol(ar, &param_decl->data.param_decl.name);
+                        print_symbol(ar, param_decl->data.param_decl.name);
                         fprintf(ar->f, ": ");
                     }
                     render_node(ar, param_decl->data.param_decl.type);
@@ -417,21 +440,31 @@ static void render_node(AstRender *ar, AstNode *node) {
             fprintf(ar->f, "}");
             break;
         case NodeTypeDirective:
-            fprintf(ar->f, "#%s(",  buf_ptr(&node->data.directive.name));
+            fprintf(ar->f, "#%s(",  buf_ptr(node->data.directive.name));
             render_node(ar, node->data.directive.expr);
             fprintf(ar->f, ")\n");
             break;
         case NodeTypeReturnExpr:
-            zig_panic("TODO");
+            {
+                const char *return_str = return_string(node->data.return_expr.kind);
+                fprintf(ar->f, "%s ", return_str);
+                render_node(ar, node->data.return_expr.expr);
+                break;
+            }
         case NodeTypeDefer:
-            zig_panic("TODO");
+            {
+                const char *defer_str = defer_string(node->data.defer.kind);
+                fprintf(ar->f, "%s ", defer_str);
+                render_node(ar, node->data.return_expr.expr);
+                break;
+            }
         case NodeTypeVariableDeclaration:
             {
                 const char *pub_str = visib_mod_string(node->data.variable_declaration.top_level_decl.visib_mod);
                 const char *extern_str = extern_string(node->data.variable_declaration.is_extern);
                 const char *const_or_var = const_or_var_string(node->data.variable_declaration.is_const);
                 fprintf(ar->f, "%s%s%s ", pub_str, extern_str, const_or_var);
-                print_symbol(ar, &node->data.variable_declaration.symbol);
+                print_symbol(ar, node->data.variable_declaration.symbol);
 
                 if (node->data.variable_declaration.type) {
                     fprintf(ar->f, ": ");
@@ -446,7 +479,7 @@ static void render_node(AstRender *ar, AstNode *node) {
         case NodeTypeTypeDecl:
             {
                 const char *pub_str = visib_mod_string(node->data.type_decl.top_level_decl.visib_mod);
-                const char *var_name = buf_ptr(&node->data.type_decl.symbol);
+                const char *var_name = buf_ptr(node->data.type_decl.symbol);
                 fprintf(ar->f, "%stype %s = ", pub_str, var_name);
                 render_node(ar, node->data.type_decl.child_type);
                 break;
@@ -463,12 +496,15 @@ static void render_node(AstRender *ar, AstNode *node) {
         case NodeTypeUnwrapErrorExpr:
             zig_panic("TODO");
         case NodeTypeNumberLiteral:
-            switch (node->data.number_literal.kind) {
-                case NumLitUInt:
-                    fprintf(ar->f, "%" PRIu64, node->data.number_literal.data.x_uint);
+            switch (node->data.number_literal.bignum->kind) {
+                case BigNumKindInt:
+                    {
+                        const char *negative_str = node->data.number_literal.bignum->is_negative ? "-" : "";
+                        fprintf(ar->f, "%s%llu", negative_str, node->data.number_literal.bignum->data.x_uint);
+                    }
                     break;
-                case NumLitFloat:
-                    fprintf(ar->f, "%f", node->data.number_literal.data.x_float);
+                case BigNumKindFloat:
+                    fprintf(ar->f, "%f", node->data.number_literal.bignum->data.x_float);
                     break;
             }
             break;
@@ -478,7 +514,7 @@ static void render_node(AstRender *ar, AstNode *node) {
                     fprintf(ar->f, "c");
                 }
                 Buf tmp_buf = BUF_INIT;
-                string_literal_escape(&node->data.string_literal.buf, &tmp_buf);
+                string_literal_escape(node->data.string_literal.buf, &tmp_buf);
                 fprintf(ar->f, "\"%s\"", buf_ptr(&tmp_buf));
             }
             break;
@@ -498,7 +534,7 @@ static void render_node(AstRender *ar, AstNode *node) {
                 if (override_type) {
                     fprintf(ar->f, "%s", buf_ptr(&override_type->name));
                 } else {
-                    fprintf(ar->f, "%s", buf_ptr(&node->data.symbol_expr.symbol));
+                    print_symbol(ar, node->data.symbol_expr.symbol);
                 }
             }
             break;
@@ -513,10 +549,14 @@ static void render_node(AstRender *ar, AstNode *node) {
         case NodeTypeFnCallExpr:
             if (node->data.fn_call_expr.is_builtin) {
                 fprintf(ar->f, "@");
+            } else {
+                fprintf(ar->f, "(");
             }
-            fprintf(ar->f, "(");
             render_node(ar, node->data.fn_call_expr.fn_ref_expr);
-            fprintf(ar->f, ")(");
+            if (!node->data.fn_call_expr.is_builtin) {
+                fprintf(ar->f, ")");
+            }
+            fprintf(ar->f, "(");
             for (int i = 0; i < node->data.fn_call_expr.params.length; i += 1) {
                 AstNode *param = node->data.fn_call_expr.params.at(i);
                 if (i != 0) {
@@ -537,7 +577,7 @@ static void render_node(AstRender *ar, AstNode *node) {
         case NodeTypeFieldAccessExpr:
             {
                 AstNode *lhs = node->data.field_access_expr.struct_expr;
-                Buf *rhs = &node->data.field_access_expr.field_name;
+                Buf *rhs = node->data.field_access_expr.field_name;
                 render_node(ar, lhs);
                 fprintf(ar->f, ".");
                 print_symbol(ar, rhs);
@@ -577,7 +617,7 @@ static void render_node(AstRender *ar, AstNode *node) {
             zig_panic("TODO");
         case NodeTypeContainerDecl:
             {
-                const char *struct_name = buf_ptr(&node->data.struct_decl.name);
+                const char *struct_name = buf_ptr(node->data.struct_decl.name);
                 const char *pub_str = visib_mod_string(node->data.struct_decl.top_level_decl.visib_mod);
                 const char *container_str = container_string(node->data.struct_decl.kind);
                 fprintf(ar->f, "%s%s %s {\n", pub_str, container_str, struct_name);
@@ -586,7 +626,7 @@ static void render_node(AstRender *ar, AstNode *node) {
                     AstNode *field_node = node->data.struct_decl.fields.at(field_i);
                     assert(field_node->type == NodeTypeStructField);
                     print_indent(ar);
-                    print_symbol(ar, &field_node->data.struct_field.name);
+                    print_symbol(ar, field_node->data.struct_field.name);
                     if (!is_node_void(field_node->data.struct_field.type)) {
                         fprintf(ar->f, ": ");
                         render_node(ar, field_node->data.struct_field.type);
src/bignum.cpp
@@ -6,6 +6,7 @@
  */
 
 #include "bignum.hpp"
+#include "buffer.hpp"
 
 #include <assert.h>
 #include <math.h>
@@ -41,6 +42,10 @@ void bignum_init_signed(BigNum *dest, int64_t x) {
     }
 }
 
+void bignum_init_bignum(BigNum *dest, BigNum *src) {
+    memcpy(dest, src, sizeof(BigNum));
+}
+
 bool bignum_fits_in_bits(BigNum *bn, int bit_count, bool is_signed) {
     assert(bn->kind == BigNumKindInt);
 
@@ -343,3 +348,15 @@ bool bignum_cmp_gte(BigNum *op1, BigNum *op2) {
         return true;
     }
 }
+
+bool bignum_increment_by_scalar(BigNum *bignum, uint64_t scalar) {
+    assert(bignum->kind == BigNumKindInt);
+    assert(!bignum->is_negative);
+    return __builtin_uaddll_overflow(bignum->data.x_uint, scalar, &bignum->data.x_uint);
+}
+
+bool bignum_multiply_by_scalar(BigNum *bignum, uint64_t scalar) {
+    assert(bignum->kind == BigNumKindInt);
+    assert(!bignum->is_negative);
+    return __builtin_umulll_overflow(bignum->data.x_uint, scalar, &bignum->data.x_uint);
+}
src/bignum.hpp
@@ -5,7 +5,8 @@
  * See http://opensource.org/licenses/MIT
  */
 
-#include "buffer.hpp"
+#ifndef ZIG_BIGNUM_HPP
+#define ZIG_BIGNUM_HPP
 
 #include <stdint.h>
 
@@ -26,6 +27,7 @@ struct BigNum {
 void bignum_init_float(BigNum *dest, double x);
 void bignum_init_unsigned(BigNum *dest, uint64_t x);
 void bignum_init_signed(BigNum *dest, int64_t x);
+void bignum_init_bignum(BigNum *dest, BigNum *src);
 
 bool bignum_fits_in_bits(BigNum *bn, int bit_count, bool is_signed);
 uint64_t bignum_to_twos_complement(BigNum *bn);
@@ -57,4 +59,11 @@ bool bignum_cmp_gt(BigNum *op1, BigNum *op2);
 bool bignum_cmp_lte(BigNum *op1, BigNum *op2);
 bool bignum_cmp_gte(BigNum *op1, BigNum *op2);
 
+// helper functions
+bool bignum_increment_by_scalar(BigNum *bignum, uint64_t scalar);
+bool bignum_multiply_by_scalar(BigNum *bignum, uint64_t scalar);
+
+struct Buf;
 Buf *bignum_to_buf(BigNum *bn);
+
+#endif
src/codegen.cpp
@@ -1431,7 +1431,7 @@ static LLVMValueRef gen_field_access_expr(CodeGen *g, AstNode *node, bool is_lva
     TypeTableEntry *struct_type = get_expr_type(struct_expr);
 
     if (struct_type->id == TypeTableEntryIdArray) {
-        Buf *name = &node->data.field_access_expr.field_name;
+        Buf *name = node->data.field_access_expr.field_name;
         assert(buf_eql_str(name, "len"));
         return LLVMConstInt(g->builtin_types.entry_usize->type_ref,
                 struct_type->data.array.len, false);
@@ -2726,18 +2726,18 @@ static LLVMValueRef gen_block(CodeGen *g, AstNode *block_node, TypeTableEntry *i
 }
 
 static int find_asm_index(CodeGen *g, AstNode *node, AsmToken *tok) {
-    const char *ptr = buf_ptr(&node->data.asm_expr.asm_template) + tok->start + 2;
+    const char *ptr = buf_ptr(node->data.asm_expr.asm_template) + tok->start + 2;
     int len = tok->end - tok->start - 2;
     int result = 0;
     for (int i = 0; i < node->data.asm_expr.output_list.length; i += 1, result += 1) {
         AsmOutput *asm_output = node->data.asm_expr.output_list.at(i);
-        if (buf_eql_mem(&asm_output->asm_symbolic_name, ptr, len)) {
+        if (buf_eql_mem(asm_output->asm_symbolic_name, ptr, len)) {
             return result;
         }
     }
     for (int i = 0; i < node->data.asm_expr.input_list.length; i += 1, result += 1) {
         AsmInput *asm_input = node->data.asm_expr.input_list.at(i);
-        if (buf_eql_mem(&asm_input->asm_symbolic_name, ptr, len)) {
+        if (buf_eql_mem(asm_input->asm_symbolic_name, ptr, len)) {
             return result;
         }
     }
@@ -2749,7 +2749,7 @@ static LLVMValueRef gen_asm_expr(CodeGen *g, AstNode *node) {
 
     AstNodeAsmExpr *asm_expr = &node->data.asm_expr;
 
-    Buf *src_template = &asm_expr->asm_template;
+    Buf *src_template = asm_expr->asm_template;
 
     Buf llvm_template = BUF_INIT;
     buf_resize(&llvm_template, 0);
@@ -2796,11 +2796,11 @@ static LLVMValueRef gen_asm_expr(CodeGen *g, AstNode *node) {
     for (int i = 0; i < asm_expr->output_list.length; i += 1, total_index += 1) {
         AsmOutput *asm_output = asm_expr->output_list.at(i);
         bool is_return = (asm_output->return_type != nullptr);
-        assert(*buf_ptr(&asm_output->constraint) == '=');
+        assert(*buf_ptr(asm_output->constraint) == '=');
         if (is_return) {
-            buf_appendf(&constraint_buf, "=%s", buf_ptr(&asm_output->constraint) + 1);
+            buf_appendf(&constraint_buf, "=%s", buf_ptr(asm_output->constraint) + 1);
         } else {
-            buf_appendf(&constraint_buf, "=*%s", buf_ptr(&asm_output->constraint) + 1);
+            buf_appendf(&constraint_buf, "=*%s", buf_ptr(asm_output->constraint) + 1);
         }
         if (total_index + 1 < total_constraint_count) {
             buf_append_char(&constraint_buf, ',');
@@ -2816,7 +2816,7 @@ static LLVMValueRef gen_asm_expr(CodeGen *g, AstNode *node) {
     }
     for (int i = 0; i < asm_expr->input_list.length; i += 1, total_index += 1, param_index += 1) {
         AsmInput *asm_input = asm_expr->input_list.at(i);
-        buf_append_buf(&constraint_buf, &asm_input->constraint);
+        buf_append_buf(&constraint_buf, asm_input->constraint);
         if (total_index + 1 < total_constraint_count) {
             buf_append_char(&constraint_buf, ',');
         }
@@ -2885,7 +2885,7 @@ static LLVMValueRef gen_container_init_expr(CodeGen *g, AstNode *node) {
             if (type_struct_field->type_entry->id == TypeTableEntryIdVoid) {
                 continue;
             }
-            assert(buf_eql_buf(type_struct_field->name, &field_node->data.struct_val_field.name));
+            assert(buf_eql_buf(type_struct_field->name, field_node->data.struct_val_field.name));
 
             set_debug_source_node(g, field_node);
             LLVMValueRef field_ptr = LLVMBuildStructGEP(g->builder, tmp_struct_ptr, type_struct_field->gen_index, "");
@@ -3853,7 +3853,7 @@ static void generate_error_name_table(CodeGen *g) {
     for (int i = 1; i < g->error_decls.length; i += 1) {
         AstNode *error_decl_node = g->error_decls.at(i);
         assert(error_decl_node->type == NodeTypeErrorValueDecl);
-        Buf *name = &error_decl_node->data.error_value_decl.name;
+        Buf *name = error_decl_node->data.error_value_decl.name;
 
         LLVMValueRef str_init = LLVMConstString(buf_ptr(name), buf_len(name), true);
         LLVMValueRef str_global = LLVMAddGlobal(g->module, LLVMTypeOf(str_init), "");
@@ -3882,7 +3882,7 @@ static void build_label_blocks(CodeGen *g, FnTableEntry *fn) {
     LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn->fn_value, "entry");
     for (int i = 0; i < fn->all_labels.length; i += 1) {
         LabelTableEntry *label = fn->all_labels.at(i);
-        Buf *name = &label->decl_node->data.label.name;
+        Buf *name = label->decl_node->data.label.name;
         label->basic_block = LLVMAppendBasicBlock(fn->fn_value, buf_ptr(name));
     }
     LLVMPositionBuilderAtEnd(g->builder, entry_block);
@@ -4951,7 +4951,7 @@ void codegen_generate_h_file(CodeGen *g) {
         buf_appendf(&h_buf, "%s %s %s(",
                 buf_ptr(export_macro),
                 buf_ptr(&return_type_c),
-                buf_ptr(&fn_proto->name));
+                buf_ptr(fn_proto->name));
 
         Buf param_type_c = BUF_INIT;
         if (fn_proto->params.length) {
@@ -4961,7 +4961,7 @@ void codegen_generate_h_file(CodeGen *g) {
                 to_c_type(g, param_type, &param_type_c);
                 buf_appendf(&h_buf, "%s %s",
                         buf_ptr(&param_type_c),
-                        buf_ptr(&param_decl_node->data.param_decl.name));
+                        buf_ptr(param_decl_node->data.param_decl.name));
                 if (param_i < fn_proto->params.length - 1)
                     buf_appendf(&h_buf, ", ");
             }
src/eval.cpp
@@ -427,7 +427,7 @@ static EvalVar *find_var(EvalFn *ef, Buf *name) {
 static bool eval_symbol_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_val) {
     assert(node->type == NodeTypeSymbol);
 
-    Buf *name = &node->data.symbol_expr.symbol;
+    Buf *name = node->data.symbol_expr.symbol;
     EvalVar *var = find_var(ef, name);
     assert(var);
 
@@ -924,7 +924,7 @@ static bool eval_field_access_expr(EvalFn *ef, AstNode *node, ConstExprValue *ou
     TypeTableEntry *struct_type = get_resolved_expr(struct_expr)->type_entry;
 
     if (struct_type->id == TypeTableEntryIdArray) {
-        Buf *name = &node->data.field_access_expr.field_name;
+        Buf *name = node->data.field_access_expr.field_name;
         assert(buf_eql_str(name, "len"));
         zig_panic("TODO");
     } else if (struct_type->id == TypeTableEntryIdStruct || (struct_type->id == TypeTableEntryIdPointer &&
@@ -971,7 +971,7 @@ static bool eval_for_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_val) {
     if (eval_expr(ef, array_node, &array_val)) return true;
 
     assert(elem_node->type == NodeTypeSymbol);
-    Buf *elem_var_name = &elem_node->data.symbol_expr.symbol;
+    Buf *elem_var_name = elem_node->data.symbol_expr.symbol;
 
     if (node->data.for_expr.elem_is_ptr) {
         zig_panic("TODO");
@@ -980,7 +980,7 @@ static bool eval_for_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_val) {
     Buf *index_var_name = nullptr;
     if (index_node) {
         assert(index_node->type == NodeTypeSymbol);
-        index_var_name = &index_node->data.symbol_expr.symbol;
+        index_var_name = index_node->data.symbol_expr.symbol;
     }
 
     uint64_t it_index = 0;
@@ -1164,7 +1164,7 @@ static bool eval_var_decl_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_va
 
     my_scope->vars.add_one();
     EvalVar *var = &my_scope->vars.last();
-    var->name = &node->data.variable_declaration.symbol;
+    var->name = node->data.variable_declaration.symbol;
 
     if (eval_expr(ef, node->data.variable_declaration.expr, &var->value)) return true;
 
@@ -1178,13 +1178,7 @@ static bool eval_number_literal_expr(EvalFn *ef, AstNode *node, ConstExprValue *
     assert(!node->data.number_literal.overflow);
 
     out_val->ok = true;
-    if (node->data.number_literal.kind == NumLitUInt) {
-        bignum_init_unsigned(&out_val->data.x_bignum, node->data.number_literal.data.x_uint);
-    } else if (node->data.number_literal.kind == NumLitFloat) {
-        bignum_init_float(&out_val->data.x_bignum, node->data.number_literal.data.x_float);
-    } else {
-        zig_unreachable();
-    }
+    bignum_init_bignum(&out_val->data.x_bignum, node->data.number_literal.bignum);
 
     return false;
 }
@@ -1339,7 +1333,7 @@ static bool eval_fn_args(EvalFnRoot *efr, FnTableEntry *fn, ConstExprValue *args
 
         root_scope->vars.add_one();
         EvalVar *eval_var = &root_scope->vars.last();
-        eval_var->name = &decl_param_node->data.param_decl.name;
+        eval_var->name = decl_param_node->data.param_decl.name;
         eval_var->value = *src_const_val;
     }
 
src/parseh.cpp
@@ -104,14 +104,14 @@ static AstNode *create_node(Context *c, NodeType type) {
 
 static AstNode *create_symbol_node(Context *c, const char *type_name) {
     AstNode *node = create_node(c, NodeTypeSymbol);
-    buf_init_from_str(&node->data.symbol_expr.symbol, type_name);
+    node->data.symbol_expr.symbol = buf_create_from_str(type_name);
     return node;
 }
 
 static AstNode *create_field_access_node(Context *c, const char *lhs, const char *rhs) {
     AstNode *node = create_node(c, NodeTypeFieldAccessExpr);
     node->data.field_access_expr.struct_expr = create_symbol_node(c, lhs);
-    buf_init_from_str(&node->data.field_access_expr.field_name, rhs);
+    node->data.field_access_expr.field_name = buf_create_from_str(rhs);
     normalize_parent_ptrs(node);
     return node;
 }
@@ -120,7 +120,7 @@ static AstNode *create_typed_var_decl_node(Context *c, bool is_const, const char
         AstNode *type_node, AstNode *init_node)
 {
     AstNode *node = create_node(c, NodeTypeVariableDeclaration);
-    buf_init_from_str(&node->data.variable_declaration.symbol, var_name);
+    node->data.variable_declaration.symbol = buf_create_from_str(var_name);
     node->data.variable_declaration.is_const = is_const;
     node->data.variable_declaration.top_level_decl.visib_mod = c->visib_mod;
     node->data.variable_declaration.expr = init_node;
@@ -146,7 +146,7 @@ static AstNode *create_prefix_node(Context *c, PrefixOp op, AstNode *child_node)
 static AstNode *create_struct_field_node(Context *c, const char *name, AstNode *type_node) {
     assert(type_node);
     AstNode *node = create_node(c, NodeTypeStructField);
-    buf_init_from_str(&node->data.struct_field.name, name);
+    node->data.struct_field.name = buf_create_from_str(name);
     node->data.struct_field.top_level_decl.visib_mod = VisibModPub;
     node->data.struct_field.type = type_node;
 
@@ -157,7 +157,7 @@ static AstNode *create_struct_field_node(Context *c, const char *name, AstNode *
 static AstNode *create_param_decl_node(Context *c, const char *name, AstNode *type_node, bool is_noalias) {
     assert(type_node);
     AstNode *node = create_node(c, NodeTypeParamDecl);
-    buf_init_from_str(&node->data.param_decl.name, name);
+    node->data.param_decl.name = buf_create_from_str(name);
     node->data.param_decl.type = type_node;
     node->data.param_decl.is_noalias = is_noalias;
 
@@ -171,17 +171,18 @@ static AstNode *create_char_lit_node(Context *c, uint8_t value) {
     return node;
 }
 
+// accepts ownership of buf
 static AstNode *create_str_lit_node(Context *c, Buf *buf) {
     AstNode *node = create_node(c, NodeTypeStringLiteral);
-    buf_init_from_buf(&node->data.string_literal.buf, buf);
+    node->data.string_literal.buf = buf;
     node->data.string_literal.c = true;
     return node;
 }
 
 static AstNode *create_num_lit_float(Context *c, double x) {
     AstNode *node = create_node(c, NodeTypeNumberLiteral);
-    node->data.number_literal.kind = NumLitFloat;
-    node->data.number_literal.data.x_float = x;
+    node->data.number_literal.bignum = allocate_nonzero<BigNum>(1);
+    bignum_init_float(node->data.number_literal.bignum, x);
     return node;
 }
 
@@ -193,8 +194,8 @@ static AstNode *create_num_lit_float_negative(Context *c, double x, bool negativ
 
 static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) {
     AstNode *node = create_node(c, NodeTypeNumberLiteral);
-    node->data.number_literal.kind = NumLitUInt;
-    node->data.number_literal.data.x_uint = x;
+    node->data.number_literal.bignum = allocate_nonzero<BigNum>(1);
+    bignum_init_unsigned(node->data.number_literal.bignum, x);
     return node;
 }
 
@@ -221,7 +222,7 @@ static AstNode *create_num_lit_signed(Context *c, int64_t x) {
 
 static AstNode *create_type_decl_node(Context *c, const char *name, AstNode *child_type_node) {
     AstNode *node = create_node(c, NodeTypeTypeDecl);
-    buf_init_from_str(&node->data.type_decl.symbol, name);
+    node->data.type_decl.symbol = buf_create_from_str(name);
     node->data.type_decl.top_level_decl.visib_mod = c->visib_mod;
     node->data.type_decl.child_type = child_type_node;
 
@@ -240,7 +241,7 @@ static AstNode *create_fn_proto_node(Context *c, Buf *name, TypeTableEntry *fn_t
     AstNode *node = create_node(c, NodeTypeFnProto);
     node->data.fn_proto.is_inline = true;
     node->data.fn_proto.top_level_decl.visib_mod = c->visib_mod;
-    buf_init_from_buf(&node->data.fn_proto.name, name);
+    node->data.fn_proto.name = name;
     node->data.fn_proto.return_type = make_type_node(c, fn_type->data.fn.fn_type_id.return_type);
 
     for (int i = 0; i < fn_type->data.fn.fn_type_id.param_count; i += 1) {
@@ -273,7 +274,7 @@ static AstNode *create_inline_fn_node(Context *c, Buf *fn_name, Buf *var_name, T
     fn_call_node->data.fn_call_expr.fn_ref_expr = unwrap_node;
     for (int i = 0; i < fn_type->data.fn.fn_type_id.param_count; i += 1) {
         AstNode *decl_node = node->data.fn_def.fn_proto->data.fn_proto.params.at(i);
-        Buf *param_name = &decl_node->data.param_decl.name;
+        Buf *param_name = decl_node->data.param_decl.name;
         fn_call_node->data.fn_call_expr.params.append(create_symbol_node(c, buf_ptr(param_name)));
     }
 
@@ -686,10 +687,9 @@ static TypeTableEntry *resolve_qual_type(Context *c, QualType qt, const Decl *de
 }
 
 static void visit_fn_decl(Context *c, const FunctionDecl *fn_decl) {
-    Buf fn_name = BUF_INIT;
-    buf_init_from_str(&fn_name, decl_name(fn_decl));
+    Buf *fn_name = buf_create_from_str(decl_name(fn_decl));
 
-    if (c->fn_table.maybe_get(&fn_name)) {
+    if (c->fn_table.maybe_get(fn_name)) {
         // we already saw this function
         return;
     }
@@ -697,14 +697,14 @@ static void visit_fn_decl(Context *c, const FunctionDecl *fn_decl) {
     TypeTableEntry *fn_type = resolve_qual_type(c, fn_decl->getType(), fn_decl);
 
     if (fn_type->id == TypeTableEntryIdInvalid) {
-        emit_warning(c, fn_decl, "ignoring function '%s' - unable to resolve type", buf_ptr(&fn_name));
+        emit_warning(c, fn_decl, "ignoring function '%s' - unable to resolve type", buf_ptr(fn_name));
         return;
     }
     assert(fn_type->id == TypeTableEntryIdFn);
 
 
     AstNode *node = create_node(c, NodeTypeFnProto);
-    buf_init_from_buf(&node->data.fn_proto.name, &fn_name);
+    node->data.fn_proto.name = fn_name;
 
     node->data.fn_proto.is_extern = fn_type->data.fn.fn_type_id.is_extern;
     node->data.fn_proto.top_level_decl.visib_mod = c->visib_mod;
@@ -731,7 +731,7 @@ static void visit_fn_decl(Context *c, const FunctionDecl *fn_decl) {
 
     normalize_parent_ptrs(node);
 
-    c->fn_table.put(buf_create_from_buf(&fn_name), true);
+    c->fn_table.put(buf_create_from_buf(fn_name), true);
     c->root->data.root.top_level_decls.append(node);
 }
 
@@ -937,7 +937,7 @@ static void visit_enum_decl(Context *c, const EnumDecl *enum_decl) {
         if (enum_type->data.enumeration.complete) {
             // now create top level decl for the type
             AstNode *enum_node = create_node(c, NodeTypeContainerDecl);
-            buf_init_from_buf(&enum_node->data.struct_decl.name, &enum_type->name);
+            enum_node->data.struct_decl.name = &enum_type->name;
             enum_node->data.struct_decl.kind = ContainerKindEnum;
             enum_node->data.struct_decl.top_level_decl.visib_mod = VisibModExport;
             enum_node->data.struct_decl.type_entry = enum_type;
@@ -1114,7 +1114,7 @@ static void visit_record_decl(Context *c, const RecordDecl *record_decl) {
     if (struct_type->data.structure.complete) {
         // now create a top level decl node for the type
         AstNode *struct_node = create_node(c, NodeTypeContainerDecl);
-        buf_init_from_buf(&struct_node->data.struct_decl.name, &struct_type->name);
+        struct_node->data.struct_decl.name = &struct_type->name;
         struct_node->data.struct_decl.kind = ContainerKindStruct;
         struct_node->data.struct_decl.top_level_decl.visib_mod = VisibModExport;
         struct_node->data.struct_decl.type_entry = struct_type;
@@ -1284,7 +1284,7 @@ static void render_aliases(Context *c) {
     for (int i = 0; i < c->aliases.length; i += 1) {
         AstNode *alias_node = c->aliases.at(i);
         assert(alias_node->type == NodeTypeVariableDeclaration);
-        Buf *name = &alias_node->data.variable_declaration.symbol;
+        Buf *name = alias_node->data.variable_declaration.symbol;
         if (name_exists(c, name)) {
             continue;
         }
@@ -1327,7 +1327,7 @@ static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *ch
             case CTokIdStrLit:
                 if (is_last && is_first) {
                     AstNode *var_node = create_var_decl_node(c, buf_ptr(name),
-                            create_str_lit_node(c, &tok->data.str_lit));
+                            create_str_lit_node(c, buf_create_from_buf(&tok->data.str_lit)));
                     c->macro_table.put(name, var_node);
                 }
                 return;
src/parser.cpp
@@ -21,6 +21,9 @@ struct ParseContext {
     ImportTableEntry *owner;
     ErrColor err_color;
     uint32_t *next_node_index;
+    // These buffers are used freqently so we preallocate them once here.
+    Buf *void_buf;
+    Buf *empty_buf;
 };
 
 __attribute__ ((format (printf, 4, 5)))
@@ -29,7 +32,9 @@ static void ast_asm_error(ParseContext *pc, AstNode *node, int offset, const cha
     assert(node->type == NodeTypeAsmExpr);
 
 
-    SrcPos pos = node->data.asm_expr.offset_map.at(offset);
+    // TODO calculate or otherwise keep track of originating line/column number for strings
+    //SrcPos pos = node->data.asm_expr.offset_map.at(offset);
+    SrcPos pos = { node->line, node->column };
 
     va_list ap;
     va_start(ap, format);
@@ -83,12 +88,12 @@ static AstNode *ast_create_node(ParseContext *pc, NodeType type, Token *first_to
 
 static AstNode *ast_create_void_type_node(ParseContext *pc, Token *token) {
     AstNode *node = ast_create_node(pc, NodeTypeSymbol, token);
-    buf_init_from_str(&node->data.symbol_expr.symbol, "void");
+    node->data.symbol_expr.symbol = pc->void_buf;
     return node;
 }
 
 static void parse_asm_template(ParseContext *pc, AstNode *node) {
-    Buf *asm_template = &node->data.asm_expr.asm_template;
+    Buf *asm_template = node->data.asm_expr.asm_template;
 
     enum State {
         StateStart,
@@ -170,514 +175,29 @@ static void parse_asm_template(ParseContext *pc, AstNode *node) {
     }
 }
 
-static uint8_t parse_char_literal(ParseContext *pc, Token *token) {
-    // skip the single quotes at beginning and end
-    // convert escape sequences
-    bool escape = false;
-    int return_count = 0;
-    uint8_t return_value;
-    for (int i = token->start_pos + 1; i < token->end_pos - 1; i += 1) {
-        uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
-        if (escape) {
-            switch (c) {
-                case '\\':
-                    return_value = '\\';
-                    return_count += 1;
-                    break;
-                case 'r':
-                    return_value = '\r';
-                    return_count += 1;
-                    break;
-                case 'n':
-                    return_value = '\n';
-                    return_count += 1;
-                    break;
-                case 't':
-                    return_value = '\t';
-                    return_count += 1;
-                    break;
-                case '\'':
-                    return_value = '\'';
-                    return_count += 1;
-                    break;
-                default:
-                    ast_error(pc, token, "invalid escape character");
-            }
-            escape = false;
-        } else if (c == '\\') {
-            escape = true;
-        } else {
-            return_value = c;
-            return_count += 1;
-        }
-    }
-    if (return_count == 0) {
-        ast_error(pc, token, "character literal too short");
-    } else if (return_count > 1) {
-        ast_error(pc, token, "character literal too long");
-    }
-    return return_value;
-}
-
-static uint32_t get_hex_digit(uint8_t c) {
-    switch (c) {
-        case '0': return 0;
-        case '1': return 1;
-        case '2': return 2;
-        case '3': return 3;
-        case '4': return 4;
-        case '5': return 5;
-        case '6': return 6;
-        case '7': return 7;
-        case '8': return 8;
-        case '9': return 9;
-
-        case 'a':
-        case 'A':
-            return 10;
-        case 'b':
-        case 'B':
-            return 11;
-        case 'c':
-        case 'C':
-            return 12;
-        case 'd':
-        case 'D':
-            return 13;
-        case 'e':
-        case 'E':
-            return 14;
-        case 'f':
-        case 'F':
-            return 15;
-        default:
-            return UINT32_MAX;
-    }
-}
-
-static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool *out_c_str,
-        ZigList<SrcPos> *offset_map)
-{
-    if (token->raw_string_start > 0) {
-        uint8_t c1 = *((uint8_t*)buf_ptr(pc->buf) + token->start_pos);
-        uint8_t c2 = *((uint8_t*)buf_ptr(pc->buf) + token->start_pos + 1);
-        assert(c1 == 'r');
-        if (out_c_str) {
-            *out_c_str = (c2 == 'c');
-        }
-        const char *str = buf_ptr(pc->buf) + token->raw_string_start;
-        buf_init_from_mem(buf, str, token->raw_string_end - token->raw_string_start);
-        if (offset_map) {
-            SrcPos pos = {token->start_line, token->start_column};
-            for (int i = token->start_pos; i < token->raw_string_start; i += 1) {
-                uint8_t c = buf_ptr(pc->buf)[i];
-                if (c == '\n') {
-                    pos.line += 1;
-                    pos.column = 0;
-                } else {
-                    pos.column += 1;
-                }
-            }
-            for (int i = token->raw_string_start; i < token->raw_string_end; i += 1) {
-                offset_map->append(pos);
-
-                uint8_t c = buf_ptr(pc->buf)[i];
-                if (c == '\n') {
-                    pos.line += 1;
-                    pos.column = 0;
-                } else {
-                    pos.column += 1;
-                }
-            }
-        }
-        return;
-    }
-
-    // skip the double quotes at beginning and end
-    // convert escape sequences
-    // detect c string literal
-
-    enum State {
-        StatePre,
-        StateSkipQuot,
-        StateStart,
-        StateEscape,
-        StateHex1,
-        StateHex2,
-        StateUnicode,
-    };
-
-    buf_resize(buf, 0);
-
-    int unicode_index;
-    int unicode_end;
-
-    State state = StatePre;
-    SrcPos pos = {token->start_line, token->start_column};
-    uint32_t hex_value = 0;
-    for (int i = token->start_pos; i < token->end_pos - 1; i += 1) {
-        uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
-
-        switch (state) {
-            case StatePre:
-                switch (c) {
-                    case '@':
-                        state = StateSkipQuot;
-                        break;
-                    case 'c':
-                        if (out_c_str) {
-                            *out_c_str = true;
-                        } else {
-                            ast_error(pc, token, "C string literal not allowed here");
-                        }
-                        state = StateSkipQuot;
-                        break;
-                    case '"':
-                        state = StateStart;
-                        break;
-                    default:
-                        ast_error(pc, token, "invalid string character");
-                }
-                break;
-            case StateSkipQuot:
-                state = StateStart;
-                break;
-            case StateStart:
-                if (c == '\\') {
-                    state = StateEscape;
-                } else {
-                    buf_append_char(buf, c);
-                    if (offset_map) offset_map->append(pos);
-                }
-                break;
-            case StateEscape:
-                switch (c) {
-                    case '\\':
-                        buf_append_char(buf, '\\');
-                        if (offset_map) offset_map->append(pos);
-                        state = StateStart;
-                        break;
-                    case 'r':
-                        buf_append_char(buf, '\r');
-                        if (offset_map) offset_map->append(pos);
-                        state = StateStart;
-                        break;
-                    case 'n':
-                        buf_append_char(buf, '\n');
-                        if (offset_map) offset_map->append(pos);
-                        state = StateStart;
-                        break;
-                    case 't':
-                        buf_append_char(buf, '\t');
-                        if (offset_map) offset_map->append(pos);
-                        state = StateStart;
-                        break;
-                    case '"':
-                        buf_append_char(buf, '"');
-                        if (offset_map) offset_map->append(pos);
-                        state = StateStart;
-                        break;
-                    case '\'':
-                        buf_append_char(buf, '\'');
-                        if (offset_map) offset_map->append(pos);
-                        state = StateStart;
-                        break;
-                    case 'x':
-                        state = StateHex1;
-                        break;
-                    case 'u':
-                        state = StateUnicode;
-                        unicode_index = 0;
-                        unicode_end = 4;
-                        hex_value = 0;
-                        break;
-                    case 'U':
-                        state = StateUnicode;
-                        unicode_index = 0;
-                        unicode_end = 6;
-                        hex_value = 0;
-                        break;
-                    default:
-                        ast_error(pc, token, "invalid escape character");
-                }
-                break;
-            case StateHex1:
-                {
-                    uint32_t hex_digit = get_hex_digit(c);
-                    if (hex_digit == UINT32_MAX) {
-                        ast_error(pc, token, "invalid hex digit: '%c'", c);
-                    }
-                    hex_value = hex_digit * 16;
-                    state = StateHex2;
-                    break;
-                }
-            case StateHex2:
-                {
-                    uint32_t hex_digit = get_hex_digit(c);
-                    if (hex_digit == UINT32_MAX) {
-                        ast_error(pc, token, "invalid hex digit: '%c'", c);
-                    }
-                    hex_value += hex_digit;
-                    assert(hex_value >= 0 && hex_value <= 255);
-                    buf_append_char(buf, hex_value);
-                    state = StateStart;
-                    break;
-                }
-            case StateUnicode:
-                {
-                    uint32_t hex_digit = get_hex_digit(c);
-                    if (hex_digit == UINT32_MAX) {
-                        ast_error(pc, token, "invalid hex digit: '%c'", c);
-                    }
-                    hex_value *= 16;
-                    hex_value += hex_digit;
-                    unicode_index += 1;
-                    if (unicode_index >= unicode_end) {
-                        if (hex_value <= 0x7f) {
-                            // 00000000 00000000 00000000 0xxxxxxx
-                            buf_append_char(buf, hex_value);
-                        } else if (hex_value <= 0x7ff) {
-                            // 00000000 00000000 00000xxx xx000000
-                            buf_append_char(buf, (unsigned char)(0xc0 | (hex_value >> 6)));
-                            // 00000000 00000000 00000000 00xxxxxx
-                            buf_append_char(buf, (unsigned char)(0x80 | (hex_value & 0x3f)));
-                        } else if (hex_value <= 0xffff) {
-                            // 00000000 00000000 xxxx0000 00000000
-                            buf_append_char(buf, (unsigned char)(0xe0 | (hex_value >> 12)));
-                            // 00000000 00000000 0000xxxx xx000000
-                            buf_append_char(buf, (unsigned char)(0x80 | ((hex_value >> 6) & 0x3f)));
-                            // 00000000 00000000 00000000 00xxxxxx
-                            buf_append_char(buf, (unsigned char)(0x80 | (hex_value & 0x3f)));
-                        } else if (hex_value <= 0x10ffff) {
-                            // 00000000 000xxx00 00000000 00000000
-                            buf_append_char(buf, (unsigned char)(0xf0 | (hex_value >> 18)));
-                            // 00000000 000000xx xxxx0000 00000000
-                            buf_append_char(buf, (unsigned char)(0x80 | ((hex_value >> 12) & 0x3f)));
-                            // 00000000 00000000 0000xxxx xx000000
-                            buf_append_char(buf, (unsigned char)(0x80 | ((hex_value >> 6) & 0x3f)));
-                            // 00000000 00000000 00000000 00xxxxxx
-                            buf_append_char(buf, (unsigned char)(0x80 | (hex_value & 0x3f)));
-                        } else {
-                            ast_error(pc, token, "unicode value out of range: %x", hex_value);
-                        }
-                        state = StateStart;
-                    }
-                    break;
-                }
-        }
-        if (c == '\n') {
-            pos.line += 1;
-            pos.column = 0;
-        } else {
-            pos.column += 1;
-        }
-    }
-    assert(state == StateStart);
-    if (offset_map) offset_map->append(pos);
+static Buf *token_buf(Token *token) {
+    assert(token->id == TokenIdStringLiteral || token->id == TokenIdSymbol);
+    return &token->data.str_lit.str;
 }
 
-static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
-    uint8_t *first_char = (uint8_t *)buf_ptr(pc->buf) + token->start_pos;
-    bool at_sign = *first_char == '@';
-    if (at_sign) {
-        parse_string_literal(pc, token, buf, nullptr, nullptr);
-    } else {
-        buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
-    }
+static BigNum *token_bignum(Token *token) {
+    assert(token->id == TokenIdNumberLiteral);
+    return &token->data.num_lit.bignum;
 }
 
-
-static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix,
-    int skip_index, bool *overflow)
-{
-    unsigned long long x = 0;
-
-    for (int i = digits_start; i < digits_end; i++) {
-        if (i == skip_index)
-            continue;
-        uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
-        unsigned long long digit = get_digit_value(c);
-
-        // x *= radix;
-        if (__builtin_umulll_overflow(x, radix, &x)) {
-            *overflow = true;
-            return 0;
-        }
-
-        // x += digit
-        if (__builtin_uaddll_overflow(x, digit, &x)) {
-            *overflow = true;
-            return 0;
-        }
-    }
-    return x;
+static uint8_t token_char_lit(Token *token) {
+    assert(token->id == TokenIdCharLiteral);
+    return token->data.char_lit.c;
 }
 
-static void parse_number_literal(ParseContext *pc, Token *token, AstNodeNumberLiteral *num_lit) {
-    assert(token->id == TokenIdNumberLiteral);
-
-    int whole_number_start = token->start_pos;
-    if (token->radix != 10) {
-        // skip the "0x"
-        whole_number_start += 2;
-    }
-
-    int whole_number_end = token->decimal_point_pos;
-    if (whole_number_end <= whole_number_start) {
-        // TODO: error for empty whole number part
-        num_lit->overflow = true;
-        return;
-    }
-
-    if (token->decimal_point_pos == token->end_pos) {
-        // integer
-        unsigned long long whole_number = parse_int_digits(pc, whole_number_start, whole_number_end,
-            token->radix, -1, &num_lit->overflow);
-        if (num_lit->overflow) return;
-
-        num_lit->data.x_uint = whole_number;
-        num_lit->kind = NumLitUInt;
+static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
+    if (token->id == TokenIdSymbol) {
+        buf_init_from_buf(buf, token_buf(token));
     } else {
-        // float
-
-        if (token->radix == 10) {
-            // use a third-party base-10 float parser
-            char *str_begin = buf_ptr(pc->buf) + whole_number_start;
-            char *str_end;
-            errno = 0;
-            double x = strtod(str_begin, &str_end);
-            if (errno) {
-                // TODO: forward error to user
-                num_lit->overflow = true;
-                return;
-            }
-            assert(str_end == buf_ptr(pc->buf) + token->end_pos);
-            num_lit->data.x_float = x;
-            num_lit->kind = NumLitFloat;
-            return;
-        }
-
-        if (token->decimal_point_pos < token->exponent_marker_pos) {
-            // fraction
-            int fraction_start = token->decimal_point_pos + 1;
-            int fraction_end = token->exponent_marker_pos;
-            if (fraction_end <= fraction_start) {
-                // TODO: error for empty fraction part
-                num_lit->overflow = true;
-                return;
-            }
-        }
-
-        // trim leading and trailing zeros in the significand digit sequence
-        int significand_start = whole_number_start;
-        for (; significand_start < token->exponent_marker_pos; significand_start++) {
-            if (significand_start == token->decimal_point_pos)
-                continue;
-            uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + significand_start);
-            if (c != '0')
-                break;
-        }
-        int significand_end = token->exponent_marker_pos;
-        for (; significand_end - 1 > significand_start; significand_end--) {
-            if (significand_end - 1 <= token->decimal_point_pos) {
-                significand_end = token->decimal_point_pos;
-                break;
-            }
-            uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + significand_end - 1);
-            if (c != '0')
-                break;
-        }
-
-        unsigned long long significand_as_int = parse_int_digits(pc, significand_start, significand_end,
-            token->radix, token->decimal_point_pos, &num_lit->overflow);
-        if (num_lit->overflow) return;
-
-        int exponent_in_bin_or_dec = 0;
-        if (significand_end > token->decimal_point_pos) {
-            exponent_in_bin_or_dec = token->decimal_point_pos + 1 - significand_end;
-            if (token->radix == 2) {
-                // already good
-            } else if (token->radix == 8) {
-                exponent_in_bin_or_dec *= 3;
-            } else if (token->radix == 10) {
-                // already good
-            } else if (token->radix == 16) {
-                exponent_in_bin_or_dec *= 4;
-            } else zig_unreachable();
-        }
-
-        if (token->exponent_marker_pos < token->end_pos) {
-            // exponent
-            int exponent_start = token->exponent_marker_pos + 1;
-            int exponent_end = token->end_pos;
-            if (exponent_end <= exponent_start) {
-                // TODO: error for empty exponent part
-                num_lit->overflow = true;
-                return;
-            }
-            bool is_exponent_negative = false;
-            uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + exponent_start);
-            if (c == '+') {
-                exponent_start += 1;
-            } else if (c == '-') {
-                exponent_start += 1;
-                is_exponent_negative = true;
-            }
-
-            if (exponent_end <= exponent_start) {
-                // TODO: error for empty exponent part
-                num_lit->overflow = true;
-                return;
-            }
-
-            unsigned long long specified_exponent = parse_int_digits(pc, exponent_start, exponent_end,
-                10, -1, &num_lit->overflow);
-            // TODO: this check is a little silly
-            if (specified_exponent >= LLONG_MAX) {
-                num_lit->overflow = true;
-                return;
-            }
-
-            if (is_exponent_negative) {
-                exponent_in_bin_or_dec -= specified_exponent;
-            } else {
-                exponent_in_bin_or_dec += specified_exponent;
-            }
-        }
-
-        uint64_t significand_bits;
-        uint64_t exponent_bits;
-        if (significand_as_int != 0) {
-            // normalize the significand
-            if (token->radix == 10) {
-                zig_panic("TODO: decimal floats");
-            } else {
-                int significand_magnitude_in_bin = __builtin_clzll(1) - __builtin_clzll(significand_as_int);
-                exponent_in_bin_or_dec += significand_magnitude_in_bin;
-                if (!(-1023 <= exponent_in_bin_or_dec && exponent_in_bin_or_dec < 1023)) {
-                    num_lit->overflow = true;
-                    return;
-                }
-
-                // this should chop off exactly one 1 bit from the top.
-                significand_bits = ((uint64_t)significand_as_int << (52 - significand_magnitude_in_bin)) & 0xfffffffffffffULL;
-                exponent_bits = exponent_in_bin_or_dec + 1023;
-            }
-        } else {
-            // 0 is all 0's
-            significand_bits = 0;
-            exponent_bits = 0;
-        }
-
-        uint64_t double_bits = (exponent_bits << 52) | significand_bits;
-        double x = *(double *)&double_bits;
-
-        num_lit->data.x_float = x;
-        num_lit->kind = NumLitFloat;
+        buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
     }
 }
 
-
 __attribute__ ((noreturn))
 static void ast_invalid_token_error(ParseContext *pc, Token *token) {
     Buf token_value = BUF_INIT;
@@ -723,7 +243,7 @@ static AstNode *ast_parse_directive(ParseContext *pc, int *token_index) {
 
     Token *name_symbol = ast_eat_token(pc, token_index, TokenIdSymbol);
 
-    ast_buf_from_token(pc, name_symbol, &node->data.directive.name);
+    node->data.directive.name = token_buf(name_symbol);
 
     node->data.directive.expr = ast_parse_grouped_expr(pc, token_index, true);
 
@@ -769,12 +289,12 @@ static AstNode *ast_parse_param_decl(ParseContext *pc, int *token_index) {
         token = &pc->tokens->at(*token_index);
     }
 
-    buf_resize(&node->data.param_decl.name, 0);
+    node->data.param_decl.name = pc->empty_buf;
 
     if (token->id == TokenIdSymbol) {
         Token *next_token = &pc->tokens->at(*token_index + 1);
         if (next_token->id == TokenIdColon) {
-            ast_buf_from_token(pc, token, &node->data.param_decl.name);
+            node->data.param_decl.name = token_buf(token);
             *token_index += 2;
         }
     }
@@ -915,8 +435,8 @@ static void ast_parse_asm_input_item(ParseContext *pc, int *token_index, AstNode
     ast_eat_token(pc, token_index, TokenIdRParen);
 
     AsmInput *asm_input = allocate<AsmInput>(1);
-    ast_buf_from_token(pc, alias, &asm_input->asm_symbolic_name);
-    parse_string_literal(pc, constraint, &asm_input->constraint, nullptr, nullptr);
+    asm_input->asm_symbolic_name = token_buf(alias);
+    asm_input->constraint = token_buf(constraint);
     asm_input->expr = expr_node;
     node->data.asm_expr.input_list.append(asm_input);
 }
@@ -938,7 +458,7 @@ static void ast_parse_asm_output_item(ParseContext *pc, int *token_index, AstNod
     Token *token = &pc->tokens->at(*token_index);
     *token_index += 1;
     if (token->id == TokenIdSymbol) {
-        ast_buf_from_token(pc, token, &asm_output->variable_name);
+        asm_output->variable_name = token_buf(token);
     } else if (token->id == TokenIdArrow) {
         asm_output->return_type = ast_parse_prefix_op_expr(pc, token_index, true);
     } else {
@@ -947,8 +467,8 @@ static void ast_parse_asm_output_item(ParseContext *pc, int *token_index, AstNod
 
     ast_eat_token(pc, token_index, TokenIdRParen);
 
-    ast_buf_from_token(pc, alias, &asm_output->asm_symbolic_name);
-    parse_string_literal(pc, constraint, &asm_output->constraint, nullptr, nullptr);
+    asm_output->asm_symbolic_name = token_buf(alias);
+    asm_output->constraint = token_buf(constraint);
     node->data.asm_expr.output_list.append(asm_output);
 }
 
@@ -968,8 +488,7 @@ static void ast_parse_asm_clobbers(ParseContext *pc, int *token_index, AstNode *
         ast_expect_token(pc, string_tok, TokenIdStringLiteral);
         *token_index += 1;
 
-        Buf *clobber_buf = buf_alloc();
-        parse_string_literal(pc, string_tok, clobber_buf, nullptr, nullptr);
+        Buf *clobber_buf = token_buf(string_tok);
         node->data.asm_expr.clobber_list.append(clobber_buf);
 
         Token *comma = &pc->tokens->at(*token_index);
@@ -1072,19 +591,14 @@ static AstNode *ast_parse_asm_expr(ParseContext *pc, int *token_index, bool mand
     ast_expect_token(pc, lparen_tok, TokenIdLParen);
     *token_index += 1;
 
-    Token *template_tok = &pc->tokens->at(*token_index);
-    ast_expect_token(pc, template_tok, TokenIdStringLiteral);
-    *token_index += 1;
+    Token *template_tok = ast_eat_token(pc, token_index, TokenIdStringLiteral);
 
-    parse_string_literal(pc, template_tok, &node->data.asm_expr.asm_template, nullptr,
-            &node->data.asm_expr.offset_map);
+    node->data.asm_expr.asm_template = token_buf(template_tok);
     parse_asm_template(pc, node);
 
     ast_parse_asm_output(pc, token_index, node);
 
-    Token *rparen_tok = &pc->tokens->at(*token_index);
-    ast_expect_token(pc, rparen_tok, TokenIdRParen);
-    *token_index += 1;
+    ast_eat_token(pc, token_index, TokenIdRParen);
 
     normalize_parent_ptrs(node);
     return node;
@@ -1099,17 +613,19 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
 
     if (token->id == TokenIdNumberLiteral) {
         AstNode *node = ast_create_node(pc, NodeTypeNumberLiteral, token);
-        parse_number_literal(pc, token, &node->data.number_literal);
+        node->data.number_literal.bignum = token_bignum(token);
+        node->data.number_literal.overflow = token->data.num_lit.overflow;
         *token_index += 1;
         return node;
     } else if (token->id == TokenIdStringLiteral) {
         AstNode *node = ast_create_node(pc, NodeTypeStringLiteral, token);
-        parse_string_literal(pc, token, &node->data.string_literal.buf, &node->data.string_literal.c, nullptr);
+        node->data.string_literal.buf = token_buf(token);
+        node->data.string_literal.c = token->data.str_lit.is_c_str;
         *token_index += 1;
         return node;
     } else if (token->id == TokenIdCharLiteral) {
         AstNode *node = ast_create_node(pc, NodeTypeCharLiteral, token);
-        node->data.char_literal.value = parse_char_literal(pc, token);
+        node->data.char_literal.value = token_char_lit(token);
         *token_index += 1;
         return node;
     } else if (token->id == TokenIdKeywordTrue) {
@@ -1155,7 +671,7 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
         *token_index += 1;
         Token *name_tok = ast_eat_token(pc, token_index, TokenIdSymbol);
         AstNode *name_node = ast_create_node(pc, NodeTypeSymbol, name_tok);
-        ast_buf_from_token(pc, name_tok, &name_node->data.symbol_expr.symbol);
+        name_node->data.symbol_expr.symbol = token_buf(name_tok);
 
         AstNode *node = ast_create_node(pc, NodeTypeFnCallExpr, token);
         node->data.fn_call_expr.fn_ref_expr = name_node;
@@ -1168,7 +684,7 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
     } else if (token->id == TokenIdSymbol) {
         *token_index += 1;
         AstNode *node = ast_create_node(pc, NodeTypeSymbol, token);
-        ast_buf_from_token(pc, token, &node->data.symbol_expr.symbol);
+        node->data.symbol_expr.symbol = token_buf(token);
         return node;
     } else if (token->id == TokenIdKeywordGoto) {
         AstNode *node = ast_create_node(pc, NodeTypeGoto, token);
@@ -1178,7 +694,7 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
         *token_index += 1;
         ast_expect_token(pc, dest_symbol, TokenIdSymbol);
 
-        ast_buf_from_token(pc, dest_symbol, &node->data.goto_expr.name);
+        node->data.goto_expr.name = token_buf(dest_symbol);
         return node;
     }
 
@@ -1243,7 +759,7 @@ static AstNode *ast_parse_curly_suffix_expr(ParseContext *pc, int *token_index,
 
                         AstNode *field_node = ast_create_node(pc, NodeTypeStructValueField, token);
 
-                        ast_buf_from_token(pc, field_name_tok, &field_node->data.struct_val_field.name);
+                        field_node->data.struct_val_field.name = token_buf(field_name_tok);
                         field_node->data.struct_val_field.expr = ast_parse_expression(pc, token_index, true);
 
                         normalize_parent_ptrs(field_node);
@@ -1370,7 +886,7 @@ static AstNode *ast_parse_suffix_op_expr(ParseContext *pc, int *token_index, boo
 
             AstNode *node = ast_create_node(pc, NodeTypeFieldAccessExpr, first_token);
             node->data.field_access_expr.struct_expr = primary_expr;
-            ast_buf_from_token(pc, name_token, &node->data.field_access_expr.field_name);
+            node->data.field_access_expr.field_name = token_buf(name_token);
 
             normalize_parent_ptrs(node);
             primary_expr = node;
@@ -1819,10 +1335,10 @@ static AstNode *ast_parse_if_expr(ParseContext *pc, int *token_index, bool manda
             *token_index += 1;
             node->data.if_var_expr.var_is_ptr = true;
             Token *name_token = ast_eat_token(pc, token_index, TokenIdSymbol);
-            ast_buf_from_token(pc, name_token, &node->data.if_var_expr.var_decl.symbol);
+            node->data.if_var_expr.var_decl.symbol = token_buf(name_token);
         } else if (star_or_symbol->id == TokenIdSymbol) {
             *token_index += 1;
-            ast_buf_from_token(pc, star_or_symbol, &node->data.if_var_expr.var_decl.symbol);
+            node->data.if_var_expr.var_decl.symbol = token_buf(star_or_symbol);
         } else {
             ast_invalid_token_error(pc, star_or_symbol);
         }
@@ -1974,7 +1490,7 @@ static AstNode *ast_parse_variable_declaration_expr(ParseContext *pc, int *token
     node->data.variable_declaration.top_level_decl.directives = directives;
 
     Token *name_token = ast_eat_token(pc, token_index, TokenIdSymbol);
-    ast_buf_from_token(pc, name_token, &node->data.variable_declaration.symbol);
+    node->data.variable_declaration.symbol = token_buf(name_token);
 
     Token *eq_or_colon = &pc->tokens->at(*token_index);
     *token_index += 1;
@@ -2067,7 +1583,7 @@ static AstNode *ast_parse_while_expr(ParseContext *pc, int *token_index, bool ma
 static AstNode *ast_parse_symbol(ParseContext *pc, int *token_index) {
     Token *token = ast_eat_token(pc, token_index, TokenIdSymbol);
     AstNode *node = ast_create_node(pc, NodeTypeSymbol, token);
-    ast_buf_from_token(pc, token, &node->data.symbol_expr.symbol);
+    node->data.symbol_expr.symbol = token_buf(token);
     return node;
 }
 
@@ -2405,7 +1921,7 @@ static AstNode *ast_parse_label(ParseContext *pc, int *token_index, bool mandato
     *token_index += 2;
 
     AstNode *node = ast_create_node(pc, NodeTypeLabel, symbol_token);
-    ast_buf_from_token(pc, symbol_token, &node->data.label.name);
+    node->data.label.name = token_buf(symbol_token);
     return node;
 }
 
@@ -2413,7 +1929,7 @@ static AstNode *ast_create_void_expr(ParseContext *pc, Token *token) {
     AstNode *node = ast_create_node(pc, NodeTypeContainerInitExpr, token);
     node->data.container_init_expr.type = ast_create_node(pc, NodeTypeSymbol, token);
     node->data.container_init_expr.kind = ContainerInitKindArray;
-    buf_init_from_str(&node->data.container_init_expr.type->data.symbol_expr.symbol, "void");
+    node->data.container_init_expr.type->data.symbol_expr.symbol = pc->void_buf;
     normalize_parent_ptrs(node);
     return node;
 }
@@ -2508,9 +2024,9 @@ static AstNode *ast_parse_fn_proto(ParseContext *pc, int *token_index, bool mand
     Token *fn_name = &pc->tokens->at(*token_index);
     if (fn_name->id == TokenIdSymbol) {
         *token_index += 1;
-        ast_buf_from_token(pc, fn_name, &node->data.fn_proto.name);
+        node->data.fn_proto.name = token_buf(fn_name);
     } else {
-        buf_resize(&node->data.fn_proto.name, 0);
+        node->data.fn_proto.name = pc->empty_buf;
     }
 
     ast_parse_param_decl_list(pc, token_index, &node->data.fn_proto.params, &node->data.fn_proto.is_var_args);
@@ -2663,7 +2179,7 @@ static AstNode *ast_parse_container_decl(ParseContext *pc, int *token_index,
 
     AstNode *node = ast_create_node(pc, NodeTypeContainerDecl, first_token);
     node->data.struct_decl.kind = kind;
-    ast_buf_from_token(pc, struct_name, &node->data.struct_decl.name);
+    node->data.struct_decl.name = token_buf(struct_name);
     node->data.struct_decl.top_level_decl.visib_mod = visib_mod;
     node->data.struct_decl.top_level_decl.directives = directives;
 
@@ -2729,8 +2245,7 @@ static AstNode *ast_parse_container_decl(ParseContext *pc, int *token_index,
 
             field_node->data.struct_field.top_level_decl.visib_mod = visib_mod;
             field_node->data.struct_field.top_level_decl.directives = directive_list;
-
-            ast_buf_from_token(pc, token, &field_node->data.struct_field.name);
+            field_node->data.struct_field.name = token_buf(token);
 
             Token *expr_or_comma = &pc->tokens->at(*token_index);
             if (expr_or_comma->id == TokenIdComma) {
@@ -2772,7 +2287,7 @@ static AstNode *ast_parse_error_value_decl(ParseContext *pc, int *token_index,
     AstNode *node = ast_create_node(pc, NodeTypeErrorValueDecl, first_token);
     node->data.error_value_decl.top_level_decl.visib_mod = visib_mod;
     node->data.error_value_decl.top_level_decl.directives = directives;
-    ast_buf_from_token(pc, name_tok, &node->data.error_value_decl.name);
+    node->data.error_value_decl.name = token_buf(name_tok);
 
     normalize_parent_ptrs(node);
     return node;
@@ -2795,7 +2310,7 @@ static AstNode *ast_parse_type_decl(ParseContext *pc, int *token_index,
     ast_eat_token(pc, token_index, TokenIdEq);
 
     AstNode *node = ast_create_node(pc, NodeTypeTypeDecl, first_token);
-    ast_buf_from_token(pc, name_tok, &node->data.type_decl.symbol);
+    node->data.type_decl.symbol = token_buf(name_tok);
     node->data.type_decl.child_type = ast_parse_prefix_op_expr(pc, token_index, true);
 
     ast_eat_token(pc, token_index, TokenIdSemicolon);
@@ -2901,6 +2416,8 @@ AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens, ImportTableEntry *owner,
         ErrColor err_color, uint32_t *next_node_index)
 {
     ParseContext pc = {0};
+    pc.void_buf = buf_create_from_str("void");
+    pc.empty_buf = buf_create_from_str("");
     pc.err_color = err_color;
     pc.owner = owner;
     pc.buf = buf;
src/tokenizer.cpp
@@ -11,6 +11,9 @@
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <errno.h>
 
 #define WHITESPACE \
          ' ': \
@@ -30,7 +33,7 @@
          '0': \
     case DIGIT_NON_ZERO
 
-#define ALPHA_EXCEPT_CR \
+#define ALPHA_EXCEPT_C \
          'a': \
     case 'b': \
   /*case 'c':*/ \
@@ -48,7 +51,7 @@
     case 'o': \
     case 'p': \
     case 'q': \
-  /*case 'r':*/ \
+    case 'r': \
     case 's': \
     case 't': \
     case 'u': \
@@ -85,77 +88,93 @@
     case 'Z'
 
 #define ALPHA \
-    ALPHA_EXCEPT_CR: \
-    case 'c': \
-    case 'r'
-
-#define SYMBOL_CHAR \
-    SYMBOL_CHAR_EXCEPT_C: \
+    ALPHA_EXCEPT_C: \
     case 'c'
 
-#define SYMBOL_CHAR_EXCEPT_C \
-    ALPHA_EXCEPT_CR: \
-    case 'r': \
+#define SYMBOL_CHAR \
+    ALPHA_EXCEPT_C: \
     case DIGIT: \
-    case '_'
+    case '_': \
+    case 'c'
 
 #define SYMBOL_START \
     ALPHA: \
     case '_'
 
-#define HEX_DIGIT \
-         'a': \
-    case 'b': \
-    case 'c': \
-    case 'd': \
-    case 'e': \
-    case 'f': \
-    case 'A': \
-    case 'B': \
-    case 'C': \
-    case 'D': \
-    case 'E': \
-    case 'F': \
-    case DIGIT
+struct ZigKeyword {
+    const char *text;
+    TokenId token_id;
+};
 
-const char * zig_keywords[] = {
-    "true", "false", "null", "fn", "return", "var", "const", "extern",
-    "pub", "export", "use", "if", "else", "goto", "asm",
-    "volatile", "struct", "enum", "while", "for", "continue", "break",
-    "null", "noalias", "switch", "undefined", "error", "type", "inline",
-    "defer", "union",
+static const struct ZigKeyword zig_keywords[] = {
+    {"asm", TokenIdKeywordAsm},
+    {"break", TokenIdKeywordBreak},
+    {"const", TokenIdKeywordConst},
+    {"continue", TokenIdKeywordContinue},
+    {"defer", TokenIdKeywordDefer},
+    {"else", TokenIdKeywordElse},
+    {"enum", TokenIdKeywordEnum},
+    {"error", TokenIdKeywordError},
+    {"export", TokenIdKeywordExport},
+    {"extern", TokenIdKeywordExtern},
+    {"false", TokenIdKeywordFalse},
+    {"fn", TokenIdKeywordFn},
+    {"for", TokenIdKeywordFor},
+    {"goto", TokenIdKeywordGoto},
+    {"if", TokenIdKeywordIf},
+    {"inline", TokenIdKeywordInline},
+    {"noalias", TokenIdKeywordNoAlias},
+    {"null", TokenIdKeywordNull},
+    {"pub", TokenIdKeywordPub},
+    {"return", TokenIdKeywordReturn},
+    {"struct", TokenIdKeywordStruct},
+    {"switch", TokenIdKeywordSwitch},
+    {"true", TokenIdKeywordTrue},
+    {"type", TokenIdKeywordType},
+    {"undefined", TokenIdKeywordUndefined},
+    {"union", TokenIdKeywordUnion},
+    {"use", TokenIdKeywordUse},
+    {"var", TokenIdKeywordVar},
+    {"volatile", TokenIdKeywordVolatile},
+    {"while", TokenIdKeywordWhile},
 };
 
 bool is_zig_keyword(Buf *buf) {
     for (int i = 0; i < array_length(zig_keywords); i += 1) {
-        if (buf_eql_str(buf, zig_keywords[i])) {
+        if (buf_eql_str(buf, zig_keywords[i].text)) {
             return true;
         }
     }
     return false;
 }
 
+static bool is_symbol_char(uint8_t c) {
+    switch (c) {
+        case SYMBOL_CHAR:
+            return true;
+        default:
+            return false;
+    }
+}
+
 enum TokenizeState {
     TokenizeStateStart,
     TokenizeStateSymbol,
-    TokenizeStateSymbolFirst,
-    TokenizeStateSymbolFirstRaw,
-    TokenizeStateFirstR,
+    TokenizeStateSymbolFirstC,
     TokenizeStateZero, // "0", which might lead to "0x"
     TokenizeStateNumber, // "123", "0x123"
+    TokenizeStateNumberDot,
     TokenizeStateFloatFraction, // "123.456", "0x123.456"
     TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
     TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5"
     TokenizeStateString,
     TokenizeStateStringEscape,
-    TokenizeStateRawString,
-    TokenizeStateRawStringContents,
-    TokenizeStateRawStringMaybeEnd,
     TokenizeStateCharLiteral,
     TokenizeStateCharLiteralEnd,
     TokenizeStateSawStar,
     TokenizeStateSawStarPercent,
     TokenizeStateSawSlash,
+    TokenizeStateSawBackslash,
     TokenizeStateSawPercent,
     TokenizeStateSawPlus,
     TokenizeStateSawPlusPercent,
@@ -167,6 +186,9 @@ enum TokenizeState {
     TokenizeStateSawPipe,
     TokenizeStateSawPipePipe,
     TokenizeStateLineComment,
+    TokenizeStateLineString,
+    TokenizeStateLineStringEnd,
+    TokenizeStateLineStringContinue,
     TokenizeStateSawEq,
     TokenizeStateSawBang,
     TokenizeStateSawLessThan,
@@ -178,7 +200,7 @@ enum TokenizeState {
     TokenizeStateSawDotDot,
     TokenizeStateSawQuestionMark,
     TokenizeStateSawAtSign,
-    TokenizeStateHex,
+    TokenizeStateCharCode,
     TokenizeStateError,
 };
 
@@ -192,10 +214,16 @@ struct Tokenize {
     int column;
     Token *cur_tok;
     Tokenization *out;
-    int raw_string_id_start;
-    int raw_string_id_end;
-    int raw_string_id_cmp_pos;
-    int hex_chars_left;
+    uint32_t radix;
+    int32_t exp_add_amt;
+    bool is_exp_negative;
+    bool is_num_lit_float;
+    size_t char_code_index;
+    size_t char_code_end;
+    bool unicode;
+    uint32_t char_code;
+    int exponent_in_bin_or_dec;
+    BigNum specified_exponent;
 };
 
 __attribute__ ((format (printf, 2, 3)))
@@ -216,19 +244,28 @@ static void tokenize_error(Tokenize *t, const char *format, ...) {
     va_end(ap);
 }
 
+static void set_token_id(Tokenize *t, Token *token, TokenId id) {
+    token->id = id;
+
+    if (id == TokenIdNumberLiteral) {
+        token->data.num_lit.overflow = false;
+    } else if (id == TokenIdStringLiteral || id == TokenIdSymbol) {
+        memset(&token->data.str_lit.str, 0, sizeof(Buf));
+        buf_resize(&token->data.str_lit.str, 0);
+        token->data.str_lit.is_c_str = false;
+    }
+}
+
 static void begin_token(Tokenize *t, TokenId id) {
     assert(!t->cur_tok);
     t->tokens->add_one();
     Token *token = &t->tokens->last();
     token->start_line = t->line;
     token->start_column = t->column;
-    token->id = id;
     token->start_pos = t->pos;
-    token->radix = 0;
-    token->decimal_point_pos = 0;
-    token->exponent_marker_pos = 0;
-    token->raw_string_start = 0;
-    token->raw_string_end = 0;
+
+    set_token_id(t, token, id);
+
     t->cur_tok = token;
 }
 
@@ -237,83 +274,82 @@ static void cancel_token(Tokenize *t) {
     t->cur_tok = nullptr;
 }
 
+static void end_float_token(Tokenize *t) {
+    t->cur_tok->data.num_lit.bignum.kind = BigNumKindFloat;
+
+    if (t->radix == 10) {
+        char *str_begin = buf_ptr(t->buf) + t->cur_tok->start_pos;
+        char *str_end;
+        errno = 0;
+        t->cur_tok->data.num_lit.bignum.data.x_float = strtod(str_begin, &str_end);
+        if (errno) {
+            t->cur_tok->data.num_lit.overflow = true;
+            return;
+        }
+        assert(str_end == buf_ptr(t->buf) + t->cur_tok->end_pos);
+        return;
+    }
+
+
+    if (t->specified_exponent.data.x_uint >= INT_MAX) {
+        t->cur_tok->data.num_lit.overflow = true;
+        return;
+    }
+
+    int64_t specified_exponent = t->specified_exponent.data.x_uint;
+    if (t->is_exp_negative) {
+        specified_exponent = -specified_exponent;
+    }
+    t->exponent_in_bin_or_dec += specified_exponent;
+
+    uint64_t significand = t->cur_tok->data.num_lit.bignum.data.x_uint;
+    uint64_t significand_bits;
+    uint64_t exponent_bits;
+    if (significand == 0) {
+        // 0 is all 0's
+        significand_bits = 0;
+        exponent_bits = 0;
+    } else {
+        // normalize the significand
+        if (t->radix == 10) {
+            zig_panic("TODO: decimal floats");
+        } else {
+            int significand_magnitude_in_bin = __builtin_clzll(1) - __builtin_clzll(significand);
+            t->exponent_in_bin_or_dec += significand_magnitude_in_bin;
+            if (!(-1023 <= t->exponent_in_bin_or_dec && t->exponent_in_bin_or_dec < 1023)) {
+                t->cur_tok->data.num_lit.overflow = true;
+            } else {
+                // this should chop off exactly one 1 bit from the top.
+                significand_bits = ((uint64_t)significand << (52 - significand_magnitude_in_bin)) & 0xfffffffffffffULL;
+                exponent_bits = t->exponent_in_bin_or_dec + 1023;
+            }
+        }
+    }
+    uint64_t double_bits = (exponent_bits << 52) | significand_bits;
+    memcpy(&t->cur_tok->data.num_lit.bignum.data.x_float, &double_bits, sizeof(double));
+}
+
 static void end_token(Tokenize *t) {
     assert(t->cur_tok);
     t->cur_tok->end_pos = t->pos + 1;
 
-    // normalize number literal parsing stuff
     if (t->cur_tok->id == TokenIdNumberLiteral) {
-        if (t->cur_tok->exponent_marker_pos == 0) {
-            t->cur_tok->exponent_marker_pos = t->cur_tok->end_pos;
+        if (t->cur_tok->data.num_lit.overflow) {
+            return;
         }
-        if (t->cur_tok->decimal_point_pos == 0) {
-            t->cur_tok->decimal_point_pos = t->cur_tok->exponent_marker_pos;
+        if (t->is_num_lit_float) {
+            end_float_token(t);
         }
-    }
-
-    char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
-    int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
+    } else if (t->cur_tok->id == TokenIdSymbol) {
+        char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
+        int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
 
-    if (mem_eql_str(token_mem, token_len, "fn")) {
-        t->cur_tok->id = TokenIdKeywordFn;
-    } else if (mem_eql_str(token_mem, token_len, "return")) {
-        t->cur_tok->id = TokenIdKeywordReturn;
-    } else if (mem_eql_str(token_mem, token_len, "var")) {
-        t->cur_tok->id = TokenIdKeywordVar;
-    } else if (mem_eql_str(token_mem, token_len, "const")) {
-        t->cur_tok->id = TokenIdKeywordConst;
-    } else if (mem_eql_str(token_mem, token_len, "extern")) {
-        t->cur_tok->id = TokenIdKeywordExtern;
-    } else if (mem_eql_str(token_mem, token_len, "pub")) {
-        t->cur_tok->id = TokenIdKeywordPub;
-    } else if (mem_eql_str(token_mem, token_len, "export")) {
-        t->cur_tok->id = TokenIdKeywordExport;
-    } else if (mem_eql_str(token_mem, token_len, "use")) {
-        t->cur_tok->id = TokenIdKeywordUse;
-    } else if (mem_eql_str(token_mem, token_len, "true")) {
-        t->cur_tok->id = TokenIdKeywordTrue;
-    } else if (mem_eql_str(token_mem, token_len, "false")) {
-        t->cur_tok->id = TokenIdKeywordFalse;
-    } else if (mem_eql_str(token_mem, token_len, "if")) {
-        t->cur_tok->id = TokenIdKeywordIf;
-    } else if (mem_eql_str(token_mem, token_len, "else")) {
-        t->cur_tok->id = TokenIdKeywordElse;
-    } else if (mem_eql_str(token_mem, token_len, "goto")) {
-        t->cur_tok->id = TokenIdKeywordGoto;
-    } else if (mem_eql_str(token_mem, token_len, "volatile")) {
-        t->cur_tok->id = TokenIdKeywordVolatile;
-    } else if (mem_eql_str(token_mem, token_len, "asm")) {
-        t->cur_tok->id = TokenIdKeywordAsm;
-    } else if (mem_eql_str(token_mem, token_len, "struct")) {
-        t->cur_tok->id = TokenIdKeywordStruct;
-    } else if (mem_eql_str(token_mem, token_len, "enum")) {
-        t->cur_tok->id = TokenIdKeywordEnum;
-    } else if (mem_eql_str(token_mem, token_len, "union")) {
-        t->cur_tok->id = TokenIdKeywordUnion;
-    } else if (mem_eql_str(token_mem, token_len, "for")) {
-        t->cur_tok->id = TokenIdKeywordFor;
-    } else if (mem_eql_str(token_mem, token_len, "while")) {
-        t->cur_tok->id = TokenIdKeywordWhile;
-    } else if (mem_eql_str(token_mem, token_len, "continue")) {
-        t->cur_tok->id = TokenIdKeywordContinue;
-    } else if (mem_eql_str(token_mem, token_len, "break")) {
-        t->cur_tok->id = TokenIdKeywordBreak;
-    } else if (mem_eql_str(token_mem, token_len, "null")) {
-        t->cur_tok->id = TokenIdKeywordNull;
-    } else if (mem_eql_str(token_mem, token_len, "noalias")) {
-        t->cur_tok->id = TokenIdKeywordNoAlias;
-    } else if (mem_eql_str(token_mem, token_len, "switch")) {
-        t->cur_tok->id = TokenIdKeywordSwitch;
-    } else if (mem_eql_str(token_mem, token_len, "undefined")) {
-        t->cur_tok->id = TokenIdKeywordUndefined;
-    } else if (mem_eql_str(token_mem, token_len, "error")) {
-        t->cur_tok->id = TokenIdKeywordError;
-    } else if (mem_eql_str(token_mem, token_len, "type")) {
-        t->cur_tok->id = TokenIdKeywordType;
-    } else if (mem_eql_str(token_mem, token_len, "inline")) {
-        t->cur_tok->id = TokenIdKeywordInline;
-    } else if (mem_eql_str(token_mem, token_len, "defer")) {
-        t->cur_tok->id = TokenIdKeywordDefer;
+        for (size_t i = 0; i < array_length(zig_keywords); i += 1) {
+            if (mem_eql_str(token_mem, token_len, zig_keywords[i].text)) {
+                t->cur_tok->id = zig_keywords[i].token_id;
+                break;
+            }
+        }
     }
 
     t->cur_tok = nullptr;
@@ -327,7 +363,7 @@ static bool is_exponent_signifier(uint8_t c, int radix) {
     }
 }
 
-int get_digit_value(uint8_t c) {
+static uint32_t get_digit_value(uint8_t c) {
     if ('0' <= c && c <= '9') {
         return c - '0';
     }
@@ -337,7 +373,19 @@ int get_digit_value(uint8_t c) {
     if ('a' <= c && c <= 'z') {
         return c - 'a' + 10;
     }
-    return -1;
+    return UINT32_MAX;
+}
+
+void handle_string_escape(Tokenize *t, uint8_t c) {
+    if (t->cur_tok->id == TokenIdCharLiteral) {
+        t->cur_tok->data.char_lit.c = c;
+        t->state = TokenizeStateCharLiteralEnd;
+    } else if (t->cur_tok->id == TokenIdStringLiteral || t->cur_tok->id == TokenIdSymbol) {
+        buf_append_char(&t->cur_tok->data.str_lit.str, c);
+        t->state = TokenizeStateString;
+    } else {
+        zig_unreachable();
+    }
 }
 
 void tokenize(Buf *buf, Tokenization *out) {
@@ -359,27 +407,35 @@ void tokenize(Buf *buf, Tokenization *out) {
                     case WHITESPACE:
                         break;
                     case 'c':
-                        t.state = TokenizeStateSymbolFirst;
+                        t.state = TokenizeStateSymbolFirstC;
                         begin_token(&t, TokenIdSymbol);
+                        buf_append_char(&t.cur_tok->data.str_lit.str, c);
                         break;
-                    case 'r':
-                        t.state = TokenizeStateFirstR;
-                        begin_token(&t, TokenIdSymbol);
-                        break;
-                    case ALPHA_EXCEPT_CR:
+                    case ALPHA_EXCEPT_C:
                     case '_':
                         t.state = TokenizeStateSymbol;
                         begin_token(&t, TokenIdSymbol);
+                        buf_append_char(&t.cur_tok->data.str_lit.str, c);
                         break;
                     case '0':
                         t.state = TokenizeStateZero;
                         begin_token(&t, TokenIdNumberLiteral);
-                        t.cur_tok->radix = 10;
+                        t.radix = 10;
+                        t.exp_add_amt = 1;
+                        t.exponent_in_bin_or_dec = 0;
+                        t.is_num_lit_float = false;
+                        bignum_init_unsigned(&t.cur_tok->data.num_lit.bignum, 0);
+                        bignum_init_unsigned(&t.specified_exponent, 0);
                         break;
                     case DIGIT_NON_ZERO:
                         t.state = TokenizeStateNumber;
                         begin_token(&t, TokenIdNumberLiteral);
-                        t.cur_tok->radix = 10;
+                        t.radix = 10;
+                        t.exp_add_amt = 1;
+                        t.exponent_in_bin_or_dec = 0;
+                        t.is_num_lit_float = false;
+                        bignum_init_unsigned(&t.cur_tok->data.num_lit.bignum, get_digit_value(c));
+                        bignum_init_unsigned(&t.specified_exponent, 0);
                         break;
                     case '"':
                         begin_token(&t, TokenIdStringLiteral);
@@ -437,6 +493,10 @@ void tokenize(Buf *buf, Tokenization *out) {
                         begin_token(&t, TokenIdSlash);
                         t.state = TokenizeStateSawSlash;
                         break;
+                    case '\\':
+                        begin_token(&t, TokenIdStringLiteral);
+                        t.state = TokenizeStateSawBackslash;
+                        break;
                     case '%':
                         begin_token(&t, TokenIdPercent);
                         t.state = TokenizeStateSawPercent;
@@ -500,12 +560,12 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawQuestionMark:
                 switch (c) {
                     case '?':
-                        t.cur_tok->id = TokenIdDoubleQuestion;
+                        set_token_id(&t, t.cur_tok, TokenIdDoubleQuestion);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '=':
-                        t.cur_tok->id = TokenIdMaybeAssign;
+                        set_token_id(&t, t.cur_tok, TokenIdMaybeAssign);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -520,7 +580,7 @@ void tokenize(Buf *buf, Tokenization *out) {
                 switch (c) {
                     case '.':
                         t.state = TokenizeStateSawDotDot;
-                        t.cur_tok->id = TokenIdEllipsis;
+                        set_token_id(&t, t.cur_tok, TokenIdEllipsis);
                         break;
                     default:
                         t.pos -= 1;
@@ -542,12 +602,12 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawGreaterThan:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdCmpGreaterOrEq;
+                        set_token_id(&t, t.cur_tok, TokenIdCmpGreaterOrEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '>':
-                        t.cur_tok->id = TokenIdBitShiftRight;
+                        set_token_id(&t, t.cur_tok, TokenIdBitShiftRight);
                         t.state = TokenizeStateSawGreaterThanGreaterThan;
                         break;
                     default:
@@ -560,7 +620,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawGreaterThanGreaterThan:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdBitShiftRightEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBitShiftRightEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -574,12 +634,12 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawLessThan:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdCmpLessOrEq;
+                        set_token_id(&t, t.cur_tok, TokenIdCmpLessOrEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '<':
-                        t.cur_tok->id = TokenIdBitShiftLeft;
+                        set_token_id(&t, t.cur_tok, TokenIdBitShiftLeft);
                         t.state = TokenizeStateSawLessThanLessThan;
                         break;
                     default:
@@ -592,12 +652,12 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawLessThanLessThan:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdBitShiftLeftEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '%':
-                        t.cur_tok->id = TokenIdBitShiftLeftPercent;
+                        set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftPercent);
                         t.state = TokenizeStateSawShiftLeftPercent;
                         break;
                     default:
@@ -610,7 +670,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawShiftLeftPercent:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdBitShiftLeftPercentEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftPercentEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -624,7 +684,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawBang:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdCmpNotEq;
+                        set_token_id(&t, t.cur_tok, TokenIdCmpNotEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -638,12 +698,12 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawEq:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdCmpEq;
+                        set_token_id(&t, t.cur_tok, TokenIdCmpEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '>':
-                        t.cur_tok->id = TokenIdFatArrow;
+                        set_token_id(&t, t.cur_tok, TokenIdFatArrow);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -657,17 +717,17 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawStar:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdTimesEq;
+                        set_token_id(&t, t.cur_tok, TokenIdTimesEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '*':
-                        t.cur_tok->id = TokenIdStarStar;
+                        set_token_id(&t, t.cur_tok, TokenIdStarStar);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '%':
-                        t.cur_tok->id = TokenIdTimesPercent;
+                        set_token_id(&t, t.cur_tok, TokenIdTimesPercent);
                         t.state = TokenizeStateSawStarPercent;
                         break;
                     default:
@@ -680,7 +740,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawStarPercent:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdTimesPercentEq;
+                        set_token_id(&t, t.cur_tok, TokenIdTimesPercentEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -694,17 +754,17 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawPercent:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdModEq;
+                        set_token_id(&t, t.cur_tok, TokenIdModEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '.':
-                        t.cur_tok->id = TokenIdPercentDot;
+                        set_token_id(&t, t.cur_tok, TokenIdPercentDot);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '%':
-                        t.cur_tok->id = TokenIdPercentPercent;
+                        set_token_id(&t, t.cur_tok, TokenIdPercentPercent);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -718,17 +778,17 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawPlus:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdPlusEq;
+                        set_token_id(&t, t.cur_tok, TokenIdPlusEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '+':
-                        t.cur_tok->id = TokenIdPlusPlus;
+                        set_token_id(&t, t.cur_tok, TokenIdPlusPlus);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '%':
-                        t.cur_tok->id = TokenIdPlusPercent;
+                        set_token_id(&t, t.cur_tok, TokenIdPlusPercent);
                         t.state = TokenizeStateSawPlusPercent;
                         break;
                     default:
@@ -741,7 +801,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawPlusPercent:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdPlusPercentEq;
+                        set_token_id(&t, t.cur_tok, TokenIdPlusPercentEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -755,11 +815,11 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawAmpersand:
                 switch (c) {
                     case '&':
-                        t.cur_tok->id = TokenIdBoolAnd;
+                        set_token_id(&t, t.cur_tok, TokenIdBoolAnd);
                         t.state = TokenizeStateSawAmpersandAmpersand;
                         break;
                     case '=':
-                        t.cur_tok->id = TokenIdBitAndEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBitAndEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -773,7 +833,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawAmpersandAmpersand:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdBoolAndEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBoolAndEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -787,7 +847,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawCaret:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdBitXorEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBitXorEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -801,11 +861,11 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawPipe:
                 switch (c) {
                     case '|':
-                        t.cur_tok->id = TokenIdBoolOr;
+                        set_token_id(&t, t.cur_tok, TokenIdBoolOr);
                         t.state = TokenizeStateSawPipePipe;
                         break;
                     case '=':
-                        t.cur_tok->id = TokenIdBitOrEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBitOrEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -819,7 +879,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawPipePipe:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdBoolOrEq;
+                        set_token_id(&t, t.cur_tok, TokenIdBoolOrEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -837,7 +897,7 @@ void tokenize(Buf *buf, Tokenization *out) {
                         t.state = TokenizeStateLineComment;
                         break;
                     case '=':
-                        t.cur_tok->id = TokenIdDivEq;
+                        set_token_id(&t, t.cur_tok, TokenIdDivEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -848,24 +908,32 @@ void tokenize(Buf *buf, Tokenization *out) {
                         continue;
                 }
                 break;
-            case TokenizeStateLineComment:
+            case TokenizeStateSawBackslash:
+                switch (c) {
+                    case '\\':
+                        t.state = TokenizeStateLineString;
+                        break;
+                    default:
+                        tokenize_error(&t, "invalid character: '%c'", c);
+                        break;
+                }
+                break;
+            case TokenizeStateLineString:
                 switch (c) {
                     case '\n':
-                        t.state = TokenizeStateStart;
+                        t.state = TokenizeStateLineStringEnd;
                         break;
                     default:
-                        // do nothing
+                        buf_append_char(&t.cur_tok->data.str_lit.str, c);
                         break;
                 }
                 break;
-            case TokenizeStateSymbolFirst:
+            case TokenizeStateLineStringEnd:
                 switch (c) {
-                    case '"':
-                        t.cur_tok->id = TokenIdStringLiteral;
-                        t.state = TokenizeStateString;
+                    case WHITESPACE:
                         break;
-                    case SYMBOL_CHAR:
-                        t.state = TokenizeStateSymbol;
+                    case '\\':
+                        t.state = TokenizeStateLineStringContinue;
                         break;
                     default:
                         t.pos -= 1;
@@ -874,29 +942,38 @@ void tokenize(Buf *buf, Tokenization *out) {
                         continue;
                 }
                 break;
-            case TokenizeStateSymbolFirstRaw:
+            case TokenizeStateLineStringContinue:
                 switch (c) {
-                    case '"':
-                        t.cur_tok->id = TokenIdStringLiteral;
-                        t.state = TokenizeStateRawString;
-                        t.raw_string_id_start = t.pos + 1;
-                        break;
-                    case SYMBOL_CHAR:
-                        t.state = TokenizeStateSymbol;
+                    case '\\':
+                        t.state = TokenizeStateLineString;
+                        buf_append_char(&t.cur_tok->data.str_lit.str, '\n');
                         break;
                     default:
-                        t.pos -= 1;
-                        end_token(&t);
+                        tokenize_error(&t, "invalid character: '%c'", c);
+                        break;
+                }
+                break;
+            case TokenizeStateLineComment:
+                switch (c) {
+                    case '\n':
                         t.state = TokenizeStateStart;
-                        continue;
+                        break;
+                    default:
+                        // do nothing
+                        break;
                 }
                 break;
-            case TokenizeStateSawAtSign:
+            case TokenizeStateSymbolFirstC:
                 switch (c) {
                     case '"':
-                        t.cur_tok->id = TokenIdSymbol;
+                        set_token_id(&t, t.cur_tok, TokenIdStringLiteral);
+                        t.cur_tok->data.str_lit.is_c_str = true;
                         t.state = TokenizeStateString;
                         break;
+                    case SYMBOL_CHAR:
+                        t.state = TokenizeStateSymbol;
+                        buf_append_char(&t.cur_tok->data.str_lit.str, c);
+                        break;
                     default:
                         t.pos -= 1;
                         end_token(&t);
@@ -904,18 +981,11 @@ void tokenize(Buf *buf, Tokenization *out) {
                         continue;
                 }
                 break;
-            case TokenizeStateFirstR:
+            case TokenizeStateSawAtSign:
                 switch (c) {
                     case '"':
-                        t.cur_tok->id = TokenIdStringLiteral;
-                        t.state = TokenizeStateRawString;
-                        t.raw_string_id_start = t.pos + 1;
-                        break;
-                    case 'c':
-                        t.state = TokenizeStateSymbolFirstRaw;
-                        break;
-                    case SYMBOL_CHAR_EXCEPT_C:
-                        t.state = TokenizeStateSymbol;
+                        set_token_id(&t, t.cur_tok, TokenIdSymbol);
+                        t.state = TokenizeStateString;
                         break;
                     default:
                         t.pos -= 1;
@@ -927,6 +997,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSymbol:
                 switch (c) {
                     case SYMBOL_CHAR:
+                        buf_append_char(&t.cur_tok->data.str_lit.str, c);
                         break;
                     default:
                         t.pos -= 1;
@@ -942,108 +1013,124 @@ void tokenize(Buf *buf, Tokenization *out) {
                         t.state = TokenizeStateStart;
                         break;
                     case '\n':
-                        tokenize_error(&t, "use raw string for multiline string literal");
+                        tokenize_error(&t, "newline not allowed in string literal");
                         break;
                     case '\\':
                         t.state = TokenizeStateStringEscape;
                         break;
                     default:
+                        buf_append_char(&t.cur_tok->data.str_lit.str, c);
                         break;
                 }
                 break;
             case TokenizeStateStringEscape:
                 switch (c) {
                     case 'x':
-                        t.state = TokenizeStateHex;
-                        t.hex_chars_left = 2;
+                        t.state = TokenizeStateCharCode;
+                        t.radix = 16;
+                        t.char_code = 0;
+                        t.char_code_index = 0;
+                        t.char_code_end = 2;
+                        t.unicode = false;
                         break;
                     case 'u':
-                        t.state = TokenizeStateHex;
-                        t.hex_chars_left = 4;
+                        t.state = TokenizeStateCharCode;
+                        t.radix = 16;
+                        t.char_code = 0;
+                        t.char_code_index = 0;
+                        t.char_code_end = 4;
+                        t.unicode = true;
                         break;
                     case 'U':
-                        t.state = TokenizeStateHex;
-                        t.hex_chars_left = 6;
+                        t.state = TokenizeStateCharCode;
+                        t.radix = 16;
+                        t.char_code = 0;
+                        t.char_code_index = 0;
+                        t.char_code_end = 6;
+                        t.unicode = true;
                         break;
                     case 'n':
+                        handle_string_escape(&t, '\n');
+                        break;
                     case 'r':
+                        handle_string_escape(&t, '\r');
+                        break;
                     case '\\':
+                        handle_string_escape(&t, '\\');
+                        break;
                     case 't':
+                        handle_string_escape(&t, '\t');
+                        break;
                     case '\'':
+                        handle_string_escape(&t, '\'');
+                        break;
                     case '"':
-                        if (t.cur_tok->id == TokenIdCharLiteral) {
-                            t.state = TokenizeStateCharLiteralEnd;
-                        } else if (t.cur_tok->id == TokenIdStringLiteral) {
-                            t.state = TokenizeStateString;
-                        } else {
-                            zig_unreachable();
-                        }
+                        handle_string_escape(&t, '\"');
                         break;
                     default:
                         tokenize_error(&t, "invalid character: '%c'", c);
                 }
                 break;
-            case TokenizeStateHex:
-                switch (c) {
-                    case HEX_DIGIT:
-                        t.hex_chars_left -= 1;
-                        if (t.hex_chars_left == 0) {
-                            if (t.cur_tok->id == TokenIdCharLiteral) {
-                                t.state = TokenizeStateCharLiteralEnd;
-                            } else if (t.cur_tok->id == TokenIdStringLiteral) {
-                                t.state = TokenizeStateString;
-                            } else if (t.cur_tok->id == TokenIdSymbol) {
-                                t.state = TokenizeStateString;
+            case TokenizeStateCharCode:
+                {
+                    uint32_t digit_value = get_digit_value(c);
+                    if (digit_value >= t.radix) {
+                        tokenize_error(&t, "invalid digit: '%c'", c);
+                    }
+                    t.char_code *= t.radix;
+                    t.char_code += digit_value;
+                    t.char_code_index += 1;
+
+                    if (t.char_code_index >= t.char_code_end) {
+                        if (t.unicode) {
+                            if (t.char_code <= 0x7f) {
+                                // 00000000 00000000 00000000 0xxxxxxx
+                                handle_string_escape(&t, t.char_code);
+                            } else if (t.cur_tok->id == TokenIdCharLiteral) {
+                                tokenize_error(&t, "unicode value too large for character literal: %x", t.char_code);
+                            } else if (t.char_code <= 0x7ff) {
+                                // 00000000 00000000 00000xxx xx000000
+                                handle_string_escape(&t, 0xc0 | (t.char_code >> 6));
+                                // 00000000 00000000 00000000 00xxxxxx
+                                handle_string_escape(&t, 0x80 | (t.char_code & 0x3f));
+                            } else if (t.char_code <= 0xffff) {
+                                // 00000000 00000000 xxxx0000 00000000
+                                handle_string_escape(&t, 0xe0 | (t.char_code >> 12));
+                                // 00000000 00000000 0000xxxx xx000000
+                                handle_string_escape(&t, 0x80 | ((t.char_code >> 6) & 0x3f));
+                                // 00000000 00000000 00000000 00xxxxxx
+                                handle_string_escape(&t, 0x80 | (t.char_code & 0x3f));
+                            } else if (t.char_code <= 0x10ffff) {
+                                // 00000000 000xxx00 00000000 00000000
+                                handle_string_escape(&t, 0xf0 | (t.char_code >> 18));
+                                // 00000000 000000xx xxxx0000 00000000
+                                handle_string_escape(&t, 0x80 | ((t.char_code >> 12) & 0x3f));
+                                // 00000000 00000000 0000xxxx xx000000
+                                handle_string_escape(&t, 0x80 | ((t.char_code >> 6) & 0x3f));
+                                // 00000000 00000000 00000000 00xxxxxx
+                                handle_string_escape(&t, 0x80 | (t.char_code & 0x3f));
                             } else {
-                                zig_unreachable();
+                                tokenize_error(&t, "unicode value out of range: %x", t.char_code);
                             }
+                        } else {
+                            if (t.cur_tok->id == TokenIdCharLiteral && t.char_code >= sizeof(uint8_t)) {
+                                tokenize_error(&t, "value too large for character literal: '%x'",
+                                        t.char_code);
+                            }
+                            handle_string_escape(&t, t.char_code);
                         }
-                        break;
-                    default:
-                        tokenize_error(&t, "invalid character: '%c'", c);
-                }
-                break;
-            case TokenizeStateRawString:
-                if (c == '(') {
-                    t.raw_string_id_end = t.pos;
-                    t.cur_tok->raw_string_start = t.pos + 1;
-                    t.state = TokenizeStateRawStringContents;
-                }
-                break;
-            case TokenizeStateRawStringContents:
-                if (c == ')') {
-                    t.state = TokenizeStateRawStringMaybeEnd;
-                    t.raw_string_id_cmp_pos = t.raw_string_id_start;
-                    t.cur_tok->raw_string_end = t.pos;
-                }
-                break;
-            case TokenizeStateRawStringMaybeEnd:
-                if (t.raw_string_id_cmp_pos >= t.raw_string_id_end &&
-                    c == '"')
-                {
-                    end_token(&t);
-                    t.state = TokenizeStateStart;
-                } else if (c != buf_ptr(t.buf)[t.raw_string_id_cmp_pos]) {
-                    if (c == ')') {
-                        t.raw_string_id_cmp_pos = t.raw_string_id_start;
-                        t.cur_tok->raw_string_end = t.pos;
-                    } else {
-                        t.state = TokenizeStateRawStringContents;
                     }
-                } else {
-                    t.raw_string_id_cmp_pos += 1;
                 }
                 break;
             case TokenizeStateCharLiteral:
                 switch (c) {
                     case '\'':
-                        end_token(&t);
-                        t.state = TokenizeStateStart;
-                        break;
+                        tokenize_error(&t, "expected character");
                     case '\\':
                         t.state = TokenizeStateStringEscape;
                         break;
                     default:
+                        t.cur_tok->data.char_lit.c = c;
                         t.state = TokenizeStateCharLiteralEnd;
                         break;
                 }
@@ -1061,15 +1148,17 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateZero:
                 switch (c) {
                     case 'b':
-                        t.cur_tok->radix = 2;
+                        t.radix = 2;
                         t.state = TokenizeStateNumber;
                         break;
                     case 'o':
-                        t.cur_tok->radix = 8;
+                        t.radix = 8;
+                        t.exp_add_amt = 3;
                         t.state = TokenizeStateNumber;
                         break;
                     case 'x':
-                        t.cur_tok->radix = 16;
+                        t.radix = 16;
+                        t.exp_add_amt = 4;
                         t.state = TokenizeStateNumber;
                         break;
                     default:
@@ -1082,113 +1171,127 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateNumber:
                 {
                     if (c == '.') {
-                        if (t.pos + 1 < buf_len(t.buf)) {
-                            uint8_t next_c = buf_ptr(t.buf)[t.pos + 1];
-                            if (next_c == '.') {
-                                t.pos -= 1;
-                                end_token(&t);
-                                t.state = TokenizeStateStart;
-                                continue;
-                            }
-                        }
-                        t.cur_tok->decimal_point_pos = t.pos;
-                        t.state = TokenizeStateFloatFraction;
+                        t.state = TokenizeStateNumberDot;
                         break;
                     }
-                    if (is_exponent_signifier(c, t.cur_tok->radix)) {
-                        t.cur_tok->exponent_marker_pos = t.pos;
+                    if (is_exponent_signifier(c, t.radix)) {
                         t.state = TokenizeStateFloatExponentUnsigned;
+                        t.is_num_lit_float = true;
                         break;
                     }
-                    if (c == '_') {
-                        tokenize_error(&t, "invalid character: '%c'", c);
-                        break;
-                    }
-                    int digit_value = get_digit_value(c);
-                    if (digit_value >= 0) {
-                        if (digit_value >= t.cur_tok->radix) {
+                    uint32_t digit_value = get_digit_value(c);
+                    if (digit_value >= t.radix) {
+                        if (is_symbol_char(c)) {
                             tokenize_error(&t, "invalid character: '%c'", c);
-                            break;
                         }
-                        // normal digit
-                    } else {
                         // not my char
                         t.pos -= 1;
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         continue;
                     }
+                    t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+                        bignum_multiply_by_scalar(&t.cur_tok->data.num_lit.bignum, t.radix);
+                    t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+                        bignum_increment_by_scalar(&t.cur_tok->data.num_lit.bignum, digit_value);
                     break;
                 }
+            case TokenizeStateNumberDot:
+                if (c == '.') {
+                    t.pos -= 2;
+                    end_token(&t);
+                    t.state = TokenizeStateStart;
+                    continue;
+                }
+                t.pos -= 1;
+                t.state = TokenizeStateFloatFraction;
+                t.is_num_lit_float = true;
+                continue;
             case TokenizeStateFloatFraction:
                 {
-                    if (is_exponent_signifier(c, t.cur_tok->radix)) {
-                        t.cur_tok->exponent_marker_pos = t.pos;
+                    if (is_exponent_signifier(c, t.radix)) {
                         t.state = TokenizeStateFloatExponentUnsigned;
                         break;
                     }
-                    if (c == '_') {
-                        tokenize_error(&t, "invalid character: '%c'", c);
-                        break;
-                    }
-                    int digit_value = get_digit_value(c);
-                    if (digit_value >= 0) {
-                        if (digit_value >= t.cur_tok->radix) {
+                    uint32_t digit_value = get_digit_value(c);
+                    if (digit_value >= t.radix) {
+                        if (is_symbol_char(c)) {
                             tokenize_error(&t, "invalid character: '%c'", c);
-                            break;
                         }
-                        // normal digit
-                    } else {
                         // not my char
                         t.pos -= 1;
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         continue;
                     }
+                    t.exponent_in_bin_or_dec -= t.exp_add_amt;
+                    if (t.radix == 10) {
+                        // For now we use strtod to parse decimal floats, so we just have to get to the
+                        // end of the token.
+                        break;
+                    }
+                    t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+                        bignum_multiply_by_scalar(&t.cur_tok->data.num_lit.bignum, t.radix);
+                    t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+                        bignum_increment_by_scalar(&t.cur_tok->data.num_lit.bignum, digit_value);
                     break;
                 }
             case TokenizeStateFloatExponentUnsigned:
                 switch (c) {
                     case '+':
+                        t.is_exp_negative = false;
+                        t.state = TokenizeStateFloatExponentNumber;
+                        break;
                     case '-':
+                        t.is_exp_negative = true;
                         t.state = TokenizeStateFloatExponentNumber;
                         break;
                     default:
                         // reinterpret as normal exponent number
                         t.pos -= 1;
+                        t.is_exp_negative = false;
                         t.state = TokenizeStateFloatExponentNumber;
                         continue;
                 }
                 break;
             case TokenizeStateFloatExponentNumber:
-                switch (c) {
-                    case DIGIT:
-                        break;
-                    case ALPHA:
-                    case '_':
-                        tokenize_error(&t, "invalid character: '%c'", c);
-                        break;
-                    default:
+                {
+                    uint32_t digit_value = get_digit_value(c);
+                    if (digit_value >= t.radix) {
+                        if (is_symbol_char(c)) {
+                            tokenize_error(&t, "invalid character: '%c'", c);
+                        }
+                        // not my char
                         t.pos -= 1;
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         continue;
+                    }
+                    if (t.radix == 10) {
+                        // For now we use strtod to parse decimal floats, so we just have to get to the
+                        // end of the token.
+                        break;
+                    }
+                    t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+                        bignum_multiply_by_scalar(&t.specified_exponent, 10);
+                    t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+                        bignum_increment_by_scalar(&t.specified_exponent, digit_value);
                 }
                 break;
             case TokenizeStateSawDash:
                 switch (c) {
                     case '>':
-                        t.cur_tok->id = TokenIdArrow;
+                        set_token_id(&t, t.cur_tok, TokenIdArrow);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '=':
-                        t.cur_tok->id = TokenIdMinusEq;
+                        set_token_id(&t, t.cur_tok, TokenIdMinusEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
                     case '%':
-                        t.cur_tok->id = TokenIdMinusPercent;
+                        set_token_id(&t, t.cur_tok, TokenIdMinusPercent);
                         t.state = TokenizeStateSawMinusPercent;
                         break;
                     default:
@@ -1201,7 +1304,7 @@ void tokenize(Buf *buf, Tokenization *out) {
             case TokenizeStateSawMinusPercent:
                 switch (c) {
                     case '=':
-                        t.cur_tok->id = TokenIdMinusPercentEq;
+                        set_token_id(&t, t.cur_tok, TokenIdMinusPercentEq);
                         end_token(&t);
                         t.state = TokenizeStateStart;
                         break;
@@ -1226,11 +1329,14 @@ void tokenize(Buf *buf, Tokenization *out) {
         case TokenizeStateStart:
         case TokenizeStateError:
             break;
+        case TokenizeStateNumberDot:
+            tokenize_error(&t, "unterminated number literal");
+            break;
         case TokenizeStateString:
             tokenize_error(&t, "unterminated string");
             break;
         case TokenizeStateStringEscape:
-        case TokenizeStateHex:
+        case TokenizeStateCharCode:
             if (t.cur_tok->id == TokenIdStringLiteral) {
                 tokenize_error(&t, "unterminated string");
             } else if (t.cur_tok->id == TokenIdCharLiteral) {
@@ -1239,19 +1345,12 @@ void tokenize(Buf *buf, Tokenization *out) {
                 zig_unreachable();
             }
             break;
-        case TokenizeStateRawString:
-        case TokenizeStateRawStringContents:
-        case TokenizeStateRawStringMaybeEnd:
-            tokenize_error(&t, "unterminated raw string");
-            break;
         case TokenizeStateCharLiteral:
         case TokenizeStateCharLiteralEnd:
             tokenize_error(&t, "unterminated character literal");
             break;
         case TokenizeStateSymbol:
-        case TokenizeStateSymbolFirst:
-        case TokenizeStateSymbolFirstRaw:
-        case TokenizeStateFirstR:
+        case TokenizeStateSymbolFirstC:
         case TokenizeStateZero:
         case TokenizeStateNumber:
         case TokenizeStateFloatFraction:
@@ -1280,9 +1379,13 @@ void tokenize(Buf *buf, Tokenization *out) {
         case TokenizeStateSawPlusPercent:
         case TokenizeStateSawMinusPercent:
         case TokenizeStateSawShiftLeftPercent:
+        case TokenizeStateLineString:
+        case TokenizeStateLineStringEnd:
             end_token(&t);
             break;
         case TokenizeStateSawDotDot:
+        case TokenizeStateSawBackslash:
+        case TokenizeStateLineStringContinue:
             tokenize_error(&t, "unexpected EOF");
             break;
         case TokenizeStateLineComment:
src/tokenizer.hpp
@@ -9,6 +9,7 @@
 #define ZIG_TOKENIZER_HPP
 
 #include "buffer.hpp"
+#include "bignum.hpp"
 
 enum TokenId {
     TokenIdEof,
@@ -111,6 +112,22 @@ enum TokenId {
     TokenIdPercentDot,
 };
 
+struct TokenNumLit {
+    BigNum bignum;
+    // overflow is true if when parsing the number, we discovered it would not
+    // fit without losing data in a uint64_t or double
+    bool overflow;
+};
+
+struct TokenStrLit {
+    Buf str;
+    bool is_c_str;
+};
+
+struct TokenCharLit {
+    uint8_t c;
+};
+
 struct Token {
     TokenId id;
     int start_pos;
@@ -118,14 +135,16 @@ struct Token {
     int start_line;
     int start_column;
 
-    // for id == TokenIdNumberLiteral
-    int radix; // if != 10, then skip the first 2 characters
-    int decimal_point_pos; // either exponent_marker_pos or the position of the '.'
-    int exponent_marker_pos; // either end_pos or the position of the 'e'/'p'
+    union {
+        // TokenIdNumberLiteral
+        TokenNumLit num_lit;
 
-    // for id == TokenIdStringLiteral
-    int raw_string_start;
-    int raw_string_end;
+        // TokenIdStringLiteral or TokenIdSymbol
+        TokenStrLit str_lit;
+
+        // TokenIdCharLiteral
+        TokenCharLit char_lit;
+    } data;
 };
 
 struct Tokenization {
@@ -142,8 +161,6 @@ void tokenize(Buf *buf, Tokenization *out_tokenization);
 
 void print_tokens(Buf *buf, ZigList<Token> *tokens);
 
-int get_digit_value(uint8_t c);
-
 const char * token_name(TokenId id);
 
 bool valid_symbol_starter(uint8_t c);
test/run_tests.cpp
@@ -1173,7 +1173,7 @@ fn f() {
     add_compile_fail_case("normal string with newline", R"SOURCE(
 const foo = "a
 b";
-    )SOURCE", 1, ".tmp_source.zig:2:13: error: use raw string for multiline string literal");
+    )SOURCE", 1, ".tmp_source.zig:2:13: error: newline not allowed in string literal");
 
     add_compile_fail_case("invalid comparison for function pointers", R"SOURCE(
 fn foo() {}
@@ -1760,7 +1760,7 @@ struct type {
     )SOURCE", 3,
             R"(pub const FOO = c"aoeu\x13 derp")",
             R"(pub const FOO2 = c"aoeu\x134 derp")",
-            R"(pub const FOO_CHAR = '\x3f')");
+            R"(pub const FOO_CHAR = '?')");
 }
 
 static void run_self_hosted_test(bool is_release_mode) {
test/self_hosted.zig
@@ -684,17 +684,13 @@ fn count_trailing_zeroes() {
 
 #attribute("test")
 fn multiline_string() {
-    const s1 = r"AOEU(
-one
-two)
-three)AOEU";
-    const s2 = "\none\ntwo)\nthree";
-    const s3 = r"(
-one
-two)
-three)";
+    const s1 =
+        \\one
+        \\two)
+        \\three
+    ;
+    const s2 = "one\ntwo)\nthree";
     assert(str.eql(s1, s2));
-    assert(str.eql(s3, s2));
 }