Commit d0b11af2bd
Changed files (15)
doc/vim/syntax/zig.vim
@@ -1,11 +1,12 @@
" Vim syntax file
" Language: Zig
" Maintainer: Andrew Kelley
-" Latest Revision: 28 July 2016
+" Latest Revision: 03 August 2016
if exists("b:current_syntax")
finish
endif
+let b:current_syntax = "zig"
syn keyword zigStorage const var extern export pub noalias inline noinline
syn keyword zigStructure struct enum union
@@ -24,33 +25,30 @@ syn keyword zigBoolean true false
syn match zigOperator display "\%(+%\?\|-%\?\|/\|*%\?\|=\|\^\|&\|?\||\|!\|>\|<\|%\|<<%\?\|>>\|&&\|||\)=\?"
syn match zigArrowCharacter display "->"
-syn match zigDecNumber display "\<[0-9][0-9_]*\%([iu]\%(size\|8\|16\|32\|64\)\)\="
-syn match zigHexNumber display "\<0x[a-fA-F0-9_]\+\%([iu]\%(size\|8\|16\|32\|64\)\)\="
-syn match zigOctNumber display "\<0o[0-7_]\+\%([iu]\%(size\|8\|16\|32\|64\)\)\="
-syn match zigBinNumber display "\<0b[01_]\+\%([iu]\%(size\|8\|16\|32\|64\)\)\="
+syn match zigDecNumber display "\<[0-9]*\%(.[0-9]\+\)\=\%([eE][+-]\?[0-9]\+\)\="
+syn match zigHexNumber display "\<0x[a-fA-F0-9]\+\%(.[a-fA-F0-9]\+\%([pP][+-]\?[0-9]\+\)\?\)\="
+syn match zigOctNumber display "\<0o[0-7]\+"
+syn match zigBinNumber display "\<0b[01]\+\%(.[01]\+\%([eE][+-]\?[0-9]\+\)\?\)\="
syn match zigCharacterInvalid display contained /b\?'\zs[\n\r\t']\ze'/
syn match zigCharacterInvalidUnicode display contained /b'\zs[^[:cntrl:][:graph:][:alnum:][:space:]]\ze'/
syn match zigCharacter /b'\([^\\]\|\\\(.\|x\x\{2}\)\)'/ contains=zigEscape,zigEscapeError,zigCharacterInvalid,zigCharacterInvalidUnicode
-syn match zigCharacter /'\([^\\]\|\\\(.\|x\x\{2}\|u\x\{4}\|U\x\{8}\|u{\x\{1,6}}\)\)'/ contains=zigEscape,zigEscapeUnicode,zigEscapeError,zigCharacterInvalid
-
-syn match zigShebang /\%^#![^[].*/
+syn match zigCharacter /'\([^\\]\|\\\(.\|x\x\{2}\|u\x\{4}\|U\x\{6}\)\)'/ contains=zigEscape,zigEscapeUnicode,zigEscapeError,zigCharacterInvalid
syn region zigCommentLine start="//" end="$" contains=zigTodo,@Spell
syn region zigCommentLineDoc start="//\%(//\@!\|!\)" end="$" contains=zigTodo,@Spell
+" TODO match only the first '\\' within the zigMultilineString as zigMultilineStringPrefix
+syn match zigMultilineStringPrefix display contained /c\?\\\\/
+syn region zigMultilineString start="c\?\\\\" end="$" contains=zigMultilineStringPrefix
+
syn keyword zigTodo contained TODO XXX
syn match zigEscapeError display contained /\\./
-syn match zigEscape display contained /\\\([nrt0\\'"]\|x\x\{2}\)/
-syn match zigEscapeUnicode display contained /\\\(u\x\{4}\|U\x\{8}\)/
-syn match zigEscapeUnicode display contained /\\u{\x\{1,6}}/
-syn match zigStringContinuation display contained /\\\n\s*/
-syn region zigString start=+c\?"+ skip=+\\\\\|\\"+ end=+"+ oneline contains=zigEscape,zigEscapeUnicode,zigEscapeError,zigStringContinuation,@Spell
-syn region zigString start='r"\z([^)]*\)(' end=')\z1"' contains=@Spell
-
-let b:current_syntax = "zig"
+syn match zigEscape display contained /\\\([nrt\\'"]\|x\x\{2}\)/
+syn match zigEscapeUnicode display contained /\\\(u\x\{4}\|U\x\{6}\)/
+syn region zigString start=+c\?"+ skip=+\\\\\|\\"+ end=+"+ oneline contains=zigEscape,zigEscapeUnicode,zigEscapeError,@Spell
hi def link zigDecNumber zigNumber
hi def link zigHexNumber zigNumber
@@ -59,12 +57,12 @@ hi def link zigBinNumber zigNumber
hi def link zigKeyword Keyword
hi def link zigType Type
-hi def link zigShebang Comment
hi def link zigCommentLine Comment
hi def link zigCommentLineDoc SpecialComment
hi def link zigTodo Todo
-hi def link zigStringContinuation Special
hi def link zigString String
+hi def link zigMultilineString String
+hi def link zigMultilineStringPrefix Comment
hi def link zigCharacterInvalid Error
hi def link zigCharacterInvalidUnicode zigCharacterInvalid
hi def link zigCharacter Character
doc/langref.md
@@ -7,27 +7,27 @@ Root = many(TopLevelDecl) "EOF"
TopLevelDecl = many(Directive) option(VisibleMod) (FnDef | ExternDecl | ContainerDecl | GlobalVarDecl | ErrorValueDecl | TypeDecl | UseDecl)
-TypeDecl = "type" "Symbol" "=" TypeExpr ";"
+TypeDecl = "type" Symbol "=" TypeExpr ";"
-ErrorValueDecl = "error" "Symbol" ";"
+ErrorValueDecl = "error" Symbol ";"
GlobalVarDecl = VariableDeclaration ";"
-VariableDeclaration = ("var" | "const") "Symbol" option(":" TypeExpr) "=" Expression
+VariableDeclaration = ("var" | "const") Symbol option(":" TypeExpr) "=" Expression
-ContainerDecl = ("struct" | "enum" | "union") "Symbol" option(ParamDeclList) "{" many(StructMember) "}"
+ContainerDecl = ("struct" | "enum" | "union") Symbol option(ParamDeclList) "{" many(StructMember) "}"
StructMember = many(Directive) option(VisibleMod) (StructField | FnDef | GlobalVarDecl | ContainerDecl)
-StructField = "Symbol" option(":" Expression) ",")
+StructField = Symbol option(":" Expression) ",")
UseDecl = "use" Expression ";"
ExternDecl = "extern" (FnProto | VariableDeclaration) ";"
-FnProto = "fn" option("Symbol") ParamDeclList option("->" TypeExpr)
+FnProto = "fn" option(Symbol) ParamDeclList option("->" TypeExpr)
-Directive = "#" "Symbol" "(" Expression ")"
+Directive = "#" Symbol "(" Expression ")"
VisibleMod = "pub" | "export"
@@ -35,13 +35,13 @@ FnDef = option("inline" | "extern") FnProto Block
ParamDeclList = "(" list(ParamDecl, ",") ")"
-ParamDecl = option("noalias" | "inline") option("Symbol" ":") TypeExpr | "..."
+ParamDecl = option("noalias" | "inline") option(Symbol ":") TypeExpr | "..."
Block = "{" list(option(Statement), ";") "}"
Statement = Label | VariableDeclaration ";" | Defer ";" | NonBlockExpression ";" | BlockExpression
-Label = "Symbol" ":"
+Label = Symbol ":"
Expression = BlockExpression | NonBlockExpression
@@ -49,23 +49,23 @@ TypeExpr = PrefixOpExpression
NonBlockExpression = ReturnExpression | AssignmentExpression
-AsmExpression = "asm" option("volatile") "(" "String" option(AsmOutput) ")"
+AsmExpression = "asm" option("volatile") "(" String option(AsmOutput) ")"
AsmOutput = ":" list(AsmOutputItem, ",") option(AsmInput)
AsmInput = ":" list(AsmInputItem, ",") option(AsmClobbers)
-AsmOutputItem = "[" "Symbol" "]" "String" "(" ("Symbol" | "->" TypeExpr) ")"
+AsmOutputItem = "[" Symbol "]" String "(" (Symbol | "->" TypeExpr) ")"
-AsmInputItem = "[" "Symbol" "]" "String" "(" Expression ")"
+AsmInputItem = "[" Symbol "]" String "(" Expression ")"
-AsmClobbers= ":" list("String", ",")
+AsmClobbers= ":" list(String, ",")
UnwrapExpression = BoolOrExpression (UnwrapMaybe | UnwrapError) | BoolOrExpression
UnwrapMaybe = "??" Expression
-UnwrapError = "%%" option("|" "Symbol" "|") Expression
+UnwrapError = "%%" option("|" Symbol "|") Expression
AssignmentExpression = UnwrapExpression AssignmentOperator UnwrapExpression | UnwrapExpression
@@ -75,13 +75,13 @@ BlockExpression = IfExpression | Block | WhileExpression | ForExpression | Switc
SwitchExpression = "switch" "(" Expression ")" "{" many(SwitchProng) "}"
-SwitchProng = (list(SwitchItem, ",") | "else") "=>" option("|" "Symbol" "|") Expression ","
+SwitchProng = (list(SwitchItem, ",") | "else") "=>" option("|" Symbol "|") Expression ","
SwitchItem = Expression | (Expression "..." Expression)
WhileExpression = "while" "(" Expression option(";" Expression) ")" Expression
-ForExpression = "for" "(" Expression ")" option("|" option("*") "Symbol" option("," "Symbol") "|") Expression
+ForExpression = "for" "(" Expression ")" option("|" option("*") Symbol option("," Symbol) "|") Expression
BoolOrExpression = BoolAndExpression "||" BoolOrExpression | BoolAndExpression
@@ -93,7 +93,7 @@ IfExpression = IfVarExpression | IfBoolExpression
IfBoolExpression = "if" "(" Expression ")" Expression option(Else)
-IfVarExpression = "if" "(" ("const" | "var") option("*") "Symbol" option(":" TypeExpr) "?=" Expression ")" Expression Option(Else)
+IfVarExpression = "if" "(" ("const" | "var") option("*") Symbol option(":" TypeExpr) "?=" Expression ")" Expression Option(Else)
Else = "else" Expression
@@ -127,7 +127,7 @@ PrefixOpExpression = PrefixOp PrefixOpExpression | SuffixOpExpression
SuffixOpExpression = PrimaryExpression option(FnCallExpression | ArrayAccessExpression | FieldAccessExpression | SliceExpression)
-FieldAccessExpression = "." "Symbol"
+FieldAccessExpression = "." Symbol
FnCallExpression = "(" list(Expression, ",") ")"
@@ -139,15 +139,15 @@ ContainerInitExpression = "{" ContainerInitBody "}"
ContainerInitBody = list(StructLiteralField, ",") | list(Expression, ",")
-StructLiteralField = "." "Symbol" "=" Expression
+StructLiteralField = "." Symbol "=" Expression
PrefixOp = "!" | "-" | "~" | "*" | ("&" option("const")) | "?" | "%" | "%%" | "??" | "-%"
-PrimaryExpression = "Number" | "String" | "CharLiteral" | KeywordLiteral | GroupedExpression | GotoExpression | BlockExpression | "Symbol" | ("@" "Symbol" FnCallExpression) | ArrayType | (option("extern") FnProto) | AsmExpression | ("error" "." "Symbol")
+PrimaryExpression = Number | String | CharLiteral | KeywordLiteral | GroupedExpression | GotoExpression | BlockExpression | Symbol | ("@" Symbol FnCallExpression) | ArrayType | (option("extern") FnProto) | AsmExpression | ("error" "." Symbol)
ArrayType = "[" option(Expression) "]" option("const") TypeExpr
-GotoExpression = "goto" "Symbol"
+GotoExpression = "goto" Symbol
GroupedExpression = "(" Expression ")"
@@ -265,14 +265,13 @@ from codegen.
### Literals
#### Character and String Literals
+
```
Literal Example Characters Escapes Null Term Type
Byte 'H' All ASCII Byte No u8
UTF-8 Bytes "hello" All Unicode Byte & Unicode No [5]u8
UTF-8 C string c"hello" All Unicode Byte & Unicode Yes &const u8
-UTF-8 Raw String r"X(hello)X" All Unicode None No [5]u8
-UTF-8 Raw C String rc"X(hello)X" All Unicode None Yes &const u8
```
### Escapes
@@ -291,26 +290,56 @@ UTF-8 Raw C String rc"X(hello)X" All Unicode None Yes &const
Note that the maximum valid Unicode point is 0x10ffff.
-##### Raw Strings
+##### Multiline String Literals
-Raw string literals have no escapes and can span across multiple lines. To
-start a raw string, use 'r"' or 'rc"' followed by unique bytes followed by '('.
-To end a raw string, use ')' followed by the same unique bytes, followed by '"'.
+Multiline string literals have no escapes and can span across multiple lines.
+To start a multiline string literal, use the `\\` token. Just like a comment,
+the string literal goes until the end of the line. The end of the line is not
+included in the string literal.
+However, if the next line begins with `\\` then a newline is appended and
+the string literal continues.
-#### Numeric Literals
+Example:
+```zig
+const hello_world_in_c =
+ \\#include <stdio.h>
+ \\
+ \\int main(int argc, char **argv) {
+ \\ printf("hello world\n");
+ \\ return 0;
+ \\}
+;
```
-Number literals Example Exponentiation
-
-Decimal integer 98222 N/A
-Hex integer 0xff N/A
-Octal integer 0o77 N/A
-Binary integer 0b11110000 N/A
-Floating-point 123.0E+77 Optional
-Hex floating point TODO TODO
+
+For a multiline C string literal, prepend `c` to each `\\`. Example:
+
+```zig
+const c_string_literal =
+ c\\#include <stdio.h>
+ c\\
+ c\\int main(int argc, char **argv) {
+ c\\ printf("hello world\n");
+ c\\ return 0;
+ c\\}
+;
```
+In this example the variable `c_string_literal` has type `&const char` and
+has a terminating null byte.
+
+#### Number Literals
+
+ Number literals | Example | Exponentiation
+--------------------|-------------|--------------
+ Decimal integer | 98222 | N/A
+ Hex integer | 0xff | N/A
+ Octal integer | 0o77 | N/A
+ Binary integer | 0b11110000 | N/A
+ Floating point | 123.0E+77 | Optional
+ Hex floating point | 0x103.70p-5 | Optional
+
### Identifiers
TODO
src/all_types.hpp
@@ -194,7 +194,7 @@ struct AstNodeRoot {
struct AstNodeFnProto {
TopLevelDecl top_level_decl;
- Buf name;
+ Buf *name;
ZigList<AstNode *> params;
AstNode *return_type;
bool is_var_args;
@@ -229,7 +229,7 @@ struct AstNodeFnDecl {
};
struct AstNodeParamDecl {
- Buf name;
+ Buf *name;
AstNode *type;
bool is_noalias;
bool is_inline;
@@ -279,7 +279,7 @@ struct AstNodeDefer {
struct AstNodeVariableDeclaration {
TopLevelDecl top_level_decl;
- Buf symbol;
+ Buf *symbol;
bool is_const;
bool is_extern;
// one or both of type and expr will be non null
@@ -293,7 +293,7 @@ struct AstNodeVariableDeclaration {
struct AstNodeTypeDecl {
TopLevelDecl top_level_decl;
- Buf symbol;
+ Buf *symbol;
AstNode *child_type;
// populated by semantic analyzer
@@ -305,7 +305,7 @@ struct AstNodeTypeDecl {
struct AstNodeErrorValueDecl {
TopLevelDecl top_level_decl;
- Buf name;
+ Buf *name;
// populated by semantic analyzer
ErrorTableEntry *err;
@@ -434,7 +434,7 @@ struct AstNodeSliceExpr {
struct AstNodeFieldAccessExpr {
AstNode *struct_expr;
- Buf field_name;
+ Buf *field_name;
// populated by semantic analyzer
TypeStructField *type_struct_field;
@@ -448,7 +448,7 @@ struct AstNodeFieldAccessExpr {
};
struct AstNodeDirective {
- Buf name;
+ Buf *name;
AstNode *expr;
};
@@ -555,7 +555,7 @@ struct AstNodeSwitchRange {
};
struct AstNodeLabel {
- Buf name;
+ Buf *name;
// populated by semantic analyzer
Expr resolved_expr;
@@ -563,7 +563,7 @@ struct AstNodeLabel {
};
struct AstNodeGoto {
- Buf name;
+ Buf *name;
// populated by semantic analyzer
Expr resolved_expr;
@@ -571,9 +571,9 @@ struct AstNodeGoto {
};
struct AsmOutput {
- Buf asm_symbolic_name;
- Buf constraint;
- Buf variable_name;
+ Buf *asm_symbolic_name;
+ Buf *constraint;
+ Buf *variable_name;
AstNode *return_type; // null unless "=r" and return
// populated by semantic analyzer
@@ -581,8 +581,8 @@ struct AsmOutput {
};
struct AsmInput {
- Buf asm_symbolic_name;
- Buf constraint;
+ Buf *asm_symbolic_name;
+ Buf *constraint;
AstNode *expr;
};
@@ -593,8 +593,7 @@ struct SrcPos {
struct AstNodeAsmExpr {
bool is_volatile;
- Buf asm_template;
- ZigList<SrcPos> offset_map;
+ Buf *asm_template;
ZigList<AsmToken> token_list;
ZigList<AsmOutput*> output_list;
ZigList<AsmInput*> input_list;
@@ -613,7 +612,7 @@ enum ContainerKind {
struct AstNodeStructDecl {
TopLevelDecl top_level_decl;
- Buf name;
+ Buf *name;
ContainerKind kind;
ZigList<AstNode *> generic_params;
bool generic_params_is_var_args; // always an error but it can happen from parsing
@@ -629,12 +628,12 @@ struct AstNodeStructDecl {
struct AstNodeStructField {
TopLevelDecl top_level_decl;
- Buf name;
+ Buf *name;
AstNode *type;
};
struct AstNodeStringLiteral {
- Buf buf;
+ Buf *buf;
bool c;
// populated by semantic analyzer:
@@ -648,29 +647,19 @@ struct AstNodeCharLiteral {
Expr resolved_expr;
};
-enum NumLit {
- NumLitFloat,
- NumLitUInt,
-};
-
struct AstNodeNumberLiteral {
- NumLit kind;
+ BigNum *bignum;
// overflow is true if when parsing the number, we discovered it would not
// fit without losing data in a uint64_t or double
bool overflow;
- union {
- uint64_t x_uint;
- double x_float;
- } data;
-
// populated by semantic analyzer
Expr resolved_expr;
};
struct AstNodeStructValueField {
- Buf name;
+ Buf *name;
AstNode *expr;
// populated by semantic analyzer
@@ -706,7 +695,7 @@ struct AstNodeUndefinedLiteral {
};
struct AstNodeSymbolExpr {
- Buf symbol;
+ Buf *symbol;
// populated by semantic analyzer
Expr resolved_expr;
src/analyze.cpp
@@ -1053,7 +1053,7 @@ static void resolve_function_proto(CodeGen *g, AstNode *node, FnTableEntry *fn_t
if (fn_proto->top_level_decl.directives) {
for (int i = 0; i < fn_proto->top_level_decl.directives->length; i += 1) {
AstNode *directive_node = fn_proto->top_level_decl.directives->at(i);
- Buf *name = &directive_node->data.directive.name;
+ Buf *name = directive_node->data.directive.name;
if (buf_eql_str(name, "attribute")) {
if (fn_table_entry->fn_def_node) {
@@ -1251,7 +1251,7 @@ static void resolve_enum_type(CodeGen *g, ImportTableEntry *import, TypeTableEnt
for (uint32_t i = 0; i < field_count; i += 1) {
AstNode *field_node = decl_node->data.struct_decl.fields.at(i);
TypeEnumField *type_enum_field = &enum_type->data.enumeration.fields[i];
- type_enum_field->name = &field_node->data.struct_field.name;
+ type_enum_field->name = field_node->data.struct_field.name;
TypeTableEntry *field_type = analyze_type_expr(g, import, context,
field_node->data.struct_field.type);
type_enum_field->type_entry = field_type;
@@ -1365,7 +1365,7 @@ static void resolve_enum_type(CodeGen *g, ImportTableEntry *import, TypeTableEnt
uint64_t debug_align_in_bits = 8*LLVMABISizeOfType(g->target_data_ref, enum_type->type_ref);
LLVMZigDIType *replacement_di_type = LLVMZigCreateDebugStructType(g->dbuilder,
LLVMZigFileToScope(import->di_file),
- buf_ptr(&decl_node->data.struct_decl.name),
+ buf_ptr(decl_node->data.struct_decl.name),
import->di_file, decl_node->line + 1,
debug_size_in_bits,
debug_align_in_bits,
@@ -1381,7 +1381,7 @@ static void resolve_enum_type(CodeGen *g, ImportTableEntry *import, TypeTableEnt
uint64_t tag_debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, tag_type_entry->type_ref);
uint64_t tag_debug_align_in_bits = 8*LLVMABISizeOfType(g->target_data_ref, tag_type_entry->type_ref);
LLVMZigDIType *tag_di_type = LLVMZigCreateDebugEnumerationType(g->dbuilder,
- LLVMZigFileToScope(import->di_file), buf_ptr(&decl_node->data.struct_decl.name),
+ LLVMZigFileToScope(import->di_file), buf_ptr(decl_node->data.struct_decl.name),
import->di_file, decl_node->line + 1,
tag_debug_size_in_bits,
tag_debug_align_in_bits,
@@ -1441,7 +1441,7 @@ static void resolve_struct_type(CodeGen *g, ImportTableEntry *import, TypeTableE
for (int i = 0; i < field_count; i += 1) {
AstNode *field_node = decl_node->data.struct_decl.fields.at(i);
TypeStructField *type_struct_field = &struct_type->data.structure.fields[i];
- type_struct_field->name = &field_node->data.struct_field.name;
+ type_struct_field->name = field_node->data.struct_field.name;
TypeTableEntry *field_type = analyze_type_expr(g, import, context,
field_node->data.struct_field.type);
type_struct_field->type_entry = field_type;
@@ -1514,7 +1514,7 @@ static void resolve_struct_type(CodeGen *g, ImportTableEntry *import, TypeTableE
uint64_t debug_align_in_bits = 8*LLVMABISizeOfType(g->target_data_ref, struct_type->type_ref);
LLVMZigDIType *replacement_di_type = LLVMZigCreateDebugStructType(g->dbuilder,
LLVMZigFileToScope(import->di_file),
- buf_ptr(&decl_node->data.struct_decl.name),
+ buf_ptr(decl_node->data.struct_decl.name),
import->di_file, decl_node->line + 1,
debug_size_in_bits,
debug_align_in_bits,
@@ -1570,7 +1570,7 @@ static void preview_fn_proto_instance(CodeGen *g, ImportTableEntry *import, AstN
assert(!is_generic_instance || !is_generic_fn);
AstNode *parent_decl = proto_node->data.fn_proto.top_level_decl.parent_decl;
- Buf *proto_name = &proto_node->data.fn_proto.name;
+ Buf *proto_name = proto_node->data.fn_proto.name;
AstNode *fn_def_node = proto_node->data.fn_proto.fn_def_node;
bool is_extern = proto_node->data.fn_proto.is_extern;
@@ -1645,7 +1645,7 @@ static void scan_struct_decl(CodeGen *g, ImportTableEntry *import, BlockContext
return;
}
- Buf *name = &node->data.struct_decl.name;
+ Buf *name = node->data.struct_decl.name;
TypeTableEntry *container_type = get_partial_container_type(g, import, context,
node->data.struct_decl.kind, node, buf_ptr(name));
node->data.struct_decl.type_entry = container_type;
@@ -1692,7 +1692,7 @@ static void preview_error_value_decl(CodeGen *g, AstNode *node) {
ErrorTableEntry *err = allocate<ErrorTableEntry>(1);
err->decl_node = node;
- buf_init_from_buf(&err->name, &node->data.error_value_decl.name);
+ buf_init_from_buf(&err->name, node->data.error_value_decl.name);
auto existing_entry = g->error_table.maybe_get(&err->name);
if (existing_entry) {
@@ -1749,7 +1749,7 @@ static void resolve_top_level_decl(CodeGen *g, AstNode *node, bool pointer_only)
case NodeTypeTypeDecl:
{
AstNode *type_node = node->data.type_decl.child_type;
- Buf *decl_name = &node->data.type_decl.symbol;
+ Buf *decl_name = node->data.type_decl.symbol;
TypeTableEntry *entry;
if (node->data.type_decl.override_type) {
@@ -2479,12 +2479,12 @@ static TypeTableEntry *analyze_container_init_expr(CodeGen *g, ImportTableEntry
val_field_node->block_context = context;
TypeStructField *type_field = find_struct_type_field(container_type,
- &val_field_node->data.struct_val_field.name);
+ val_field_node->data.struct_val_field.name);
if (!type_field) {
add_node_error(g, val_field_node,
buf_sprintf("no member named '%s' in '%s'",
- buf_ptr(&val_field_node->data.struct_val_field.name), buf_ptr(&container_type->name)));
+ buf_ptr(val_field_node->data.struct_val_field.name), buf_ptr(&container_type->name)));
continue;
}
@@ -2604,7 +2604,7 @@ static TypeTableEntry *analyze_field_access_expr(CodeGen *g, ImportTableEntry *i
AstNode **struct_expr_node = &node->data.field_access_expr.struct_expr;
TypeTableEntry *struct_type = analyze_expression(g, import, context, nullptr, *struct_expr_node);
- Buf *field_name = &node->data.field_access_expr.field_name;
+ Buf *field_name = node->data.field_access_expr.field_name;
bool wrapped_in_fn_call = node->data.field_access_expr.is_fn_call;
@@ -2965,28 +2965,33 @@ static TypeTableEntry *resolve_expr_const_val_as_string_lit(CodeGen *g, AstNode
return get_array_type(g, g->builtin_types.entry_u8, buf_len(str));
}
-
-static TypeTableEntry *resolve_expr_const_val_as_unsigned_num_lit(CodeGen *g, AstNode *node,
- TypeTableEntry *expected_type, uint64_t x, bool depends_on_compile_var)
+static TypeTableEntry *resolve_expr_const_val_as_bignum(CodeGen *g, AstNode *node,
+ TypeTableEntry *expected_type, BigNum *bignum, bool depends_on_compile_var)
{
Expr *expr = get_resolved_expr(node);
expr->const_val.ok = true;
expr->const_val.depends_on_compile_var = depends_on_compile_var;
- bignum_init_unsigned(&expr->const_val.data.x_bignum, x);
-
- return g->builtin_types.entry_num_lit_int;
+ bignum_init_bignum(&expr->const_val.data.x_bignum, bignum);
+ if (bignum->kind == BigNumKindInt) {
+ return g->builtin_types.entry_num_lit_int;
+ } else if (bignum->kind == BigNumKindFloat) {
+ return g->builtin_types.entry_num_lit_float;
+ } else {
+ zig_unreachable();
+ }
}
-static TypeTableEntry *resolve_expr_const_val_as_float_num_lit(CodeGen *g, AstNode *node,
- TypeTableEntry *expected_type, double x)
+static TypeTableEntry *resolve_expr_const_val_as_unsigned_num_lit(CodeGen *g, AstNode *node,
+ TypeTableEntry *expected_type, uint64_t x, bool depends_on_compile_var)
{
Expr *expr = get_resolved_expr(node);
expr->const_val.ok = true;
+ expr->const_val.depends_on_compile_var = depends_on_compile_var;
- bignum_init_float(&expr->const_val.data.x_bignum, x);
+ bignum_init_unsigned(&expr->const_val.data.x_bignum, x);
- return g->builtin_types.entry_num_lit_float;
+ return g->builtin_types.entry_num_lit_int;
}
static TypeTableEntry *analyze_error_literal_expr(CodeGen *g, ImportTableEntry *import,
@@ -3073,7 +3078,7 @@ static TypeTableEntry *analyze_symbol_expr(CodeGen *g, ImportTableEntry *import,
return resolve_expr_const_val_as_type(g, node, node->data.symbol_expr.override_type_entry, false);
}
- Buf *variable_name = &node->data.symbol_expr.symbol;
+ Buf *variable_name = node->data.symbol_expr.symbol;
auto primitive_table_entry = g->primitive_type_table.maybe_get(variable_name);
if (primitive_table_entry) {
@@ -3177,7 +3182,7 @@ static TypeTableEntry *analyze_lvalue(CodeGen *g, ImportTableEntry *import, Bloc
return g->builtin_types.entry_invalid;
}
if (purpose != LValPurposeAddressOf) {
- Buf *name = &lhs_node->data.symbol_expr.symbol;
+ Buf *name = lhs_node->data.symbol_expr.symbol;
VariableTableEntry *var = find_variable(g, block_context, name);
if (var) {
if (var->is_const) {
@@ -3742,7 +3747,7 @@ static TypeTableEntry *analyze_unwrap_error_expr(CodeGen *g, ImportTableEntry *i
if (var_node) {
child_context = new_block_context(node, parent_context);
var_node->block_context = child_context;
- Buf *var_name = &var_node->data.symbol_expr.symbol;
+ Buf *var_name = var_node->data.symbol_expr.symbol;
node->data.unwrap_err_expr.var = add_local_var(g, var_node, import, child_context, var_name,
g->builtin_types.entry_pure_error, true, nullptr);
} else {
@@ -3827,7 +3832,7 @@ static VariableTableEntry *analyze_variable_declaration_raw(CodeGen *g, ImportTa
assert(type != nullptr); // should have been caught by the parser
VariableTableEntry *var = add_local_var(g, source_node, import, context,
- &variable_declaration->symbol, type, is_const,
+ variable_declaration->symbol, type, is_const,
expr_is_maybe ? nullptr : variable_declaration->expr);
variable_declaration->variable = var;
@@ -3886,15 +3891,7 @@ static TypeTableEntry *analyze_number_literal_expr(CodeGen *g, ImportTableEntry
return g->builtin_types.entry_invalid;
}
- if (node->data.number_literal.kind == NumLitUInt) {
- return resolve_expr_const_val_as_unsigned_num_lit(g, node,
- expected_type, node->data.number_literal.data.x_uint, false);
- } else if (node->data.number_literal.kind == NumLitFloat) {
- return resolve_expr_const_val_as_float_num_lit(g, node,
- expected_type, node->data.number_literal.data.x_float);
- } else {
- zig_unreachable();
- }
+ return resolve_expr_const_val_as_bignum(g, node, expected_type, node->data.number_literal.bignum, false);
}
static TypeTableEntry *analyze_array_type(CodeGen *g, ImportTableEntry *import, BlockContext *context,
@@ -4034,13 +4031,13 @@ static TypeTableEntry *analyze_for_expr(CodeGen *g, ImportTableEntry *import, Bl
AstNode *elem_var_node = node->data.for_expr.elem_node;
elem_var_node->block_context = child_context;
- Buf *elem_var_name = &elem_var_node->data.symbol_expr.symbol;
+ Buf *elem_var_name = elem_var_node->data.symbol_expr.symbol;
node->data.for_expr.elem_var = add_local_var(g, elem_var_node, import, child_context, elem_var_name,
var_type, true, nullptr);
AstNode *index_var_node = node->data.for_expr.index_node;
if (index_var_node) {
- Buf *index_var_name = &index_var_node->data.symbol_expr.symbol;
+ Buf *index_var_name = index_var_node->data.symbol_expr.symbol;
index_var_node->block_context = child_context;
node->data.for_expr.index_var = add_local_var(g, index_var_node, import, child_context, index_var_name,
g->builtin_types.entry_usize, true, nullptr);
@@ -4952,7 +4949,7 @@ static TypeTableEntry *analyze_builtin_fn_call_expr(CodeGen *g, ImportTableEntry
assert(node->type == NodeTypeFnCallExpr);
AstNode *fn_ref_expr = node->data.fn_call_expr.fn_ref_expr;
- Buf *name = &fn_ref_expr->data.symbol_expr.symbol;
+ Buf *name = fn_ref_expr->data.symbol_expr.symbol;
auto entry = g->builtin_fn_table.maybe_get(name);
@@ -5476,7 +5473,7 @@ static TypeTableEntry *analyze_fn_call_with_inline_args(CodeGen *g, ImportTableE
ConstExprValue *const_val = &get_resolved_expr(*param_node)->const_val;
if (const_val->ok) {
VariableTableEntry *var = add_local_var(g, generic_param_decl_node, decl_node->owner, child_context,
- &generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
+ generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
// This generic function instance could be called with anything, so when this variable is read it
// needs to know that it depends on compile time variable data.
var->force_depends_on_compile_var = true;
@@ -5570,7 +5567,7 @@ static TypeTableEntry *analyze_generic_fn_call(CodeGen *g, ImportTableEntry *imp
ConstExprValue *const_val = &get_resolved_expr(*param_node)->const_val;
if (const_val->ok) {
VariableTableEntry *var = add_local_var(g, generic_param_decl_node, decl_node->owner, child_context,
- &generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
+ generic_param_decl_node->data.param_decl.name, param_type, true, *param_node);
var->force_depends_on_compile_var = true;
} else {
add_node_error(g, *param_node, buf_sprintf("unable to evaluate constant expression"));
@@ -5964,7 +5961,7 @@ static TypeTableEntry *analyze_switch_expr(CodeGen *g, ImportTableEntry *import,
if (expr_type->id == TypeTableEntryIdEnum) {
if (item_node->type == NodeTypeSymbol) {
- Buf *field_name = &item_node->data.symbol_expr.symbol;
+ Buf *field_name = item_node->data.symbol_expr.symbol;
TypeEnumField *type_enum_field = get_enum_field(expr_type, field_name);
if (type_enum_field) {
item_node->data.symbol_expr.enum_field = type_enum_field;
@@ -6000,7 +5997,7 @@ static TypeTableEntry *analyze_switch_expr(CodeGen *g, ImportTableEntry *import,
}
} else if (expr_type->id == TypeTableEntryIdErrorUnion) {
if (item_node->type == NodeTypeSymbol) {
- Buf *err_name = &item_node->data.symbol_expr.symbol;
+ Buf *err_name = item_node->data.symbol_expr.symbol;
bool is_ok_case = buf_eql_str(err_name, "Ok");
auto err_table_entry = is_ok_case ? nullptr: g->error_table.maybe_get(err_name);
if (is_ok_case || err_table_entry) {
@@ -6072,7 +6069,7 @@ static TypeTableEntry *analyze_switch_expr(CodeGen *g, ImportTableEntry *import,
AstNode *var_node = prong_node->data.switch_prong.var_symbol;
if (var_node) {
assert(var_node->type == NodeTypeSymbol);
- Buf *var_name = &var_node->data.symbol_expr.symbol;
+ Buf *var_name = var_node->data.symbol_expr.symbol;
var_node->block_context = child_context;
prong_node->data.switch_prong.var = add_local_var(g, var_node, import,
child_context, var_name, var_type, true, nullptr);
@@ -6228,9 +6225,9 @@ static TypeTableEntry *analyze_string_literal_expr(CodeGen *g, ImportTableEntry
TypeTableEntry *expected_type, AstNode *node)
{
if (node->data.string_literal.c) {
- return resolve_expr_const_val_as_c_string_lit(g, node, &node->data.string_literal.buf);
+ return resolve_expr_const_val_as_c_string_lit(g, node, node->data.string_literal.buf);
} else {
- return resolve_expr_const_val_as_string_lit(g, node, &node->data.string_literal.buf);
+ return resolve_expr_const_val_as_string_lit(g, node, node->data.string_literal.buf);
}
}
@@ -6255,7 +6252,7 @@ static TypeTableEntry *analyze_block_expr(CodeGen *g, ImportTableEntry *import,
child->data.label.label_entry = label;
fn_table_entry->all_labels.append(label);
- child_context->label_table.put(&child->data.label.name, label);
+ child_context->label_table.put(child->data.label.name, label);
return_type = g->builtin_types.entry_void;
continue;
@@ -6316,7 +6313,7 @@ static TypeTableEntry *analyze_asm_expr(CodeGen *g, ImportTableEntry *import, Bl
break;
}
} else {
- Buf *variable_name = &asm_output->variable_name;
+ Buf *variable_name = asm_output->variable_name;
VariableTableEntry *var = find_variable(g, context, variable_name);
if (var) {
asm_output->variable = var;
@@ -6351,7 +6348,7 @@ static TypeTableEntry *analyze_goto_pass1(CodeGen *g, ImportTableEntry *import,
static void analyze_goto_pass2(CodeGen *g, ImportTableEntry *import, AstNode *node) {
assert(node->type == NodeTypeGoto);
- Buf *label_name = &node->data.goto_expr.name;
+ Buf *label_name = node->data.goto_expr.name;
BlockContext *context = node->block_context;
assert(context);
LabelTableEntry *label = find_label(g, context, label_name);
@@ -6549,11 +6546,11 @@ static void analyze_fn_body(CodeGen *g, FnTableEntry *fn_table_entry) {
buf_sprintf("byvalue struct parameters not yet supported on extern functions"));
}
- if (buf_len(¶m_decl->name) == 0) {
+ if (buf_len(param_decl->name) == 0) {
add_node_error(g, param_decl_node, buf_sprintf("missing parameter name"));
}
- VariableTableEntry *var = add_local_var(g, param_decl_node, import, context, ¶m_decl->name,
+ VariableTableEntry *var = add_local_var(g, param_decl_node, import, context, param_decl->name,
type, true, nullptr);
var->src_arg_index = i;
param_decl_node->data.param_decl.variable = var;
@@ -6583,7 +6580,7 @@ static void analyze_fn_body(CodeGen *g, FnTableEntry *fn_table_entry) {
if (!label->used) {
add_node_error(g, label->decl_node,
buf_sprintf("label '%s' defined but not used",
- buf_ptr(&label->decl_node->data.label.name)));
+ buf_ptr(label->decl_node->data.label.name)));
}
}
@@ -6640,7 +6637,7 @@ static void scan_decls(CodeGen *g, ImportTableEntry *import, BlockContext *conte
break;
case NodeTypeContainerDecl:
{
- Buf *name = &node->data.struct_decl.name;
+ Buf *name = node->data.struct_decl.name;
add_top_level_decl(g, import, context, node, name);
if (node->data.struct_decl.generic_params.length == 0) {
scan_struct_decl(g, import, context, node);
@@ -6653,20 +6650,20 @@ static void scan_decls(CodeGen *g, ImportTableEntry *import, BlockContext *conte
break;
case NodeTypeVariableDeclaration:
{
- Buf *name = &node->data.variable_declaration.symbol;
+ Buf *name = node->data.variable_declaration.symbol;
add_top_level_decl(g, import, context, node, name);
break;
}
case NodeTypeTypeDecl:
{
- Buf *name = &node->data.type_decl.symbol;
+ Buf *name = node->data.type_decl.symbol;
add_top_level_decl(g, import, context, node, name);
break;
}
case NodeTypeFnProto:
{
// if the name is missing, we immediately announce an error
- Buf *fn_name = &node->data.fn_proto.name;
+ Buf *fn_name = node->data.fn_proto.name;
if (buf_len(fn_name) == 0) {
node->data.fn_proto.skip = true;
add_node_error(g, node, buf_sprintf("missing function name"));
@@ -6851,6 +6848,9 @@ ImportTableEntry *add_source_file(CodeGen *g, PackageTableEntry *package,
assert(import_entry->root);
if (g->verbose) {
ast_print(stderr, import_entry->root, 0);
+ //fprintf(stderr, "\nReformatted Source:\n");
+ //fprintf(stderr, "---------------------\n");
+ //ast_render(stderr, import_entry->root, 4);
}
import_entry->di_file = LLVMZigCreateFile(g->dbuilder, buf_ptr(src_basename), buf_ptr(src_dirname));
@@ -6868,7 +6868,7 @@ ImportTableEntry *add_source_file(CodeGen *g, PackageTableEntry *package,
if (top_level_decl->type == NodeTypeFnDef) {
AstNode *proto_node = top_level_decl->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
- Buf *proto_name = &proto_node->data.fn_proto.name;
+ Buf *proto_name = proto_node->data.fn_proto.name;
bool is_private = (proto_node->data.fn_proto.top_level_decl.visib_mod == VisibModPrivate);
@@ -7064,7 +7064,7 @@ bool is_node_void_expr(AstNode *node) {
{
AstNode *type_node = node->data.container_init_expr.type;
if (type_node->type == NodeTypeSymbol &&
- buf_eql_str(&type_node->data.symbol_expr.symbol, "void"))
+ buf_eql_str(type_node->data.symbol_expr.symbol, "void"))
{
return true;
}
src/ast_render.cpp
@@ -78,6 +78,24 @@ static const char *visib_mod_string(VisibMod mod) {
zig_unreachable();
}
+static const char *return_string(ReturnKind kind) {
+ switch (kind) {
+ case ReturnKindUnconditional: return "return";
+ case ReturnKindError: return "%return";
+ case ReturnKindMaybe: return "?return";
+ }
+ zig_unreachable();
+}
+
+static const char *defer_string(ReturnKind kind) {
+ switch (kind) {
+ case ReturnKindUnconditional: return "defer";
+ case ReturnKindError: return "%defer";
+ case ReturnKindMaybe: return "?defer";
+ }
+ zig_unreachable();
+}
+
static const char *extern_string(bool is_extern) {
return is_extern ? "extern " : "";
}
@@ -243,7 +261,7 @@ static bool is_node_void(AstNode *node) {
if (node->type == NodeTypeSymbol) {
if (node->data.symbol_expr.override_type_entry) {
return node->data.symbol_expr.override_type_entry->id == TypeTableEntryIdVoid;
- } else if (buf_eql_str(&node->data.symbol_expr.symbol, "void")) {
+ } else if (buf_eql_str(node->data.symbol_expr.symbol, "void")) {
return true;
}
}
@@ -260,7 +278,12 @@ static bool is_digit(uint8_t c) {
}
static bool is_printable(uint8_t c) {
- return is_alpha_under(c) || is_digit(c) || c == ' ';
+ static const uint8_t printables[] =
+ " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.~`!@#$%^&*()_-+=\\{}[];'\"?/<>,";
+ for (size_t i = 0; i < array_length(printables); i += 1) {
+ if (c == printables[i]) return true;
+ }
+ return false;
}
static void string_literal_escape(Buf *source, Buf *dest) {
@@ -353,18 +376,18 @@ static void render_node(AstRender *ar, AstNode *node) {
const char *extern_str = extern_string(node->data.fn_proto.is_extern);
const char *inline_str = inline_string(node->data.fn_proto.is_inline);
fprintf(ar->f, "%s%s%sfn ", pub_str, inline_str, extern_str);
- print_symbol(ar, &node->data.fn_proto.name);
+ print_symbol(ar, node->data.fn_proto.name);
fprintf(ar->f, "(");
int arg_count = node->data.fn_proto.params.length;
bool is_var_args = node->data.fn_proto.is_var_args;
for (int arg_i = 0; arg_i < arg_count; arg_i += 1) {
AstNode *param_decl = node->data.fn_proto.params.at(arg_i);
assert(param_decl->type == NodeTypeParamDecl);
- if (buf_len(¶m_decl->data.param_decl.name) > 0) {
+ if (buf_len(param_decl->data.param_decl.name) > 0) {
const char *noalias_str = param_decl->data.param_decl.is_noalias ? "noalias " : "";
const char *inline_str = param_decl->data.param_decl.is_inline ? "inline " : "";
fprintf(ar->f, "%s%s", noalias_str, inline_str);
- print_symbol(ar, ¶m_decl->data.param_decl.name);
+ print_symbol(ar, param_decl->data.param_decl.name);
fprintf(ar->f, ": ");
}
render_node(ar, param_decl->data.param_decl.type);
@@ -417,21 +440,31 @@ static void render_node(AstRender *ar, AstNode *node) {
fprintf(ar->f, "}");
break;
case NodeTypeDirective:
- fprintf(ar->f, "#%s(", buf_ptr(&node->data.directive.name));
+ fprintf(ar->f, "#%s(", buf_ptr(node->data.directive.name));
render_node(ar, node->data.directive.expr);
fprintf(ar->f, ")\n");
break;
case NodeTypeReturnExpr:
- zig_panic("TODO");
+ {
+ const char *return_str = return_string(node->data.return_expr.kind);
+ fprintf(ar->f, "%s ", return_str);
+ render_node(ar, node->data.return_expr.expr);
+ break;
+ }
case NodeTypeDefer:
- zig_panic("TODO");
+ {
+ const char *defer_str = defer_string(node->data.defer.kind);
+ fprintf(ar->f, "%s ", defer_str);
+ render_node(ar, node->data.return_expr.expr);
+ break;
+ }
case NodeTypeVariableDeclaration:
{
const char *pub_str = visib_mod_string(node->data.variable_declaration.top_level_decl.visib_mod);
const char *extern_str = extern_string(node->data.variable_declaration.is_extern);
const char *const_or_var = const_or_var_string(node->data.variable_declaration.is_const);
fprintf(ar->f, "%s%s%s ", pub_str, extern_str, const_or_var);
- print_symbol(ar, &node->data.variable_declaration.symbol);
+ print_symbol(ar, node->data.variable_declaration.symbol);
if (node->data.variable_declaration.type) {
fprintf(ar->f, ": ");
@@ -446,7 +479,7 @@ static void render_node(AstRender *ar, AstNode *node) {
case NodeTypeTypeDecl:
{
const char *pub_str = visib_mod_string(node->data.type_decl.top_level_decl.visib_mod);
- const char *var_name = buf_ptr(&node->data.type_decl.symbol);
+ const char *var_name = buf_ptr(node->data.type_decl.symbol);
fprintf(ar->f, "%stype %s = ", pub_str, var_name);
render_node(ar, node->data.type_decl.child_type);
break;
@@ -463,12 +496,15 @@ static void render_node(AstRender *ar, AstNode *node) {
case NodeTypeUnwrapErrorExpr:
zig_panic("TODO");
case NodeTypeNumberLiteral:
- switch (node->data.number_literal.kind) {
- case NumLitUInt:
- fprintf(ar->f, "%" PRIu64, node->data.number_literal.data.x_uint);
+ switch (node->data.number_literal.bignum->kind) {
+ case BigNumKindInt:
+ {
+ const char *negative_str = node->data.number_literal.bignum->is_negative ? "-" : "";
+ fprintf(ar->f, "%s%llu", negative_str, node->data.number_literal.bignum->data.x_uint);
+ }
break;
- case NumLitFloat:
- fprintf(ar->f, "%f", node->data.number_literal.data.x_float);
+ case BigNumKindFloat:
+ fprintf(ar->f, "%f", node->data.number_literal.bignum->data.x_float);
break;
}
break;
@@ -478,7 +514,7 @@ static void render_node(AstRender *ar, AstNode *node) {
fprintf(ar->f, "c");
}
Buf tmp_buf = BUF_INIT;
- string_literal_escape(&node->data.string_literal.buf, &tmp_buf);
+ string_literal_escape(node->data.string_literal.buf, &tmp_buf);
fprintf(ar->f, "\"%s\"", buf_ptr(&tmp_buf));
}
break;
@@ -498,7 +534,7 @@ static void render_node(AstRender *ar, AstNode *node) {
if (override_type) {
fprintf(ar->f, "%s", buf_ptr(&override_type->name));
} else {
- fprintf(ar->f, "%s", buf_ptr(&node->data.symbol_expr.symbol));
+ print_symbol(ar, node->data.symbol_expr.symbol);
}
}
break;
@@ -513,10 +549,14 @@ static void render_node(AstRender *ar, AstNode *node) {
case NodeTypeFnCallExpr:
if (node->data.fn_call_expr.is_builtin) {
fprintf(ar->f, "@");
+ } else {
+ fprintf(ar->f, "(");
}
- fprintf(ar->f, "(");
render_node(ar, node->data.fn_call_expr.fn_ref_expr);
- fprintf(ar->f, ")(");
+ if (!node->data.fn_call_expr.is_builtin) {
+ fprintf(ar->f, ")");
+ }
+ fprintf(ar->f, "(");
for (int i = 0; i < node->data.fn_call_expr.params.length; i += 1) {
AstNode *param = node->data.fn_call_expr.params.at(i);
if (i != 0) {
@@ -537,7 +577,7 @@ static void render_node(AstRender *ar, AstNode *node) {
case NodeTypeFieldAccessExpr:
{
AstNode *lhs = node->data.field_access_expr.struct_expr;
- Buf *rhs = &node->data.field_access_expr.field_name;
+ Buf *rhs = node->data.field_access_expr.field_name;
render_node(ar, lhs);
fprintf(ar->f, ".");
print_symbol(ar, rhs);
@@ -577,7 +617,7 @@ static void render_node(AstRender *ar, AstNode *node) {
zig_panic("TODO");
case NodeTypeContainerDecl:
{
- const char *struct_name = buf_ptr(&node->data.struct_decl.name);
+ const char *struct_name = buf_ptr(node->data.struct_decl.name);
const char *pub_str = visib_mod_string(node->data.struct_decl.top_level_decl.visib_mod);
const char *container_str = container_string(node->data.struct_decl.kind);
fprintf(ar->f, "%s%s %s {\n", pub_str, container_str, struct_name);
@@ -586,7 +626,7 @@ static void render_node(AstRender *ar, AstNode *node) {
AstNode *field_node = node->data.struct_decl.fields.at(field_i);
assert(field_node->type == NodeTypeStructField);
print_indent(ar);
- print_symbol(ar, &field_node->data.struct_field.name);
+ print_symbol(ar, field_node->data.struct_field.name);
if (!is_node_void(field_node->data.struct_field.type)) {
fprintf(ar->f, ": ");
render_node(ar, field_node->data.struct_field.type);
src/bignum.cpp
@@ -6,6 +6,7 @@
*/
#include "bignum.hpp"
+#include "buffer.hpp"
#include <assert.h>
#include <math.h>
@@ -41,6 +42,10 @@ void bignum_init_signed(BigNum *dest, int64_t x) {
}
}
+void bignum_init_bignum(BigNum *dest, BigNum *src) {
+ memcpy(dest, src, sizeof(BigNum));
+}
+
bool bignum_fits_in_bits(BigNum *bn, int bit_count, bool is_signed) {
assert(bn->kind == BigNumKindInt);
@@ -343,3 +348,15 @@ bool bignum_cmp_gte(BigNum *op1, BigNum *op2) {
return true;
}
}
+
+bool bignum_increment_by_scalar(BigNum *bignum, uint64_t scalar) {
+ assert(bignum->kind == BigNumKindInt);
+ assert(!bignum->is_negative);
+ return __builtin_uaddll_overflow(bignum->data.x_uint, scalar, &bignum->data.x_uint);
+}
+
+bool bignum_multiply_by_scalar(BigNum *bignum, uint64_t scalar) {
+ assert(bignum->kind == BigNumKindInt);
+ assert(!bignum->is_negative);
+ return __builtin_umulll_overflow(bignum->data.x_uint, scalar, &bignum->data.x_uint);
+}
src/bignum.hpp
@@ -5,7 +5,8 @@
* See http://opensource.org/licenses/MIT
*/
-#include "buffer.hpp"
+#ifndef ZIG_BIGNUM_HPP
+#define ZIG_BIGNUM_HPP
#include <stdint.h>
@@ -26,6 +27,7 @@ struct BigNum {
void bignum_init_float(BigNum *dest, double x);
void bignum_init_unsigned(BigNum *dest, uint64_t x);
void bignum_init_signed(BigNum *dest, int64_t x);
+void bignum_init_bignum(BigNum *dest, BigNum *src);
bool bignum_fits_in_bits(BigNum *bn, int bit_count, bool is_signed);
uint64_t bignum_to_twos_complement(BigNum *bn);
@@ -57,4 +59,11 @@ bool bignum_cmp_gt(BigNum *op1, BigNum *op2);
bool bignum_cmp_lte(BigNum *op1, BigNum *op2);
bool bignum_cmp_gte(BigNum *op1, BigNum *op2);
+// helper functions
+bool bignum_increment_by_scalar(BigNum *bignum, uint64_t scalar);
+bool bignum_multiply_by_scalar(BigNum *bignum, uint64_t scalar);
+
+struct Buf;
Buf *bignum_to_buf(BigNum *bn);
+
+#endif
src/codegen.cpp
@@ -1431,7 +1431,7 @@ static LLVMValueRef gen_field_access_expr(CodeGen *g, AstNode *node, bool is_lva
TypeTableEntry *struct_type = get_expr_type(struct_expr);
if (struct_type->id == TypeTableEntryIdArray) {
- Buf *name = &node->data.field_access_expr.field_name;
+ Buf *name = node->data.field_access_expr.field_name;
assert(buf_eql_str(name, "len"));
return LLVMConstInt(g->builtin_types.entry_usize->type_ref,
struct_type->data.array.len, false);
@@ -2726,18 +2726,18 @@ static LLVMValueRef gen_block(CodeGen *g, AstNode *block_node, TypeTableEntry *i
}
static int find_asm_index(CodeGen *g, AstNode *node, AsmToken *tok) {
- const char *ptr = buf_ptr(&node->data.asm_expr.asm_template) + tok->start + 2;
+ const char *ptr = buf_ptr(node->data.asm_expr.asm_template) + tok->start + 2;
int len = tok->end - tok->start - 2;
int result = 0;
for (int i = 0; i < node->data.asm_expr.output_list.length; i += 1, result += 1) {
AsmOutput *asm_output = node->data.asm_expr.output_list.at(i);
- if (buf_eql_mem(&asm_output->asm_symbolic_name, ptr, len)) {
+ if (buf_eql_mem(asm_output->asm_symbolic_name, ptr, len)) {
return result;
}
}
for (int i = 0; i < node->data.asm_expr.input_list.length; i += 1, result += 1) {
AsmInput *asm_input = node->data.asm_expr.input_list.at(i);
- if (buf_eql_mem(&asm_input->asm_symbolic_name, ptr, len)) {
+ if (buf_eql_mem(asm_input->asm_symbolic_name, ptr, len)) {
return result;
}
}
@@ -2749,7 +2749,7 @@ static LLVMValueRef gen_asm_expr(CodeGen *g, AstNode *node) {
AstNodeAsmExpr *asm_expr = &node->data.asm_expr;
- Buf *src_template = &asm_expr->asm_template;
+ Buf *src_template = asm_expr->asm_template;
Buf llvm_template = BUF_INIT;
buf_resize(&llvm_template, 0);
@@ -2796,11 +2796,11 @@ static LLVMValueRef gen_asm_expr(CodeGen *g, AstNode *node) {
for (int i = 0; i < asm_expr->output_list.length; i += 1, total_index += 1) {
AsmOutput *asm_output = asm_expr->output_list.at(i);
bool is_return = (asm_output->return_type != nullptr);
- assert(*buf_ptr(&asm_output->constraint) == '=');
+ assert(*buf_ptr(asm_output->constraint) == '=');
if (is_return) {
- buf_appendf(&constraint_buf, "=%s", buf_ptr(&asm_output->constraint) + 1);
+ buf_appendf(&constraint_buf, "=%s", buf_ptr(asm_output->constraint) + 1);
} else {
- buf_appendf(&constraint_buf, "=*%s", buf_ptr(&asm_output->constraint) + 1);
+ buf_appendf(&constraint_buf, "=*%s", buf_ptr(asm_output->constraint) + 1);
}
if (total_index + 1 < total_constraint_count) {
buf_append_char(&constraint_buf, ',');
@@ -2816,7 +2816,7 @@ static LLVMValueRef gen_asm_expr(CodeGen *g, AstNode *node) {
}
for (int i = 0; i < asm_expr->input_list.length; i += 1, total_index += 1, param_index += 1) {
AsmInput *asm_input = asm_expr->input_list.at(i);
- buf_append_buf(&constraint_buf, &asm_input->constraint);
+ buf_append_buf(&constraint_buf, asm_input->constraint);
if (total_index + 1 < total_constraint_count) {
buf_append_char(&constraint_buf, ',');
}
@@ -2885,7 +2885,7 @@ static LLVMValueRef gen_container_init_expr(CodeGen *g, AstNode *node) {
if (type_struct_field->type_entry->id == TypeTableEntryIdVoid) {
continue;
}
- assert(buf_eql_buf(type_struct_field->name, &field_node->data.struct_val_field.name));
+ assert(buf_eql_buf(type_struct_field->name, field_node->data.struct_val_field.name));
set_debug_source_node(g, field_node);
LLVMValueRef field_ptr = LLVMBuildStructGEP(g->builder, tmp_struct_ptr, type_struct_field->gen_index, "");
@@ -3853,7 +3853,7 @@ static void generate_error_name_table(CodeGen *g) {
for (int i = 1; i < g->error_decls.length; i += 1) {
AstNode *error_decl_node = g->error_decls.at(i);
assert(error_decl_node->type == NodeTypeErrorValueDecl);
- Buf *name = &error_decl_node->data.error_value_decl.name;
+ Buf *name = error_decl_node->data.error_value_decl.name;
LLVMValueRef str_init = LLVMConstString(buf_ptr(name), buf_len(name), true);
LLVMValueRef str_global = LLVMAddGlobal(g->module, LLVMTypeOf(str_init), "");
@@ -3882,7 +3882,7 @@ static void build_label_blocks(CodeGen *g, FnTableEntry *fn) {
LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn->fn_value, "entry");
for (int i = 0; i < fn->all_labels.length; i += 1) {
LabelTableEntry *label = fn->all_labels.at(i);
- Buf *name = &label->decl_node->data.label.name;
+ Buf *name = label->decl_node->data.label.name;
label->basic_block = LLVMAppendBasicBlock(fn->fn_value, buf_ptr(name));
}
LLVMPositionBuilderAtEnd(g->builder, entry_block);
@@ -4951,7 +4951,7 @@ void codegen_generate_h_file(CodeGen *g) {
buf_appendf(&h_buf, "%s %s %s(",
buf_ptr(export_macro),
buf_ptr(&return_type_c),
- buf_ptr(&fn_proto->name));
+ buf_ptr(fn_proto->name));
Buf param_type_c = BUF_INIT;
if (fn_proto->params.length) {
@@ -4961,7 +4961,7 @@ void codegen_generate_h_file(CodeGen *g) {
to_c_type(g, param_type, ¶m_type_c);
buf_appendf(&h_buf, "%s %s",
buf_ptr(¶m_type_c),
- buf_ptr(¶m_decl_node->data.param_decl.name));
+ buf_ptr(param_decl_node->data.param_decl.name));
if (param_i < fn_proto->params.length - 1)
buf_appendf(&h_buf, ", ");
}
src/eval.cpp
@@ -427,7 +427,7 @@ static EvalVar *find_var(EvalFn *ef, Buf *name) {
static bool eval_symbol_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_val) {
assert(node->type == NodeTypeSymbol);
- Buf *name = &node->data.symbol_expr.symbol;
+ Buf *name = node->data.symbol_expr.symbol;
EvalVar *var = find_var(ef, name);
assert(var);
@@ -924,7 +924,7 @@ static bool eval_field_access_expr(EvalFn *ef, AstNode *node, ConstExprValue *ou
TypeTableEntry *struct_type = get_resolved_expr(struct_expr)->type_entry;
if (struct_type->id == TypeTableEntryIdArray) {
- Buf *name = &node->data.field_access_expr.field_name;
+ Buf *name = node->data.field_access_expr.field_name;
assert(buf_eql_str(name, "len"));
zig_panic("TODO");
} else if (struct_type->id == TypeTableEntryIdStruct || (struct_type->id == TypeTableEntryIdPointer &&
@@ -971,7 +971,7 @@ static bool eval_for_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_val) {
if (eval_expr(ef, array_node, &array_val)) return true;
assert(elem_node->type == NodeTypeSymbol);
- Buf *elem_var_name = &elem_node->data.symbol_expr.symbol;
+ Buf *elem_var_name = elem_node->data.symbol_expr.symbol;
if (node->data.for_expr.elem_is_ptr) {
zig_panic("TODO");
@@ -980,7 +980,7 @@ static bool eval_for_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_val) {
Buf *index_var_name = nullptr;
if (index_node) {
assert(index_node->type == NodeTypeSymbol);
- index_var_name = &index_node->data.symbol_expr.symbol;
+ index_var_name = index_node->data.symbol_expr.symbol;
}
uint64_t it_index = 0;
@@ -1164,7 +1164,7 @@ static bool eval_var_decl_expr(EvalFn *ef, AstNode *node, ConstExprValue *out_va
my_scope->vars.add_one();
EvalVar *var = &my_scope->vars.last();
- var->name = &node->data.variable_declaration.symbol;
+ var->name = node->data.variable_declaration.symbol;
if (eval_expr(ef, node->data.variable_declaration.expr, &var->value)) return true;
@@ -1178,13 +1178,7 @@ static bool eval_number_literal_expr(EvalFn *ef, AstNode *node, ConstExprValue *
assert(!node->data.number_literal.overflow);
out_val->ok = true;
- if (node->data.number_literal.kind == NumLitUInt) {
- bignum_init_unsigned(&out_val->data.x_bignum, node->data.number_literal.data.x_uint);
- } else if (node->data.number_literal.kind == NumLitFloat) {
- bignum_init_float(&out_val->data.x_bignum, node->data.number_literal.data.x_float);
- } else {
- zig_unreachable();
- }
+ bignum_init_bignum(&out_val->data.x_bignum, node->data.number_literal.bignum);
return false;
}
@@ -1339,7 +1333,7 @@ static bool eval_fn_args(EvalFnRoot *efr, FnTableEntry *fn, ConstExprValue *args
root_scope->vars.add_one();
EvalVar *eval_var = &root_scope->vars.last();
- eval_var->name = &decl_param_node->data.param_decl.name;
+ eval_var->name = decl_param_node->data.param_decl.name;
eval_var->value = *src_const_val;
}
src/parseh.cpp
@@ -104,14 +104,14 @@ static AstNode *create_node(Context *c, NodeType type) {
static AstNode *create_symbol_node(Context *c, const char *type_name) {
AstNode *node = create_node(c, NodeTypeSymbol);
- buf_init_from_str(&node->data.symbol_expr.symbol, type_name);
+ node->data.symbol_expr.symbol = buf_create_from_str(type_name);
return node;
}
static AstNode *create_field_access_node(Context *c, const char *lhs, const char *rhs) {
AstNode *node = create_node(c, NodeTypeFieldAccessExpr);
node->data.field_access_expr.struct_expr = create_symbol_node(c, lhs);
- buf_init_from_str(&node->data.field_access_expr.field_name, rhs);
+ node->data.field_access_expr.field_name = buf_create_from_str(rhs);
normalize_parent_ptrs(node);
return node;
}
@@ -120,7 +120,7 @@ static AstNode *create_typed_var_decl_node(Context *c, bool is_const, const char
AstNode *type_node, AstNode *init_node)
{
AstNode *node = create_node(c, NodeTypeVariableDeclaration);
- buf_init_from_str(&node->data.variable_declaration.symbol, var_name);
+ node->data.variable_declaration.symbol = buf_create_from_str(var_name);
node->data.variable_declaration.is_const = is_const;
node->data.variable_declaration.top_level_decl.visib_mod = c->visib_mod;
node->data.variable_declaration.expr = init_node;
@@ -146,7 +146,7 @@ static AstNode *create_prefix_node(Context *c, PrefixOp op, AstNode *child_node)
static AstNode *create_struct_field_node(Context *c, const char *name, AstNode *type_node) {
assert(type_node);
AstNode *node = create_node(c, NodeTypeStructField);
- buf_init_from_str(&node->data.struct_field.name, name);
+ node->data.struct_field.name = buf_create_from_str(name);
node->data.struct_field.top_level_decl.visib_mod = VisibModPub;
node->data.struct_field.type = type_node;
@@ -157,7 +157,7 @@ static AstNode *create_struct_field_node(Context *c, const char *name, AstNode *
static AstNode *create_param_decl_node(Context *c, const char *name, AstNode *type_node, bool is_noalias) {
assert(type_node);
AstNode *node = create_node(c, NodeTypeParamDecl);
- buf_init_from_str(&node->data.param_decl.name, name);
+ node->data.param_decl.name = buf_create_from_str(name);
node->data.param_decl.type = type_node;
node->data.param_decl.is_noalias = is_noalias;
@@ -171,17 +171,18 @@ static AstNode *create_char_lit_node(Context *c, uint8_t value) {
return node;
}
+// accepts ownership of buf
static AstNode *create_str_lit_node(Context *c, Buf *buf) {
AstNode *node = create_node(c, NodeTypeStringLiteral);
- buf_init_from_buf(&node->data.string_literal.buf, buf);
+ node->data.string_literal.buf = buf;
node->data.string_literal.c = true;
return node;
}
static AstNode *create_num_lit_float(Context *c, double x) {
AstNode *node = create_node(c, NodeTypeNumberLiteral);
- node->data.number_literal.kind = NumLitFloat;
- node->data.number_literal.data.x_float = x;
+ node->data.number_literal.bignum = allocate_nonzero<BigNum>(1);
+ bignum_init_float(node->data.number_literal.bignum, x);
return node;
}
@@ -193,8 +194,8 @@ static AstNode *create_num_lit_float_negative(Context *c, double x, bool negativ
static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) {
AstNode *node = create_node(c, NodeTypeNumberLiteral);
- node->data.number_literal.kind = NumLitUInt;
- node->data.number_literal.data.x_uint = x;
+ node->data.number_literal.bignum = allocate_nonzero<BigNum>(1);
+ bignum_init_unsigned(node->data.number_literal.bignum, x);
return node;
}
@@ -221,7 +222,7 @@ static AstNode *create_num_lit_signed(Context *c, int64_t x) {
static AstNode *create_type_decl_node(Context *c, const char *name, AstNode *child_type_node) {
AstNode *node = create_node(c, NodeTypeTypeDecl);
- buf_init_from_str(&node->data.type_decl.symbol, name);
+ node->data.type_decl.symbol = buf_create_from_str(name);
node->data.type_decl.top_level_decl.visib_mod = c->visib_mod;
node->data.type_decl.child_type = child_type_node;
@@ -240,7 +241,7 @@ static AstNode *create_fn_proto_node(Context *c, Buf *name, TypeTableEntry *fn_t
AstNode *node = create_node(c, NodeTypeFnProto);
node->data.fn_proto.is_inline = true;
node->data.fn_proto.top_level_decl.visib_mod = c->visib_mod;
- buf_init_from_buf(&node->data.fn_proto.name, name);
+ node->data.fn_proto.name = name;
node->data.fn_proto.return_type = make_type_node(c, fn_type->data.fn.fn_type_id.return_type);
for (int i = 0; i < fn_type->data.fn.fn_type_id.param_count; i += 1) {
@@ -273,7 +274,7 @@ static AstNode *create_inline_fn_node(Context *c, Buf *fn_name, Buf *var_name, T
fn_call_node->data.fn_call_expr.fn_ref_expr = unwrap_node;
for (int i = 0; i < fn_type->data.fn.fn_type_id.param_count; i += 1) {
AstNode *decl_node = node->data.fn_def.fn_proto->data.fn_proto.params.at(i);
- Buf *param_name = &decl_node->data.param_decl.name;
+ Buf *param_name = decl_node->data.param_decl.name;
fn_call_node->data.fn_call_expr.params.append(create_symbol_node(c, buf_ptr(param_name)));
}
@@ -686,10 +687,9 @@ static TypeTableEntry *resolve_qual_type(Context *c, QualType qt, const Decl *de
}
static void visit_fn_decl(Context *c, const FunctionDecl *fn_decl) {
- Buf fn_name = BUF_INIT;
- buf_init_from_str(&fn_name, decl_name(fn_decl));
+ Buf *fn_name = buf_create_from_str(decl_name(fn_decl));
- if (c->fn_table.maybe_get(&fn_name)) {
+ if (c->fn_table.maybe_get(fn_name)) {
// we already saw this function
return;
}
@@ -697,14 +697,14 @@ static void visit_fn_decl(Context *c, const FunctionDecl *fn_decl) {
TypeTableEntry *fn_type = resolve_qual_type(c, fn_decl->getType(), fn_decl);
if (fn_type->id == TypeTableEntryIdInvalid) {
- emit_warning(c, fn_decl, "ignoring function '%s' - unable to resolve type", buf_ptr(&fn_name));
+ emit_warning(c, fn_decl, "ignoring function '%s' - unable to resolve type", buf_ptr(fn_name));
return;
}
assert(fn_type->id == TypeTableEntryIdFn);
AstNode *node = create_node(c, NodeTypeFnProto);
- buf_init_from_buf(&node->data.fn_proto.name, &fn_name);
+ node->data.fn_proto.name = fn_name;
node->data.fn_proto.is_extern = fn_type->data.fn.fn_type_id.is_extern;
node->data.fn_proto.top_level_decl.visib_mod = c->visib_mod;
@@ -731,7 +731,7 @@ static void visit_fn_decl(Context *c, const FunctionDecl *fn_decl) {
normalize_parent_ptrs(node);
- c->fn_table.put(buf_create_from_buf(&fn_name), true);
+ c->fn_table.put(buf_create_from_buf(fn_name), true);
c->root->data.root.top_level_decls.append(node);
}
@@ -937,7 +937,7 @@ static void visit_enum_decl(Context *c, const EnumDecl *enum_decl) {
if (enum_type->data.enumeration.complete) {
// now create top level decl for the type
AstNode *enum_node = create_node(c, NodeTypeContainerDecl);
- buf_init_from_buf(&enum_node->data.struct_decl.name, &enum_type->name);
+ enum_node->data.struct_decl.name = &enum_type->name;
enum_node->data.struct_decl.kind = ContainerKindEnum;
enum_node->data.struct_decl.top_level_decl.visib_mod = VisibModExport;
enum_node->data.struct_decl.type_entry = enum_type;
@@ -1114,7 +1114,7 @@ static void visit_record_decl(Context *c, const RecordDecl *record_decl) {
if (struct_type->data.structure.complete) {
// now create a top level decl node for the type
AstNode *struct_node = create_node(c, NodeTypeContainerDecl);
- buf_init_from_buf(&struct_node->data.struct_decl.name, &struct_type->name);
+ struct_node->data.struct_decl.name = &struct_type->name;
struct_node->data.struct_decl.kind = ContainerKindStruct;
struct_node->data.struct_decl.top_level_decl.visib_mod = VisibModExport;
struct_node->data.struct_decl.type_entry = struct_type;
@@ -1284,7 +1284,7 @@ static void render_aliases(Context *c) {
for (int i = 0; i < c->aliases.length; i += 1) {
AstNode *alias_node = c->aliases.at(i);
assert(alias_node->type == NodeTypeVariableDeclaration);
- Buf *name = &alias_node->data.variable_declaration.symbol;
+ Buf *name = alias_node->data.variable_declaration.symbol;
if (name_exists(c, name)) {
continue;
}
@@ -1327,7 +1327,7 @@ static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *ch
case CTokIdStrLit:
if (is_last && is_first) {
AstNode *var_node = create_var_decl_node(c, buf_ptr(name),
- create_str_lit_node(c, &tok->data.str_lit));
+ create_str_lit_node(c, buf_create_from_buf(&tok->data.str_lit)));
c->macro_table.put(name, var_node);
}
return;
src/parser.cpp
@@ -21,6 +21,9 @@ struct ParseContext {
ImportTableEntry *owner;
ErrColor err_color;
uint32_t *next_node_index;
+ // These buffers are used freqently so we preallocate them once here.
+ Buf *void_buf;
+ Buf *empty_buf;
};
__attribute__ ((format (printf, 4, 5)))
@@ -29,7 +32,9 @@ static void ast_asm_error(ParseContext *pc, AstNode *node, int offset, const cha
assert(node->type == NodeTypeAsmExpr);
- SrcPos pos = node->data.asm_expr.offset_map.at(offset);
+ // TODO calculate or otherwise keep track of originating line/column number for strings
+ //SrcPos pos = node->data.asm_expr.offset_map.at(offset);
+ SrcPos pos = { node->line, node->column };
va_list ap;
va_start(ap, format);
@@ -83,12 +88,12 @@ static AstNode *ast_create_node(ParseContext *pc, NodeType type, Token *first_to
static AstNode *ast_create_void_type_node(ParseContext *pc, Token *token) {
AstNode *node = ast_create_node(pc, NodeTypeSymbol, token);
- buf_init_from_str(&node->data.symbol_expr.symbol, "void");
+ node->data.symbol_expr.symbol = pc->void_buf;
return node;
}
static void parse_asm_template(ParseContext *pc, AstNode *node) {
- Buf *asm_template = &node->data.asm_expr.asm_template;
+ Buf *asm_template = node->data.asm_expr.asm_template;
enum State {
StateStart,
@@ -170,514 +175,29 @@ static void parse_asm_template(ParseContext *pc, AstNode *node) {
}
}
-static uint8_t parse_char_literal(ParseContext *pc, Token *token) {
- // skip the single quotes at beginning and end
- // convert escape sequences
- bool escape = false;
- int return_count = 0;
- uint8_t return_value;
- for (int i = token->start_pos + 1; i < token->end_pos - 1; i += 1) {
- uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
- if (escape) {
- switch (c) {
- case '\\':
- return_value = '\\';
- return_count += 1;
- break;
- case 'r':
- return_value = '\r';
- return_count += 1;
- break;
- case 'n':
- return_value = '\n';
- return_count += 1;
- break;
- case 't':
- return_value = '\t';
- return_count += 1;
- break;
- case '\'':
- return_value = '\'';
- return_count += 1;
- break;
- default:
- ast_error(pc, token, "invalid escape character");
- }
- escape = false;
- } else if (c == '\\') {
- escape = true;
- } else {
- return_value = c;
- return_count += 1;
- }
- }
- if (return_count == 0) {
- ast_error(pc, token, "character literal too short");
- } else if (return_count > 1) {
- ast_error(pc, token, "character literal too long");
- }
- return return_value;
-}
-
-static uint32_t get_hex_digit(uint8_t c) {
- switch (c) {
- case '0': return 0;
- case '1': return 1;
- case '2': return 2;
- case '3': return 3;
- case '4': return 4;
- case '5': return 5;
- case '6': return 6;
- case '7': return 7;
- case '8': return 8;
- case '9': return 9;
-
- case 'a':
- case 'A':
- return 10;
- case 'b':
- case 'B':
- return 11;
- case 'c':
- case 'C':
- return 12;
- case 'd':
- case 'D':
- return 13;
- case 'e':
- case 'E':
- return 14;
- case 'f':
- case 'F':
- return 15;
- default:
- return UINT32_MAX;
- }
-}
-
-static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool *out_c_str,
- ZigList<SrcPos> *offset_map)
-{
- if (token->raw_string_start > 0) {
- uint8_t c1 = *((uint8_t*)buf_ptr(pc->buf) + token->start_pos);
- uint8_t c2 = *((uint8_t*)buf_ptr(pc->buf) + token->start_pos + 1);
- assert(c1 == 'r');
- if (out_c_str) {
- *out_c_str = (c2 == 'c');
- }
- const char *str = buf_ptr(pc->buf) + token->raw_string_start;
- buf_init_from_mem(buf, str, token->raw_string_end - token->raw_string_start);
- if (offset_map) {
- SrcPos pos = {token->start_line, token->start_column};
- for (int i = token->start_pos; i < token->raw_string_start; i += 1) {
- uint8_t c = buf_ptr(pc->buf)[i];
- if (c == '\n') {
- pos.line += 1;
- pos.column = 0;
- } else {
- pos.column += 1;
- }
- }
- for (int i = token->raw_string_start; i < token->raw_string_end; i += 1) {
- offset_map->append(pos);
-
- uint8_t c = buf_ptr(pc->buf)[i];
- if (c == '\n') {
- pos.line += 1;
- pos.column = 0;
- } else {
- pos.column += 1;
- }
- }
- }
- return;
- }
-
- // skip the double quotes at beginning and end
- // convert escape sequences
- // detect c string literal
-
- enum State {
- StatePre,
- StateSkipQuot,
- StateStart,
- StateEscape,
- StateHex1,
- StateHex2,
- StateUnicode,
- };
-
- buf_resize(buf, 0);
-
- int unicode_index;
- int unicode_end;
-
- State state = StatePre;
- SrcPos pos = {token->start_line, token->start_column};
- uint32_t hex_value = 0;
- for (int i = token->start_pos; i < token->end_pos - 1; i += 1) {
- uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
-
- switch (state) {
- case StatePre:
- switch (c) {
- case '@':
- state = StateSkipQuot;
- break;
- case 'c':
- if (out_c_str) {
- *out_c_str = true;
- } else {
- ast_error(pc, token, "C string literal not allowed here");
- }
- state = StateSkipQuot;
- break;
- case '"':
- state = StateStart;
- break;
- default:
- ast_error(pc, token, "invalid string character");
- }
- break;
- case StateSkipQuot:
- state = StateStart;
- break;
- case StateStart:
- if (c == '\\') {
- state = StateEscape;
- } else {
- buf_append_char(buf, c);
- if (offset_map) offset_map->append(pos);
- }
- break;
- case StateEscape:
- switch (c) {
- case '\\':
- buf_append_char(buf, '\\');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 'r':
- buf_append_char(buf, '\r');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 'n':
- buf_append_char(buf, '\n');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 't':
- buf_append_char(buf, '\t');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case '"':
- buf_append_char(buf, '"');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case '\'':
- buf_append_char(buf, '\'');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 'x':
- state = StateHex1;
- break;
- case 'u':
- state = StateUnicode;
- unicode_index = 0;
- unicode_end = 4;
- hex_value = 0;
- break;
- case 'U':
- state = StateUnicode;
- unicode_index = 0;
- unicode_end = 6;
- hex_value = 0;
- break;
- default:
- ast_error(pc, token, "invalid escape character");
- }
- break;
- case StateHex1:
- {
- uint32_t hex_digit = get_hex_digit(c);
- if (hex_digit == UINT32_MAX) {
- ast_error(pc, token, "invalid hex digit: '%c'", c);
- }
- hex_value = hex_digit * 16;
- state = StateHex2;
- break;
- }
- case StateHex2:
- {
- uint32_t hex_digit = get_hex_digit(c);
- if (hex_digit == UINT32_MAX) {
- ast_error(pc, token, "invalid hex digit: '%c'", c);
- }
- hex_value += hex_digit;
- assert(hex_value >= 0 && hex_value <= 255);
- buf_append_char(buf, hex_value);
- state = StateStart;
- break;
- }
- case StateUnicode:
- {
- uint32_t hex_digit = get_hex_digit(c);
- if (hex_digit == UINT32_MAX) {
- ast_error(pc, token, "invalid hex digit: '%c'", c);
- }
- hex_value *= 16;
- hex_value += hex_digit;
- unicode_index += 1;
- if (unicode_index >= unicode_end) {
- if (hex_value <= 0x7f) {
- // 00000000 00000000 00000000 0xxxxxxx
- buf_append_char(buf, hex_value);
- } else if (hex_value <= 0x7ff) {
- // 00000000 00000000 00000xxx xx000000
- buf_append_char(buf, (unsigned char)(0xc0 | (hex_value >> 6)));
- // 00000000 00000000 00000000 00xxxxxx
- buf_append_char(buf, (unsigned char)(0x80 | (hex_value & 0x3f)));
- } else if (hex_value <= 0xffff) {
- // 00000000 00000000 xxxx0000 00000000
- buf_append_char(buf, (unsigned char)(0xe0 | (hex_value >> 12)));
- // 00000000 00000000 0000xxxx xx000000
- buf_append_char(buf, (unsigned char)(0x80 | ((hex_value >> 6) & 0x3f)));
- // 00000000 00000000 00000000 00xxxxxx
- buf_append_char(buf, (unsigned char)(0x80 | (hex_value & 0x3f)));
- } else if (hex_value <= 0x10ffff) {
- // 00000000 000xxx00 00000000 00000000
- buf_append_char(buf, (unsigned char)(0xf0 | (hex_value >> 18)));
- // 00000000 000000xx xxxx0000 00000000
- buf_append_char(buf, (unsigned char)(0x80 | ((hex_value >> 12) & 0x3f)));
- // 00000000 00000000 0000xxxx xx000000
- buf_append_char(buf, (unsigned char)(0x80 | ((hex_value >> 6) & 0x3f)));
- // 00000000 00000000 00000000 00xxxxxx
- buf_append_char(buf, (unsigned char)(0x80 | (hex_value & 0x3f)));
- } else {
- ast_error(pc, token, "unicode value out of range: %x", hex_value);
- }
- state = StateStart;
- }
- break;
- }
- }
- if (c == '\n') {
- pos.line += 1;
- pos.column = 0;
- } else {
- pos.column += 1;
- }
- }
- assert(state == StateStart);
- if (offset_map) offset_map->append(pos);
+static Buf *token_buf(Token *token) {
+ assert(token->id == TokenIdStringLiteral || token->id == TokenIdSymbol);
+ return &token->data.str_lit.str;
}
-static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
- uint8_t *first_char = (uint8_t *)buf_ptr(pc->buf) + token->start_pos;
- bool at_sign = *first_char == '@';
- if (at_sign) {
- parse_string_literal(pc, token, buf, nullptr, nullptr);
- } else {
- buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
- }
+static BigNum *token_bignum(Token *token) {
+ assert(token->id == TokenIdNumberLiteral);
+ return &token->data.num_lit.bignum;
}
-
-static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix,
- int skip_index, bool *overflow)
-{
- unsigned long long x = 0;
-
- for (int i = digits_start; i < digits_end; i++) {
- if (i == skip_index)
- continue;
- uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
- unsigned long long digit = get_digit_value(c);
-
- // x *= radix;
- if (__builtin_umulll_overflow(x, radix, &x)) {
- *overflow = true;
- return 0;
- }
-
- // x += digit
- if (__builtin_uaddll_overflow(x, digit, &x)) {
- *overflow = true;
- return 0;
- }
- }
- return x;
+static uint8_t token_char_lit(Token *token) {
+ assert(token->id == TokenIdCharLiteral);
+ return token->data.char_lit.c;
}
-static void parse_number_literal(ParseContext *pc, Token *token, AstNodeNumberLiteral *num_lit) {
- assert(token->id == TokenIdNumberLiteral);
-
- int whole_number_start = token->start_pos;
- if (token->radix != 10) {
- // skip the "0x"
- whole_number_start += 2;
- }
-
- int whole_number_end = token->decimal_point_pos;
- if (whole_number_end <= whole_number_start) {
- // TODO: error for empty whole number part
- num_lit->overflow = true;
- return;
- }
-
- if (token->decimal_point_pos == token->end_pos) {
- // integer
- unsigned long long whole_number = parse_int_digits(pc, whole_number_start, whole_number_end,
- token->radix, -1, &num_lit->overflow);
- if (num_lit->overflow) return;
-
- num_lit->data.x_uint = whole_number;
- num_lit->kind = NumLitUInt;
+static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
+ if (token->id == TokenIdSymbol) {
+ buf_init_from_buf(buf, token_buf(token));
} else {
- // float
-
- if (token->radix == 10) {
- // use a third-party base-10 float parser
- char *str_begin = buf_ptr(pc->buf) + whole_number_start;
- char *str_end;
- errno = 0;
- double x = strtod(str_begin, &str_end);
- if (errno) {
- // TODO: forward error to user
- num_lit->overflow = true;
- return;
- }
- assert(str_end == buf_ptr(pc->buf) + token->end_pos);
- num_lit->data.x_float = x;
- num_lit->kind = NumLitFloat;
- return;
- }
-
- if (token->decimal_point_pos < token->exponent_marker_pos) {
- // fraction
- int fraction_start = token->decimal_point_pos + 1;
- int fraction_end = token->exponent_marker_pos;
- if (fraction_end <= fraction_start) {
- // TODO: error for empty fraction part
- num_lit->overflow = true;
- return;
- }
- }
-
- // trim leading and trailing zeros in the significand digit sequence
- int significand_start = whole_number_start;
- for (; significand_start < token->exponent_marker_pos; significand_start++) {
- if (significand_start == token->decimal_point_pos)
- continue;
- uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + significand_start);
- if (c != '0')
- break;
- }
- int significand_end = token->exponent_marker_pos;
- for (; significand_end - 1 > significand_start; significand_end--) {
- if (significand_end - 1 <= token->decimal_point_pos) {
- significand_end = token->decimal_point_pos;
- break;
- }
- uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + significand_end - 1);
- if (c != '0')
- break;
- }
-
- unsigned long long significand_as_int = parse_int_digits(pc, significand_start, significand_end,
- token->radix, token->decimal_point_pos, &num_lit->overflow);
- if (num_lit->overflow) return;
-
- int exponent_in_bin_or_dec = 0;
- if (significand_end > token->decimal_point_pos) {
- exponent_in_bin_or_dec = token->decimal_point_pos + 1 - significand_end;
- if (token->radix == 2) {
- // already good
- } else if (token->radix == 8) {
- exponent_in_bin_or_dec *= 3;
- } else if (token->radix == 10) {
- // already good
- } else if (token->radix == 16) {
- exponent_in_bin_or_dec *= 4;
- } else zig_unreachable();
- }
-
- if (token->exponent_marker_pos < token->end_pos) {
- // exponent
- int exponent_start = token->exponent_marker_pos + 1;
- int exponent_end = token->end_pos;
- if (exponent_end <= exponent_start) {
- // TODO: error for empty exponent part
- num_lit->overflow = true;
- return;
- }
- bool is_exponent_negative = false;
- uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + exponent_start);
- if (c == '+') {
- exponent_start += 1;
- } else if (c == '-') {
- exponent_start += 1;
- is_exponent_negative = true;
- }
-
- if (exponent_end <= exponent_start) {
- // TODO: error for empty exponent part
- num_lit->overflow = true;
- return;
- }
-
- unsigned long long specified_exponent = parse_int_digits(pc, exponent_start, exponent_end,
- 10, -1, &num_lit->overflow);
- // TODO: this check is a little silly
- if (specified_exponent >= LLONG_MAX) {
- num_lit->overflow = true;
- return;
- }
-
- if (is_exponent_negative) {
- exponent_in_bin_or_dec -= specified_exponent;
- } else {
- exponent_in_bin_or_dec += specified_exponent;
- }
- }
-
- uint64_t significand_bits;
- uint64_t exponent_bits;
- if (significand_as_int != 0) {
- // normalize the significand
- if (token->radix == 10) {
- zig_panic("TODO: decimal floats");
- } else {
- int significand_magnitude_in_bin = __builtin_clzll(1) - __builtin_clzll(significand_as_int);
- exponent_in_bin_or_dec += significand_magnitude_in_bin;
- if (!(-1023 <= exponent_in_bin_or_dec && exponent_in_bin_or_dec < 1023)) {
- num_lit->overflow = true;
- return;
- }
-
- // this should chop off exactly one 1 bit from the top.
- significand_bits = ((uint64_t)significand_as_int << (52 - significand_magnitude_in_bin)) & 0xfffffffffffffULL;
- exponent_bits = exponent_in_bin_or_dec + 1023;
- }
- } else {
- // 0 is all 0's
- significand_bits = 0;
- exponent_bits = 0;
- }
-
- uint64_t double_bits = (exponent_bits << 52) | significand_bits;
- double x = *(double *)&double_bits;
-
- num_lit->data.x_float = x;
- num_lit->kind = NumLitFloat;
+ buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
}
}
-
__attribute__ ((noreturn))
static void ast_invalid_token_error(ParseContext *pc, Token *token) {
Buf token_value = BUF_INIT;
@@ -723,7 +243,7 @@ static AstNode *ast_parse_directive(ParseContext *pc, int *token_index) {
Token *name_symbol = ast_eat_token(pc, token_index, TokenIdSymbol);
- ast_buf_from_token(pc, name_symbol, &node->data.directive.name);
+ node->data.directive.name = token_buf(name_symbol);
node->data.directive.expr = ast_parse_grouped_expr(pc, token_index, true);
@@ -769,12 +289,12 @@ static AstNode *ast_parse_param_decl(ParseContext *pc, int *token_index) {
token = &pc->tokens->at(*token_index);
}
- buf_resize(&node->data.param_decl.name, 0);
+ node->data.param_decl.name = pc->empty_buf;
if (token->id == TokenIdSymbol) {
Token *next_token = &pc->tokens->at(*token_index + 1);
if (next_token->id == TokenIdColon) {
- ast_buf_from_token(pc, token, &node->data.param_decl.name);
+ node->data.param_decl.name = token_buf(token);
*token_index += 2;
}
}
@@ -915,8 +435,8 @@ static void ast_parse_asm_input_item(ParseContext *pc, int *token_index, AstNode
ast_eat_token(pc, token_index, TokenIdRParen);
AsmInput *asm_input = allocate<AsmInput>(1);
- ast_buf_from_token(pc, alias, &asm_input->asm_symbolic_name);
- parse_string_literal(pc, constraint, &asm_input->constraint, nullptr, nullptr);
+ asm_input->asm_symbolic_name = token_buf(alias);
+ asm_input->constraint = token_buf(constraint);
asm_input->expr = expr_node;
node->data.asm_expr.input_list.append(asm_input);
}
@@ -938,7 +458,7 @@ static void ast_parse_asm_output_item(ParseContext *pc, int *token_index, AstNod
Token *token = &pc->tokens->at(*token_index);
*token_index += 1;
if (token->id == TokenIdSymbol) {
- ast_buf_from_token(pc, token, &asm_output->variable_name);
+ asm_output->variable_name = token_buf(token);
} else if (token->id == TokenIdArrow) {
asm_output->return_type = ast_parse_prefix_op_expr(pc, token_index, true);
} else {
@@ -947,8 +467,8 @@ static void ast_parse_asm_output_item(ParseContext *pc, int *token_index, AstNod
ast_eat_token(pc, token_index, TokenIdRParen);
- ast_buf_from_token(pc, alias, &asm_output->asm_symbolic_name);
- parse_string_literal(pc, constraint, &asm_output->constraint, nullptr, nullptr);
+ asm_output->asm_symbolic_name = token_buf(alias);
+ asm_output->constraint = token_buf(constraint);
node->data.asm_expr.output_list.append(asm_output);
}
@@ -968,8 +488,7 @@ static void ast_parse_asm_clobbers(ParseContext *pc, int *token_index, AstNode *
ast_expect_token(pc, string_tok, TokenIdStringLiteral);
*token_index += 1;
- Buf *clobber_buf = buf_alloc();
- parse_string_literal(pc, string_tok, clobber_buf, nullptr, nullptr);
+ Buf *clobber_buf = token_buf(string_tok);
node->data.asm_expr.clobber_list.append(clobber_buf);
Token *comma = &pc->tokens->at(*token_index);
@@ -1072,19 +591,14 @@ static AstNode *ast_parse_asm_expr(ParseContext *pc, int *token_index, bool mand
ast_expect_token(pc, lparen_tok, TokenIdLParen);
*token_index += 1;
- Token *template_tok = &pc->tokens->at(*token_index);
- ast_expect_token(pc, template_tok, TokenIdStringLiteral);
- *token_index += 1;
+ Token *template_tok = ast_eat_token(pc, token_index, TokenIdStringLiteral);
- parse_string_literal(pc, template_tok, &node->data.asm_expr.asm_template, nullptr,
- &node->data.asm_expr.offset_map);
+ node->data.asm_expr.asm_template = token_buf(template_tok);
parse_asm_template(pc, node);
ast_parse_asm_output(pc, token_index, node);
- Token *rparen_tok = &pc->tokens->at(*token_index);
- ast_expect_token(pc, rparen_tok, TokenIdRParen);
- *token_index += 1;
+ ast_eat_token(pc, token_index, TokenIdRParen);
normalize_parent_ptrs(node);
return node;
@@ -1099,17 +613,19 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
if (token->id == TokenIdNumberLiteral) {
AstNode *node = ast_create_node(pc, NodeTypeNumberLiteral, token);
- parse_number_literal(pc, token, &node->data.number_literal);
+ node->data.number_literal.bignum = token_bignum(token);
+ node->data.number_literal.overflow = token->data.num_lit.overflow;
*token_index += 1;
return node;
} else if (token->id == TokenIdStringLiteral) {
AstNode *node = ast_create_node(pc, NodeTypeStringLiteral, token);
- parse_string_literal(pc, token, &node->data.string_literal.buf, &node->data.string_literal.c, nullptr);
+ node->data.string_literal.buf = token_buf(token);
+ node->data.string_literal.c = token->data.str_lit.is_c_str;
*token_index += 1;
return node;
} else if (token->id == TokenIdCharLiteral) {
AstNode *node = ast_create_node(pc, NodeTypeCharLiteral, token);
- node->data.char_literal.value = parse_char_literal(pc, token);
+ node->data.char_literal.value = token_char_lit(token);
*token_index += 1;
return node;
} else if (token->id == TokenIdKeywordTrue) {
@@ -1155,7 +671,7 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
*token_index += 1;
Token *name_tok = ast_eat_token(pc, token_index, TokenIdSymbol);
AstNode *name_node = ast_create_node(pc, NodeTypeSymbol, name_tok);
- ast_buf_from_token(pc, name_tok, &name_node->data.symbol_expr.symbol);
+ name_node->data.symbol_expr.symbol = token_buf(name_tok);
AstNode *node = ast_create_node(pc, NodeTypeFnCallExpr, token);
node->data.fn_call_expr.fn_ref_expr = name_node;
@@ -1168,7 +684,7 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
} else if (token->id == TokenIdSymbol) {
*token_index += 1;
AstNode *node = ast_create_node(pc, NodeTypeSymbol, token);
- ast_buf_from_token(pc, token, &node->data.symbol_expr.symbol);
+ node->data.symbol_expr.symbol = token_buf(token);
return node;
} else if (token->id == TokenIdKeywordGoto) {
AstNode *node = ast_create_node(pc, NodeTypeGoto, token);
@@ -1178,7 +694,7 @@ static AstNode *ast_parse_primary_expr(ParseContext *pc, int *token_index, bool
*token_index += 1;
ast_expect_token(pc, dest_symbol, TokenIdSymbol);
- ast_buf_from_token(pc, dest_symbol, &node->data.goto_expr.name);
+ node->data.goto_expr.name = token_buf(dest_symbol);
return node;
}
@@ -1243,7 +759,7 @@ static AstNode *ast_parse_curly_suffix_expr(ParseContext *pc, int *token_index,
AstNode *field_node = ast_create_node(pc, NodeTypeStructValueField, token);
- ast_buf_from_token(pc, field_name_tok, &field_node->data.struct_val_field.name);
+ field_node->data.struct_val_field.name = token_buf(field_name_tok);
field_node->data.struct_val_field.expr = ast_parse_expression(pc, token_index, true);
normalize_parent_ptrs(field_node);
@@ -1370,7 +886,7 @@ static AstNode *ast_parse_suffix_op_expr(ParseContext *pc, int *token_index, boo
AstNode *node = ast_create_node(pc, NodeTypeFieldAccessExpr, first_token);
node->data.field_access_expr.struct_expr = primary_expr;
- ast_buf_from_token(pc, name_token, &node->data.field_access_expr.field_name);
+ node->data.field_access_expr.field_name = token_buf(name_token);
normalize_parent_ptrs(node);
primary_expr = node;
@@ -1819,10 +1335,10 @@ static AstNode *ast_parse_if_expr(ParseContext *pc, int *token_index, bool manda
*token_index += 1;
node->data.if_var_expr.var_is_ptr = true;
Token *name_token = ast_eat_token(pc, token_index, TokenIdSymbol);
- ast_buf_from_token(pc, name_token, &node->data.if_var_expr.var_decl.symbol);
+ node->data.if_var_expr.var_decl.symbol = token_buf(name_token);
} else if (star_or_symbol->id == TokenIdSymbol) {
*token_index += 1;
- ast_buf_from_token(pc, star_or_symbol, &node->data.if_var_expr.var_decl.symbol);
+ node->data.if_var_expr.var_decl.symbol = token_buf(star_or_symbol);
} else {
ast_invalid_token_error(pc, star_or_symbol);
}
@@ -1974,7 +1490,7 @@ static AstNode *ast_parse_variable_declaration_expr(ParseContext *pc, int *token
node->data.variable_declaration.top_level_decl.directives = directives;
Token *name_token = ast_eat_token(pc, token_index, TokenIdSymbol);
- ast_buf_from_token(pc, name_token, &node->data.variable_declaration.symbol);
+ node->data.variable_declaration.symbol = token_buf(name_token);
Token *eq_or_colon = &pc->tokens->at(*token_index);
*token_index += 1;
@@ -2067,7 +1583,7 @@ static AstNode *ast_parse_while_expr(ParseContext *pc, int *token_index, bool ma
static AstNode *ast_parse_symbol(ParseContext *pc, int *token_index) {
Token *token = ast_eat_token(pc, token_index, TokenIdSymbol);
AstNode *node = ast_create_node(pc, NodeTypeSymbol, token);
- ast_buf_from_token(pc, token, &node->data.symbol_expr.symbol);
+ node->data.symbol_expr.symbol = token_buf(token);
return node;
}
@@ -2405,7 +1921,7 @@ static AstNode *ast_parse_label(ParseContext *pc, int *token_index, bool mandato
*token_index += 2;
AstNode *node = ast_create_node(pc, NodeTypeLabel, symbol_token);
- ast_buf_from_token(pc, symbol_token, &node->data.label.name);
+ node->data.label.name = token_buf(symbol_token);
return node;
}
@@ -2413,7 +1929,7 @@ static AstNode *ast_create_void_expr(ParseContext *pc, Token *token) {
AstNode *node = ast_create_node(pc, NodeTypeContainerInitExpr, token);
node->data.container_init_expr.type = ast_create_node(pc, NodeTypeSymbol, token);
node->data.container_init_expr.kind = ContainerInitKindArray;
- buf_init_from_str(&node->data.container_init_expr.type->data.symbol_expr.symbol, "void");
+ node->data.container_init_expr.type->data.symbol_expr.symbol = pc->void_buf;
normalize_parent_ptrs(node);
return node;
}
@@ -2508,9 +2024,9 @@ static AstNode *ast_parse_fn_proto(ParseContext *pc, int *token_index, bool mand
Token *fn_name = &pc->tokens->at(*token_index);
if (fn_name->id == TokenIdSymbol) {
*token_index += 1;
- ast_buf_from_token(pc, fn_name, &node->data.fn_proto.name);
+ node->data.fn_proto.name = token_buf(fn_name);
} else {
- buf_resize(&node->data.fn_proto.name, 0);
+ node->data.fn_proto.name = pc->empty_buf;
}
ast_parse_param_decl_list(pc, token_index, &node->data.fn_proto.params, &node->data.fn_proto.is_var_args);
@@ -2663,7 +2179,7 @@ static AstNode *ast_parse_container_decl(ParseContext *pc, int *token_index,
AstNode *node = ast_create_node(pc, NodeTypeContainerDecl, first_token);
node->data.struct_decl.kind = kind;
- ast_buf_from_token(pc, struct_name, &node->data.struct_decl.name);
+ node->data.struct_decl.name = token_buf(struct_name);
node->data.struct_decl.top_level_decl.visib_mod = visib_mod;
node->data.struct_decl.top_level_decl.directives = directives;
@@ -2729,8 +2245,7 @@ static AstNode *ast_parse_container_decl(ParseContext *pc, int *token_index,
field_node->data.struct_field.top_level_decl.visib_mod = visib_mod;
field_node->data.struct_field.top_level_decl.directives = directive_list;
-
- ast_buf_from_token(pc, token, &field_node->data.struct_field.name);
+ field_node->data.struct_field.name = token_buf(token);
Token *expr_or_comma = &pc->tokens->at(*token_index);
if (expr_or_comma->id == TokenIdComma) {
@@ -2772,7 +2287,7 @@ static AstNode *ast_parse_error_value_decl(ParseContext *pc, int *token_index,
AstNode *node = ast_create_node(pc, NodeTypeErrorValueDecl, first_token);
node->data.error_value_decl.top_level_decl.visib_mod = visib_mod;
node->data.error_value_decl.top_level_decl.directives = directives;
- ast_buf_from_token(pc, name_tok, &node->data.error_value_decl.name);
+ node->data.error_value_decl.name = token_buf(name_tok);
normalize_parent_ptrs(node);
return node;
@@ -2795,7 +2310,7 @@ static AstNode *ast_parse_type_decl(ParseContext *pc, int *token_index,
ast_eat_token(pc, token_index, TokenIdEq);
AstNode *node = ast_create_node(pc, NodeTypeTypeDecl, first_token);
- ast_buf_from_token(pc, name_tok, &node->data.type_decl.symbol);
+ node->data.type_decl.symbol = token_buf(name_tok);
node->data.type_decl.child_type = ast_parse_prefix_op_expr(pc, token_index, true);
ast_eat_token(pc, token_index, TokenIdSemicolon);
@@ -2901,6 +2416,8 @@ AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens, ImportTableEntry *owner,
ErrColor err_color, uint32_t *next_node_index)
{
ParseContext pc = {0};
+ pc.void_buf = buf_create_from_str("void");
+ pc.empty_buf = buf_create_from_str("");
pc.err_color = err_color;
pc.owner = owner;
pc.buf = buf;
src/tokenizer.cpp
@@ -11,6 +11,9 @@
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <errno.h>
#define WHITESPACE \
' ': \
@@ -30,7 +33,7 @@
'0': \
case DIGIT_NON_ZERO
-#define ALPHA_EXCEPT_CR \
+#define ALPHA_EXCEPT_C \
'a': \
case 'b': \
/*case 'c':*/ \
@@ -48,7 +51,7 @@
case 'o': \
case 'p': \
case 'q': \
- /*case 'r':*/ \
+ case 'r': \
case 's': \
case 't': \
case 'u': \
@@ -85,77 +88,93 @@
case 'Z'
#define ALPHA \
- ALPHA_EXCEPT_CR: \
- case 'c': \
- case 'r'
-
-#define SYMBOL_CHAR \
- SYMBOL_CHAR_EXCEPT_C: \
+ ALPHA_EXCEPT_C: \
case 'c'
-#define SYMBOL_CHAR_EXCEPT_C \
- ALPHA_EXCEPT_CR: \
- case 'r': \
+#define SYMBOL_CHAR \
+ ALPHA_EXCEPT_C: \
case DIGIT: \
- case '_'
+ case '_': \
+ case 'c'
#define SYMBOL_START \
ALPHA: \
case '_'
-#define HEX_DIGIT \
- 'a': \
- case 'b': \
- case 'c': \
- case 'd': \
- case 'e': \
- case 'f': \
- case 'A': \
- case 'B': \
- case 'C': \
- case 'D': \
- case 'E': \
- case 'F': \
- case DIGIT
+struct ZigKeyword {
+ const char *text;
+ TokenId token_id;
+};
-const char * zig_keywords[] = {
- "true", "false", "null", "fn", "return", "var", "const", "extern",
- "pub", "export", "use", "if", "else", "goto", "asm",
- "volatile", "struct", "enum", "while", "for", "continue", "break",
- "null", "noalias", "switch", "undefined", "error", "type", "inline",
- "defer", "union",
+static const struct ZigKeyword zig_keywords[] = {
+ {"asm", TokenIdKeywordAsm},
+ {"break", TokenIdKeywordBreak},
+ {"const", TokenIdKeywordConst},
+ {"continue", TokenIdKeywordContinue},
+ {"defer", TokenIdKeywordDefer},
+ {"else", TokenIdKeywordElse},
+ {"enum", TokenIdKeywordEnum},
+ {"error", TokenIdKeywordError},
+ {"export", TokenIdKeywordExport},
+ {"extern", TokenIdKeywordExtern},
+ {"false", TokenIdKeywordFalse},
+ {"fn", TokenIdKeywordFn},
+ {"for", TokenIdKeywordFor},
+ {"goto", TokenIdKeywordGoto},
+ {"if", TokenIdKeywordIf},
+ {"inline", TokenIdKeywordInline},
+ {"noalias", TokenIdKeywordNoAlias},
+ {"null", TokenIdKeywordNull},
+ {"pub", TokenIdKeywordPub},
+ {"return", TokenIdKeywordReturn},
+ {"struct", TokenIdKeywordStruct},
+ {"switch", TokenIdKeywordSwitch},
+ {"true", TokenIdKeywordTrue},
+ {"type", TokenIdKeywordType},
+ {"undefined", TokenIdKeywordUndefined},
+ {"union", TokenIdKeywordUnion},
+ {"use", TokenIdKeywordUse},
+ {"var", TokenIdKeywordVar},
+ {"volatile", TokenIdKeywordVolatile},
+ {"while", TokenIdKeywordWhile},
};
bool is_zig_keyword(Buf *buf) {
for (int i = 0; i < array_length(zig_keywords); i += 1) {
- if (buf_eql_str(buf, zig_keywords[i])) {
+ if (buf_eql_str(buf, zig_keywords[i].text)) {
return true;
}
}
return false;
}
+static bool is_symbol_char(uint8_t c) {
+ switch (c) {
+ case SYMBOL_CHAR:
+ return true;
+ default:
+ return false;
+ }
+}
+
enum TokenizeState {
TokenizeStateStart,
TokenizeStateSymbol,
- TokenizeStateSymbolFirst,
- TokenizeStateSymbolFirstRaw,
- TokenizeStateFirstR,
+ TokenizeStateSymbolFirstC,
TokenizeStateZero, // "0", which might lead to "0x"
TokenizeStateNumber, // "123", "0x123"
+ TokenizeStateNumberDot,
TokenizeStateFloatFraction, // "123.456", "0x123.456"
TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5"
TokenizeStateString,
TokenizeStateStringEscape,
- TokenizeStateRawString,
- TokenizeStateRawStringContents,
- TokenizeStateRawStringMaybeEnd,
TokenizeStateCharLiteral,
TokenizeStateCharLiteralEnd,
TokenizeStateSawStar,
TokenizeStateSawStarPercent,
TokenizeStateSawSlash,
+ TokenizeStateSawBackslash,
TokenizeStateSawPercent,
TokenizeStateSawPlus,
TokenizeStateSawPlusPercent,
@@ -167,6 +186,9 @@ enum TokenizeState {
TokenizeStateSawPipe,
TokenizeStateSawPipePipe,
TokenizeStateLineComment,
+ TokenizeStateLineString,
+ TokenizeStateLineStringEnd,
+ TokenizeStateLineStringContinue,
TokenizeStateSawEq,
TokenizeStateSawBang,
TokenizeStateSawLessThan,
@@ -178,7 +200,7 @@ enum TokenizeState {
TokenizeStateSawDotDot,
TokenizeStateSawQuestionMark,
TokenizeStateSawAtSign,
- TokenizeStateHex,
+ TokenizeStateCharCode,
TokenizeStateError,
};
@@ -192,10 +214,16 @@ struct Tokenize {
int column;
Token *cur_tok;
Tokenization *out;
- int raw_string_id_start;
- int raw_string_id_end;
- int raw_string_id_cmp_pos;
- int hex_chars_left;
+ uint32_t radix;
+ int32_t exp_add_amt;
+ bool is_exp_negative;
+ bool is_num_lit_float;
+ size_t char_code_index;
+ size_t char_code_end;
+ bool unicode;
+ uint32_t char_code;
+ int exponent_in_bin_or_dec;
+ BigNum specified_exponent;
};
__attribute__ ((format (printf, 2, 3)))
@@ -216,19 +244,28 @@ static void tokenize_error(Tokenize *t, const char *format, ...) {
va_end(ap);
}
+static void set_token_id(Tokenize *t, Token *token, TokenId id) {
+ token->id = id;
+
+ if (id == TokenIdNumberLiteral) {
+ token->data.num_lit.overflow = false;
+ } else if (id == TokenIdStringLiteral || id == TokenIdSymbol) {
+ memset(&token->data.str_lit.str, 0, sizeof(Buf));
+ buf_resize(&token->data.str_lit.str, 0);
+ token->data.str_lit.is_c_str = false;
+ }
+}
+
static void begin_token(Tokenize *t, TokenId id) {
assert(!t->cur_tok);
t->tokens->add_one();
Token *token = &t->tokens->last();
token->start_line = t->line;
token->start_column = t->column;
- token->id = id;
token->start_pos = t->pos;
- token->radix = 0;
- token->decimal_point_pos = 0;
- token->exponent_marker_pos = 0;
- token->raw_string_start = 0;
- token->raw_string_end = 0;
+
+ set_token_id(t, token, id);
+
t->cur_tok = token;
}
@@ -237,83 +274,82 @@ static void cancel_token(Tokenize *t) {
t->cur_tok = nullptr;
}
+static void end_float_token(Tokenize *t) {
+ t->cur_tok->data.num_lit.bignum.kind = BigNumKindFloat;
+
+ if (t->radix == 10) {
+ char *str_begin = buf_ptr(t->buf) + t->cur_tok->start_pos;
+ char *str_end;
+ errno = 0;
+ t->cur_tok->data.num_lit.bignum.data.x_float = strtod(str_begin, &str_end);
+ if (errno) {
+ t->cur_tok->data.num_lit.overflow = true;
+ return;
+ }
+ assert(str_end == buf_ptr(t->buf) + t->cur_tok->end_pos);
+ return;
+ }
+
+
+ if (t->specified_exponent.data.x_uint >= INT_MAX) {
+ t->cur_tok->data.num_lit.overflow = true;
+ return;
+ }
+
+ int64_t specified_exponent = t->specified_exponent.data.x_uint;
+ if (t->is_exp_negative) {
+ specified_exponent = -specified_exponent;
+ }
+ t->exponent_in_bin_or_dec += specified_exponent;
+
+ uint64_t significand = t->cur_tok->data.num_lit.bignum.data.x_uint;
+ uint64_t significand_bits;
+ uint64_t exponent_bits;
+ if (significand == 0) {
+ // 0 is all 0's
+ significand_bits = 0;
+ exponent_bits = 0;
+ } else {
+ // normalize the significand
+ if (t->radix == 10) {
+ zig_panic("TODO: decimal floats");
+ } else {
+ int significand_magnitude_in_bin = __builtin_clzll(1) - __builtin_clzll(significand);
+ t->exponent_in_bin_or_dec += significand_magnitude_in_bin;
+ if (!(-1023 <= t->exponent_in_bin_or_dec && t->exponent_in_bin_or_dec < 1023)) {
+ t->cur_tok->data.num_lit.overflow = true;
+ } else {
+ // this should chop off exactly one 1 bit from the top.
+ significand_bits = ((uint64_t)significand << (52 - significand_magnitude_in_bin)) & 0xfffffffffffffULL;
+ exponent_bits = t->exponent_in_bin_or_dec + 1023;
+ }
+ }
+ }
+ uint64_t double_bits = (exponent_bits << 52) | significand_bits;
+ memcpy(&t->cur_tok->data.num_lit.bignum.data.x_float, &double_bits, sizeof(double));
+}
+
static void end_token(Tokenize *t) {
assert(t->cur_tok);
t->cur_tok->end_pos = t->pos + 1;
- // normalize number literal parsing stuff
if (t->cur_tok->id == TokenIdNumberLiteral) {
- if (t->cur_tok->exponent_marker_pos == 0) {
- t->cur_tok->exponent_marker_pos = t->cur_tok->end_pos;
+ if (t->cur_tok->data.num_lit.overflow) {
+ return;
}
- if (t->cur_tok->decimal_point_pos == 0) {
- t->cur_tok->decimal_point_pos = t->cur_tok->exponent_marker_pos;
+ if (t->is_num_lit_float) {
+ end_float_token(t);
}
- }
-
- char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
- int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
+ } else if (t->cur_tok->id == TokenIdSymbol) {
+ char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
+ int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
- if (mem_eql_str(token_mem, token_len, "fn")) {
- t->cur_tok->id = TokenIdKeywordFn;
- } else if (mem_eql_str(token_mem, token_len, "return")) {
- t->cur_tok->id = TokenIdKeywordReturn;
- } else if (mem_eql_str(token_mem, token_len, "var")) {
- t->cur_tok->id = TokenIdKeywordVar;
- } else if (mem_eql_str(token_mem, token_len, "const")) {
- t->cur_tok->id = TokenIdKeywordConst;
- } else if (mem_eql_str(token_mem, token_len, "extern")) {
- t->cur_tok->id = TokenIdKeywordExtern;
- } else if (mem_eql_str(token_mem, token_len, "pub")) {
- t->cur_tok->id = TokenIdKeywordPub;
- } else if (mem_eql_str(token_mem, token_len, "export")) {
- t->cur_tok->id = TokenIdKeywordExport;
- } else if (mem_eql_str(token_mem, token_len, "use")) {
- t->cur_tok->id = TokenIdKeywordUse;
- } else if (mem_eql_str(token_mem, token_len, "true")) {
- t->cur_tok->id = TokenIdKeywordTrue;
- } else if (mem_eql_str(token_mem, token_len, "false")) {
- t->cur_tok->id = TokenIdKeywordFalse;
- } else if (mem_eql_str(token_mem, token_len, "if")) {
- t->cur_tok->id = TokenIdKeywordIf;
- } else if (mem_eql_str(token_mem, token_len, "else")) {
- t->cur_tok->id = TokenIdKeywordElse;
- } else if (mem_eql_str(token_mem, token_len, "goto")) {
- t->cur_tok->id = TokenIdKeywordGoto;
- } else if (mem_eql_str(token_mem, token_len, "volatile")) {
- t->cur_tok->id = TokenIdKeywordVolatile;
- } else if (mem_eql_str(token_mem, token_len, "asm")) {
- t->cur_tok->id = TokenIdKeywordAsm;
- } else if (mem_eql_str(token_mem, token_len, "struct")) {
- t->cur_tok->id = TokenIdKeywordStruct;
- } else if (mem_eql_str(token_mem, token_len, "enum")) {
- t->cur_tok->id = TokenIdKeywordEnum;
- } else if (mem_eql_str(token_mem, token_len, "union")) {
- t->cur_tok->id = TokenIdKeywordUnion;
- } else if (mem_eql_str(token_mem, token_len, "for")) {
- t->cur_tok->id = TokenIdKeywordFor;
- } else if (mem_eql_str(token_mem, token_len, "while")) {
- t->cur_tok->id = TokenIdKeywordWhile;
- } else if (mem_eql_str(token_mem, token_len, "continue")) {
- t->cur_tok->id = TokenIdKeywordContinue;
- } else if (mem_eql_str(token_mem, token_len, "break")) {
- t->cur_tok->id = TokenIdKeywordBreak;
- } else if (mem_eql_str(token_mem, token_len, "null")) {
- t->cur_tok->id = TokenIdKeywordNull;
- } else if (mem_eql_str(token_mem, token_len, "noalias")) {
- t->cur_tok->id = TokenIdKeywordNoAlias;
- } else if (mem_eql_str(token_mem, token_len, "switch")) {
- t->cur_tok->id = TokenIdKeywordSwitch;
- } else if (mem_eql_str(token_mem, token_len, "undefined")) {
- t->cur_tok->id = TokenIdKeywordUndefined;
- } else if (mem_eql_str(token_mem, token_len, "error")) {
- t->cur_tok->id = TokenIdKeywordError;
- } else if (mem_eql_str(token_mem, token_len, "type")) {
- t->cur_tok->id = TokenIdKeywordType;
- } else if (mem_eql_str(token_mem, token_len, "inline")) {
- t->cur_tok->id = TokenIdKeywordInline;
- } else if (mem_eql_str(token_mem, token_len, "defer")) {
- t->cur_tok->id = TokenIdKeywordDefer;
+ for (size_t i = 0; i < array_length(zig_keywords); i += 1) {
+ if (mem_eql_str(token_mem, token_len, zig_keywords[i].text)) {
+ t->cur_tok->id = zig_keywords[i].token_id;
+ break;
+ }
+ }
}
t->cur_tok = nullptr;
@@ -327,7 +363,7 @@ static bool is_exponent_signifier(uint8_t c, int radix) {
}
}
-int get_digit_value(uint8_t c) {
+static uint32_t get_digit_value(uint8_t c) {
if ('0' <= c && c <= '9') {
return c - '0';
}
@@ -337,7 +373,19 @@ int get_digit_value(uint8_t c) {
if ('a' <= c && c <= 'z') {
return c - 'a' + 10;
}
- return -1;
+ return UINT32_MAX;
+}
+
+void handle_string_escape(Tokenize *t, uint8_t c) {
+ if (t->cur_tok->id == TokenIdCharLiteral) {
+ t->cur_tok->data.char_lit.c = c;
+ t->state = TokenizeStateCharLiteralEnd;
+ } else if (t->cur_tok->id == TokenIdStringLiteral || t->cur_tok->id == TokenIdSymbol) {
+ buf_append_char(&t->cur_tok->data.str_lit.str, c);
+ t->state = TokenizeStateString;
+ } else {
+ zig_unreachable();
+ }
}
void tokenize(Buf *buf, Tokenization *out) {
@@ -359,27 +407,35 @@ void tokenize(Buf *buf, Tokenization *out) {
case WHITESPACE:
break;
case 'c':
- t.state = TokenizeStateSymbolFirst;
+ t.state = TokenizeStateSymbolFirstC;
begin_token(&t, TokenIdSymbol);
+ buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
- case 'r':
- t.state = TokenizeStateFirstR;
- begin_token(&t, TokenIdSymbol);
- break;
- case ALPHA_EXCEPT_CR:
+ case ALPHA_EXCEPT_C:
case '_':
t.state = TokenizeStateSymbol;
begin_token(&t, TokenIdSymbol);
+ buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
case '0':
t.state = TokenizeStateZero;
begin_token(&t, TokenIdNumberLiteral);
- t.cur_tok->radix = 10;
+ t.radix = 10;
+ t.exp_add_amt = 1;
+ t.exponent_in_bin_or_dec = 0;
+ t.is_num_lit_float = false;
+ bignum_init_unsigned(&t.cur_tok->data.num_lit.bignum, 0);
+ bignum_init_unsigned(&t.specified_exponent, 0);
break;
case DIGIT_NON_ZERO:
t.state = TokenizeStateNumber;
begin_token(&t, TokenIdNumberLiteral);
- t.cur_tok->radix = 10;
+ t.radix = 10;
+ t.exp_add_amt = 1;
+ t.exponent_in_bin_or_dec = 0;
+ t.is_num_lit_float = false;
+ bignum_init_unsigned(&t.cur_tok->data.num_lit.bignum, get_digit_value(c));
+ bignum_init_unsigned(&t.specified_exponent, 0);
break;
case '"':
begin_token(&t, TokenIdStringLiteral);
@@ -437,6 +493,10 @@ void tokenize(Buf *buf, Tokenization *out) {
begin_token(&t, TokenIdSlash);
t.state = TokenizeStateSawSlash;
break;
+ case '\\':
+ begin_token(&t, TokenIdStringLiteral);
+ t.state = TokenizeStateSawBackslash;
+ break;
case '%':
begin_token(&t, TokenIdPercent);
t.state = TokenizeStateSawPercent;
@@ -500,12 +560,12 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawQuestionMark:
switch (c) {
case '?':
- t.cur_tok->id = TokenIdDoubleQuestion;
+ set_token_id(&t, t.cur_tok, TokenIdDoubleQuestion);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '=':
- t.cur_tok->id = TokenIdMaybeAssign;
+ set_token_id(&t, t.cur_tok, TokenIdMaybeAssign);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -520,7 +580,7 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) {
case '.':
t.state = TokenizeStateSawDotDot;
- t.cur_tok->id = TokenIdEllipsis;
+ set_token_id(&t, t.cur_tok, TokenIdEllipsis);
break;
default:
t.pos -= 1;
@@ -542,12 +602,12 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawGreaterThan:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdCmpGreaterOrEq;
+ set_token_id(&t, t.cur_tok, TokenIdCmpGreaterOrEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '>':
- t.cur_tok->id = TokenIdBitShiftRight;
+ set_token_id(&t, t.cur_tok, TokenIdBitShiftRight);
t.state = TokenizeStateSawGreaterThanGreaterThan;
break;
default:
@@ -560,7 +620,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawGreaterThanGreaterThan:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdBitShiftRightEq;
+ set_token_id(&t, t.cur_tok, TokenIdBitShiftRightEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -574,12 +634,12 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawLessThan:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdCmpLessOrEq;
+ set_token_id(&t, t.cur_tok, TokenIdCmpLessOrEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '<':
- t.cur_tok->id = TokenIdBitShiftLeft;
+ set_token_id(&t, t.cur_tok, TokenIdBitShiftLeft);
t.state = TokenizeStateSawLessThanLessThan;
break;
default:
@@ -592,12 +652,12 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawLessThanLessThan:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdBitShiftLeftEq;
+ set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '%':
- t.cur_tok->id = TokenIdBitShiftLeftPercent;
+ set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftPercent);
t.state = TokenizeStateSawShiftLeftPercent;
break;
default:
@@ -610,7 +670,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawShiftLeftPercent:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdBitShiftLeftPercentEq;
+ set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftPercentEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -624,7 +684,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawBang:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdCmpNotEq;
+ set_token_id(&t, t.cur_tok, TokenIdCmpNotEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -638,12 +698,12 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawEq:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdCmpEq;
+ set_token_id(&t, t.cur_tok, TokenIdCmpEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '>':
- t.cur_tok->id = TokenIdFatArrow;
+ set_token_id(&t, t.cur_tok, TokenIdFatArrow);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -657,17 +717,17 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawStar:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdTimesEq;
+ set_token_id(&t, t.cur_tok, TokenIdTimesEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '*':
- t.cur_tok->id = TokenIdStarStar;
+ set_token_id(&t, t.cur_tok, TokenIdStarStar);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '%':
- t.cur_tok->id = TokenIdTimesPercent;
+ set_token_id(&t, t.cur_tok, TokenIdTimesPercent);
t.state = TokenizeStateSawStarPercent;
break;
default:
@@ -680,7 +740,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawStarPercent:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdTimesPercentEq;
+ set_token_id(&t, t.cur_tok, TokenIdTimesPercentEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -694,17 +754,17 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawPercent:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdModEq;
+ set_token_id(&t, t.cur_tok, TokenIdModEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '.':
- t.cur_tok->id = TokenIdPercentDot;
+ set_token_id(&t, t.cur_tok, TokenIdPercentDot);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '%':
- t.cur_tok->id = TokenIdPercentPercent;
+ set_token_id(&t, t.cur_tok, TokenIdPercentPercent);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -718,17 +778,17 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawPlus:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdPlusEq;
+ set_token_id(&t, t.cur_tok, TokenIdPlusEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '+':
- t.cur_tok->id = TokenIdPlusPlus;
+ set_token_id(&t, t.cur_tok, TokenIdPlusPlus);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '%':
- t.cur_tok->id = TokenIdPlusPercent;
+ set_token_id(&t, t.cur_tok, TokenIdPlusPercent);
t.state = TokenizeStateSawPlusPercent;
break;
default:
@@ -741,7 +801,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawPlusPercent:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdPlusPercentEq;
+ set_token_id(&t, t.cur_tok, TokenIdPlusPercentEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -755,11 +815,11 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawAmpersand:
switch (c) {
case '&':
- t.cur_tok->id = TokenIdBoolAnd;
+ set_token_id(&t, t.cur_tok, TokenIdBoolAnd);
t.state = TokenizeStateSawAmpersandAmpersand;
break;
case '=':
- t.cur_tok->id = TokenIdBitAndEq;
+ set_token_id(&t, t.cur_tok, TokenIdBitAndEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -773,7 +833,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawAmpersandAmpersand:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdBoolAndEq;
+ set_token_id(&t, t.cur_tok, TokenIdBoolAndEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -787,7 +847,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawCaret:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdBitXorEq;
+ set_token_id(&t, t.cur_tok, TokenIdBitXorEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -801,11 +861,11 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawPipe:
switch (c) {
case '|':
- t.cur_tok->id = TokenIdBoolOr;
+ set_token_id(&t, t.cur_tok, TokenIdBoolOr);
t.state = TokenizeStateSawPipePipe;
break;
case '=':
- t.cur_tok->id = TokenIdBitOrEq;
+ set_token_id(&t, t.cur_tok, TokenIdBitOrEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -819,7 +879,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawPipePipe:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdBoolOrEq;
+ set_token_id(&t, t.cur_tok, TokenIdBoolOrEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -837,7 +897,7 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateLineComment;
break;
case '=':
- t.cur_tok->id = TokenIdDivEq;
+ set_token_id(&t, t.cur_tok, TokenIdDivEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -848,24 +908,32 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
- case TokenizeStateLineComment:
+ case TokenizeStateSawBackslash:
+ switch (c) {
+ case '\\':
+ t.state = TokenizeStateLineString;
+ break;
+ default:
+ tokenize_error(&t, "invalid character: '%c'", c);
+ break;
+ }
+ break;
+ case TokenizeStateLineString:
switch (c) {
case '\n':
- t.state = TokenizeStateStart;
+ t.state = TokenizeStateLineStringEnd;
break;
default:
- // do nothing
+ buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
}
break;
- case TokenizeStateSymbolFirst:
+ case TokenizeStateLineStringEnd:
switch (c) {
- case '"':
- t.cur_tok->id = TokenIdStringLiteral;
- t.state = TokenizeStateString;
+ case WHITESPACE:
break;
- case SYMBOL_CHAR:
- t.state = TokenizeStateSymbol;
+ case '\\':
+ t.state = TokenizeStateLineStringContinue;
break;
default:
t.pos -= 1;
@@ -874,29 +942,38 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
- case TokenizeStateSymbolFirstRaw:
+ case TokenizeStateLineStringContinue:
switch (c) {
- case '"':
- t.cur_tok->id = TokenIdStringLiteral;
- t.state = TokenizeStateRawString;
- t.raw_string_id_start = t.pos + 1;
- break;
- case SYMBOL_CHAR:
- t.state = TokenizeStateSymbol;
+ case '\\':
+ t.state = TokenizeStateLineString;
+ buf_append_char(&t.cur_tok->data.str_lit.str, '\n');
break;
default:
- t.pos -= 1;
- end_token(&t);
+ tokenize_error(&t, "invalid character: '%c'", c);
+ break;
+ }
+ break;
+ case TokenizeStateLineComment:
+ switch (c) {
+ case '\n':
t.state = TokenizeStateStart;
- continue;
+ break;
+ default:
+ // do nothing
+ break;
}
break;
- case TokenizeStateSawAtSign:
+ case TokenizeStateSymbolFirstC:
switch (c) {
case '"':
- t.cur_tok->id = TokenIdSymbol;
+ set_token_id(&t, t.cur_tok, TokenIdStringLiteral);
+ t.cur_tok->data.str_lit.is_c_str = true;
t.state = TokenizeStateString;
break;
+ case SYMBOL_CHAR:
+ t.state = TokenizeStateSymbol;
+ buf_append_char(&t.cur_tok->data.str_lit.str, c);
+ break;
default:
t.pos -= 1;
end_token(&t);
@@ -904,18 +981,11 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
- case TokenizeStateFirstR:
+ case TokenizeStateSawAtSign:
switch (c) {
case '"':
- t.cur_tok->id = TokenIdStringLiteral;
- t.state = TokenizeStateRawString;
- t.raw_string_id_start = t.pos + 1;
- break;
- case 'c':
- t.state = TokenizeStateSymbolFirstRaw;
- break;
- case SYMBOL_CHAR_EXCEPT_C:
- t.state = TokenizeStateSymbol;
+ set_token_id(&t, t.cur_tok, TokenIdSymbol);
+ t.state = TokenizeStateString;
break;
default:
t.pos -= 1;
@@ -927,6 +997,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSymbol:
switch (c) {
case SYMBOL_CHAR:
+ buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
default:
t.pos -= 1;
@@ -942,108 +1013,124 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
break;
case '\n':
- tokenize_error(&t, "use raw string for multiline string literal");
+ tokenize_error(&t, "newline not allowed in string literal");
break;
case '\\':
t.state = TokenizeStateStringEscape;
break;
default:
+ buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
}
break;
case TokenizeStateStringEscape:
switch (c) {
case 'x':
- t.state = TokenizeStateHex;
- t.hex_chars_left = 2;
+ t.state = TokenizeStateCharCode;
+ t.radix = 16;
+ t.char_code = 0;
+ t.char_code_index = 0;
+ t.char_code_end = 2;
+ t.unicode = false;
break;
case 'u':
- t.state = TokenizeStateHex;
- t.hex_chars_left = 4;
+ t.state = TokenizeStateCharCode;
+ t.radix = 16;
+ t.char_code = 0;
+ t.char_code_index = 0;
+ t.char_code_end = 4;
+ t.unicode = true;
break;
case 'U':
- t.state = TokenizeStateHex;
- t.hex_chars_left = 6;
+ t.state = TokenizeStateCharCode;
+ t.radix = 16;
+ t.char_code = 0;
+ t.char_code_index = 0;
+ t.char_code_end = 6;
+ t.unicode = true;
break;
case 'n':
+ handle_string_escape(&t, '\n');
+ break;
case 'r':
+ handle_string_escape(&t, '\r');
+ break;
case '\\':
+ handle_string_escape(&t, '\\');
+ break;
case 't':
+ handle_string_escape(&t, '\t');
+ break;
case '\'':
+ handle_string_escape(&t, '\'');
+ break;
case '"':
- if (t.cur_tok->id == TokenIdCharLiteral) {
- t.state = TokenizeStateCharLiteralEnd;
- } else if (t.cur_tok->id == TokenIdStringLiteral) {
- t.state = TokenizeStateString;
- } else {
- zig_unreachable();
- }
+ handle_string_escape(&t, '\"');
break;
default:
tokenize_error(&t, "invalid character: '%c'", c);
}
break;
- case TokenizeStateHex:
- switch (c) {
- case HEX_DIGIT:
- t.hex_chars_left -= 1;
- if (t.hex_chars_left == 0) {
- if (t.cur_tok->id == TokenIdCharLiteral) {
- t.state = TokenizeStateCharLiteralEnd;
- } else if (t.cur_tok->id == TokenIdStringLiteral) {
- t.state = TokenizeStateString;
- } else if (t.cur_tok->id == TokenIdSymbol) {
- t.state = TokenizeStateString;
+ case TokenizeStateCharCode:
+ {
+ uint32_t digit_value = get_digit_value(c);
+ if (digit_value >= t.radix) {
+ tokenize_error(&t, "invalid digit: '%c'", c);
+ }
+ t.char_code *= t.radix;
+ t.char_code += digit_value;
+ t.char_code_index += 1;
+
+ if (t.char_code_index >= t.char_code_end) {
+ if (t.unicode) {
+ if (t.char_code <= 0x7f) {
+ // 00000000 00000000 00000000 0xxxxxxx
+ handle_string_escape(&t, t.char_code);
+ } else if (t.cur_tok->id == TokenIdCharLiteral) {
+ tokenize_error(&t, "unicode value too large for character literal: %x", t.char_code);
+ } else if (t.char_code <= 0x7ff) {
+ // 00000000 00000000 00000xxx xx000000
+ handle_string_escape(&t, 0xc0 | (t.char_code >> 6));
+ // 00000000 00000000 00000000 00xxxxxx
+ handle_string_escape(&t, 0x80 | (t.char_code & 0x3f));
+ } else if (t.char_code <= 0xffff) {
+ // 00000000 00000000 xxxx0000 00000000
+ handle_string_escape(&t, 0xe0 | (t.char_code >> 12));
+ // 00000000 00000000 0000xxxx xx000000
+ handle_string_escape(&t, 0x80 | ((t.char_code >> 6) & 0x3f));
+ // 00000000 00000000 00000000 00xxxxxx
+ handle_string_escape(&t, 0x80 | (t.char_code & 0x3f));
+ } else if (t.char_code <= 0x10ffff) {
+ // 00000000 000xxx00 00000000 00000000
+ handle_string_escape(&t, 0xf0 | (t.char_code >> 18));
+ // 00000000 000000xx xxxx0000 00000000
+ handle_string_escape(&t, 0x80 | ((t.char_code >> 12) & 0x3f));
+ // 00000000 00000000 0000xxxx xx000000
+ handle_string_escape(&t, 0x80 | ((t.char_code >> 6) & 0x3f));
+ // 00000000 00000000 00000000 00xxxxxx
+ handle_string_escape(&t, 0x80 | (t.char_code & 0x3f));
} else {
- zig_unreachable();
+ tokenize_error(&t, "unicode value out of range: %x", t.char_code);
}
+ } else {
+ if (t.cur_tok->id == TokenIdCharLiteral && t.char_code >= sizeof(uint8_t)) {
+ tokenize_error(&t, "value too large for character literal: '%x'",
+ t.char_code);
+ }
+ handle_string_escape(&t, t.char_code);
}
- break;
- default:
- tokenize_error(&t, "invalid character: '%c'", c);
- }
- break;
- case TokenizeStateRawString:
- if (c == '(') {
- t.raw_string_id_end = t.pos;
- t.cur_tok->raw_string_start = t.pos + 1;
- t.state = TokenizeStateRawStringContents;
- }
- break;
- case TokenizeStateRawStringContents:
- if (c == ')') {
- t.state = TokenizeStateRawStringMaybeEnd;
- t.raw_string_id_cmp_pos = t.raw_string_id_start;
- t.cur_tok->raw_string_end = t.pos;
- }
- break;
- case TokenizeStateRawStringMaybeEnd:
- if (t.raw_string_id_cmp_pos >= t.raw_string_id_end &&
- c == '"')
- {
- end_token(&t);
- t.state = TokenizeStateStart;
- } else if (c != buf_ptr(t.buf)[t.raw_string_id_cmp_pos]) {
- if (c == ')') {
- t.raw_string_id_cmp_pos = t.raw_string_id_start;
- t.cur_tok->raw_string_end = t.pos;
- } else {
- t.state = TokenizeStateRawStringContents;
}
- } else {
- t.raw_string_id_cmp_pos += 1;
}
break;
case TokenizeStateCharLiteral:
switch (c) {
case '\'':
- end_token(&t);
- t.state = TokenizeStateStart;
- break;
+ tokenize_error(&t, "expected character");
case '\\':
t.state = TokenizeStateStringEscape;
break;
default:
+ t.cur_tok->data.char_lit.c = c;
t.state = TokenizeStateCharLiteralEnd;
break;
}
@@ -1061,15 +1148,17 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateZero:
switch (c) {
case 'b':
- t.cur_tok->radix = 2;
+ t.radix = 2;
t.state = TokenizeStateNumber;
break;
case 'o':
- t.cur_tok->radix = 8;
+ t.radix = 8;
+ t.exp_add_amt = 3;
t.state = TokenizeStateNumber;
break;
case 'x':
- t.cur_tok->radix = 16;
+ t.radix = 16;
+ t.exp_add_amt = 4;
t.state = TokenizeStateNumber;
break;
default:
@@ -1082,113 +1171,127 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateNumber:
{
if (c == '.') {
- if (t.pos + 1 < buf_len(t.buf)) {
- uint8_t next_c = buf_ptr(t.buf)[t.pos + 1];
- if (next_c == '.') {
- t.pos -= 1;
- end_token(&t);
- t.state = TokenizeStateStart;
- continue;
- }
- }
- t.cur_tok->decimal_point_pos = t.pos;
- t.state = TokenizeStateFloatFraction;
+ t.state = TokenizeStateNumberDot;
break;
}
- if (is_exponent_signifier(c, t.cur_tok->radix)) {
- t.cur_tok->exponent_marker_pos = t.pos;
+ if (is_exponent_signifier(c, t.radix)) {
t.state = TokenizeStateFloatExponentUnsigned;
+ t.is_num_lit_float = true;
break;
}
- if (c == '_') {
- tokenize_error(&t, "invalid character: '%c'", c);
- break;
- }
- int digit_value = get_digit_value(c);
- if (digit_value >= 0) {
- if (digit_value >= t.cur_tok->radix) {
+ uint32_t digit_value = get_digit_value(c);
+ if (digit_value >= t.radix) {
+ if (is_symbol_char(c)) {
tokenize_error(&t, "invalid character: '%c'", c);
- break;
}
- // normal digit
- } else {
// not my char
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
+ t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+ bignum_multiply_by_scalar(&t.cur_tok->data.num_lit.bignum, t.radix);
+ t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+ bignum_increment_by_scalar(&t.cur_tok->data.num_lit.bignum, digit_value);
break;
}
+ case TokenizeStateNumberDot:
+ if (c == '.') {
+ t.pos -= 2;
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ continue;
+ }
+ t.pos -= 1;
+ t.state = TokenizeStateFloatFraction;
+ t.is_num_lit_float = true;
+ continue;
case TokenizeStateFloatFraction:
{
- if (is_exponent_signifier(c, t.cur_tok->radix)) {
- t.cur_tok->exponent_marker_pos = t.pos;
+ if (is_exponent_signifier(c, t.radix)) {
t.state = TokenizeStateFloatExponentUnsigned;
break;
}
- if (c == '_') {
- tokenize_error(&t, "invalid character: '%c'", c);
- break;
- }
- int digit_value = get_digit_value(c);
- if (digit_value >= 0) {
- if (digit_value >= t.cur_tok->radix) {
+ uint32_t digit_value = get_digit_value(c);
+ if (digit_value >= t.radix) {
+ if (is_symbol_char(c)) {
tokenize_error(&t, "invalid character: '%c'", c);
- break;
}
- // normal digit
- } else {
// not my char
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
+ t.exponent_in_bin_or_dec -= t.exp_add_amt;
+ if (t.radix == 10) {
+ // For now we use strtod to parse decimal floats, so we just have to get to the
+ // end of the token.
+ break;
+ }
+ t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+ bignum_multiply_by_scalar(&t.cur_tok->data.num_lit.bignum, t.radix);
+ t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+ bignum_increment_by_scalar(&t.cur_tok->data.num_lit.bignum, digit_value);
break;
}
case TokenizeStateFloatExponentUnsigned:
switch (c) {
case '+':
+ t.is_exp_negative = false;
+ t.state = TokenizeStateFloatExponentNumber;
+ break;
case '-':
+ t.is_exp_negative = true;
t.state = TokenizeStateFloatExponentNumber;
break;
default:
// reinterpret as normal exponent number
t.pos -= 1;
+ t.is_exp_negative = false;
t.state = TokenizeStateFloatExponentNumber;
continue;
}
break;
case TokenizeStateFloatExponentNumber:
- switch (c) {
- case DIGIT:
- break;
- case ALPHA:
- case '_':
- tokenize_error(&t, "invalid character: '%c'", c);
- break;
- default:
+ {
+ uint32_t digit_value = get_digit_value(c);
+ if (digit_value >= t.radix) {
+ if (is_symbol_char(c)) {
+ tokenize_error(&t, "invalid character: '%c'", c);
+ }
+ // not my char
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
+ }
+ if (t.radix == 10) {
+ // For now we use strtod to parse decimal floats, so we just have to get to the
+ // end of the token.
+ break;
+ }
+ t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+ bignum_multiply_by_scalar(&t.specified_exponent, 10);
+ t.cur_tok->data.num_lit.overflow = t.cur_tok->data.num_lit.overflow ||
+ bignum_increment_by_scalar(&t.specified_exponent, digit_value);
}
break;
case TokenizeStateSawDash:
switch (c) {
case '>':
- t.cur_tok->id = TokenIdArrow;
+ set_token_id(&t, t.cur_tok, TokenIdArrow);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '=':
- t.cur_tok->id = TokenIdMinusEq;
+ set_token_id(&t, t.cur_tok, TokenIdMinusEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
case '%':
- t.cur_tok->id = TokenIdMinusPercent;
+ set_token_id(&t, t.cur_tok, TokenIdMinusPercent);
t.state = TokenizeStateSawMinusPercent;
break;
default:
@@ -1201,7 +1304,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawMinusPercent:
switch (c) {
case '=':
- t.cur_tok->id = TokenIdMinusPercentEq;
+ set_token_id(&t, t.cur_tok, TokenIdMinusPercentEq);
end_token(&t);
t.state = TokenizeStateStart;
break;
@@ -1226,11 +1329,14 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateStart:
case TokenizeStateError:
break;
+ case TokenizeStateNumberDot:
+ tokenize_error(&t, "unterminated number literal");
+ break;
case TokenizeStateString:
tokenize_error(&t, "unterminated string");
break;
case TokenizeStateStringEscape:
- case TokenizeStateHex:
+ case TokenizeStateCharCode:
if (t.cur_tok->id == TokenIdStringLiteral) {
tokenize_error(&t, "unterminated string");
} else if (t.cur_tok->id == TokenIdCharLiteral) {
@@ -1239,19 +1345,12 @@ void tokenize(Buf *buf, Tokenization *out) {
zig_unreachable();
}
break;
- case TokenizeStateRawString:
- case TokenizeStateRawStringContents:
- case TokenizeStateRawStringMaybeEnd:
- tokenize_error(&t, "unterminated raw string");
- break;
case TokenizeStateCharLiteral:
case TokenizeStateCharLiteralEnd:
tokenize_error(&t, "unterminated character literal");
break;
case TokenizeStateSymbol:
- case TokenizeStateSymbolFirst:
- case TokenizeStateSymbolFirstRaw:
- case TokenizeStateFirstR:
+ case TokenizeStateSymbolFirstC:
case TokenizeStateZero:
case TokenizeStateNumber:
case TokenizeStateFloatFraction:
@@ -1280,9 +1379,13 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawPlusPercent:
case TokenizeStateSawMinusPercent:
case TokenizeStateSawShiftLeftPercent:
+ case TokenizeStateLineString:
+ case TokenizeStateLineStringEnd:
end_token(&t);
break;
case TokenizeStateSawDotDot:
+ case TokenizeStateSawBackslash:
+ case TokenizeStateLineStringContinue:
tokenize_error(&t, "unexpected EOF");
break;
case TokenizeStateLineComment:
src/tokenizer.hpp
@@ -9,6 +9,7 @@
#define ZIG_TOKENIZER_HPP
#include "buffer.hpp"
+#include "bignum.hpp"
enum TokenId {
TokenIdEof,
@@ -111,6 +112,22 @@ enum TokenId {
TokenIdPercentDot,
};
+struct TokenNumLit {
+ BigNum bignum;
+ // overflow is true if when parsing the number, we discovered it would not
+ // fit without losing data in a uint64_t or double
+ bool overflow;
+};
+
+struct TokenStrLit {
+ Buf str;
+ bool is_c_str;
+};
+
+struct TokenCharLit {
+ uint8_t c;
+};
+
struct Token {
TokenId id;
int start_pos;
@@ -118,14 +135,16 @@ struct Token {
int start_line;
int start_column;
- // for id == TokenIdNumberLiteral
- int radix; // if != 10, then skip the first 2 characters
- int decimal_point_pos; // either exponent_marker_pos or the position of the '.'
- int exponent_marker_pos; // either end_pos or the position of the 'e'/'p'
+ union {
+ // TokenIdNumberLiteral
+ TokenNumLit num_lit;
- // for id == TokenIdStringLiteral
- int raw_string_start;
- int raw_string_end;
+ // TokenIdStringLiteral or TokenIdSymbol
+ TokenStrLit str_lit;
+
+ // TokenIdCharLiteral
+ TokenCharLit char_lit;
+ } data;
};
struct Tokenization {
@@ -142,8 +161,6 @@ void tokenize(Buf *buf, Tokenization *out_tokenization);
void print_tokens(Buf *buf, ZigList<Token> *tokens);
-int get_digit_value(uint8_t c);
-
const char * token_name(TokenId id);
bool valid_symbol_starter(uint8_t c);
test/run_tests.cpp
@@ -1173,7 +1173,7 @@ fn f() {
add_compile_fail_case("normal string with newline", R"SOURCE(
const foo = "a
b";
- )SOURCE", 1, ".tmp_source.zig:2:13: error: use raw string for multiline string literal");
+ )SOURCE", 1, ".tmp_source.zig:2:13: error: newline not allowed in string literal");
add_compile_fail_case("invalid comparison for function pointers", R"SOURCE(
fn foo() {}
@@ -1760,7 +1760,7 @@ struct type {
)SOURCE", 3,
R"(pub const FOO = c"aoeu\x13 derp")",
R"(pub const FOO2 = c"aoeu\x134 derp")",
- R"(pub const FOO_CHAR = '\x3f')");
+ R"(pub const FOO_CHAR = '?')");
}
static void run_self_hosted_test(bool is_release_mode) {
test/self_hosted.zig
@@ -684,17 +684,13 @@ fn count_trailing_zeroes() {
#attribute("test")
fn multiline_string() {
- const s1 = r"AOEU(
-one
-two)
-three)AOEU";
- const s2 = "\none\ntwo)\nthree";
- const s3 = r"(
-one
-two)
-three)";
+ const s1 =
+ \\one
+ \\two)
+ \\three
+ ;
+ const s2 = "one\ntwo)\nthree";
assert(str.eql(s1, s2));
- assert(str.eql(s3, s2));
}