Commit c0f9012bed

Andrew Kelley <superjoe30@gmail.com>
2017-06-16 20:34:38
parseh: fix not recognizing integer suffixes on hex numbers
1 parent 865b53f
src/c_tokenizer.cpp
@@ -107,8 +107,11 @@ static void begin_token(CTokenize *ctok, CTokId id) {
             memset(&ctok->cur_tok->data.symbol, 0, sizeof(Buf));
             buf_resize(&ctok->cur_tok->data.symbol, 0);
             break;
-        case CTokIdCharLit:
         case CTokIdNumLitInt:
+            ctok->cur_tok->data.num_lit_int.x = 0;
+            ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixNone;
+            break;
+        case CTokIdCharLit:
         case CTokIdNumLitFloat:
         case CTokIdMinus:
             break;
@@ -138,9 +141,9 @@ static void add_char(CTokenize *ctok, uint8_t c) {
 
 static void hex_digit(CTokenize *ctok, uint8_t value) {
     // TODO @mul_with_overflow
-    ctok->cur_tok->data.num_lit_int *= 16;
+    ctok->cur_tok->data.num_lit_int.x *= 16;
     // TODO @add_with_overflow
-    ctok->cur_tok->data.num_lit_int += value;
+    ctok->cur_tok->data.num_lit_int.x += value;
 
     static const uint8_t hex_digit[] = "0123456789abcdef";
     buf_append_char(&ctok->buf, hex_digit[value]);
@@ -194,19 +197,15 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                         break;
                     case DIGIT_NON_ZERO:
                         ctok->state = CTokStateDecimal;
-                        ctok->unsigned_suffix = false;
-                        ctok->long_suffix = false;
                         begin_token(ctok, CTokIdNumLitInt);
-                        ctok->cur_tok->data.num_lit_int = *c - '0';
+                        ctok->cur_tok->data.num_lit_int.x = *c - '0';
                         buf_resize(&ctok->buf, 0);
                         buf_append_char(&ctok->buf, *c);
                         break;
                     case '0':
                         ctok->state = CTokStateGotZero;
-                        ctok->unsigned_suffix = false;
-                        ctok->long_suffix = false;
                         begin_token(ctok, CTokIdNumLitInt);
-                        ctok->cur_tok->data.num_lit_int = 0;
+                        ctok->cur_tok->data.num_lit_int.x = 0;
                         buf_resize(&ctok->buf, 0);
                         buf_append_char(&ctok->buf, '0');
                         break;
@@ -289,21 +288,21 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                         buf_append_char(&ctok->buf, *c);
 
                         // TODO @mul_with_overflow
-                        ctok->cur_tok->data.num_lit_int *= 10;
+                        ctok->cur_tok->data.num_lit_int.x *= 10;
                         // TODO @add_with_overflow
-                        ctok->cur_tok->data.num_lit_int += *c - '0';
+                        ctok->cur_tok->data.num_lit_int.x += *c - '0';
                         break;
                     case '\'':
                         break;
                     case 'u':
                     case 'U':
-                        ctok->unsigned_suffix = true;
-                        ctok->state = CTokStateIntSuffix;
+                        ctok->state = CTokStateNumLitIntSuffixU;
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU;
                         break;
                     case 'l':
                     case 'L':
-                        ctok->long_suffix = true;
-                        ctok->state = CTokStateIntSuffixLong;
+                        ctok->state = CTokStateNumLitIntSuffixL;
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL;
                         break;
                     case '.':
                         buf_append_char(&ctok->buf, '.');
@@ -317,50 +316,6 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                         continue;
                 }
                 break;
-            case CTokStateIntSuffix:
-                switch (*c) {
-                    case 'l':
-                    case 'L':
-                        if (ctok->long_suffix) {
-                            return mark_error(ctok);
-                        }
-                        ctok->long_suffix = true;
-                        ctok->state = CTokStateIntSuffixLong;
-                        break;
-                    case 'u':
-                    case 'U':
-                        if (ctok->unsigned_suffix) {
-                            return mark_error(ctok);
-                        }
-                        ctok->unsigned_suffix = true;
-                        break;
-                    default:
-                        c -= 1;
-                        end_token(ctok);
-                        ctok->state = CTokStateStart;
-                        continue;
-                }
-                break;
-            case CTokStateIntSuffixLong:
-                switch (*c) {
-                    case 'l':
-                    case 'L':
-                        ctok->state = CTokStateIntSuffix;
-                        break;
-                    case 'u':
-                    case 'U':
-                        if (ctok->unsigned_suffix) {
-                            return mark_error(ctok);
-                        }
-                        ctok->unsigned_suffix = true;
-                        break;
-                    default:
-                        c -= 1;
-                        end_token(ctok);
-                        ctok->state = CTokStateStart;
-                        continue;
-                }
-                break;
             case CTokStateGotZero:
                 switch (*c) {
                     case 'x':
@@ -389,9 +344,9 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                     case '6':
                     case '7':
                         // TODO @mul_with_overflow
-                        ctok->cur_tok->data.num_lit_int *= 8;
+                        ctok->cur_tok->data.num_lit_int.x *= 8;
                         // TODO @add_with_overflow
-                        ctok->cur_tok->data.num_lit_int += *c - '0';
+                        ctok->cur_tok->data.num_lit_int.x += *c - '0';
                         break;
                     case '8':
                     case '9':
@@ -466,6 +421,82 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                         ctok->cur_tok->id = CTokIdNumLitFloat;
                         ctok->state = CTokStateExpSign;
                         break;
+                    case 'u':
+                    case 'U':
+                        // marks the number literal as unsigned
+                        ctok->state = CTokStateNumLitIntSuffixU;
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU;
+                        break;
+                    case 'l':
+                    case 'L':
+                        // marks the number literal as long
+                        ctok->state = CTokStateNumLitIntSuffixL;
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL;
+                        break;
+                    default:
+                        c -= 1;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        continue;
+                }
+                break;
+            case CTokStateNumLitIntSuffixU:
+                switch (*c) {
+                    case 'l':
+                    case 'L':
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU;
+                        ctok->state = CTokStateNumLitIntSuffixUL;
+                        break;
+                    default:
+                        c -= 1;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        continue;
+                }
+                break;
+            case CTokStateNumLitIntSuffixL:
+                switch (*c) {
+                    case 'l':
+                    case 'L':
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLL;
+                        ctok->state = CTokStateNumLitIntSuffixLL;
+                        break;
+                    case 'u':
+                    case 'U':
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        break;
+                    default:
+                        c -= 1;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        continue;
+                }
+                break;
+            case CTokStateNumLitIntSuffixLL:
+                switch (*c) {
+                    case 'u':
+                    case 'U':
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        break;
+                    default:
+                        c -= 1;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        continue;
+                }
+                break;
+            case CTokStateNumLitIntSuffixUL:
+                switch (*c) {
+                    case 'l':
+                    case 'L':
+                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU;
+                        end_token(ctok);
+                        ctok->state = CTokStateStart;
+                        break;
                     default:
                         c -= 1;
                         end_token(ctok);
@@ -681,8 +712,10 @@ found_end_of_macro:
         case CTokStateHex:
         case CTokStateOctal:
         case CTokStateGotZero:
-        case CTokStateIntSuffix:
-        case CTokStateIntSuffixLong:
+        case CTokStateNumLitIntSuffixU:
+        case CTokStateNumLitIntSuffixL:
+        case CTokStateNumLitIntSuffixUL:
+        case CTokStateNumLitIntSuffixLL:
             end_token(ctok);
             break;
         case CTokStateFloat:
src/c_tokenizer.hpp
@@ -20,12 +20,26 @@ enum CTokId {
     CTokIdMinus,
 };
 
+enum CNumLitSuffix {
+    CNumLitSuffixNone,
+    CNumLitSuffixL,
+    CNumLitSuffixU,
+    CNumLitSuffixLU,
+    CNumLitSuffixLL,
+    CNumLitSuffixLLU,
+};
+
+struct CNumLitInt {
+    uint64_t x;
+    CNumLitSuffix suffix;
+};
+
 struct CTok {
     enum CTokId id;
     union {
         uint8_t char_lit;
         Buf str_lit;
-        uint64_t num_lit_int;
+        CNumLitInt num_lit_int;
         double num_lit_float;
         Buf symbol;
     } data;
@@ -47,13 +61,15 @@ enum CTokState {
     CTokStateOctal,
     CTokStateGotZero,
     CTokStateHex,
-    CTokStateIntSuffix,
-    CTokStateIntSuffixLong,
     CTokStateFloat,
     CTokStateExpSign,
     CTokStateFloatExp,
     CTokStateFloatExpFirst,
     CTokStateStrOctal,
+    CTokStateNumLitIntSuffixU,
+    CTokStateNumLitIntSuffixL,
+    CTokStateNumLitIntSuffixLL,
+    CTokStateNumLitIntSuffixUL,
 };
 
 struct CTokenize {
@@ -62,8 +78,6 @@ struct CTokenize {
     bool error;
     CTok *cur_tok;
     Buf buf;
-    bool unsigned_suffix;
-    bool long_suffix;
     uint8_t cur_char;
     int octal_index;
 };
src/parseh.cpp
@@ -162,12 +162,16 @@ static Tld *create_global_str_lit_var(Context *c, Buf *name, Buf *value) {
     return &tld_var->base;
 }
 
-static Tld *create_global_num_lit_unsigned_negative(Context *c, Buf *name, uint64_t x, bool negative) {
-    ConstExprValue *var_val = create_const_unsigned_negative(c->codegen->builtin_types.entry_num_lit_int, x, negative);
+static Tld *create_global_num_lit_unsigned_negative_type(Context *c, Buf *name, uint64_t x, bool negative, TypeTableEntry *type_entry) {
+    ConstExprValue *var_val = create_const_unsigned_negative(type_entry, x, negative);
     TldVar *tld_var = create_global_var(c, name, var_val, true);
     return &tld_var->base;
 }
 
+static Tld *create_global_num_lit_unsigned_negative(Context *c, Buf *name, uint64_t x, bool negative) {
+    return create_global_num_lit_unsigned_negative_type(c, name, x, negative, c->codegen->builtin_types.entry_num_lit_int);
+}
+
 static Tld *create_global_num_lit_float(Context *c, Buf *name, double value) {
     ConstExprValue *var_val = create_const_float(c->codegen->builtin_types.entry_num_lit_float, value);
     TldVar *tld_var = create_global_var(c, name, var_val, true);
@@ -1149,7 +1153,32 @@ static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *ch
                 return;
             case CTokIdNumLitInt:
                 if (is_last) {
-                    Tld *tld = create_global_num_lit_unsigned_negative(c, name, tok->data.num_lit_int, negate);
+                    Tld *tld;
+                    switch (tok->data.num_lit_int.suffix) {
+                        case CNumLitSuffixNone:
+                            tld = create_global_num_lit_unsigned_negative(c, name, tok->data.num_lit_int.x, negate);
+                            break;
+                        case CNumLitSuffixL:
+                            tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate,
+                                    c->codegen->builtin_types.entry_c_int[CIntTypeLong]);
+                            break;
+                        case CNumLitSuffixU:
+                            tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate,
+                                    c->codegen->builtin_types.entry_c_int[CIntTypeUInt]);
+                            break;
+                        case CNumLitSuffixLU:
+                            tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate,
+                                    c->codegen->builtin_types.entry_c_int[CIntTypeULong]);
+                            break;
+                        case CNumLitSuffixLL:
+                            tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate,
+                                    c->codegen->builtin_types.entry_c_int[CIntTypeLongLong]);
+                            break;
+                        case CNumLitSuffixLLU:
+                            tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate,
+                                    c->codegen->builtin_types.entry_c_int[CIntTypeULongLong]);
+                            break;
+                    }
                     c->macro_table.put(name, tld);
                 }
                 return;
test/parseh.zig
@@ -217,6 +217,48 @@ pub fn addCases(cases: &tests.ParseHContext) {
         \\pub const SDL_INIT_VIDEO = 32;
     );
 
+    cases.add("u integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020u  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_uint = 32;
+    );
+
+    cases.add("l integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020l  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_long = 32;
+    );
+
+    cases.add("ul integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020ul  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_ulong = 32;
+    );
+
+    cases.add("lu integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020lu  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_ulong = 32;
+    );
+
+    cases.add("ll integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020ll  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_longlong = 32;
+    );
+
+    cases.add("ull integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020ull  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_ulonglong = 32;
+    );
+
+    cases.add("llu integer suffix after hex literal",
+        \\#define SDL_INIT_VIDEO 0x00000020llu  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+    ,
+        \\pub const SDL_INIT_VIDEO: c_ulonglong = 32;
+    );
+
     cases.add("zig keywords in C code",
         \\struct comptime {
         \\    int defer;