Commit 19dbc5805c

r00ster91 <r00ster91@proton.me>
2022-10-16 17:44:31
fix(perf): remove LUT
This makes it so that we no longer use a LUT (Look-Up Table): * The code is much simpler and easier to understand now. * Using a LUT means we rely on a warm cache. Relying on the cache like this results in inconsistent performance and in many cases codegen will be worse. Also as @topolarity once pointed out, in some cases while it seems like the code may branch, it actually doesn't: https://github.com/ziglang/zig/pull/11629#issuecomment-1213641429 * Other languages' standard libraries don't do this either. JFF I wanted to see what other languages codegen compared to us now: https://rust.godbolt.org/z/Te4ax9Edf, https://zig.godbolt.org/z/nTbYedWKv So we are pretty much on par or better than other languages now.
1 parent 626e02a
Changed files (1)
lib
lib/std/ascii.zig
@@ -12,7 +12,7 @@ const std = @import("std");
 
 /// The C0 control codes of the ASCII encoding.
 ///
-/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`.
+/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`
 pub const control_code = struct {
     /// Null.
     pub const nul = 0x00;
@@ -88,188 +88,63 @@ pub const control_code = struct {
     pub const xoff = dc3;
 };
 
-const tIndex = enum(u3) {
-    Alpha,
-    Hex,
-    Space,
-    Digit,
-    Lower,
-    Upper,
-    // Ctrl, < 0x20 || == DEL
-    // Print, = Graph || == ' '. NOT '\t' et cetera
-    Punct,
-    Graph,
-    //ASCII, | ~0b01111111
-    //isBlank, == ' ' || == '\x09'
-};
-
-const combinedTable = init: {
-    comptime var table: [256]u8 = undefined;
-
-    const mem = std.mem;
-
-    const alpha = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-    };
-    const lower = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-    };
-    const upper = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
-    const digit = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
-    const hex = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-
-        0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
-    const space = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
-    const punct = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
-
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
-    };
-    const graph = [_]u1{
-        //  0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
-    };
-
-    comptime var i = 0;
-    inline while (i < 128) : (i += 1) {
-        table[i] =
-            @as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
-            @as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
-            @as(u8, space[i]) << @enumToInt(tIndex.Space) |
-            @as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
-            @as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
-            @as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
-            @as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
-            @as(u8, graph[i]) << @enumToInt(tIndex.Graph);
-    }
-    mem.set(u8, table[128..256], 0);
-    break :init table;
-};
-
-fn inTable(c: u8, t: tIndex) bool {
-    return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
-}
-
-/// Returns whether the character is alphanumeric.
+/// Returns whether the character is alphanumeric: A-Z, a-z, or 0-9.
 pub fn isAlphanumeric(c: u8) bool {
-    return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
-        @as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
+    return switch (c) {
+        'A'...'Z', 'a'...'z', '0'...'9' => true,
+        else => false,
+    };
 }
 
-/// Returns whether the character is alphabetic.
+/// Returns whether the character is alphabetic: A-Z or a-z.
 pub fn isAlphabetic(c: u8) bool {
-    return inTable(c, tIndex.Alpha);
+    return switch (c) {
+        'A'...'Z', 'a'...'z' => true,
+        else => false,
+    };
 }
 
 /// Returns whether the character is a control character.
-/// This is the same as `!isPrint(c)`.
 ///
-/// See also: `control_code`.
+/// See also: `control_code`
 pub fn isControl(c: u8) bool {
     return c <= control_code.us or c == control_code.del;
 }
 
 /// Returns whether the character is a digit.
 pub fn isDigit(c: u8) bool {
-    return inTable(c, tIndex.Digit);
+    return switch (c) {
+        '0'...'9' => true,
+        else => false,
+    };
 }
 
-/// Returns whether the character is a lowercased letter.
+/// Returns whether the character is a lowercase letter.
 pub fn isLower(c: u8) bool {
-    return inTable(c, tIndex.Lower);
+    return switch (c) {
+        'a'...'z' => true,
+        else => false,
+    };
 }
 
-/// Returns whether the character is printable and has some graphical representation.
-/// This also returns `true` for the space character.
-/// This is the same as `!isControl(c)`.
+/// Returns whether the character is printable and has some graphical representation,
+/// including the space character.
 pub fn isPrint(c: u8) bool {
-    return inTable(c, tIndex.Graph) or c == ' ';
+    return isASCII(c) and !isControl(c);
 }
 
 /// Returns whether this character is included in `whitespace`.
 pub fn isWhitespace(c: u8) bool {
-    return inTable(c, tIndex.Space);
+    return for (whitespace) |other| {
+        if (c == other)
+            break true;
+    } else false;
 }
 
 /// Whitespace for general use.
 /// This may be used with e.g. `std.mem.trim` to trim whitespace.
 ///
-/// See also: `isWhitespace`.
+/// See also: `isWhitespace`
 pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control_code.vt, control_code.ff };
 
 test "whitespace" {
@@ -281,14 +156,20 @@ test "whitespace" {
     }
 }
 
-/// Returns whether the character is an uppercased letter.
+/// Returns whether the character is an uppercase letter.
 pub fn isUpper(c: u8) bool {
-    return inTable(c, tIndex.Upper);
+    return switch (c) {
+        'A'...'Z' => true,
+        else => false,
+    };
 }
 
-/// Returns whether the character is a hexadecimal digit. Case-insensitive.
+/// Returns whether the character is a hexadecimal digit: A-F, a-f, or 0-9.
 pub fn isHex(c: u8) bool {
-    return inTable(c, tIndex.Hex);
+    return switch (c) {
+        'A'...'F', 'a'...'f', '0'...'9' => true,
+        else => false,
+    };
 }
 
 /// Returns whether the character is a 7-bit ASCII character.
@@ -322,6 +203,8 @@ test "ASCII character classes" {
     try testing.expect(isControl(control_code.nul));
     try testing.expect(isControl(control_code.ff));
     try testing.expect(isControl(control_code.us));
+    try testing.expect(!isControl(0x80));
+    try testing.expect(!isControl(0xff));
 
     try testing.expect('C' == toUpper('c'));
     try testing.expect(':' == toUpper(':'));
@@ -351,6 +234,7 @@ test "ASCII character classes" {
 
     try testing.expect(!isHex('g'));
     try testing.expect(isHex('b'));
+    try testing.expect(isHex('F'));
     try testing.expect(isHex('9'));
 
     try testing.expect(!isDigit('~'));
@@ -361,6 +245,8 @@ test "ASCII character classes" {
     try testing.expect(isPrint('@'));
     try testing.expect(isPrint('~'));
     try testing.expect(!isPrint(control_code.esc));
+    try testing.expect(!isPrint(0x80));
+    try testing.expect(!isPrint(0xff));
 }
 
 /// Writes a lower case copy of `ascii_string` to `output`.