Commit `8c31eaf2a8`

Andrew Kelley <superjoe30@gmail.com>

2018-02-10 20:52:39

std zig tokenizer: don't require 3 newlines at the end of the source

master

1 parent a2bd9f8

Changed files (4)

src-self-hosted

main.zig

module.zig

std

zig

tokenizer.zig

io.zig

@@ -565,6 +565,15 @@ fn fmtMain(allocator: &mem.Allocator, file_paths: []const []const u8) !void {
         var file = try io.File.openRead(allocator, file_path);
         defer file.close();
 
+        const source_code = io.readFileAlloc(allocator, file_path) catch |err| {
+            warn("unable to open '{}': {}", file_path, err);
+            continue;
+        };
+        defer allocator.free(source_code);
+
+        var tokenizer = std.zig.Tokenizer.init(source_code);
+        var parser = std.zig.Parser.init(&tokenizer, allocator, file_path);
+        defer parser.deinit();
         warn("opened {} (todo tokenize and parse and render)\n", file_path);
     }
 }

@@ -213,14 +213,11 @@ pub const Module = struct {
         };
         errdefer self.allocator.free(root_src_real_path);
 
-        const source_code = io.readFileAllocExtra(self.allocator, root_src_real_path, 3) catch |err| {
+        const source_code = io.readFileAlloc(self.allocator, root_src_real_path) catch |err| {
             try printError("unable to open '{}': {}", root_src_real_path, err);
             return err;
         };
         errdefer self.allocator.free(source_code);
-        source_code[source_code.len - 3] = '\n';
-        source_code[source_code.len - 2] = '\n';
-        source_code[source_code.len - 1] = '\n';
 
         warn("====input:====\n");

@@ -175,12 +175,7 @@ pub const Tokenizer = struct {
         std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
     }
 
-    /// buffer must end with "\n\n\n". This is so that attempting to decode
-    /// a the 3 trailing bytes of a 4-byte utf8 sequence is never a buffer overflow.
     pub fn init(buffer: []const u8) Tokenizer {
-        std.debug.assert(buffer[buffer.len - 1] == '\n');
-        std.debug.assert(buffer[buffer.len - 2] == '\n');
-        std.debug.assert(buffer[buffer.len - 3] == '\n');
         return Tokenizer {
             .buffer = buffer,
             .index = 0,
@@ -556,8 +551,9 @@ pub const Tokenizer = struct {
         } else {
             // check utf8-encoded character.
             const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
-            // the last 3 bytes in the buffer are guaranteed to be '\n',
-            // which means we don't need to do any bounds checking here.
+            if (self.index + length >= self.buffer.len) {
+                return u3(self.buffer.len - self.index);
+            }
             const bytes = self.buffer[self.index..self.index + length];
             switch (length) {
                 2 => {

@@ -524,16 +524,11 @@ pub fn writeFile(allocator: &mem.Allocator, path: []const u8, data: []const u8)
 
 /// On success, caller owns returned buffer.
 pub fn readFileAlloc(allocator: &mem.Allocator, path: []const u8) ![]u8 {
-    return readFileAllocExtra(allocator, path, 0);
-}
-/// On success, caller owns returned buffer.
-/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized.
-pub fn readFileAllocExtra(allocator: &mem.Allocator, path: []const u8, extra_len: usize) ![]u8 {
     var file = try File.openRead(allocator, path);
     defer file.close();
 
     const size = try file.getEndPos();
-    const buf = try allocator.alloc(u8, size + extra_len);
+    const buf = try allocator.alloc(u8, size);
     errdefer allocator.free(buf);
 
     var adapter = FileInStream.init(&file);

Commit 8c31eaf2a8

Commit `8c31eaf2a8`