Commit 173fc842c4

Andrew Kelley <superjoe30@gmail.com>
2018-09-10 00:07:11
basic compiler id hash working
1 parent b4d5d4d
src/buffer.hpp
@@ -78,6 +78,10 @@ static inline Buf *buf_create_from_mem(const char *ptr, size_t len) {
     return buf;
 }
 
+static inline Buf *buf_create_from_slice(Slice<uint8_t> slice) {
+    return buf_create_from_mem((const char *)slice.ptr, slice.len);
+}
+
 static inline Buf *buf_create_from_str(const char *str) {
     return buf_create_from_mem(str, strlen(str));
 }
src/cache_hash.cpp
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2018 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
+#include "cache_hash.hpp"
+#include "buffer.hpp"
+#include "os.hpp"
+
+#include <stdio.h>
+
+void cache_init(CacheHash *ch, Buf *manifest_dir) {
+    int rc = blake2b_init(&ch->blake, 48);
+    assert(rc == 0);
+    ch->files = {};
+    ch->manifest_dir = manifest_dir;
+    ch->manifest_file_path = nullptr;
+    ch->manifest_dirty = false;
+}
+
+void cache_str(CacheHash *ch, const char *ptr) {
+    assert(ch->manifest_file_path == nullptr);
+    assert(ptr != nullptr);
+    // + 1 to include the null byte
+    blake2b_update(&ch->blake, ptr, strlen(ptr) + 1);
+}
+
+void cache_int(CacheHash *ch, int x) {
+    assert(ch->manifest_file_path == nullptr);
+    // + 1 to include the null byte
+    uint8_t buf[sizeof(int) + 1];
+    memcpy(buf, &x, sizeof(int));
+    buf[sizeof(int)] = 0;
+    blake2b_update(&ch->blake, buf, sizeof(int) + 1);
+}
+
+void cache_buf(CacheHash *ch, Buf *buf) {
+    assert(ch->manifest_file_path == nullptr);
+    assert(buf != nullptr);
+    // + 1 to include the null byte
+    blake2b_update(&ch->blake, buf_ptr(buf), buf_len(buf) + 1);
+}
+
+void cache_buf_opt(CacheHash *ch, Buf *buf) {
+    assert(ch->manifest_file_path == nullptr);
+    if (buf == nullptr) {
+        cache_str(ch, "");
+        cache_str(ch, "");
+    } else {
+        cache_buf(ch, buf);
+    }
+}
+
+void cache_list_of_link_lib(CacheHash *ch, LinkLib **ptr, size_t len) {
+    assert(ch->manifest_file_path == nullptr);
+    for (size_t i = 0; i < len; i += 1) {
+        LinkLib *lib = ptr[i];
+        if (lib->provided_explicitly) {
+            cache_buf(ch, lib->name);
+        }
+    }
+    cache_str(ch, "");
+}
+
+void cache_list_of_buf(CacheHash *ch, Buf **ptr, size_t len) {
+    assert(ch->manifest_file_path == nullptr);
+    for (size_t i = 0; i < len; i += 1) {
+        Buf *buf = ptr[i];
+        cache_buf(ch, buf);
+    }
+    cache_str(ch, "");
+}
+
+void cache_file(CacheHash *ch, Buf *file_path) {
+    assert(ch->manifest_file_path == nullptr);
+    assert(file_path != nullptr);
+    Buf *resolved_path = buf_alloc();
+    *resolved_path = os_path_resolve(&file_path, 1);
+    CacheHashFile *chf = ch->files.add_one();
+    chf->path = resolved_path;
+    cache_buf(ch, resolved_path);
+}
+
+void cache_file_opt(CacheHash *ch, Buf *file_path) {
+    assert(ch->manifest_file_path == nullptr);
+    if (file_path == nullptr) {
+        cache_str(ch, "");
+        cache_str(ch, "");
+    } else {
+        cache_file(ch, file_path);
+    }
+}
+
+// Ported from std/base64.zig
+static uint8_t base64_fs_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+static void base64_encode(Slice<uint8_t> dest, Slice<uint8_t> source) {
+    size_t dest_len = ((source.len + 2) / 3) * 4;
+    assert(dest.len == dest_len);
+
+    size_t i = 0;
+    size_t out_index = 0;
+    for (; i + 2 < source.len; i += 3) {
+        dest.ptr[out_index] = base64_fs_alphabet[(source.ptr[i] >> 2) & 0x3f];
+        out_index += 1;
+
+        dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i] & 0x3) << 4) | ((source.ptr[i + 1] & 0xf0) >> 4)];
+        out_index += 1;
+
+        dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i + 1] & 0xf) << 2) | ((source.ptr[i + 2] & 0xc0) >> 6)];
+        out_index += 1;
+
+        dest.ptr[out_index] = base64_fs_alphabet[source.ptr[i + 2] & 0x3f];
+        out_index += 1;
+    }
+
+    // Assert that we never need pad characters.
+    assert(i == source.len);
+}
+
+// Ported from std/base64.zig
+static Error base64_decode(Slice<uint8_t> dest, Slice<uint8_t> source) {
+    assert(source.len % 4 == 0);
+    assert(dest.len == (source.len / 4) * 3);
+
+    // In Zig this is comptime computed. In C++ it's not worth it to do that.
+    uint8_t char_to_index[256];
+    bool char_in_alphabet[256] = {0};
+    for (size_t i = 0; i < 64; i += 1) {
+        uint8_t c = base64_fs_alphabet[i];
+        assert(!char_in_alphabet[c]);
+        char_in_alphabet[c] = true;
+        char_to_index[c] = i;
+    }
+
+    size_t src_cursor = 0;
+    size_t dest_cursor = 0;
+
+    for (;src_cursor < source.len; src_cursor += 4) {
+        if (!char_in_alphabet[source.ptr[src_cursor + 0]]) return ErrorInvalidFormat;
+        if (!char_in_alphabet[source.ptr[src_cursor + 1]]) return ErrorInvalidFormat;
+        if (!char_in_alphabet[source.ptr[src_cursor + 2]]) return ErrorInvalidFormat;
+        if (!char_in_alphabet[source.ptr[src_cursor + 3]]) return ErrorInvalidFormat;
+        dest.ptr[dest_cursor + 0] = (char_to_index[source.ptr[src_cursor + 0]] << 2) | (char_to_index[source.ptr[src_cursor + 1]] >> 4);
+        dest.ptr[dest_cursor + 1] = (char_to_index[source.ptr[src_cursor + 1]] << 4) | (char_to_index[source.ptr[src_cursor + 2]] >> 2);
+        dest.ptr[dest_cursor + 2] = (char_to_index[source.ptr[src_cursor + 2]] << 6) | (char_to_index[source.ptr[src_cursor + 3]]);
+        dest_cursor += 3;
+    }
+
+    assert(src_cursor == source.len);
+    assert(dest_cursor == dest.len);
+    return ErrorNone;
+}
+
+static Error hash_file(uint8_t *digest, OsFile handle) {
+    Error err;
+
+    blake2b_state blake;
+    int rc = blake2b_init(&blake, 48);
+    assert(rc == 0);
+
+    for (;;) {
+        uint8_t buf[4096];
+        size_t amt = 4096;
+        if ((err = os_file_read(handle, buf, &amt)))
+            return err;
+        if (amt == 0) {
+            rc = blake2b_final(&blake, digest, 48);
+            assert(rc == 0);
+            return ErrorNone;
+        }
+        blake2b_update(&blake, buf, amt);
+    }
+}
+
+static Error populate_file_hash(CacheHash *ch, CacheHashFile *chf) {
+    Error err;
+
+    assert(chf->path != nullptr);
+
+    OsFile this_file;
+    if ((err = os_file_open_r(chf->path, &this_file)))
+        return err;
+
+    if ((err = os_file_mtime(this_file, &chf->mtime))) {
+        os_file_close(this_file);
+        return err;
+    }
+
+    if ((err = hash_file(chf->bin_digest, this_file))) {
+        os_file_close(this_file);
+        return err;
+    }
+    os_file_close(this_file);
+
+    blake2b_update(&ch->blake, chf->bin_digest, 48);
+
+    return ErrorNone;
+}
+
+Error cache_hit(CacheHash *ch, Buf *out_digest) {
+    Error err;
+
+    uint8_t bin_digest[48];
+    int rc = blake2b_final(&ch->blake, bin_digest, 48);
+    assert(rc == 0);
+
+    Buf b64_digest = BUF_INIT;
+    buf_resize(&b64_digest, 64);
+    base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48});
+
+    rc = blake2b_init(&ch->blake, 48);
+    assert(rc == 0);
+    blake2b_update(&ch->blake, bin_digest, 48);
+
+    ch->manifest_file_path = buf_alloc();
+    os_path_join(ch->manifest_dir, &b64_digest, ch->manifest_file_path);
+
+    buf_append_str(ch->manifest_file_path, ".txt");
+
+    if ((err = os_file_open_lock_rw(ch->manifest_file_path, &ch->manifest_file)))
+        return err;
+
+    Buf line_buf = BUF_INIT;
+    buf_resize(&line_buf, 512);
+    if ((err = os_file_read_all(ch->manifest_file, &line_buf))) {
+        os_file_close(ch->manifest_file);
+        return err;
+    }
+
+    size_t input_file_count = ch->files.length;
+    bool any_file_changed = false;
+    size_t file_i = 0;
+    SplitIterator line_it = memSplit(buf_to_slice(&line_buf), str("\n"));
+    for (;; file_i += 1) {
+        Optional<Slice<uint8_t>> opt_line = SplitIterator_next(&line_it);
+        if (!opt_line.is_some)
+            break;
+
+        CacheHashFile *chf;
+        if (file_i < input_file_count) {
+            chf = &ch->files.at(file_i);
+        } else if (any_file_changed) {
+            // cache miss.
+            // keep the the manifest file open with the rw lock
+            // reset the hash
+            rc = blake2b_init(&ch->blake, 48);
+            assert(rc == 0);
+            blake2b_update(&ch->blake, bin_digest, 48);
+            ch->files.resize(input_file_count);
+            // bring the hash up to the input file hashes
+            for (file_i = 0; file_i < input_file_count; file_i += 1) {
+                blake2b_update(&ch->blake, ch->files.at(file_i).bin_digest, 48);
+            }
+            // caller can notice that out_digest is unmodified.
+            return ErrorNone;
+        } else {
+            chf = ch->files.add_one();
+            chf->path = nullptr;
+        }
+
+        SplitIterator it = memSplit(opt_line.value, str(" "));
+
+        Optional<Slice<uint8_t>> opt_mtime_sec = SplitIterator_next(&it);
+        if (!opt_mtime_sec.is_some) {
+            os_file_close(ch->manifest_file);
+            return ErrorInvalidFormat;
+        }
+        chf->mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10);
+
+        Optional<Slice<uint8_t>> opt_mtime_nsec = SplitIterator_next(&it);
+        if (!opt_mtime_nsec.is_some) {
+            os_file_close(ch->manifest_file);
+            return ErrorInvalidFormat;
+        }
+        chf->mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10);
+
+        Optional<Slice<uint8_t>> opt_digest = SplitIterator_next(&it);
+        if (!opt_digest.is_some) {
+            os_file_close(ch->manifest_file);
+            return ErrorInvalidFormat;
+        }
+        if ((err = base64_decode({chf->bin_digest, 48}, opt_digest.value))) {
+            os_file_close(ch->manifest_file);
+            return ErrorInvalidFormat;
+        }
+
+        Optional<Slice<uint8_t>> opt_file_path = SplitIterator_next(&it);
+        if (!opt_file_path.is_some) {
+            os_file_close(ch->manifest_file);
+            return ErrorInvalidFormat;
+        }
+        Buf *this_path = buf_create_from_slice(opt_file_path.value);
+        if (chf->path != nullptr && !buf_eql_buf(this_path, chf->path)) {
+            os_file_close(ch->manifest_file);
+            return ErrorInvalidFormat;
+        }
+        chf->path = this_path;
+
+        // if the mtime matches we can trust the digest
+        OsFile this_file;
+        if ((err = os_file_open_r(chf->path, &this_file))) {
+            os_file_close(ch->manifest_file);
+            return err;
+        }
+        OsTimeStamp actual_mtime;
+        if ((err = os_file_mtime(this_file, &actual_mtime))) {
+            os_file_close(this_file);
+            os_file_close(ch->manifest_file);
+            return err;
+        }
+        if (chf->mtime.sec == actual_mtime.sec && chf->mtime.nsec == actual_mtime.nsec) {
+            os_file_close(this_file);
+        } else {
+            // we have to recompute the digest.
+            // later we'll rewrite the manifest with the new mtime/digest values
+            ch->manifest_dirty = true;
+            chf->mtime = actual_mtime;
+
+            uint8_t actual_digest[48];
+            if ((err = hash_file(actual_digest, this_file))) {
+                os_file_close(this_file);
+                os_file_close(ch->manifest_file);
+                return err;
+            }
+            os_file_close(this_file);
+            if (memcmp(chf->bin_digest, actual_digest, 48) != 0) {
+                memcpy(chf->bin_digest, actual_digest, 48);
+                // keep going until we have the input file digests
+                any_file_changed = true;
+            }
+        }
+        if (!any_file_changed) {
+            blake2b_update(&ch->blake, chf->bin_digest, 48);
+        }
+    }
+    if (file_i < input_file_count) {
+        // manifest file is empty or missing entries, so this is a cache miss
+        ch->manifest_dirty = true;
+        for (; file_i < input_file_count; file_i += 1) {
+            CacheHashFile *chf = &ch->files.at(file_i);
+            if ((err = populate_file_hash(ch, chf))) {
+                os_file_close(ch->manifest_file);
+                return err;
+            }
+        }
+        return ErrorNone;
+    }
+    // Cache Hit
+    return cache_final(ch, out_digest);
+}
+
+Error cache_add_file(CacheHash *ch, Buf *path) {
+    Error err;
+
+    assert(ch->manifest_file_path != nullptr);
+    CacheHashFile *chf = ch->files.add_one();
+    chf->path = path;
+    if ((err = populate_file_hash(ch, chf))) {
+        os_file_close(ch->manifest_file);
+        return err;
+    }
+
+    return ErrorNone;
+}
+
+static Error write_manifest_file(CacheHash *ch) {
+    Error err;
+    Buf contents = BUF_INIT;
+    buf_resize(&contents, 0);
+    uint8_t encoded_digest[65];
+    encoded_digest[64] = 0;
+    for (size_t i = 0; i < ch->files.length; i += 1) {
+        CacheHashFile *chf = &ch->files.at(i);
+        base64_encode({encoded_digest, 64}, {chf->bin_digest, 48});
+        buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n",
+            chf->mtime.sec, chf->mtime.nsec, encoded_digest, buf_ptr(chf->path));
+    }
+    fprintf(stderr, "overwrite with\n%s\n", buf_ptr(&contents));
+    if ((err = os_file_overwrite(ch->manifest_file, &contents)))
+        return err;
+
+    return ErrorNone;
+}
+
+Error cache_final(CacheHash *ch, Buf *out_digest) {
+    Error err;
+
+    assert(ch->manifest_file_path != nullptr);
+
+    if (ch->manifest_dirty) {
+        if ((err = write_manifest_file(ch))) {
+            fprintf(stderr, "Warning: Unable to write cache file '%s': %s\n",
+                    buf_ptr(ch->manifest_file_path), err_str(err));
+        }
+    }
+    os_file_close(ch->manifest_file);
+
+    uint8_t bin_digest[48];
+    int rc = blake2b_final(&ch->blake, bin_digest, 48);
+    assert(rc == 0);
+    buf_resize(out_digest, 64);
+    base64_encode(buf_to_slice(out_digest), {bin_digest, 48});
+
+    return ErrorNone;
+}
src/cache_hash.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
+#ifndef ZIG_CACHE_HASH_HPP
+#define ZIG_CACHE_HASH_HPP
+
+#include "all_types.hpp"
+#include "blake2.h"
+#include "os.hpp"
+
+struct CacheHashFile {
+    Buf *path;
+    OsTimeStamp mtime;
+    uint8_t bin_digest[48];
+};
+
+struct CacheHash {
+    blake2b_state blake;
+    ZigList<CacheHashFile> files;
+    Buf *manifest_dir;
+    Buf *manifest_file_path;
+    OsFile manifest_file;
+    bool manifest_dirty;
+};
+
+// Always call this first to set up.
+void cache_init(CacheHash *ch, Buf *manifest_dir);
+
+// Next, use the hash population functions to add the initial parameters.
+void cache_str(CacheHash *ch, const char *ptr);
+void cache_int(CacheHash *ch, int x);
+void cache_buf(CacheHash *ch, Buf *buf);
+void cache_buf_opt(CacheHash *ch, Buf *buf);
+void cache_list_of_link_lib(CacheHash *ch, LinkLib **ptr, size_t len);
+void cache_list_of_buf(CacheHash *ch, Buf **ptr, size_t len);
+void cache_file(CacheHash *ch, Buf *path);
+void cache_file_opt(CacheHash *ch, Buf *path);
+
+// Then call cache_hit when you're ready to see if you can skip the next step.
+// out_b64_digest will be left unchanged if it was a cache miss
+Error ATTRIBUTE_MUST_USE cache_hit(CacheHash *ch, Buf *out_b64_digest);
+
+// If you got a cache hit, the flow is done. No more functions to call.
+// Next call this function for every file that is depended on.
+Error ATTRIBUTE_MUST_USE cache_add_file(CacheHash *ch, Buf *path);
+
+// If you did not get a cache hit, use the hash population functions again
+// and do all the actual work. When done use cache_final to save the cache
+// for next time.
+Error ATTRIBUTE_MUST_USE cache_final(CacheHash *ch, Buf *out_digest);
+
+#endif
src/codegen.cpp
@@ -19,7 +19,6 @@
 #include "target.hpp"
 #include "util.hpp"
 #include "zig_llvm.h"
-#include "blake2.h"
 
 #include <stdio.h>
 #include <errno.h>
@@ -7675,130 +7674,45 @@ void codegen_add_time_event(CodeGen *g, const char *name) {
     g->timing_events.append({os_get_time(), name});
 }
 
-static void add_cache_str(blake2b_state *blake, const char *ptr) {
-    assert(ptr != nullptr);
-    // + 1 to include the null byte
-    blake2b_update(blake, ptr, strlen(ptr) + 1);
-}
-
-static void add_cache_int(blake2b_state *blake, int x) {
-    // + 1 to include the null byte
-    uint8_t buf[sizeof(int) + 1];
-    memcpy(buf, &x, sizeof(int));
-    buf[sizeof(int)] = 0;
-    blake2b_update(blake, buf, sizeof(int) + 1);
-}
 
-static void add_cache_buf(blake2b_state *blake, Buf *buf) {
-    assert(buf != nullptr);
-    // + 1 to include the null byte
-    blake2b_update(blake, buf_ptr(buf), buf_len(buf) + 1);
-}
-
-static void add_cache_buf_opt(blake2b_state *blake, Buf *buf) {
-    if (buf == nullptr) {
-        add_cache_str(blake, "");
-        add_cache_str(blake, "");
-    } else {
-        add_cache_buf(blake, buf);
-    }
-}
-
-static void add_cache_list_of_link_lib(blake2b_state *blake, LinkLib **ptr, size_t len) {
-    for (size_t i = 0; i < len; i += 1) {
-        LinkLib *lib = ptr[i];
-        if (lib->provided_explicitly) {
-            add_cache_buf(blake, lib->name);
-        }
-    }
-    add_cache_str(blake, "");
-}
-
-static void add_cache_list_of_buf(blake2b_state *blake, Buf **ptr, size_t len) {
-    for (size_t i = 0; i < len; i += 1) {
-        Buf *buf = ptr[i];
-        add_cache_buf(blake, buf);
-    }
-    add_cache_str(blake, "");
-}
-
-//static void add_cache_file(CodeGen *g, blake2b_state *blake, Buf *resolved_path) {
-//    assert(file_name != nullptr);
-//    g->cache_files.append(resolved_path);
-//}
+//// Called before init()
+//static bool build_with_cache(CodeGen *g) {
+//    // TODO zig exe & dynamic libraries
+//    // should be in main.cpp I think. only needs to happen
+//    // once on startup.
+//
+//    CacheHash comp;
+//    cache_init(&comp);
+//
+//    add_cache_buf(&blake, g->root_out_name);
+//    add_cache_buf_opt(&blake, get_resolved_root_src_path(g)); // Root source file
+//    add_cache_list_of_link_lib(&blake, g->link_libs_list.items, g->link_libs_list.length);
+//    add_cache_list_of_buf(&blake, g->darwin_frameworks.items, g->darwin_frameworks.length);
+//    add_cache_list_of_buf(&blake, g->rpath_list.items, g->rpath_list.length);
+//    add_cache_int(&blake, g->emit_file_type);
+//    add_cache_int(&blake, g->build_mode);
+//    add_cache_int(&blake, g->out_type);
+//    // TODO the rest of the struct CodeGen fields
+//
+//    uint8_t bin_digest[48];
+//    rc = blake2b_final(&blake, bin_digest, 48);
+//    assert(rc == 0);
+//
+//    Buf b64_digest = BUF_INIT;
+//    buf_resize(&b64_digest, 64);
+//    base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48});
 //
-//static void add_cache_file_opt(CodeGen *g, blake2b_state *blake, Buf *resolved_path) {
-//    if (resolved_path == nullptr) {
-//        add_cache_str(blake, "");
-//        add_cache_str(blake, "");
-//    } else {
-//        add_cache_file(g, blake, resolved_path);
-//    }
+//    fprintf(stderr, "input params hash: %s\n", buf_ptr(&b64_digest));
+//    // TODO next look for a manifest file that has all the files from the input parameters
+//    // use that to construct the real hash, which looks up the output directory and another manifest file
+//
+//    return false;
 //}
 
-// Ported from std/base64.zig
-static uint8_t base64_fs_alphabet[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_";
-static void base64_encode(Slice<uint8_t> dest, Slice<uint8_t> source) {
-    size_t dest_len = ((source.len + 2) / 3) * 4;
-    assert(dest.len == dest_len);
-
-    size_t i = 0;
-    size_t out_index = 0;
-    for (; i + 2 < source.len; i += 3) {
-        dest.ptr[out_index] = base64_fs_alphabet[(source.ptr[i] >> 2) & 0x3f];
-        out_index += 1;
-
-        dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i] & 0x3) << 4) | ((source.ptr[i + 1] & 0xf0) >> 4)];
-        out_index += 1;
-
-        dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i + 1] & 0xf) << 2) | ((source.ptr[i + 2] & 0xc0) >> 6)];
-        out_index += 1;
-
-        dest.ptr[out_index] = base64_fs_alphabet[source.ptr[i + 2] & 0x3f];
-        out_index += 1;
-    }
-
-    // Assert that we never need pad characters.
-    assert(i == source.len);
-}
-
-// Called before init()
-static bool build_with_cache(CodeGen *g) {
-    blake2b_state blake;
-    int rc = blake2b_init(&blake, 48);
-    assert(rc == 0);
-
-    // TODO zig exe & dynamic libraries
-
-    add_cache_buf(&blake, g->root_out_name);
-    add_cache_buf_opt(&blake, get_resolved_root_src_path(g)); // Root source file
-    add_cache_list_of_link_lib(&blake, g->link_libs_list.items, g->link_libs_list.length);
-    add_cache_list_of_buf(&blake, g->darwin_frameworks.items, g->darwin_frameworks.length);
-    add_cache_list_of_buf(&blake, g->rpath_list.items, g->rpath_list.length);
-    add_cache_int(&blake, g->emit_file_type);
-    add_cache_int(&blake, g->build_mode);
-    add_cache_int(&blake, g->out_type);
-    // TODO the rest of the struct CodeGen fields
-
-    uint8_t bin_digest[48];
-    rc = blake2b_final(&blake, bin_digest, 48);
-    assert(rc == 0);
-
-    Buf b64_digest = BUF_INIT;
-    buf_resize(&b64_digest, 64);
-    base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48});
-
-    fprintf(stderr, "input params hash: %s\n", buf_ptr(&b64_digest));
-    // TODO next look for a manifest file that has all the files from the input parameters
-    // use that to construct the real hash, which looks up the output directory and another manifest file
-
-    return false;
-}
-
 void codegen_build(CodeGen *g) {
     assert(g->out_type != OutTypeUnknown);
-    if (build_with_cache(g))
-        return;
+    //if (build_with_cache(g))
+    //    return;
     init(g);
 
     codegen_add_time_event(g, "Semantic Analysis");
src/error.cpp
@@ -27,6 +27,8 @@ const char *err_str(int err) {
         case ErrorNegativeDenominator: return "negative denominator";
         case ErrorShiftedOutOneBits: return "exact shift shifted out one bits";
         case ErrorCCompileErrors: return "C compile errors";
+        case ErrorEndOfFile: return "end of file";
+        case ErrorIsDir: return "is directory";
     }
     return "(invalid error)";
 }
src/error.hpp
@@ -27,6 +27,8 @@ enum Error {
     ErrorNegativeDenominator,
     ErrorShiftedOutOneBits,
     ErrorCCompileErrors,
+    ErrorEndOfFile,
+    ErrorIsDir,
 };
 
 const char *err_str(int err);
src/main.cpp
@@ -13,6 +13,7 @@
 #include "link.hpp"
 #include "os.hpp"
 #include "target.hpp"
+#include "cache_hash.hpp"
 
 #include <stdio.h>
 
@@ -270,6 +271,66 @@ int main(int argc, char **argv) {
         return 0;
     }
 
+    if (argc == 2 && strcmp(argv[1], "TEST") == 0) {
+        Error err;
+        Buf app_data_dir = BUF_INIT;
+        if ((err = os_get_app_data_dir(&app_data_dir, "zig"))) {
+            fprintf(stderr, "get app dir: %s\n", err_str(err));
+            return 1;
+        }
+        Buf *stage1_dir = buf_alloc();
+        os_path_join(&app_data_dir, buf_create_from_str("stage1"), stage1_dir);
+        Buf *manifest_dir = buf_alloc();
+        os_path_join(stage1_dir, buf_create_from_str("exe"), manifest_dir);
+
+        if ((err = os_make_path(manifest_dir))) {
+            fprintf(stderr, "make path: %s\n", err_str(err));
+            return 1;
+        }
+        CacheHash cache_hash;
+        CacheHash *ch = &cache_hash;
+        cache_init(ch, manifest_dir);
+        Buf self_exe_path = BUF_INIT;
+        if ((err = os_self_exe_path(&self_exe_path))) {
+            fprintf(stderr, "self exe path: %s\n", err_str(err));
+            return 1;
+        }
+
+        cache_file(ch, &self_exe_path);
+
+        Buf exe_digest = BUF_INIT;
+        buf_resize(&exe_digest, 0);
+        if ((err = cache_hit(ch, &exe_digest))) {
+            fprintf(stderr, "cache hit error: %s\n", err_str(err));
+            return 1;
+        }
+        if (buf_len(&exe_digest) != 0) {
+            fprintf(stderr, "cache hit: %s\n", buf_ptr(&exe_digest));
+            return 0;
+        }
+        fprintf(stderr, "cache miss\n");
+        ZigList<Buf *> lib_paths = {};
+        if ((err = os_self_exe_shared_libs(lib_paths))) {
+            fprintf(stderr, "finding out shared libs: %s\n", err_str(err));
+            return 1;
+        }
+        for (size_t i = 0; i < lib_paths.length; i += 1) {
+            Buf *lib_path = lib_paths.at(i);
+            if ((err = cache_add_file(ch, lib_path))) {
+                fprintf(stderr, "cache add file %s: %s", buf_ptr(lib_path), err_str(err));
+                return 1;
+            }
+        }
+        if ((err = cache_final(ch, &exe_digest))) {
+            fprintf(stderr, "final: %s\n", err_str(err));
+            return 1;
+        }
+
+
+        fprintf(stderr, "computed2: %s\n", buf_ptr(&exe_digest));
+        return 0;
+    }
+
     os_init();
 
     char *arg0 = argv[0];
src/os.cpp
@@ -40,6 +40,10 @@ typedef SSIZE_T ssize_t;
 
 #endif
 
+#if defined(ZIG_OS_LINUX)
+#include <link.h>
+#endif
+
 
 #if defined(__MACH__)
 #include <mach/clock.h>
@@ -57,54 +61,6 @@ static clock_serv_t cclock;
 #include <errno.h>
 #include <time.h>
 
-// Ported from std/mem.zig.
-// Coordinate struct fields with memSplit function
-struct SplitIterator {
-    size_t index;
-    Slice<uint8_t> buffer;
-    Slice<uint8_t> split_bytes;
-};
-
-// Ported from std/mem.zig.
-static bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte) {
-    for (size_t i = 0; i < self->split_bytes.len; i += 1) {
-        if (byte == self->split_bytes.ptr[i]) {
-            return true;
-        }
-    }
-    return false;
-}
-
-// Ported from std/mem.zig.
-static Optional<Slice<uint8_t>> SplitIterator_next(SplitIterator *self) {
-    // move to beginning of token
-    while (self->index < self->buffer.len &&
-        SplitIterator_isSplitByte(self, self->buffer.ptr[self->index]))
-    {
-        self->index += 1;
-    }
-    size_t start = self->index;
-    if (start == self->buffer.len) {
-        return {};
-    }
-
-    // move to end of token
-    while (self->index < self->buffer.len &&
-        !SplitIterator_isSplitByte(self, self->buffer.ptr[self->index]))
-    {
-        self->index += 1;
-    }
-    size_t end = self->index;
-
-    return Optional<Slice<uint8_t>>::some(self->buffer.slice(start, end));
-}
-
-// Ported from std/mem.zig
-static SplitIterator memSplit(Slice<uint8_t> buffer, Slice<uint8_t> split_bytes) {
-    return SplitIterator{0, buffer, split_bytes};
-}
-
-
 #if defined(ZIG_OS_POSIX)
 static void populate_termination(Termination *term, int status) {
     if (WIFEXITED(status)) {
@@ -1368,16 +1324,16 @@ double os_get_time(void) {
 #endif
 }
 
-int os_make_path(Buf *path) {
+Error os_make_path(Buf *path) {
     Buf resolved_path = os_path_resolve(&path, 1);
 
     size_t end_index = buf_len(&resolved_path);
-    int err;
+    Error err;
     while (true) {
         if ((err = os_make_dir(buf_slice(&resolved_path, 0, end_index)))) {
             if (err == ErrorPathAlreadyExists) {
                 if (end_index == buf_len(&resolved_path))
-                    return 0;
+                    return ErrorNone;
             } else if (err == ErrorFileNotFound) {
                 // march end_index backward until next path component
                 while (true) {
@@ -1391,7 +1347,7 @@ int os_make_path(Buf *path) {
             }
         }
         if (end_index == buf_len(&resolved_path))
-            return 0;
+            return ErrorNone;
         // march end_index forward until next path component
         while (true) {
             end_index += 1;
@@ -1399,10 +1355,10 @@ int os_make_path(Buf *path) {
                 break;
         }
     }
-    return 0;
+    return ErrorNone;
 }
 
-int os_make_dir(Buf *path) {
+Error os_make_dir(Buf *path) {
 #if defined(ZIG_OS_WINDOWS)
     if (!CreateDirectory(buf_ptr(path), NULL)) {
         if (GetLastError() == ERROR_ALREADY_EXISTS)
@@ -1413,7 +1369,7 @@ int os_make_dir(Buf *path) {
             return ErrorAccess;
         return ErrorUnexpected;
     }
-    return 0;
+    return ErrorNone;
 #else
     if (mkdir(buf_ptr(path), 0755) == -1) {
         if (errno == EEXIST)
@@ -1424,7 +1380,7 @@ int os_make_dir(Buf *path) {
             return ErrorAccess;
         return ErrorUnexpected;
     }
-    return 0;
+    return ErrorNone;
 #endif
 }
 
@@ -1447,7 +1403,7 @@ int os_init(void) {
     return 0;
 }
 
-int os_self_exe_path(Buf *out_path) {
+Error os_self_exe_path(Buf *out_path) {
 #if defined(ZIG_OS_WINDOWS)
     buf_resize(out_path, 256);
     for (;;) {
@@ -1480,27 +1436,21 @@ int os_self_exe_path(Buf *out_path) {
     char *real_path = realpath(buf_ptr(tmp), buf_ptr(out_path));
     if (!real_path) {
         buf_init_from_buf(out_path, tmp);
-        return 0;
+        return ErrorNone;
     }
 
     // Resize out_path for the correct length.
     buf_resize(out_path, strlen(buf_ptr(out_path)));
 
-    return 0;
+    return ErrorNone;
 #elif defined(ZIG_OS_LINUX)
-    buf_resize(out_path, 256);
-    for (;;) {
-        ssize_t amt = readlink("/proc/self/exe", buf_ptr(out_path), buf_len(out_path));
-        if (amt == -1) {
-            return ErrorUnexpected;
-        }
-        if (amt == (ssize_t)buf_len(out_path)) {
-            buf_resize(out_path, buf_len(out_path) * 2);
-            continue;
-        }
-        buf_resize(out_path, amt);
-        return 0;
+    buf_resize(out_path, PATH_MAX);
+    ssize_t amt = readlink("/proc/self/exe", buf_ptr(out_path), buf_len(out_path));
+    if (amt == -1) {
+        return ErrorUnexpected;
     }
+    buf_resize(out_path, amt);
+    return ErrorNone;
 #endif
     return ErrorFileNotFound;
 }
@@ -1685,3 +1635,225 @@ int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf* output_buf, ZigLLVM_ArchTy
     return ErrorFileNotFound;
 #endif
 }
+
+// Ported from std/os/get_app_data_dir.zig
+Error os_get_app_data_dir(Buf *out_path, const char *appname) {
+#if defined(ZIG_OS_WINDOWS)
+#error "Unimplemented"
+#elif defined(ZIG_OS_DARWIN)
+    const char *home_dir = getenv("HOME");
+    if (home_dir == nullptr) {
+        // TODO use /etc/passwd
+        return ErrorFileNotFound;
+    }
+    buf_resize(out_path, 0);
+    buf_appendf(out_path, "%s/Library/Application Support/%s", home_dir, appname);
+    return ErrorNone;
+#elif defined(ZIG_OS_LINUX)
+    const char *home_dir = getenv("HOME");
+    if (home_dir == nullptr) {
+        // TODO use /etc/passwd
+        return ErrorFileNotFound;
+    }
+    buf_resize(out_path, 0);
+    buf_appendf(out_path, "%s/.local/share/%s", home_dir, appname);
+    return ErrorNone;
+#endif
+}
+
+
+#if defined(ZIG_OS_LINUX)
+static int self_exe_shared_libs_callback(struct dl_phdr_info *info, size_t size, void *data) {
+    ZigList<Buf *> *libs = reinterpret_cast< ZigList<Buf *> *>(data);
+    if (info->dlpi_name[0] == '/') {
+        libs->append(buf_create_from_str(info->dlpi_name));
+    }
+    return 0;
+}
+#endif
+
+Error os_self_exe_shared_libs(ZigList<Buf *> &paths) {
+#if defined(ZIG_OS_LINUX)
+    paths.resize(0);
+    dl_iterate_phdr(self_exe_shared_libs_callback, &paths);
+    return ErrorNone;
+#else
+#error "unimplemented"
+#endif
+}
+
+Error os_file_open_r(Buf *full_path, OsFile *out_file) {
+#if defined(ZIG_OS_WINDOWS)
+#error "unimplemented"
+#else
+    for (;;) {
+        int fd = open(buf_ptr(full_path), O_RDONLY|O_CLOEXEC);
+        if (fd == -1) {
+            switch (errno) {
+                case EINTR:
+                    continue;
+                case EINVAL:
+                    zig_unreachable();
+                case EFAULT:
+                    zig_unreachable();
+                case EACCES:
+                    return ErrorAccess;
+                case EISDIR:
+                    return ErrorIsDir;
+                case ENOENT:
+                    return ErrorFileNotFound;
+                default:
+                    return ErrorFileSystem;
+            }
+        }
+        *out_file = fd;
+        return ErrorNone;
+    }
+#endif
+}
+
+Error os_file_open_lock_rw(Buf *full_path, OsFile *out_file) {
+#if defined(ZIG_OS_WINDOWS)
+#error "unimplemented"
+#else
+    int fd;
+    for (;;) {
+        fd = open(buf_ptr(full_path), O_RDWR|O_CLOEXEC|O_CREAT, 0666);
+        if (fd == -1) {
+            switch (errno) {
+                case EINTR:
+                    continue;
+                case EINVAL:
+                    zig_unreachable();
+                case EFAULT:
+                    zig_unreachable();
+                case EACCES:
+                    return ErrorAccess;
+                case EISDIR:
+                    return ErrorIsDir;
+                case ENOENT:
+                    return ErrorFileNotFound;
+                default:
+                    return ErrorFileSystem;
+            }
+        }
+        break;
+    }
+    for (;;) {
+        struct flock lock;
+        lock.l_type = F_WRLCK;
+        lock.l_whence = SEEK_SET;
+        lock.l_start = 0;
+        lock.l_len = 0;
+        if (fcntl(fd, F_SETLKW, &lock) == -1) {
+            switch (errno) {
+                case EINTR:
+                    continue;
+                case EBADF:
+                    zig_unreachable();
+                case EFAULT:
+                    zig_unreachable();
+                case EINVAL:
+                    zig_unreachable();
+                default:
+                    close(fd);
+                    return ErrorFileSystem;
+            }
+        }
+        break;
+    }
+    *out_file = fd;
+    return ErrorNone;
+#endif
+}
+
+Error os_file_mtime(OsFile file, OsTimeStamp *mtime) {
+#if defined(ZIG_OS_WINDOWS)
+#error unimplemented
+#else
+    struct stat statbuf;
+    if (fstat(file, &statbuf) == -1)
+        return ErrorFileSystem;
+
+    mtime->sec = statbuf.st_mtim.tv_sec;
+    mtime->nsec = statbuf.st_mtim.tv_nsec;
+    return ErrorNone;
+#endif
+}
+
+Error os_file_read(OsFile file, void *ptr, size_t *len) {
+#if defined(ZIG_OS_WINDOWS)
+#error unimplemented
+#else
+    for (;;) {
+        ssize_t rc = read(file, ptr, *len);
+        if (rc == -1) {
+            switch (errno) {
+                case EINTR:
+                    continue;
+                case EBADF:
+                    zig_unreachable();
+                case EFAULT:
+                    zig_unreachable();
+                case EISDIR:
+                    zig_unreachable();
+                default:
+                    return ErrorFileSystem;
+            }
+        }
+        *len = rc;
+        return ErrorNone;
+    }
+#endif
+}
+
+Error os_file_read_all(OsFile file, Buf *contents) {
+    Error err;
+    size_t index = 0;
+    for (;;) {
+        size_t amt = buf_len(contents) - index;
+
+        if (amt < 512) {
+            buf_resize(contents, buf_len(contents) + 512);
+            amt += 512;
+        }
+
+        if ((err = os_file_read(file, buf_ptr(contents) + index, &amt)))
+            return err;
+
+        if (amt == 0) {
+            buf_resize(contents, index);
+            return ErrorNone;
+        }
+
+        index += amt;
+    }
+}
+
+Error os_file_overwrite(OsFile file, Buf *contents) {
+#if defined(ZIG_OS_WINDOWS)
+#error unimplemented
+#else
+    if (lseek(file, 0, SEEK_SET) == -1)
+        return ErrorFileSystem;
+    for (;;) {
+        if (write(file, buf_ptr(contents), buf_len(contents)) == -1) {
+            switch (errno) {
+                case EINTR:
+                    continue;
+                case EINVAL:
+                    zig_unreachable();
+                case EBADF:
+                    zig_unreachable();
+                default:
+                    return ErrorFileSystem;
+            }
+        }
+        return ErrorNone;
+    }
+#endif
+}
+
+void os_file_close(OsFile file) {
+    close(file);
+}
src/os.hpp
@@ -13,10 +13,43 @@
 #include "error.hpp"
 #include "zig_llvm.h"
 #include "windows_sdk.h"
+#include "result.hpp"
 
 #include <stdio.h>
 #include <inttypes.h>
 
+#if defined(__APPLE__)
+#define ZIG_OS_DARWIN
+#elif defined(_WIN32)
+#define ZIG_OS_WINDOWS
+#elif defined(__linux__)
+#define ZIG_OS_LINUX
+#else
+#define ZIG_OS_UNKNOWN
+#endif
+
+#if defined(__x86_64__)
+#define ZIG_ARCH_X86_64
+#else
+#define ZIG_ARCH_UNKNOWN
+#endif
+
+#if defined(ZIG_OS_WINDOWS)
+#define ZIG_PRI_usize "I64u"
+#define ZIG_PRI_u64 "I64u"
+#define ZIG_PRI_llu "I64u"
+#define ZIG_PRI_x64 "I64x"
+#define OS_SEP "\\"
+#define ZIG_OS_SEP_CHAR '\\'
+#else
+#define ZIG_PRI_usize "zu"
+#define ZIG_PRI_u64 PRIu64
+#define ZIG_PRI_llu "llu"
+#define ZIG_PRI_x64 PRIx64
+#define OS_SEP "/"
+#define ZIG_OS_SEP_CHAR '/'
+#endif
+
 enum TermColor {
     TermColorRed,
     TermColorGreen,
@@ -38,6 +71,17 @@ struct Termination {
     int code;
 };
 
+#if defined(ZIG_OS_WINDOWS)
+#define OsFile (void *)
+#else
+#define OsFile int
+#endif
+
+struct OsTimeStamp {
+    uint64_t sec;
+    uint64_t nsec;
+};
+
 int os_init(void);
 
 void os_spawn_process(const char *exe, ZigList<const char *> &args, Termination *term);
@@ -54,8 +98,16 @@ bool os_path_is_absolute(Buf *path);
 
 int os_get_global_cache_directory(Buf *out_tmp_path);
 
-int os_make_path(Buf *path);
-int os_make_dir(Buf *path);
+Error ATTRIBUTE_MUST_USE os_make_path(Buf *path);
+Error ATTRIBUTE_MUST_USE os_make_dir(Buf *path);
+
+Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file);
+Error ATTRIBUTE_MUST_USE os_file_open_lock_rw(Buf *full_path, OsFile *out_file);
+Error ATTRIBUTE_MUST_USE os_file_mtime(OsFile file, OsTimeStamp *mtime);
+Error ATTRIBUTE_MUST_USE os_file_read(OsFile file, void *ptr, size_t *len);
+Error ATTRIBUTE_MUST_USE os_file_read_all(OsFile file, Buf *contents);
+Error ATTRIBUTE_MUST_USE os_file_overwrite(OsFile file, Buf *contents);
+void os_file_close(OsFile file);
 
 void os_write_file(Buf *full_path, Buf *contents);
 int os_copy_file(Buf *src_path, Buf *dest_path);
@@ -78,42 +130,14 @@ double os_get_time(void);
 
 bool os_is_sep(uint8_t c);
 
-int os_self_exe_path(Buf *out_path);
+Error ATTRIBUTE_MUST_USE os_self_exe_path(Buf *out_path);
+
+Error ATTRIBUTE_MUST_USE os_get_app_data_dir(Buf *out_path, const char *appname);
 
 int os_get_win32_ucrt_include_path(ZigWindowsSDK *sdk, Buf *output_buf);
 int os_get_win32_ucrt_lib_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type);
 int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type);
 
-#if defined(__APPLE__)
-#define ZIG_OS_DARWIN
-#elif defined(_WIN32)
-#define ZIG_OS_WINDOWS
-#elif defined(__linux__)
-#define ZIG_OS_LINUX
-#else
-#define ZIG_OS_UNKNOWN
-#endif
-
-#if defined(__x86_64__)
-#define ZIG_ARCH_X86_64
-#else
-#define ZIG_ARCH_UNKNOWN
-#endif
-
-#if defined(ZIG_OS_WINDOWS)
-#define ZIG_PRI_usize "I64u"
-#define ZIG_PRI_u64 "I64u"
-#define ZIG_PRI_llu "I64u"
-#define ZIG_PRI_x64 "I64x"
-#define OS_SEP "\\"
-#define ZIG_OS_SEP_CHAR '\\'
-#else
-#define ZIG_PRI_usize "zu"
-#define ZIG_PRI_u64 PRIu64
-#define ZIG_PRI_llu "llu"
-#define ZIG_PRI_x64 PRIx64
-#define OS_SEP "/"
-#define ZIG_OS_SEP_CHAR '/'
-#endif
+Error ATTRIBUTE_MUST_USE os_self_exe_shared_libs(ZigList<Buf *> &paths);
 
 #endif
src/util.cpp
@@ -43,3 +43,42 @@ uint32_t ptr_hash(const void *ptr) {
 bool ptr_eq(const void *a, const void *b) {
     return a == b;
 }
+
+// Ported from std/mem.zig.
+bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte) {
+    for (size_t i = 0; i < self->split_bytes.len; i += 1) {
+        if (byte == self->split_bytes.ptr[i]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+// Ported from std/mem.zig.
+Optional<Slice<uint8_t>> SplitIterator_next(SplitIterator *self) {
+    // move to beginning of token
+    while (self->index < self->buffer.len &&
+        SplitIterator_isSplitByte(self, self->buffer.ptr[self->index]))
+    {
+        self->index += 1;
+    }
+    size_t start = self->index;
+    if (start == self->buffer.len) {
+        return {};
+    }
+
+    // move to end of token
+    while (self->index < self->buffer.len &&
+        !SplitIterator_isSplitByte(self, self->buffer.ptr[self->index]))
+    {
+        self->index += 1;
+    }
+    size_t end = self->index;
+
+    return Optional<Slice<uint8_t>>::some(self->buffer.slice(start, end));
+}
+
+// Ported from std/mem.zig
+SplitIterator memSplit(Slice<uint8_t> buffer, Slice<uint8_t> split_bytes) {
+    return SplitIterator{0, buffer, split_bytes};
+}
src/util.hpp
@@ -254,4 +254,16 @@ static inline void memCopy(Slice<T> dest, Slice<T> src) {
     memcpy(dest.ptr, src.ptr, src.len * sizeof(T));
 }
 
+// Ported from std/mem.zig.
+// Coordinate struct fields with memSplit function
+struct SplitIterator {
+    size_t index;
+    Slice<uint8_t> buffer;
+    Slice<uint8_t> split_bytes;
+};
+
+bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte);
+Optional<Slice<uint8_t>> SplitIterator_next(SplitIterator *self);
+SplitIterator memSplit(Slice<uint8_t> buffer, Slice<uint8_t> split_bytes);
+
 #endif
CMakeLists.txt
@@ -409,6 +409,7 @@ set(ZIG_SOURCES
     "${CMAKE_SOURCE_DIR}/src/bigint.cpp"
     "${CMAKE_SOURCE_DIR}/src/blake2b.cpp"
     "${CMAKE_SOURCE_DIR}/src/buffer.cpp"
+    "${CMAKE_SOURCE_DIR}/src/cache_hash.cpp"
     "${CMAKE_SOURCE_DIR}/src/c_tokenizer.cpp"
     "${CMAKE_SOURCE_DIR}/src/codegen.cpp"
     "${CMAKE_SOURCE_DIR}/src/errmsg.cpp"