Commit 3a6f19de48

Andrew Kelley <andrew@ziglang.org>
2019-03-11 15:26:08
stage1 caching system: detect problematic mtimes
closes #2045
1 parent fec4555
src/cache_hash.cpp
@@ -158,8 +158,10 @@ static void base64_encode(Slice<uint8_t> dest, Slice<uint8_t> source) {
 
 // Ported from std/base64.zig
 static Error base64_decode(Slice<uint8_t> dest, Slice<uint8_t> source) {
-    assert(source.len % 4 == 0);
-    assert(dest.len == (source.len / 4) * 3);
+    if (source.len % 4 != 0)
+        return ErrorInvalidFormat;
+    if (dest.len != (source.len / 4) * 3)
+        return ErrorInvalidFormat;
 
     // In Zig this is comptime computed. In C++ it's not worth it to do that.
     uint8_t char_to_index[256];
@@ -218,15 +220,41 @@ static Error hash_file(uint8_t *digest, OsFile handle, Buf *contents) {
     }
 }
 
+// If the wall clock time, rounded to the same precision as the
+// mtime, is equal to the mtime, then we cannot rely on this mtime
+// yet. We will instead save an mtime value that indicates the hash
+// must be unconditionally computed.
+static bool is_problematic_timestamp(const OsTimeStamp *fs_clock) {
+    OsTimeStamp wall_clock = os_timestamp_calendar();
+    // First make all the least significant zero bits in the fs_clock, also zero bits in the wall clock.
+    if (fs_clock->nsec == 0) {
+        wall_clock.nsec = 0;
+        if (fs_clock->sec == 0) {
+            wall_clock.sec = 0;
+        } else {
+            wall_clock.sec &= (-1ull) << ctzll(fs_clock->sec);
+        }
+    } else {
+        wall_clock.nsec &= (-1ull) << ctzll(fs_clock->nsec);
+    }
+    return wall_clock.nsec == fs_clock->nsec && wall_clock.sec == fs_clock->sec;
+}
+
 static Error populate_file_hash(CacheHash *ch, CacheHashFile *chf, Buf *contents) {
     Error err;
 
     assert(chf->path != nullptr);
 
     OsFile this_file;
-    if ((err = os_file_open_r(chf->path, &this_file, &chf->mtime)))
+    if ((err = os_file_open_r(chf->path, &this_file, &chf->attr)))
         return err;
 
+    if (is_problematic_timestamp(&chf->attr.mtime)) {
+        chf->attr.mtime.sec = 0;
+        chf->attr.mtime.nsec = 0;
+        chf->attr.inode = 0;
+    }
+
     if ((err = hash_file(chf->bin_digest, this_file, contents))) {
         os_file_close(this_file);
         return err;
@@ -278,6 +306,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
 
     size_t input_file_count = ch->files.length;
     bool any_file_changed = false;
+    Error return_code = ErrorNone;
     size_t file_i = 0;
     SplitIterator line_it = memSplit(buf_to_slice(&line_buf), str("\n"));
     for (;; file_i += 1) {
@@ -299,7 +328,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
                 blake2b_update(&ch->blake, ch->files.at(file_i).bin_digest, 48);
             }
             // caller can notice that out_digest is unmodified.
-            return ErrorNone;
+            return return_code;
         } else if (!opt_line.is_some) {
             break;
         } else {
@@ -312,57 +341,73 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
 
         SplitIterator it = memSplit(opt_line.value, str(" "));
 
+        Optional<Slice<uint8_t>> opt_inode = SplitIterator_next(&it);
+        if (!opt_inode.is_some) {
+            return_code = ErrorInvalidFormat;
+            break;
+        }
+        chf->attr.inode = strtoull((const char *)opt_inode.value.ptr, nullptr, 10);
+
         Optional<Slice<uint8_t>> opt_mtime_sec = SplitIterator_next(&it);
         if (!opt_mtime_sec.is_some) {
-            os_file_close(ch->manifest_file);
-            return ErrorInvalidFormat;
+            return_code = ErrorInvalidFormat;
+            break;
         }
-        chf->mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10);
+        chf->attr.mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10);
 
         Optional<Slice<uint8_t>> opt_mtime_nsec = SplitIterator_next(&it);
         if (!opt_mtime_nsec.is_some) {
-            os_file_close(ch->manifest_file);
-            return ErrorInvalidFormat;
+            return_code = ErrorInvalidFormat;
+            break;
         }
-        chf->mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10);
+        chf->attr.mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10);
 
         Optional<Slice<uint8_t>> opt_digest = SplitIterator_next(&it);
         if (!opt_digest.is_some) {
-            os_file_close(ch->manifest_file);
-            return ErrorInvalidFormat;
+            return_code = ErrorInvalidFormat;
+            break;
         }
         if ((err = base64_decode({chf->bin_digest, 48}, opt_digest.value))) {
-            os_file_close(ch->manifest_file);
-            return ErrorInvalidFormat;
+            return_code = ErrorInvalidFormat;
+            break;
         }
 
         Slice<uint8_t> file_path = SplitIterator_rest(&it);
         if (file_path.len == 0) {
-            os_file_close(ch->manifest_file);
-            return ErrorInvalidFormat;
+            return_code = ErrorInvalidFormat;
+            break;
         }
         Buf *this_path = buf_create_from_slice(file_path);
         if (chf->path != nullptr && !buf_eql_buf(this_path, chf->path)) {
-            os_file_close(ch->manifest_file);
-            return ErrorInvalidFormat;
+            return_code = ErrorInvalidFormat;
+            break;
         }
         chf->path = this_path;
 
         // if the mtime matches we can trust the digest
         OsFile this_file;
-        OsTimeStamp actual_mtime;
-        if ((err = os_file_open_r(chf->path, &this_file, &actual_mtime))) {
+        OsFileAttr actual_attr;
+        if ((err = os_file_open_r(chf->path, &this_file, &actual_attr))) {
             fprintf(stderr, "Unable to open %s\n: %s", buf_ptr(chf->path), err_str(err));
             os_file_close(ch->manifest_file);
             return ErrorCacheUnavailable;
         }
-        if (chf->mtime.sec == actual_mtime.sec && chf->mtime.nsec == actual_mtime.nsec) {
+        if (chf->attr.mtime.sec == actual_attr.mtime.sec &&
+            chf->attr.mtime.nsec == actual_attr.mtime.nsec &&
+            chf->attr.inode == actual_attr.inode)
+        {
             os_file_close(this_file);
         } else {
             // we have to recompute the digest.
             // later we'll rewrite the manifest with the new mtime/digest values
             ch->manifest_dirty = true;
-            chf->mtime = actual_mtime;
+            chf->attr = actual_attr;
+
+            if (is_problematic_timestamp(&actual_attr.mtime)) {
+                chf->attr.mtime.sec = 0;
+                chf->attr.mtime.nsec = 0;
+                chf->attr.inode = 0;
+            }
 
             uint8_t actual_digest[48];
             if ((err = hash_file(actual_digest, this_file, nullptr))) {
@@ -381,7 +426,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
             blake2b_update(&ch->blake, chf->bin_digest, 48);
         }
     }
-    if (file_i < input_file_count || file_i == 0) {
+    if (file_i < input_file_count || file_i == 0 || return_code != ErrorNone) {
         // manifest file is empty or missing entries, so this is a cache miss
         ch->manifest_dirty = true;
         for (; file_i < input_file_count; file_i += 1) {
@@ -392,7 +437,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
                 return ErrorCacheUnavailable;
             }
         }
-        return ErrorNone;
+        return return_code;
     }
     // Cache Hit
     return cache_final(ch, out_digest);
@@ -499,8 +544,8 @@ static Error write_manifest_file(CacheHash *ch) {
     for (size_t i = 0; i < ch->files.length; i += 1) {
         CacheHashFile *chf = &ch->files.at(i);
         base64_encode({encoded_digest, 64}, {chf->bin_digest, 48});
-        buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n",
-            chf->mtime.sec, chf->mtime.nsec, encoded_digest, buf_ptr(chf->path));
+        buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n",
+            chf->attr.inode, chf->attr.mtime.sec, chf->attr.mtime.nsec, encoded_digest, buf_ptr(chf->path));
     }
     if ((err = os_file_overwrite(ch->manifest_file, &contents)))
         return err;
src/cache_hash.hpp
@@ -15,7 +15,7 @@ struct LinkLib;
 
 struct CacheHashFile {
     Buf *path;
-    OsTimeStamp mtime;
+    OsFileAttr attr;
     uint8_t bin_digest[48];
     Buf *contents;
 };
@@ -57,6 +57,8 @@ void cache_file_opt(CacheHash *ch, Buf *path);
 // added any files before calling cache_hit. CacheHash::b64_digest becomes
 // available for use after this call, even in the case of a miss, and it
 // is a hash of the input parameters only.
+// If this function returns ErrorInvalidFormat, that error may be treated
+// as a cache miss.
 Error ATTRIBUTE_MUST_USE cache_hit(CacheHash *ch, Buf *out_b64_digest);
 
 // If you did not get a cache hit, call this function for every file
src/codegen.cpp
@@ -7724,8 +7724,11 @@ static Error define_builtin_compile_vars(CodeGen *g) {
 
     Buf digest = BUF_INIT;
     buf_resize(&digest, 0);
-    if ((err = cache_hit(&cache_hash, &digest)))
-        return err;
+    if ((err = cache_hit(&cache_hash, &digest))) {
+        // Treat an invalid format error as a cache miss.
+        if (err != ErrorInvalidFormat)
+            return err;
+    }
 
     // We should always get a cache hit because there are no
     // files in the input hash.
@@ -8342,12 +8345,14 @@ static void gen_c_object(CodeGen *g, Buf *self_exe_path, CFile *c_file) {
     Buf digest = BUF_INIT;
     buf_resize(&digest, 0);
     if ((err = cache_hit(cache_hash, &digest))) {
-        if (err == ErrorCacheUnavailable) {
-            // already printed error
-        } else {
-            fprintf(stderr, "unable to check cache when compiling C object: %s\n", err_str(err));
+        if (err != ErrorInvalidFormat) {
+            if (err == ErrorCacheUnavailable) {
+                // already printed error
+            } else {
+                fprintf(stderr, "unable to check cache when compiling C object: %s\n", err_str(err));
+            }
+            exit(1);
         }
-        exit(1);
     }
     bool is_cache_miss = (buf_len(&digest) == 0);
     if (is_cache_miss) {
@@ -8993,7 +8998,10 @@ void codegen_print_timing_report(CodeGen *g, FILE *f) {
 }
 
 void codegen_add_time_event(CodeGen *g, const char *name) {
-    g->timing_events.append({os_get_time(), name});
+    OsTimeStamp timestamp = os_timestamp_monotonic();
+    double seconds = (double)timestamp.sec;
+    seconds += ((double)timestamp.nsec) / 1000000000.0;
+    g->timing_events.append({seconds, name});
 }
 
 static void add_cache_pkg(CodeGen *g, CacheHash *ch, ZigPackage *pkg) {
@@ -9090,8 +9098,10 @@ static Error check_cache(CodeGen *g, Buf *manifest_dir, Buf *digest) {
     cache_list_of_file(ch, g->link_objects.items, g->link_objects.length);
 
     buf_resize(digest, 0);
-    if ((err = cache_hit(ch, digest)))
-        return err;
+    if ((err = cache_hit(ch, digest))) {
+        if (err != ErrorInvalidFormat)
+            return err;
+    }
 
     if (ch->manifest_file_path != nullptr) {
         g->caches_to_release.append(ch);
src/compiler.cpp
@@ -75,8 +75,10 @@ Error get_compiler_id(Buf **result) {
     cache_file(ch, &self_exe_path);
 
     buf_resize(&saved_compiler_id, 0);
-    if ((err = cache_hit(ch, &saved_compiler_id)))
-        return err;
+    if ((err = cache_hit(ch, &saved_compiler_id))) {
+        if (err != ErrorInvalidFormat)
+            return err;
+    }
     if (buf_len(&saved_compiler_id) != 0) {
         cache_release(ch);
         *result = &saved_compiler_id;
src/ir.cpp
@@ -18734,8 +18734,10 @@ static IrInstruction *ir_analyze_instruction_c_import(IrAnalyze *ira, IrInstruct
     Buf tmp_c_file_digest = BUF_INIT;
     buf_resize(&tmp_c_file_digest, 0);
     if ((err = cache_hit(cache_hash, &tmp_c_file_digest))) {
-        ir_add_error_node(ira, node, buf_sprintf("C import failed: unable to check cache: %s", err_str(err)));
-        return ira->codegen->invalid_instruction;
+        if (err != ErrorInvalidFormat) {
+            ir_add_error_node(ira, node, buf_sprintf("C import failed: unable to check cache: %s", err_str(err)));
+            return ira->codegen->invalid_instruction;
+        }
     }
     ira->codegen->caches_to_release.append(cache_hash);
 
src/os.cpp
@@ -71,8 +71,10 @@ typedef SSIZE_T ssize_t;
 
 #if defined(ZIG_OS_WINDOWS)
 static double win32_time_resolution;
+static LARGE_INTEGER windows_perf_freq;
 #elif defined(__MACH__)
-static clock_serv_t cclock;
+static clock_serv_t macos_calendar_clock;
+static clock_serv_t macos_monotonic_clock;
 #endif
 
 #include <stdlib.h>
@@ -1233,28 +1235,60 @@ Error os_rename(Buf *src_path, Buf *dest_path) {
     return ErrorNone;
 }
 
-double os_get_time(void) {
 #if defined(ZIG_OS_WINDOWS)
-    unsigned __int64 time;
-    QueryPerformanceCounter((LARGE_INTEGER*) &time);
-    return time * win32_time_resolution;
+static void windows_filetime_to_os_timestamp(FILETIME *ft, OsTimeStamp *mtime) {
+    mtime->sec = (((ULONGLONG) ft->dwHighDateTime) << 32) + ft->dwLowDateTime;
+    mtime->nsec = 0;
+}
+#endif
+
+OsTimeStamp os_timestamp_calendar(void) {
+    OsTimeStamp result;
+#if defined(ZIG_OS_WINDOWS)
+    FILETIME ft;
+    GetSystemTimeAsFileTime(&ft);
+    windows_filetime_to_os_timestamp(&ft, &result);
 #elif defined(__MACH__)
     mach_timespec_t mts;
 
-    kern_return_t err = clock_get_time(cclock, &mts);
+    kern_return_t err = clock_get_time(macos_calendar_clock, &mts);
     assert(!err);
 
-    double seconds = (double)mts.tv_sec;
-    seconds += ((double)mts.tv_nsec) / 1000000000.0;
+    result.sec = mts.tv_sec;
+    result.nsec = mts.tv_nsec;
+#else
+    struct timespec tms;
+    clock_gettime(CLOCK_REALTIME, &tms);
+
+    result.sec = tms.tv_sec;
+    result.nsec = tms.tv_nsec;
+#endif
+    return result;
+}
+
+OsTimeStamp os_timestamp_monotonic(void) {
+    OsTimeStamp result;
+#if defined(ZIG_OS_WINDOWS)
+    LARGE_INTEGER counts;
+    QueryPerformanceCounter(&counts);
+    result.sec = counts / windows_perf_freq;
+    result.nsec = (counts % windows_perf_freq) * 1000000000u / windows_perf_freq;
+#elif defined(__MACH__)
+    mach_timespec_t mts;
 
-    return seconds;
+    kern_return_t err = clock_get_time(macos_monotonic_clock, &mts);
+    assert(!err);
+
+    result.sec = mts.tv_sec;
+    result.nsec = mts.tv_nsec;
 #else
     struct timespec tms;
     clock_gettime(CLOCK_MONOTONIC, &tms);
-    double seconds = (double)tms.tv_sec;
-    seconds += ((double)tms.tv_nsec) / 1000000000.0;
-    return seconds;
+
+    result.sec = tms.tv_sec;
+    result.nsec = tms.tv_nsec;
 #endif
+    return result;
 }
 
 Error os_make_path(Buf *path) {
@@ -1352,14 +1386,14 @@ int os_init(void) {
 #if defined(ZIG_OS_WINDOWS)
     _setmode(fileno(stdout), _O_BINARY);
     _setmode(fileno(stderr), _O_BINARY);
-    unsigned __int64 frequency;
-    if (QueryPerformanceFrequency((LARGE_INTEGER*) &frequency)) {
-        win32_time_resolution = 1.0 / (double) frequency;
+    if (QueryPerformanceFrequency(&windows_perf_freq)) {
+        win32_time_resolution = 1.0 / (double) windows_perf_freq;
     } else {
         return ErrorSystemResources;
     }
 #elif defined(__MACH__)
-    host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock);
+    host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &macos_monotonic_clock);
+    host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &macos_calendar_clock);
 #endif
     return 0;
 }
@@ -1780,7 +1814,7 @@ Error os_self_exe_shared_libs(ZigList<Buf *> &paths) {
 #endif
 }
 
-Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) {
+Error os_file_open_r(Buf *full_path, OsFile *out_file, OsFileAttr *attr) {
 #if defined(ZIG_OS_WINDOWS)
     // TODO use CreateFileW
     HANDLE result = CreateFileA(buf_ptr(full_path), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
@@ -1808,14 +1842,14 @@ Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) {
     }
     *out_file = result;
 
-    if (mtime != nullptr) {
-        FILETIME last_write_time;
-        if (!GetFileTime(result, nullptr, nullptr, &last_write_time)) {
+    if (attr != nullptr) {
+        BY_HANDLE_FILE_INFORMATION file_info;
+        if (!GetFileInformationByHandle(result, &file_info)) {
             CloseHandle(result);
             return ErrorUnexpected;
         }
-        mtime->sec = (((ULONGLONG) last_write_time.dwHighDateTime) << 32) + last_write_time.dwLowDateTime;
-        mtime->nsec = 0;
+        windows_filetime_to_os_timestamp(&file_info.ftLastWriteTime, &attr->mtime);
+        attr->inode = (((uint64_t)file_info.nFileIndexHigh) << 32) | file_info.nFileIndexLow;
     }
 
     return ErrorNone;
@@ -1851,13 +1885,14 @@ Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) {
         }
         *out_file = fd;
 
-        if (mtime != nullptr) {
+        if (attr != nullptr) {
+            attr->inode = statbuf.st_ino;
 #if defined(ZIG_OS_DARWIN)
-            mtime->sec = statbuf.st_mtimespec.tv_sec;
-            mtime->nsec = statbuf.st_mtimespec.tv_nsec;
+            attr->mtime.sec = statbuf.st_mtimespec.tv_sec;
+            attr->mtime.nsec = statbuf.st_mtimespec.tv_nsec;
 #else
-            mtime->sec = statbuf.st_mtim.tv_sec;
-            mtime->nsec = statbuf.st_mtim.tv_nsec;
+            attr->mtime.sec = statbuf.st_mtim.tv_sec;
+            attr->mtime.nsec = statbuf.st_mtim.tv_nsec;
 #endif
         }
         return ErrorNone;
src/os.hpp
@@ -85,6 +85,11 @@ struct OsTimeStamp {
     uint64_t nsec;
 };
 
+struct OsFileAttr {
+    OsTimeStamp mtime;
+    uint64_t inode;
+};
+
 int os_init(void);
 
 void os_spawn_process(const char *exe, ZigList<const char *> &args, Termination *term);
@@ -103,7 +108,7 @@ bool os_path_is_absolute(Buf *path);
 Error ATTRIBUTE_MUST_USE os_make_path(Buf *path);
 Error ATTRIBUTE_MUST_USE os_make_dir(Buf *path);
 
-Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime);
+Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file, OsFileAttr *attr);
 Error ATTRIBUTE_MUST_USE os_file_open_lock_rw(Buf *full_path, OsFile *out_file);
 Error ATTRIBUTE_MUST_USE os_file_read(OsFile file, void *ptr, size_t *len);
 Error ATTRIBUTE_MUST_USE os_file_read_all(OsFile file, Buf *contents);
@@ -126,7 +131,8 @@ Error os_delete_file(Buf *path);
 Error ATTRIBUTE_MUST_USE os_file_exists(Buf *full_path, bool *result);
 
 Error os_rename(Buf *src_path, Buf *dest_path);
-double os_get_time(void);
+OsTimeStamp os_timestamp_monotonic(void);
+OsTimeStamp os_timestamp_calendar(void);
 
 bool os_is_sep(uint8_t c);
 
src/util.hpp
@@ -63,10 +63,27 @@ static inline int clzll(unsigned long long mask) {
     return 63 - lz;
 #endif
 }
+static inline int ctzll(unsigned long long mask) {
+    unsigned long result;
+#if defined(_WIN64)
+    if (_BitScanForward64(&result, mask))
+        return result;
+    zig_unreachable();
+#else
+    if (_BitScanForward(&result, mask & 0xffffffff))
+        return result;
+    }
+    if (_BitScanForward(&result, mask >> 32))
+        return 32 + result;
+    zig_unreachable();
+#endif
+}
 #else
 #define clzll(x) __builtin_clzll(x)
+#define ctzll(x) __builtin_ctzll(x)
 #endif
 
+
 template<typename T>
 ATTRIBUTE_RETURNS_NOALIAS static inline T *allocate_nonzero(size_t count) {
 #ifndef NDEBUG