From e36dac9fb6d84f4072e433733aff9894f38c19d0 Mon Sep 17 00:00:00 2001 From: STerliakov Date: Sat, 8 Nov 2025 00:12:01 +0100 Subject: [PATCH 1/4] Reject cached files with incompatible mtimes. The crash can be consistently reproduced by adding `__import__("time").sleep(0.2)` to `State.load_tree` --- mypy/build.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index e9c50ce6b224..9b576ca53741 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2106,7 +2106,18 @@ def load_tree(self, temporary: bool = False) -> None: self.meta.data_file, self.manager, "Load tree ", "Could not load tree: " ) if data is None: - return + return False + + if not self.options.bazel and self.meta.data_mtime != 0: + # A lot of time might have passed since we have loaded meta. + # If the mtime is inconsistent, we should discard the cache entry. + # We do this **after** reading the file: if avoids the race condition. + # Discarding is safe, we'll just reprocess everything if someone wrote + # to that file since we have read from it. + actual_mtime = self.manager.getmtime(self.meta.data_file) + if actual_mtime != self.meta.data_mtime: + self.manager.log(f"Discarding {self.meta.data_file}: too fresh") + return False t0 = time.time() # TODO: Assert data file wasn't changed. @@ -2120,6 +2131,7 @@ def load_tree(self, temporary: bool = False) -> None: if not temporary: self.manager.modules[self.id] = self.tree self.manager.add_stats(fresh_trees=1) + return True def fix_cross_refs(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" @@ -3390,20 +3402,24 @@ def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_INDIRECT return [s for ss in sccs for s in order_ascc(graph, ss, pri_max)] -def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManager) -> None: +def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManager) -> bool: """Process the modules in one group of modules from their cached data. This can be used to process an SCC of modules. This involves loading the tree (i.e. module symbol tables) from cache file and then fixing cross-references in the symbols. """ t0 = time.time() - for id in modules: - graph[id].load_tree() + for i, id in enumerate(modules): + if not graph[id].load_tree(): + for id in modules[i:]: + graph[id].tree = None + return False t1 = time.time() for id in modules: graph[id].fix_cross_refs() t2 = time.time() manager.add_stats(process_fresh_time=t2 - t0, load_tree_time=t1 - t0) + return True def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: @@ -3441,7 +3457,8 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: gc.disable() for prev_scc in fresh_sccs_to_load: manager.done_sccs.add(prev_scc.id) - process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager) + if not process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager): + process_stale_scc(graph, prev_scc, manager) if ( not manager.options.test_env and platform.python_implementation() == "CPython" From 73ff145c9b7a71fb83b6fa53c3fa70d7ab455d9c Mon Sep 17 00:00:00 2001 From: STerliakov Date: Sat, 8 Nov 2025 00:13:49 +0100 Subject: [PATCH 2/4] We can skip premature mtime check --- mypy/build.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 9b576ca53741..251d3b1a0677 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1431,16 +1431,6 @@ def validate_meta( t0 = time.time() bazel = manager.options.bazel assert path is not None, "Internal error: meta was provided without a path" - if not manager.options.skip_cache_mtime_checks: - # Check data_file; assume if its mtime matches it's good. - try: - data_mtime = manager.getmtime(meta.data_file) - except OSError: - manager.log(f"Metadata abandoned for {id}: failed to stat data_file") - return None - if data_mtime != meta.data_mtime: - manager.log(f"Metadata abandoned for {id}: data cache is modified") - return None if bazel: # Normalize path under bazel to make sure it isn't absolute @@ -2108,7 +2098,7 @@ def load_tree(self, temporary: bool = False) -> None: if data is None: return False - if not self.options.bazel and self.meta.data_mtime != 0: + if not self.manager.options.skip_cache_mtime_checks and self.meta.data_mtime != 0: # A lot of time might have passed since we have loaded meta. # If the mtime is inconsistent, we should discard the cache entry. # We do this **after** reading the file: if avoids the race condition. From 87e2131b5b8e0c47ac97e0d9067082bc0da81db3 Mon Sep 17 00:00:00 2001 From: STerliakov Date: Sat, 8 Nov 2025 00:30:03 +0100 Subject: [PATCH 3/4] Type fix --- mypy/build.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index 251d3b1a0677..b34f7136ba32 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2083,7 +2083,11 @@ def wrap_context(self, check_blockers: bool = True) -> Iterator[None]: def load_fine_grained_deps(self) -> dict[str, set[str]]: return self.manager.load_fine_grained_deps(self.id) - def load_tree(self, temporary: bool = False) -> None: + def load_tree(self, temporary: bool = False) -> bool: + """Load the cached tree. + + Returns True if the load was successful, False otherwise. + """ assert ( self.meta is not None ), "Internal error: this method must be called only for cached modules" From 07515bb3820c013edf85ddb7856fa6c1ee499079 Mon Sep 17 00:00:00 2001 From: STerliakov Date: Sat, 8 Nov 2025 00:35:16 +0100 Subject: [PATCH 4/4] Add try/except around mtime reading --- mypy/build.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index b34f7136ba32..4e62275a350b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2108,9 +2108,15 @@ def load_tree(self, temporary: bool = False) -> bool: # We do this **after** reading the file: if avoids the race condition. # Discarding is safe, we'll just reprocess everything if someone wrote # to that file since we have read from it. - actual_mtime = self.manager.getmtime(self.meta.data_file) + try: + actual_mtime = self.manager.getmtime(self.meta.data_file) + except OSError: + self.manager.log(f"Cache data abandoned for {self.id}: failed to stat data_file") + return False if actual_mtime != self.meta.data_mtime: - self.manager.log(f"Discarding {self.meta.data_file}: too fresh") + self.manager.log( + f"Cache data abandoned for {self.id}: inconsistent data_file mtime" + ) return False t0 = time.time()