Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 33 additions & 16 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1431,16 +1431,6 @@ def validate_meta(
t0 = time.time()
bazel = manager.options.bazel
assert path is not None, "Internal error: meta was provided without a path"
if not manager.options.skip_cache_mtime_checks:
# Check data_file; assume if its mtime matches it's good.
try:
data_mtime = manager.getmtime(meta.data_file)
except OSError:
manager.log(f"Metadata abandoned for {id}: failed to stat data_file")
return None
if data_mtime != meta.data_mtime:
manager.log(f"Metadata abandoned for {id}: data cache is modified")
return None

if bazel:
# Normalize path under bazel to make sure it isn't absolute
Expand Down Expand Up @@ -2093,7 +2083,11 @@ def wrap_context(self, check_blockers: bool = True) -> Iterator[None]:
def load_fine_grained_deps(self) -> dict[str, set[str]]:
return self.manager.load_fine_grained_deps(self.id)

def load_tree(self, temporary: bool = False) -> None:
def load_tree(self, temporary: bool = False) -> bool:
"""Load the cached tree.

Returns True if the load was successful, False otherwise.
"""
assert (
self.meta is not None
), "Internal error: this method must be called only for cached modules"
Expand All @@ -2106,7 +2100,24 @@ def load_tree(self, temporary: bool = False) -> None:
self.meta.data_file, self.manager, "Load tree ", "Could not load tree: "
)
if data is None:
return
return False

if not self.manager.options.skip_cache_mtime_checks and self.meta.data_mtime != 0:
# A lot of time might have passed since we have loaded meta.
# If the mtime is inconsistent, we should discard the cache entry.
# We do this **after** reading the file: if avoids the race condition.
# Discarding is safe, we'll just reprocess everything if someone wrote
# to that file since we have read from it.
try:
actual_mtime = self.manager.getmtime(self.meta.data_file)
except OSError:
self.manager.log(f"Cache data abandoned for {self.id}: failed to stat data_file")
return False
if actual_mtime != self.meta.data_mtime:
self.manager.log(
f"Cache data abandoned for {self.id}: inconsistent data_file mtime"
)
return False

t0 = time.time()
# TODO: Assert data file wasn't changed.
Expand All @@ -2120,6 +2131,7 @@ def load_tree(self, temporary: bool = False) -> None:
if not temporary:
self.manager.modules[self.id] = self.tree
self.manager.add_stats(fresh_trees=1)
return True

def fix_cross_refs(self) -> None:
assert self.tree is not None, "Internal error: method must be called on parsed file only"
Expand Down Expand Up @@ -3390,20 +3402,24 @@ def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_INDIRECT
return [s for ss in sccs for s in order_ascc(graph, ss, pri_max)]


def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManager) -> None:
def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManager) -> bool:
"""Process the modules in one group of modules from their cached data.

This can be used to process an SCC of modules. This involves loading the tree (i.e.
module symbol tables) from cache file and then fixing cross-references in the symbols.
"""
t0 = time.time()
for id in modules:
graph[id].load_tree()
for i, id in enumerate(modules):
if not graph[id].load_tree():
for id in modules[i:]:
graph[id].tree = None
return False
t1 = time.time()
for id in modules:
graph[id].fix_cross_refs()
t2 = time.time()
manager.add_stats(process_fresh_time=t2 - t0, load_tree_time=t1 - t0)
return True


def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
Expand Down Expand Up @@ -3441,7 +3457,8 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
gc.disable()
for prev_scc in fresh_sccs_to_load:
manager.done_sccs.add(prev_scc.id)
process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager)
if not process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager):
process_stale_scc(graph, prev_scc, manager)
Copy link
Member

@ilevkivskyi ilevkivskyi Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bad idea to do this while GC is (potentially) frozen. In general, I think we should not complicate logic in build.py for some rare/niche use cases, it is already complicated and tricky enough. I think we should simply add try/except with an error message suggesting using different --cache-dir per invocation when running mypy in parallel.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bad idea to do this while GC is (potentially) frozen.

This is already an exceptional case, we can only reach this branch if multiple processes are running concurrently. Note that this also fixes an (unreported) crash that is almost guaranteed to happen if the existing data is None early return branch is taken in State.load_tree (and I see how that can reasonably happen even without concurrency - network error on a mapped drive, for example, or just a disk fault).

If I understand correctly, the worst possible outcome in this case is OOM (which is a very good way to say "please stop running multiple instances concurrently") or increased mem usage, so what do we risk by allowing this to happen?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not super happy about try/except because it may actually not happen even when the data is stale - it may be compatible by interface but not semantically, - resulting in inconsistent state somewhere and unpredictable heisenbugs no one can reproduce.

Or let's add a checkbox to the issue template asking if the OP is running some IDE integration without explicit --cache-dir configuration...

if (
not manager.options.test_env
and platform.python_implementation() == "CPython"
Expand Down