1111from semmle .profiling import get_profiler
1212from semmle .path_rename import renamer_from_options_and_env
1313from semmle .logging import WARN , recursion_error_message , internal_error_message , Logger
14+ from semmle .util import FileExtractable , FolderExtractable
1415
1516class ExtractorFailure (Exception ):
1617 'Generic exception representing the failure of an extractor.'
@@ -19,17 +20,32 @@ class ExtractorFailure(Exception):
1920
2021class ModuleImportGraph (object ):
2122
22- def __init__ (self , max_depth ):
23+ def __init__ (self , max_depth , logger : Logger ):
2324 self .modules = {}
2425 self .succ = defaultdict (set )
2526 self .todo = set ()
2627 self .done = set ()
2728 self .max_depth = max_depth
29+ self .logger = logger
30+
31+ # During overlay extraction, only traverse the files that were changed.
32+ self .overlay_changes = None
33+ if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' in os .environ :
34+ overlay_changes_file = os .environ ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' ]
35+ logger .info ("Overlay extraction mode: only extracting files changed according to '%s'" , overlay_changes_file )
36+ try :
37+ with open (overlay_changes_file , 'r' , encoding = 'utf-8' ) as f :
38+ data = json .load (f )
39+ changed_paths = data .get ('changes' , [])
40+ self .overlay_changes = { os .path .abspath (p ) for p in changed_paths }
41+ except (IOError , ValueError ) as e :
42+ logger .warn ("Failed to read overlay changes from '%s' (falling back to full extraction): %s" , overlay_changes_file , e )
43+ self .overlay_changes = None
2844
2945 def add_root (self , mod ):
3046 self .modules [mod ] = 0
3147 if mod not in self .done :
32- self .todo . add (mod )
48+ self .add_todo (mod )
3349
3450 def add_import (self , mod , imported ):
3551 assert mod in self .modules
@@ -39,7 +55,7 @@ def add_import(self, mod, imported):
3955 self ._reduce_depth (imported , self .modules [mod ] + 1 )
4056 else :
4157 if self .modules [mod ] < self .max_depth and imported not in self .done :
42- self .todo . add (imported )
58+ self .add_todo (imported )
4359 self .modules [imported ] = self .modules [mod ] + 1
4460
4561 def _reduce_depth (self , mod , depth ):
@@ -48,7 +64,7 @@ def _reduce_depth(self, mod, depth):
4864 if depth > self .max_depth :
4965 return
5066 if mod not in self .done :
51- self .todo . add (mod )
67+ self .add_todo (mod )
5268 self .modules [mod ] = depth
5369 for imp in self .succ [mod ]:
5470 self ._reduce_depth (imp , depth + 1 )
@@ -61,11 +77,25 @@ def get(self):
6177
6278 def push_back (self , mod ):
6379 self .done .remove (mod )
64- self .todo . add (mod )
80+ self .add_todo (mod )
6581
6682 def empty (self ):
6783 return not self .todo
6884
85+ def add_todo (self , mod ):
86+ if not self ._module_in_overlay_changes (mod ):
87+ self .logger .debug ("Skipping module '%s' as it was not changed in overlay extraction." , mod )
88+ return
89+ self .todo .add (mod )
90+
91+ def _module_in_overlay_changes (self , mod ):
92+ if self .overlay_changes is not None :
93+ if isinstance (mod , FileExtractable ):
94+ return mod .path in self .overlay_changes
95+ if isinstance (mod , FolderExtractable ):
96+ return mod .path + '/__init__.py' in self .overlay_changes
97+ return True
98+
6999class ExtractorPool (object ):
70100 '''Pool of worker processes running extractors'''
71101
@@ -90,7 +120,7 @@ def __init__(self, outdir, archive, proc_count, options, logger: Logger):
90120 self .enqueued = set ()
91121 self .done = set ()
92122 self .requirements = {}
93- self .import_graph = ModuleImportGraph (options .max_import_depth )
123+ self .import_graph = ModuleImportGraph (options .max_import_depth , logger )
94124 logger .debug ("Source archive: %s" , archive )
95125 self .logger = logger
96126 DiagnosticsWriter .create_output_dir ()
0 commit comments