11use clap:: Args ;
22use lazy_static:: lazy_static;
33use rayon:: prelude:: * ;
4+ use serde_json;
45use std:: borrow:: Cow ;
6+ use std:: collections:: HashSet ;
57use std:: fs;
68use std:: io:: BufRead ;
79use std:: path:: { Path , PathBuf } ;
@@ -78,6 +80,8 @@ pub fn run(options: Options) -> std::io::Result<()> {
7880
7981 let file_list = fs:: File :: open ( file_paths:: path_from_string ( & options. file_list ) ) ?;
8082
83+ let overlay_changed_files: Option < HashSet < PathBuf > > = get_overlay_changed_files ( ) ;
84+
8185 let language: Language = tree_sitter_ruby:: LANGUAGE . into ( ) ;
8286 let erb: Language = tree_sitter_embedded_template:: LANGUAGE . into ( ) ;
8387 // Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
@@ -94,6 +98,13 @@ pub fn run(options: Options) -> std::io::Result<()> {
9498 . try_for_each ( |line| {
9599 let mut diagnostics_writer = diagnostics. logger ( ) ;
96100 let path = PathBuf :: from ( line) . canonicalize ( ) ?;
101+ match & overlay_changed_files {
102+ Some ( changed_files) if !changed_files. contains ( & path) => {
103+ // We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
104+ return Result :: Ok ( ( ) ) ;
105+ }
106+ _ => { } ,
107+ }
97108 let src_archive_file = file_paths:: path_for ( & src_archive_dir, & path, "" ) ;
98109 let mut source = std:: fs:: read ( & path) ?;
99110 let mut needs_conversion = false ;
@@ -212,6 +223,12 @@ pub fn run(options: Options) -> std::io::Result<()> {
212223 let mut trap_writer = trap:: Writer :: new ( ) ;
213224 extractor:: populate_empty_location ( & mut trap_writer) ;
214225 let res = write_trap ( & trap_dir, path, & trap_writer, trap_compression) ;
226+ if let Ok ( output_path) = std:: env:: var ( "CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT" ) {
227+ // We're extracting an overlay base. For now, we don't have any metadata we need to store
228+ // that would get read when extracting the overlay, but the CLI expects us to write
229+ // *something*. An empty file will do.
230+ std:: fs:: write ( output_path, b"" ) ?;
231+ }
215232 tracing:: info!( "Extraction complete" ) ;
216233 res
217234}
@@ -302,6 +319,41 @@ fn skip_space(content: &[u8], index: usize) -> usize {
302319 }
303320 index
304321}
322+
323+ /**
324+ * If the relevant environment variable has been set by the CLI, indicating that we are extracting
325+ * an overlay, this function reads the JSON file at the path given by its value, and returns a set
326+ * of canonicalized paths of source files that have changed and should therefore be extracted.
327+ *
328+ * If the environment variable is not set (i.e. we're not extracting an overlay), or if the file
329+ * cannot be read, this function returns `None`. In that case, all files should be extracted.
330+ */
331+ fn get_overlay_changed_files ( ) -> Option < HashSet < PathBuf > > {
332+ let path = std:: env:: var ( "CODEQL_EXTRACTOR_RUBY_OVERLAY_CHANGES" ) . ok ( ) ?;
333+ let file_content = fs:: read_to_string ( path) . ok ( ) ?;
334+ let json_value: serde_json:: Value = serde_json:: from_str ( & file_content) . ok ( ) ?;
335+
336+ // The JSON file is expected to have the following structure:
337+ // {
338+ // "changes": [
339+ // "relative/path/to/changed/file1.rb",
340+ // "relative/path/to/changed/file2.rb",
341+ // ...
342+ // ]
343+ // }
344+ json_value
345+ . get ( "changes" ) ?
346+ . as_array ( ) ?
347+ . iter ( )
348+ . map ( |change| {
349+ change
350+ . as_str ( )
351+ . map ( |s| PathBuf :: from ( s) . canonicalize ( ) . ok ( ) )
352+ . flatten ( )
353+ } )
354+ . collect ( )
355+ }
356+
305357fn scan_coding_comment ( content : & [ u8 ] ) -> std:: option:: Option < Cow < str > > {
306358 let mut index = 0 ;
307359 // skip UTF-8 BOM marker if there is one
0 commit comments