11use crate :: trap;
2+ use globset:: { GlobBuilder , GlobSetBuilder } ;
23use rayon:: prelude:: * ;
3- use std:: collections:: HashMap ;
4- use std:: ffi:: { OsStr , OsString } ;
54use std:: fs:: File ;
65use std:: io:: BufRead ;
76use std:: path:: { Path , PathBuf } ;
@@ -13,7 +12,7 @@ pub struct LanguageSpec {
1312 pub prefix : & ' static str ,
1413 pub ts_language : tree_sitter:: Language ,
1514 pub node_types : & ' static str ,
16- pub file_extensions : Vec < OsString > ,
15+ pub file_globs : Vec < String > ,
1716}
1817
1918pub struct Extractor {
@@ -83,16 +82,26 @@ impl Extractor {
8382 schemas. push ( schema) ;
8483 }
8584
86- // Construct a map from file extension -> LanguageSpec
87- let mut file_extension_language_mapping: HashMap < & OsStr , Vec < usize > > = HashMap :: new ( ) ;
88- for ( i, lang) in self . languages . iter ( ) . enumerate ( ) {
89- for ( j, _ext) in lang. file_extensions . iter ( ) . enumerate ( ) {
90- let indexes = file_extension_language_mapping
91- . entry ( & lang. file_extensions [ j] )
92- . or_default ( ) ;
93- indexes. push ( i) ;
85+ // Construct a single globset containing all language globs,
86+ // and a mapping from glob index to language index.
87+ let ( globset, glob_language_mapping) = {
88+ let mut builder = GlobSetBuilder :: new ( ) ;
89+ let mut glob_lang_mapping = vec ! [ ] ;
90+ for ( i, lang) in self . languages . iter ( ) . enumerate ( ) {
91+ for glob_str in & lang. file_globs {
92+ let glob = GlobBuilder :: new ( glob_str)
93+ . literal_separator ( true )
94+ . build ( )
95+ . expect ( "invalid glob" ) ;
96+ builder. add ( glob) ;
97+ glob_lang_mapping. push ( i) ;
98+ }
9499 }
95- }
100+ (
101+ builder. build ( ) . expect ( "failed to build globset" ) ,
102+ glob_lang_mapping,
103+ )
104+ } ;
96105
97106 let lines: std:: io:: Result < Vec < String > > =
98107 std:: io:: BufReader :: new ( file_list) . lines ( ) . collect ( ) ;
@@ -108,18 +117,29 @@ impl Extractor {
108117 let source = std:: fs:: read ( & path) ?;
109118 let mut trap_writer = trap:: Writer :: new ( ) ;
110119
111- match path. extension ( ) {
120+ match path. file_name ( ) {
112121 None => {
113- tracing:: error!( ?path, "No extension found, skipping file." ) ;
122+ tracing:: error!( ?path, "No file name found, skipping file." ) ;
114123 }
115- Some ( ext) => {
116- if let Some ( indexes) = file_extension_language_mapping. get ( ext) {
117- for i in indexes {
118- let lang = & self . languages [ * i] ;
124+ Some ( filename) => {
125+ let matches = globset. matches ( filename) ;
126+ if matches. is_empty ( ) {
127+ tracing:: error!( ?path, "No matching language found, skipping file." ) ;
128+ } else {
129+ let mut languages_processed = vec ! [ false ; self . languages. len( ) ] ;
130+
131+ for m in matches {
132+ let i = glob_language_mapping[ m] ;
133+ if languages_processed[ i] {
134+ continue ;
135+ }
136+ languages_processed[ i] = true ;
137+ let lang = & self . languages [ i] ;
138+
119139 crate :: extractor:: extract (
120140 lang. ts_language ,
121141 lang. prefix ,
122- & schemas[ * i] ,
142+ & schemas[ i] ,
123143 & mut diagnostics_writer,
124144 & mut trap_writer,
125145 & path,
@@ -130,11 +150,9 @@ impl Extractor {
130150 std:: fs:: copy ( & path, & src_archive_file) ?;
131151 write_trap ( & self . trap_dir , & path, & trap_writer, trap_compression) ?;
132152 }
133- } else {
134- tracing:: warn!( ?path, "No language matches path, skipping file." ) ;
135153 }
136154 }
137- } ;
155+ }
138156 Ok ( ( ) ) as std:: io:: Result < ( ) >
139157 } )
140158 . expect ( "failed to extract files" ) ;
0 commit comments