11use std:: collections:: HashMap ;
22
3+ const COPYRIGHT_PREFIXES : & [ & str ] = & [ "SPDX-FileCopyrightText:" , "Copyright" , "(c)" , "(C)" , "©" ] ;
4+
35pub ( crate ) struct LicensesInterner {
46 by_id : Vec < License > ,
57 by_struct : HashMap < License , usize > ,
@@ -10,7 +12,8 @@ impl LicensesInterner {
1012 LicensesInterner { by_id : Vec :: new ( ) , by_struct : HashMap :: new ( ) }
1113 }
1214
13- pub ( crate ) fn intern ( & mut self , license : License ) -> LicenseId {
15+ pub ( crate ) fn intern ( & mut self , mut license : License ) -> LicenseId {
16+ license. simplify ( ) ;
1417 if let Some ( id) = self . by_struct . get ( & license) {
1518 LicenseId ( * id)
1619 } else {
@@ -35,3 +38,28 @@ pub(crate) struct License {
3538 pub ( crate ) spdx : String ,
3639 pub ( crate ) copyright : Vec < String > ,
3740}
41+
42+ impl License {
43+ fn simplify ( & mut self ) {
44+ self . remove_copyright_prefixes ( ) ;
45+ self . copyright . sort ( ) ;
46+ self . copyright . dedup ( ) ;
47+ }
48+
49+ fn remove_copyright_prefixes ( & mut self ) {
50+ for copyright in & mut self . copyright {
51+ let mut stripped = copyright. trim ( ) ;
52+ let mut previous_stripped;
53+ loop {
54+ previous_stripped = stripped;
55+ for pattern in COPYRIGHT_PREFIXES {
56+ stripped = stripped. trim_start_matches ( pattern) . trim_start ( ) ;
57+ }
58+ if stripped == previous_stripped {
59+ break ;
60+ }
61+ }
62+ * copyright = stripped. into ( ) ;
63+ }
64+ }
65+ }
0 commit comments