11#!/usr/bin/env python3
2+ # /// script
3+ # requires-python = ">=3.12"
4+ # dependencies = [
5+ # "click",
6+ # "fuzzywuzzy",
7+ # "python-levenshtein",
8+ # ]
9+ # ///
210"""Update and sort the creators list of the zenodo record."""
311
412import json
816import click
917from fuzzywuzzy import fuzz , process
1018
19+ CREATORS_LAST = ['Poldrack, Russell A.' , 'Gorgolewski, Krzysztof J.' ]
20+ CONTRIBUTORS_LAST = ['Ghosh, Satrajit S.' ]
21+
1122
1223def read_md_table (md_text ):
1324 """
@@ -54,48 +65,29 @@ def read_md_table(md_text):
5465 return retval
5566
5667
57- def sort_contributors (entries , git_lines , exclude = None , last = None ):
68+ def sort_contributors (entries , git_lines , exclude = None ):
5869 """Return a list of author dictionaries, ordered by contribution."""
59- last = last or []
60- sorted_authors = sorted (entries , key = lambda i : i ['name' ])
70+ sorted_authors = sorted (entries )
6171
62- first_last = [' ' .join (val ['name' ].split (',' )[::- 1 ]).strip () for val in sorted_authors ]
63- first_last_excl = [' ' .join (val ['name' ].split (',' )[::- 1 ]).strip () for val in exclude or []]
72+ # Match on First Last
73+ first_last = [' ' .join (name .split (',' )[::- 1 ]).strip () for name in sorted_authors ]
74+ first_last_excl = {' ' .join (name .split (',' )[::- 1 ]).strip () for name in exclude or []}
6475
65- unmatched = []
66- author_matches = []
67- for ele in git_lines :
68- matches = process .extract (ele , first_last , scorer = fuzz .token_sort_ratio , limit = 2 )
69- # matches is a list [('First match', % Match), ('Second match', % Match)]
76+ indices = []
77+ unmatched = set ()
78+ for committer in git_lines :
79+ matches = process .extract (committer , first_last , scorer = fuzz .token_sort_ratio , limit = 2 )
7080 if matches [0 ][1 ] > 80 :
71- val = sorted_authors [first_last .index (matches [0 ][0 ])]
72- else :
73- # skip unmatched names
74- if ele not in first_last_excl :
75- unmatched .append (ele )
76- continue
77-
78- if val not in author_matches :
79- author_matches .append (val )
81+ indices .append (first_last .index (matches [0 ][0 ]))
82+ elif committer not in first_last_excl :
83+ unmatched .add (committer )
8084
81- names = {' ' .join (val ['name' ].split (',' )[::- 1 ]).strip () for val in author_matches }
82- for missing_name in first_last :
83- if missing_name not in names :
84- missing = sorted_authors [first_last .index (missing_name )]
85- author_matches .append (missing )
85+ # Return Last, First
86+ matches = dict .fromkeys ([sorted_authors [i ] for i in indices ])
87+ # Add any remaining authors not matched in git_lines
88+ matches .update (dict .fromkeys (sorted_authors ))
8689
87- position_matches = []
88- for i , item in enumerate (author_matches ):
89- pos = item .pop ('position' , None )
90- if pos is not None :
91- position_matches .append ((i , int (pos )))
92-
93- for i , pos in position_matches :
94- if pos < 0 :
95- pos += len (author_matches ) + 1
96- author_matches .insert (pos , author_matches .pop (i ))
97-
98- return author_matches , unmatched
90+ return matches , unmatched
9991
10092
10193def get_git_lines (fname = 'line-contributors.txt' ):
@@ -111,31 +103,42 @@ def get_git_lines(fname='line-contributors.txt'):
111103 lines = contrib_file .read_text ().splitlines ()
112104
113105 git_line_summary_path = shutil .which ('git-line-summary' )
106+ if not git_line_summary_path :
107+ git_line_summary_path = 'git summary --dedup-by-email' .split (' ' )
108+ else :
109+ git_line_summary_path = [git_line_summary_path ]
110+
114111 if not lines and git_line_summary_path :
115112 print ('Running git-line-summary on repo' )
116- lines = sp .check_output ([ git_line_summary_path ] ).decode ().splitlines ()
113+ lines = sp .check_output (git_line_summary_path ).decode ().splitlines ()
117114 lines = [line for line in lines if 'Not Committed Yet' not in line ]
118115 contrib_file .write_text ('\n ' .join (lines ))
119116
120117 if not lines :
121- raise RuntimeError (
122- f"""\
123- Could not find line-contributors from git repository.{
124- ' git-line-summary not found, please install git-extras.'
125- * (git_line_summary_path is None )
126- } """
118+ _msg = ': git-line-summary not found, please install git-extras ' * (
119+ git_line_summary_path is None
127120 )
121+ raise RuntimeError (f'Could not find line-contributors from git repository{ _msg } .' )
128122 return [' ' .join (line .strip ().split ()[1 :- 1 ]) for line in lines if '%' in line ]
129123
130124
131125def _namelast (inlist ):
132126 retval = []
133127 for i in inlist :
134- i ['name' ] = (f'{ i .pop ("name" , "" )} { i .pop ("lastname" , "" )} ' ).strip ()
128+ i ['name' ] = (f'{ i .pop ("lastname" , "" )} , { i .pop ("name" , "" )} ' ).strip ()
129+ if not i ['name' ]:
130+ i ['name' ] = i .get ('handle' , '<Unknown Name>' )
135131 retval .append (i )
136132 return retval
137133
138134
135+ def load (path ):
136+ return {
137+ entry ['name' ]: dict (sorted (entry .items ()))
138+ for entry in _namelast (read_md_table (Path (path ).read_text ()))
139+ }
140+
141+
139142@click .group ()
140143def cli ():
141144 """Generate authorship boilerplates."""
@@ -158,32 +161,31 @@ def zenodo(
158161 former_file ,
159162):
160163 """Generate a new Zenodo payload file."""
161- data = get_git_lines ()
162-
163164 zenodo = json .loads (Path (zenodo_file ).read_text ())
164165
165- former = _namelast (read_md_table (Path (former_file ).read_text ()))
166- zen_creators , miss_creators = sort_contributors (
167- _namelast (read_md_table (Path (maintainers ).read_text ())),
168- data ,
166+ maint = load (maintainers )
167+ contrib = load (contributors )
168+ pis = load (pi )
169+ former = load (former_file )
170+
171+ total_order , misses = sort_contributors (
172+ maint .keys () | contrib .keys () | pis .keys (),
173+ get_git_lines (),
169174 exclude = former ,
170175 )
171176
172- zen_contributors , miss_contributors = sort_contributors (
173- _namelast ( read_md_table ( Path ( contributors ). read_text ())), data , exclude = former
174- )
177+ # Sort
178+ creator_names = maint . keys () - set ( CREATORS_LAST )
179+ creator_names = [ name for name in total_order if name in creator_names ] + CREATORS_LAST
175180
176- zen_pi = _namelast (
177- sorted (
178- read_md_table (Path (pi ).read_text ()),
179- key = lambda v : (int (v .get ('position' , - 1 )), v .get ('lastname' )),
180- )
181- )
181+ skip = set (creator_names ) | set (CONTRIBUTORS_LAST )
182+ contrib_names = [name for name in total_order if name not in skip ] + CONTRIBUTORS_LAST
182183
183- zenodo ['creators' ] = zen_creators
184- zenodo ['contributors' ] = zen_contributors + zen_pi
184+ entries = contrib | maint | pis
185+
186+ zenodo ['creators' ] = [entries [name ] for name in creator_names ]
187+ zenodo ['contributors' ] = [entries [name ] for name in contrib_names ]
185188
186- misses = set (miss_creators ).intersection (miss_contributors )
187189 if misses :
188190 print (
189191 f'Some people made commits, but are missing in .maint/ files: { ", " .join (misses )} ' ,
@@ -194,18 +196,22 @@ def zenodo(
194196 for creator in zenodo ['creators' ]:
195197 creator .pop ('position' , None )
196198 creator .pop ('handle' , None )
197- if isinstance (creator ['affiliation' ], list ):
199+ if 'affiliation' not in creator :
200+ creator ['affiliation' ] = 'Unknown affiliation'
201+ elif isinstance (creator ['affiliation' ], list ):
198202 creator ['affiliation' ] = creator ['affiliation' ][0 ]
199203
200204 for creator in zenodo ['contributors' ]:
201205 creator .pop ('handle' , None )
202206 creator ['type' ] = 'Researcher'
203207 creator .pop ('position' , None )
204208
205- if isinstance (creator ['affiliation' ], list ):
209+ if 'affiliation' not in creator :
210+ creator ['affiliation' ] = 'Unknown affiliation'
211+ elif isinstance (creator ['affiliation' ], list ):
206212 creator ['affiliation' ] = creator ['affiliation' ][0 ]
207213
208- Path (zenodo_file ).write_text (f'{ json .dumps (zenodo , indent = 2 )} \n ' )
214+ Path (zenodo_file ).write_text (f'{ json .dumps (zenodo , indent = 2 , ensure_ascii = False )} \n ' )
209215
210216
211217@cli .command ()
@@ -222,34 +228,30 @@ def publication(
222228 former_file ,
223229):
224230 """Generate the list of authors and affiliations for papers."""
225- members = _namelast ( read_md_table ( Path ( maintainers ). read_text ())) + _namelast (
226- read_md_table ( Path ( contributors ). read_text () )
227- )
231+ maint = load ( maintainers )
232+ contrib = load ( contributors )
233+ former = load ( former_file )
228234
229235 hits , misses = sort_contributors (
230- members ,
236+ maint . keys () | contrib . keys () ,
231237 get_git_lines (),
232- exclude = _namelast ( read_md_table ( Path ( former_file ). read_text ())) ,
238+ exclude = former ,
233239 )
234240
235- pi_hits = _namelast (
236- sorted (
237- read_md_table (Path (pi ).read_text ()),
238- key = lambda v : (int (v .get ('position' , - 1 )), v .get ('lastname' )),
239- )
240- )
241+ pis = load (pi )
242+ entries = contrib | maint
241243
242- pi_names = [pi [ ' name' ] for pi in pi_hits ]
243- hits = [ hit for hit in hits if hit [ 'name' ] not in pi_names ] + pi_hits
244+ authors = [entries [ name ] for name in hits . keys () if name not in pis ]
245+ authors += pis . values ()
244246
245247 def _aslist (value ):
246- if isinstance (value , list | tuple ):
248+ if isinstance (value , ( list , tuple ) ):
247249 return value
248250 return [value ]
249251
250252 # Remove position
251253 affiliations = []
252- for item in hits :
254+ for item in authors :
253255 item .pop ('position' , None )
254256 for a in _aslist (item .get ('affiliation' , 'Unaffiliated' )):
255257 if a not in affiliations :
@@ -258,11 +260,11 @@ def _aslist(value):
258260 aff_indexes = [
259261 ', ' .join (
260262 [
261- str (affiliations .index (a ) + 1 )
263+ '%d' % (affiliations .index (a ) + 1 )
262264 for a in _aslist (author .get ('affiliation' , 'Unaffiliated' ))
263265 ]
264266 )
265- for author in hits
267+ for author in authors
266268 ]
267269
268270 if misses :
@@ -271,12 +273,16 @@ def _aslist(value):
271273 file = sys .stderr ,
272274 )
273275
274- print (f'Authors ({ len (hits )} ):' )
275- authors = f'{ "; " .join (rf"{ i ['name' ]} \ :sup:`{ idx } `\ " for i , idx in zip (hits , aff_indexes , strict = False ))} .'
276- print (f'{ authors } .' )
276+ print (f'Authors ({ len (authors )} ):' )
277+ print (
278+ '; ' .join (
279+ f'{ i ["name" ]} \\ :sup:`{ idx } `\\ ' for i , idx in zip (authors , aff_indexes , strict = False )
280+ )
281+ + '.'
282+ )
277283
278- lines = '\n ' . join ( f' { i + 1 : >2 } . { a } ' for i , a in enumerate ( affiliations ) )
279- print (f '\n \n Affiliations: \n { lines } ' )
284+ print ( '\n \n Affiliations:' )
285+ print ('\n ' . join ( f' { i + 1 : >2 } . { a } ' for i , a in enumerate ( affiliations )) )
280286
281287
282288if __name__ == '__main__' :
0 commit comments