Skip to content

Commit 12be7a2

Browse files
committed
chore: Rework update_authors
1 parent 7190263 commit 12be7a2

File tree

1 file changed

+93
-87
lines changed

1 file changed

+93
-87
lines changed

.maint/update_authors.py

Lines changed: 93 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
#!/usr/bin/env python3
2+
# /// script
3+
# requires-python = ">=3.12"
4+
# dependencies = [
5+
# "click",
6+
# "fuzzywuzzy",
7+
# "python-levenshtein",
8+
# ]
9+
# ///
210
"""Update and sort the creators list of the zenodo record."""
311

412
import json
@@ -8,6 +16,9 @@
816
import click
917
from fuzzywuzzy import fuzz, process
1018

19+
CREATORS_LAST = ['Poldrack, Russell A.', 'Gorgolewski, Krzysztof J.']
20+
CONTRIBUTORS_LAST = ['Ghosh, Satrajit S.']
21+
1122

1223
def read_md_table(md_text):
1324
"""
@@ -54,48 +65,29 @@ def read_md_table(md_text):
5465
return retval
5566

5667

57-
def sort_contributors(entries, git_lines, exclude=None, last=None):
68+
def sort_contributors(entries, git_lines, exclude=None):
5869
"""Return a list of author dictionaries, ordered by contribution."""
59-
last = last or []
60-
sorted_authors = sorted(entries, key=lambda i: i['name'])
70+
sorted_authors = sorted(entries)
6171

62-
first_last = [' '.join(val['name'].split(',')[::-1]).strip() for val in sorted_authors]
63-
first_last_excl = [' '.join(val['name'].split(',')[::-1]).strip() for val in exclude or []]
72+
# Match on First Last
73+
first_last = [' '.join(name.split(',')[::-1]).strip() for name in sorted_authors]
74+
first_last_excl = {' '.join(name.split(',')[::-1]).strip() for name in exclude or []}
6475

65-
unmatched = []
66-
author_matches = []
67-
for ele in git_lines:
68-
matches = process.extract(ele, first_last, scorer=fuzz.token_sort_ratio, limit=2)
69-
# matches is a list [('First match', % Match), ('Second match', % Match)]
76+
indices = []
77+
unmatched = set()
78+
for committer in git_lines:
79+
matches = process.extract(committer, first_last, scorer=fuzz.token_sort_ratio, limit=2)
7080
if matches[0][1] > 80:
71-
val = sorted_authors[first_last.index(matches[0][0])]
72-
else:
73-
# skip unmatched names
74-
if ele not in first_last_excl:
75-
unmatched.append(ele)
76-
continue
77-
78-
if val not in author_matches:
79-
author_matches.append(val)
81+
indices.append(first_last.index(matches[0][0]))
82+
elif committer not in first_last_excl:
83+
unmatched.add(committer)
8084

81-
names = {' '.join(val['name'].split(',')[::-1]).strip() for val in author_matches}
82-
for missing_name in first_last:
83-
if missing_name not in names:
84-
missing = sorted_authors[first_last.index(missing_name)]
85-
author_matches.append(missing)
85+
# Return Last, First
86+
matches = dict.fromkeys([sorted_authors[i] for i in indices])
87+
# Add any remaining authors not matched in git_lines
88+
matches.update(dict.fromkeys(sorted_authors))
8689

87-
position_matches = []
88-
for i, item in enumerate(author_matches):
89-
pos = item.pop('position', None)
90-
if pos is not None:
91-
position_matches.append((i, int(pos)))
92-
93-
for i, pos in position_matches:
94-
if pos < 0:
95-
pos += len(author_matches) + 1
96-
author_matches.insert(pos, author_matches.pop(i))
97-
98-
return author_matches, unmatched
90+
return matches, unmatched
9991

10092

10193
def get_git_lines(fname='line-contributors.txt'):
@@ -111,31 +103,42 @@ def get_git_lines(fname='line-contributors.txt'):
111103
lines = contrib_file.read_text().splitlines()
112104

113105
git_line_summary_path = shutil.which('git-line-summary')
106+
if not git_line_summary_path:
107+
git_line_summary_path = 'git summary --dedup-by-email'.split(' ')
108+
else:
109+
git_line_summary_path = [git_line_summary_path]
110+
114111
if not lines and git_line_summary_path:
115112
print('Running git-line-summary on repo')
116-
lines = sp.check_output([git_line_summary_path]).decode().splitlines()
113+
lines = sp.check_output(git_line_summary_path).decode().splitlines()
117114
lines = [line for line in lines if 'Not Committed Yet' not in line]
118115
contrib_file.write_text('\n'.join(lines))
119116

120117
if not lines:
121-
raise RuntimeError(
122-
f"""\
123-
Could not find line-contributors from git repository.{
124-
' git-line-summary not found, please install git-extras.'
125-
* (git_line_summary_path is None)
126-
}"""
118+
_msg = ': git-line-summary not found, please install git-extras ' * (
119+
git_line_summary_path is None
127120
)
121+
raise RuntimeError(f'Could not find line-contributors from git repository{_msg}.')
128122
return [' '.join(line.strip().split()[1:-1]) for line in lines if '%' in line]
129123

130124

131125
def _namelast(inlist):
132126
retval = []
133127
for i in inlist:
134-
i['name'] = (f'{i.pop("name", "")} {i.pop("lastname", "")}').strip()
128+
i['name'] = (f'{i.pop("lastname", "")}, {i.pop("name", "")}').strip()
129+
if not i['name']:
130+
i['name'] = i.get('handle', '<Unknown Name>')
135131
retval.append(i)
136132
return retval
137133

138134

135+
def load(path):
136+
return {
137+
entry['name']: dict(sorted(entry.items()))
138+
for entry in _namelast(read_md_table(Path(path).read_text()))
139+
}
140+
141+
139142
@click.group()
140143
def cli():
141144
"""Generate authorship boilerplates."""
@@ -158,32 +161,31 @@ def zenodo(
158161
former_file,
159162
):
160163
"""Generate a new Zenodo payload file."""
161-
data = get_git_lines()
162-
163164
zenodo = json.loads(Path(zenodo_file).read_text())
164165

165-
former = _namelast(read_md_table(Path(former_file).read_text()))
166-
zen_creators, miss_creators = sort_contributors(
167-
_namelast(read_md_table(Path(maintainers).read_text())),
168-
data,
166+
maint = load(maintainers)
167+
contrib = load(contributors)
168+
pis = load(pi)
169+
former = load(former_file)
170+
171+
total_order, misses = sort_contributors(
172+
maint.keys() | contrib.keys() | pis.keys(),
173+
get_git_lines(),
169174
exclude=former,
170175
)
171176

172-
zen_contributors, miss_contributors = sort_contributors(
173-
_namelast(read_md_table(Path(contributors).read_text())), data, exclude=former
174-
)
177+
# Sort
178+
creator_names = maint.keys() - set(CREATORS_LAST)
179+
creator_names = [name for name in total_order if name in creator_names] + CREATORS_LAST
175180

176-
zen_pi = _namelast(
177-
sorted(
178-
read_md_table(Path(pi).read_text()),
179-
key=lambda v: (int(v.get('position', -1)), v.get('lastname')),
180-
)
181-
)
181+
skip = set(creator_names) | set(CONTRIBUTORS_LAST)
182+
contrib_names = [name for name in total_order if name not in skip] + CONTRIBUTORS_LAST
182183

183-
zenodo['creators'] = zen_creators
184-
zenodo['contributors'] = zen_contributors + zen_pi
184+
entries = contrib | maint | pis
185+
186+
zenodo['creators'] = [entries[name] for name in creator_names]
187+
zenodo['contributors'] = [entries[name] for name in contrib_names]
185188

186-
misses = set(miss_creators).intersection(miss_contributors)
187189
if misses:
188190
print(
189191
f'Some people made commits, but are missing in .maint/ files: {", ".join(misses)}',
@@ -194,18 +196,22 @@ def zenodo(
194196
for creator in zenodo['creators']:
195197
creator.pop('position', None)
196198
creator.pop('handle', None)
197-
if isinstance(creator['affiliation'], list):
199+
if 'affiliation' not in creator:
200+
creator['affiliation'] = 'Unknown affiliation'
201+
elif isinstance(creator['affiliation'], list):
198202
creator['affiliation'] = creator['affiliation'][0]
199203

200204
for creator in zenodo['contributors']:
201205
creator.pop('handle', None)
202206
creator['type'] = 'Researcher'
203207
creator.pop('position', None)
204208

205-
if isinstance(creator['affiliation'], list):
209+
if 'affiliation' not in creator:
210+
creator['affiliation'] = 'Unknown affiliation'
211+
elif isinstance(creator['affiliation'], list):
206212
creator['affiliation'] = creator['affiliation'][0]
207213

208-
Path(zenodo_file).write_text(f'{json.dumps(zenodo, indent=2)}\n')
214+
Path(zenodo_file).write_text(f'{json.dumps(zenodo, indent=2, ensure_ascii=False)}\n')
209215

210216

211217
@cli.command()
@@ -222,34 +228,30 @@ def publication(
222228
former_file,
223229
):
224230
"""Generate the list of authors and affiliations for papers."""
225-
members = _namelast(read_md_table(Path(maintainers).read_text())) + _namelast(
226-
read_md_table(Path(contributors).read_text())
227-
)
231+
maint = load(maintainers)
232+
contrib = load(contributors)
233+
former = load(former_file)
228234

229235
hits, misses = sort_contributors(
230-
members,
236+
maint.keys() | contrib.keys(),
231237
get_git_lines(),
232-
exclude=_namelast(read_md_table(Path(former_file).read_text())),
238+
exclude=former,
233239
)
234240

235-
pi_hits = _namelast(
236-
sorted(
237-
read_md_table(Path(pi).read_text()),
238-
key=lambda v: (int(v.get('position', -1)), v.get('lastname')),
239-
)
240-
)
241+
pis = load(pi)
242+
entries = contrib | maint
241243

242-
pi_names = [pi['name'] for pi in pi_hits]
243-
hits = [hit for hit in hits if hit['name'] not in pi_names] + pi_hits
244+
authors = [entries[name] for name in hits.keys() if name not in pis]
245+
authors += pis.values()
244246

245247
def _aslist(value):
246-
if isinstance(value, list | tuple):
248+
if isinstance(value, (list, tuple)):
247249
return value
248250
return [value]
249251

250252
# Remove position
251253
affiliations = []
252-
for item in hits:
254+
for item in authors:
253255
item.pop('position', None)
254256
for a in _aslist(item.get('affiliation', 'Unaffiliated')):
255257
if a not in affiliations:
@@ -258,11 +260,11 @@ def _aslist(value):
258260
aff_indexes = [
259261
', '.join(
260262
[
261-
str(affiliations.index(a) + 1)
263+
'%d' % (affiliations.index(a) + 1)
262264
for a in _aslist(author.get('affiliation', 'Unaffiliated'))
263265
]
264266
)
265-
for author in hits
267+
for author in authors
266268
]
267269

268270
if misses:
@@ -271,12 +273,16 @@ def _aslist(value):
271273
file=sys.stderr,
272274
)
273275

274-
print(f'Authors ({len(hits)}):')
275-
authors = f'{"; ".join(rf"{i['name']} \ :sup:`{idx}`\ " for i, idx in zip(hits, aff_indexes, strict=False))}.'
276-
print(f'{authors}.')
276+
print(f'Authors ({len(authors)}):')
277+
print(
278+
'; '.join(
279+
f'{i["name"]} \\ :sup:`{idx}`\\ ' for i, idx in zip(authors, aff_indexes, strict=False)
280+
)
281+
+ '.'
282+
)
277283

278-
lines = '\n'.join(f'{i + 1: >2}. {a}' for i, a in enumerate(affiliations))
279-
print(f'\n\nAffiliations:\n{lines}')
284+
print('\n\nAffiliations:')
285+
print('\n'.join(f'{i + 1: >2}. {a}' for i, a in enumerate(affiliations)))
280286

281287

282288
if __name__ == '__main__':

0 commit comments

Comments
 (0)