Skip to content

Commit d34c100

Browse files
committed
TL: who the hell wrote this script ...
1 parent 5034f37 commit d34c100

File tree

1 file changed

+9
-38
lines changed

1 file changed

+9
-38
lines changed

bin/arxiv_to_publications_correct.py

Lines changed: 9 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -36,46 +36,17 @@
3636
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
3737
else:
3838
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
39+
assert id == id_db, f"ID generated with new DOI ({id}) is different than the original in database ({id_db})"
3940

40-
d = db.get_entry_dict()
41-
id_orig = id
42-
letters = 'bcdefghijklmnopqrstuvwxyz'
43-
i = 0
44-
duplicate = False
45-
while id in d:
46-
for author in data["author"]:
47-
if 'given' not in author:
48-
author["given"] = ''
49-
authors = " and ".join([author['given'] + ' ' + author['family'] for author in data["author"]])
50-
candidate_title = re.sub('[^A-Za-z0-9]+', '', data['title'])
51-
existing_title = re.sub('[^A-Za-z0-9]+', '', d[id].get('title', ""))
52-
if authors == d[id].get('author', "") and candidate_title == existing_title:
53-
print(f'I detected a duplicate based on the key {id}, the list of authors and the title for {url}. '
54-
f'I will ignore this entry. If this is wrong, sorry for that..\n\n')
55-
duplicate = True
56-
break
57-
else:
58-
print(f'I detected a duplicate based on the key {id}. '
59-
f'I will augment it with a letter and try again. '
60-
f'Please double-check, if this is correct.. '
61-
f'my duplicate detection algorithm is pretty bad.\n\n')
62-
id = id_orig + letters[i]
63-
i += 1
41+
entries = db.get_entry_dict()
42+
assert entries[id]["ENTRYTYPE"] == 'unpublished', "original entry in bib file was NOT unpublished !"
43+
db.entries.remove(entries[id])
6444

65-
if not duplicate:
66-
67-
for item in db.get_entry_list():
68-
if item['ID'] == id_db and item['ENTRYTYPE'] == 'unpublished':
69-
# print(f"removing {item['ID']}")
70-
db.entries.remove(item)
71-
72-
bType, *rest1 = bib.split("{")
73-
oldID, *rest2 = rest1[0].split(",")
74-
bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:])
75-
bib_db = bibtexparser.loads(bib)
76-
db.entries.extend(bib_db.get_entry_list())
77-
else:
78-
bib_db = None
45+
bType, *rest1 = bib.split("{")
46+
oldID, *rest2 = rest1[0].split(",")
47+
bib = "{".join([bType] + [','.join([id]+rest2)] + rest1[1:])
48+
bib_db = bibtexparser.loads(bib)
49+
db.entries.extend(bib_db.get_entry_list())
7950

8051
if id_list:
8152
writer = BibTexWriter()

0 commit comments

Comments
 (0)