Skip to content

Commit 6878fe6

Browse files
committed
Start working on the gbcompo21 importer (#106)
1 parent e863411 commit 6878fe6

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

scrapers/gbcompo21/import.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import csv, os
2+
from slugify import slugify
3+
from strsimpy.jaro_winkler import JaroWinkler
4+
5+
jarowinkler = JaroWinkler()
6+
7+
# Friday 1 October 2021 02:00:00 to unix datestamp epoch
8+
date = 1633053600
9+
10+
with open("gbcompo21.csv", newline="") as csvfile:
11+
spamreader = csv.DictReader(csvfile)
12+
for row in spamreader:
13+
gameObj = {
14+
"title": row["title"],
15+
"slug": slugify(row["title"]),
16+
"developer": row["user"],
17+
"typetag": "game",
18+
"tags": ["gbcompo21"],
19+
"website": row["game_url"],
20+
"date": date,
21+
}
22+
if len(row["Open Source repository"]) > 1:
23+
gameObj["repository"] = row["Open Source repository"]
24+
gameObj["tags"].append("Open Source")
25+
# if row["title"] in shortlist:
26+
# gameObj["tags"].append("gbcompo21-top20")
27+
28+
d = 1000000
29+
for directory in os.listdir("gbcompo21/entries"):
30+
d2 = jarowinkler.distance(directory, row["title"])
31+
if d2 < d:
32+
d = d2
33+
matched = directory
34+
print(gameObj["title"])
35+
print(f"{row['title']} || {matched} || {d} || ")
36+
37+
# print(gameObj)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
python-slugify==5.0.2
2+
strsimpy==0.2.1
3+
text-unidecode==1.3

0 commit comments

Comments
 (0)