Skip to content

Commit ba3aef1

Browse files
authored
1 parent 910b5b0 commit ba3aef1

File tree

1 file changed

+23
-6
lines changed

1 file changed

+23
-6
lines changed

google_scholar_crawler/main.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,31 @@
33
import json
44
from datetime import datetime
55
import os
6+
import time
7+
8+
9+
max_attempts = 100
10+
wait_seconds = 600 # 10 minutes
11+
12+
for attempt in range(1, max_attempts + 1):
13+
try:
14+
print(f"Attempt {attempt}:")
15+
# Setup proxy
16+
pg = ProxyGenerator()
17+
pg.FreeProxies() # Use free rotating proxies
18+
scholarly.use_proxy(pg)
19+
20+
author: dict = scholarly.search_author_id(os.environ['GOOGLE_SCHOLAR_ID'])
21+
scholarly.fill(author, sections=['basics', 'indices', 'counts', 'publications'])
22+
print(f"Attempt {attempt} success")
23+
break # Exit loop on first success
24+
except Exception as e:
25+
print(f"Attempt {attempt} failed with error: {e}")
26+
time.sleep(wait_seconds)
27+
else:
28+
print("All 100 attempts failed.")
629

7-
# Setup proxy
8-
pg = ProxyGenerator()
9-
pg.FreeProxies() # Use free rotating proxies
10-
scholarly.use_proxy(pg)
1130

12-
author: dict = scholarly.search_author_id(os.environ['GOOGLE_SCHOLAR_ID'])
13-
scholarly.fill(author, sections=['basics', 'indices', 'counts', 'publications'])
1431
name = author['name']
1532
author['updated'] = str(datetime.now())
1633
author['publications'] = {v['author_pub_id']:v for v in author['publications']}

0 commit comments

Comments
 (0)