diff --git a/google_scholar_crawler/main.py b/google_scholar_crawler/main.py index c11810d93b8..fc8248652a1 100644 --- a/google_scholar_crawler/main.py +++ b/google_scholar_crawler/main.py @@ -1,9 +1,14 @@ -from scholarly import scholarly +from scholarly import scholarly, ProxyGenerator import jsonpickle import json from datetime import datetime import os +# Setup proxy +pg = ProxyGenerator() +pg.FreeProxies() # Use free rotating proxies +scholarly.use_proxy(pg) + author: dict = scholarly.search_author_id(os.environ['GOOGLE_SCHOLAR_ID']) scholarly.fill(author, sections=['basics', 'indices', 'counts', 'publications']) name = author['name'] @@ -20,4 +25,4 @@ "message": f"{author['citedby']}", } with open(f'results/gs_data_shieldsio.json', 'w') as outfile: - json.dump(shieldio_data, outfile, ensure_ascii=False) + json.dump(shieldio_data, outfile, ensure_ascii=False) \ No newline at end of file diff --git a/google_scholar_crawler/requirements.txt b/google_scholar_crawler/requirements.txt index 78ff257fa81..7f60cb35288 100644 --- a/google_scholar_crawler/requirements.txt +++ b/google_scholar_crawler/requirements.txt @@ -1,2 +1,3 @@ -jsonpickle==1.4.2 -scholarly==1.5.1 +jsonpickle==4.0.5 +scholarly==1.7.11 +httpx==0.23.3 \ No newline at end of file