Skip to content

Commit 82b16be

Browse files
committed
Merge branch 'hotfix/2.6.2'
2 parents 5ce330f + cca8c02 commit 82b16be

File tree

147 files changed

+389
-390
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

147 files changed

+389
-390
lines changed

share/harvesters/edu_ageconsearch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def do_harvest(self, start_date: pendulum.Pendulum, end_date: pendulum.Pendulum)
5353

5454
# Fetch the list of work urls on a single result page and return results within date range
5555
def fetch_records(self, start_date, end_date):
56-
logger.info('Harvesting % - %s', start_date, end_date)
57-
logger.debug('Fetching page %s', self.base_url)
56+
logger.info('Harvesting %s - %s', start_date, end_date)
57+
logger.debug('Fetching page %s', self.config.base_url)
5858

5959
url = furl(self.config.base_url)
6060
url.args['starts_with'] = start_date

share/harvesters/edu_gwu.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def do_harvest(self, start_date, end_date):
1717
start_date = start_date.date()
1818

1919
# There is no apparent way to filter by date range, just sort by date.
20-
url = furl(self.config.base_url)
20+
url = furl(self.config.base_url + '/catalog')
2121
url.args['per_page'] = 10 # If it gets more active, consider upping to 50 or 100
2222
url.args['sort'] = 'system_modified_dtsi+desc'
2323

@@ -30,7 +30,7 @@ def fetch_records(self, url, start_date, end_date):
3030
resp = self.requests.get(furl(url).set(query_params={'page': page}))
3131
soup = BeautifulSoup(resp.content, 'lxml')
3232
try:
33-
total = int(soup.select('#sortAndPerPage .page_entries strong')[-1].text)
33+
total = int(soup.select('#sortAndPerPage .page_entries strong')[-1].text.replace(',', ''))
3434
except IndexError:
3535
total = 0
3636

@@ -43,9 +43,8 @@ def fetch_records(self, url, start_date, end_date):
4343
break
4444

4545
logger.info('On document %d of %d (%d%%)', count, total, (count / total) * 100)
46-
4746
for link in links:
48-
item_response = self.requests.get(self.config.home_page + link)
47+
item_response = self.requests.get(self.config.base_url + link)
4948
if item_response.status_code // 100 != 2:
5049
logger.warning('Got non-200 status %s from %s', item_response, link)
5150
continue

share/models/logs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class HarvestLog(AbstractBaseLog):
215215
class Meta:
216216
unique_together = ('source_config', 'start_date', 'end_date', 'harvester_version', 'source_config_version', )
217217

218-
def spawn_task(self, ingest=False, force=False, limit=None, superfluous=False, ignore_disabled=False, async=True):
218+
def spawn_task(self, ingest=True, force=False, limit=None, superfluous=False, ignore_disabled=False, async=True):
219219
from share.tasks import HarvesterTask
220220
# TODO Move most if not all of the logic for task argument massaging here.
221221
# It's bad to have two places already but this is required to backharvest a source without timing out on uwsgi

share/sources/au.uow/source.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: oai
66
harvester_kwargs: {metadata_prefix: oai_dc}
77
label: au.uow
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: oai_dc
1111
transformer_kwargs:
1212
approved_sets: null

share/sources/be.ghent/source.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: oai
66
harvester_kwargs: {metadata_prefix: oai_dc}
77
label: be.ghent
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: oai_dc
1111
transformer_kwargs:
1212
approved_sets: null
@@ -19,8 +19,8 @@ configs:
1919
harvester: oai
2020
harvester_kwargs: {metadata_prefix: mods}
2121
label: be.ghent.mods
22-
rate_limit_allowance: 5
23-
rate_limit_period: 1
22+
rate_limit_allowance: 1
23+
rate_limit_period: 2
2424
transformer: mods
2525
transformer_kwargs:
2626
approved_sets: null

share/sources/br.pcurio/source.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: oai
66
harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false}
77
label: br.pcurio
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: oai_dc
1111
transformer_kwargs:
1212
approved_sets: null

share/sources/ca.lwbin/source.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: ca.lwbin
66
harvester_kwargs: {}
77
label: ca.lwbin
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: ca.lwbin
1111
transformer_kwargs: {}
1212
home_page: http://130.179.67.140

share/sources/ca.umontreal/source.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: oai
66
harvester_kwargs: {metadata_prefix: mods}
77
label: ca.umontreal.mods
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: mods
1111
transformer_kwargs:
1212
approved_sets: null
@@ -19,8 +19,8 @@ configs:
1919
harvester: oai
2020
harvester_kwargs: {metadata_prefix: oai_dc}
2121
label: ca.umontreal
22-
rate_limit_allowance: 5
23-
rate_limit_period: 1
22+
rate_limit_allowance: 1
23+
rate_limit_period: 2
2424
transformer: oai_dc
2525
transformer_kwargs:
2626
approved_sets: null

share/sources/ca.uwo/source.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: oai
66
harvester_kwargs: {metadata_prefix: oai_dc}
77
label: ca.uwo
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: oai_dc
1111
transformer_kwargs:
1212
approved_sets: null

share/sources/ch.cern/source.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ configs:
55
harvester: oai
66
harvester_kwargs: {metadata_prefix: oai_dc}
77
label: ch.cern
8-
rate_limit_allowance: 5
9-
rate_limit_period: 1
8+
rate_limit_allowance: 1
9+
rate_limit_period: 2
1010
transformer: oai_dc
1111
transformer_kwargs:
1212
approved_sets: null

0 commit comments

Comments
 (0)