Skip to content

Commit 3f8d6b1

Browse files
authored
Merge pull request #43 from sypht-team/chore/bump-rec-limit
chore: bump rec_limit and allow override
2 parents 090eae6 + d0808a7 commit 3f8d6b1

File tree

5 files changed

+153
-11
lines changed

5 files changed

+153
-11
lines changed

.github/workflows/build.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,20 @@ jobs:
2626
- name: Install dependencies
2727
run: |
2828
python -m pip install --upgrade pip
29-
pip install pytest coverage codecov
29+
pip install pytest coverage codecov httpretty
3030
pip install .
31+
type python
32+
type pip
33+
type pytest
34+
pip freeze
3135
- name: Test with pytest
3236
env:
3337
SYPHT_API_BASE_ENDPOINT: ${{ vars.SYPHT_API_BASE_ENDPOINT }}
3438
SYPHT_API_KEY: ${{ secrets.SYPHT_API_KEY }}
3539
SYPHT_AUTH_ENDPOINT: ${{ vars.SYPHT_AUTH_ENDPOINT }}
3640
run: |
37-
pytest -s tests/*.py
41+
type python
42+
type pip
43+
type pytest
44+
pip freeze
45+
pytest -s tests/test*.py

sypht/client.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
from urllib.parse import quote_plus, urlencode, urljoin
77

88
import requests
9+
from requests.adapters import HTTPAdapter
10+
from urllib3.util import Retry
911

10-
from .util import fetch_all_pages
12+
from sypht.util import fetch_all_pages
1113

1214
SYPHT_API_BASE_ENDPOINT = "https://api.sypht.com"
1315
SYPHT_AUTH_ENDPOINT = "https://auth.sypht.com/oauth2/token"
@@ -75,7 +77,23 @@ def __init__(
7577

7678
@property
7779
def _create_session(self):
78-
return requests.Session()
80+
session = requests.Session()
81+
retries = Retry(
82+
total=None, # set connect, read, redirect, status, other instead
83+
connect=3,
84+
read=3,
85+
redirect=0,
86+
status=3,
87+
status_forcelist=[429, 502, 503, 504],
88+
other=0, # catch-all for other errors
89+
allowed_methods=["GET"],
90+
respect_retry_after_header=False,
91+
backoff_factor=0.5, # 0.0, 0.5, 1.0, 2.0, 4.0
92+
# Support manual status handling in _parse_response.
93+
raise_on_status=False,
94+
)
95+
session.mount(self.base_endpoint, HTTPAdapter(max_retries=retries))
96+
return session
7997

8098
def _authenticate_v2(self, endpoint, client_id, client_secret, audience):
8199
basic_auth_slug = b64encode(
@@ -418,11 +436,14 @@ def get_annotations(
418436
from_date=None,
419437
to_date=None,
420438
endpoint=None,
439+
rec_limit=None,
440+
company_id=None,
421441
):
422442
page_iter = fetch_all_pages(
423443
name="get_annotations",
424444
fetch_page=self._get_annotations,
425445
get_page=lambda response: response["annotations"],
446+
rec_limit=rec_limit,
426447
)
427448
annotations = []
428449
for response in page_iter(
@@ -433,6 +454,7 @@ def get_annotations(
433454
from_date=from_date,
434455
to_date=to_date,
435456
endpoint=endpoint,
457+
company_id=company_id,
436458
):
437459
annotations.extend(response["annotations"])
438460
return {"annotations": annotations}
@@ -446,6 +468,7 @@ def _get_annotations(
446468
from_date=None,
447469
to_date=None,
448470
endpoint=None,
471+
company_id=None,
449472
offset=0,
450473
):
451474
"""Fetch a single page of annotations skipping the given offset number of pages first. Use get_annotations to fetch all pages."""
@@ -462,6 +485,8 @@ def _get_annotations(
462485
filters.append("fromDate=" + from_date)
463486
if to_date is not None:
464487
filters.append("toDate=" + to_date)
488+
if company_id is not None:
489+
filters.append("companyId=" + company_id)
465490

466491
endpoint = urljoin(
467492
endpoint or self.base_endpoint, ("/app/annotations?" + "&".join(filters))
@@ -471,11 +496,12 @@ def _get_annotations(
471496
headers["Content-Type"] = "application/json"
472497
return self._parse_response(self.requests.get(endpoint, headers=headers))
473498

474-
def get_annotations_for_docs(self, doc_ids, endpoint=None):
499+
def get_annotations_for_docs(self, doc_ids, endpoint=None, rec_limit=None):
475500
page_iter = fetch_all_pages(
476501
name="get_annotations_for_docs",
477502
fetch_page=self._get_annotations_for_docs,
478503
get_page=lambda response: response["annotations"],
504+
rec_limit=rec_limit,
479505
)
480506
annotations = []
481507
for response in page_iter(

sypht/util.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
1+
import logging
12
from typing import Any, Callable, Iterator, List
23

4+
DEFAULT_REC_LIMIT = 100_000
5+
36

47
def fetch_all_pages(
58
name: str,
69
fetch_page: Callable[..., Any],
710
get_page: Callable[..., List[Any]] = lambda x: x,
8-
rec_limit=20000,
11+
rec_limit=DEFAULT_REC_LIMIT,
912
) -> Callable[..., Iterator[Any]]:
1013
"""Returns an iterator that calls fetch_page with an offset that we increment by the number of pages fetched. Stop if page returns empty list.
1114
1215
:param fetch_page: a function that makes an api call to fetch a page of results (using zero-based offset)
1316
:param get_page: a function that extracts the page from the response which should be a list
1417
"""
1518

19+
# Enforce a default so that the loop will stop.
20+
if rec_limit is None:
21+
rec_limit = DEFAULT_REC_LIMIT
22+
1623
def fetch_all_pages(*args, **kwargs) -> Iterator[Any]:
1724
page_count = 0
1825
recs = 0
@@ -31,17 +38,20 @@ def fetch_all_pages(*args, **kwargs) -> Iterator[Any]:
3138
)
3239
except Exception as err:
3340
raise Exception(
34-
f"Failed fetching for {name} for offset={page_count - 1} (records fetched so far:{recs})"
41+
f"Failed fetching for {name} for offset={page_count - 1} (page={page_count}) (records fetched so far:{recs}). Cause: {err}"
3542
) from err
3643
try:
3744
page = get_page(response)
3845
except Exception as err:
3946
raise Exception(
40-
f"get_page failed to extract page from response for {name} for offset={page_count - 1} (records fetched so far:{recs})"
47+
f"get_page failed to extract page from response for {name} for offset={page_count - 1} (page={page_count}) (records fetched so far:{recs}). Cause: {err}"
4148
) from err
4249
if len(page) == 0:
4350
break
4451
recs += len(page)
52+
logging.info(
53+
f"fetch_all_pages({name}): fetched page {page_count} (records={recs})"
54+
)
4555
yield response
4656

4757
return fetch_all_pages

tests/tests_client.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
import os
1+
import json
22
import unittest
33
import warnings
44
from datetime import datetime, timedelta
5+
from unittest.mock import Mock, patch
56
from uuid import UUID, uuid4
67

8+
import httpretty
9+
import pytest
10+
711
from sypht.client import SyphtClient
812

913

@@ -93,5 +97,75 @@ def test_reauthentication(self):
9397
self.assertFalse(self.sypht_client._is_token_expired())
9498

9599

100+
class RetryTest(unittest.TestCase):
101+
"""Test the global retry logic works as we expect it to."""
102+
103+
@patch.object(SyphtClient, "_authenticate_v2", return_value=("access_token", 100))
104+
@patch.object(SyphtClient, "_authenticate_v1", return_value=("access_token2", 100))
105+
@httpretty.activate(verbose=True, allow_net_connect=False)
106+
def test_it_should_retry_n_times(self, auth_v1: Mock, auth_v2: Mock):
107+
# arrange
108+
self.count = 0
109+
110+
def get_annotations(request, uri, response_headers):
111+
self.count += 1
112+
# 1 req + 3 retries = 4
113+
if self.count == 4:
114+
return [200, response_headers, json.dumps({"annotations": []})]
115+
return [502, response_headers, json.dumps({})]
116+
117+
httpretty.register_uri(
118+
httpretty.GET,
119+
"https://api.sypht.com/app/annotations?offset=0&fromDate=2021-01-01&toDate=2021-01-01",
120+
body=get_annotations,
121+
)
122+
123+
sypht_client = SyphtClient(base_endpoint="https://api.sypht.com")
124+
125+
# act / assert
126+
response = sypht_client.get_annotations(
127+
from_date=datetime(
128+
year=2021, month=1, day=1, hour=0, minute=0, second=0
129+
).strftime("%Y-%m-%d"),
130+
to_date=datetime(
131+
year=2021, month=1, day=1, hour=0, minute=0, second=0
132+
).strftime("%Y-%m-%d"),
133+
)
134+
135+
assert response == {"annotations": []}
136+
137+
@patch.object(SyphtClient, "_authenticate_v2", return_value=("access_token", 100))
138+
@patch.object(SyphtClient, "_authenticate_v1", return_value=("access_token2", 100))
139+
@httpretty.activate(verbose=True, allow_net_connect=False)
140+
def test_retry_should_eventually_fail_for_50x(self, auth_v1: Mock, auth_v2: Mock):
141+
# arrange
142+
self.count = 0
143+
144+
def get_annotations(request, uri, response_headers):
145+
self.count += 1
146+
return [502, response_headers, json.dumps({})]
147+
148+
httpretty.register_uri(
149+
httpretty.GET,
150+
"https://api.sypht.com/app/annotations?offset=0&fromDate=2021-01-01&toDate=2021-01-01",
151+
body=get_annotations,
152+
)
153+
154+
sypht_client = SyphtClient(base_endpoint="https://api.sypht.com")
155+
156+
# act / assert
157+
with self.assertRaisesRegex(Exception, ".") as e:
158+
sypht_client.get_annotations(
159+
from_date=datetime(
160+
year=2021, month=1, day=1, hour=0, minute=0, second=0
161+
).strftime("%Y-%m-%d"),
162+
to_date=datetime(
163+
year=2021, month=1, day=1, hour=0, minute=0, second=0
164+
).strftime("%Y-%m-%d"),
165+
)
166+
167+
assert self.count == 4, "should be 1 req + 3 retries"
168+
169+
96170
if __name__ == "__main__":
97171
unittest.main()

tests/tests_util.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from sypht.util import fetch_all_pages
3+
from sypht.util import DEFAULT_REC_LIMIT, fetch_all_pages
44

55

66
def test_fetch_all_pages_can_fetch_one_page():
@@ -73,6 +73,7 @@ def fetch_something(offset, pages=1):
7373

7474
def test_fetch_all_pages_never_ending():
7575
"""Fail if fetch more than n pages."""
76+
7677
# arrange
7778
def never_ending(*args, **kwargs):
7879
return [0, 1, 2]
@@ -85,7 +86,30 @@ def never_ending(*args, **kwargs):
8586
results += page
8687

8788
# assert
88-
assert "more than the limit: 20000" in str(exc_info)
89+
assert f"more than the limit: {DEFAULT_REC_LIMIT}" in str(exc_info)
90+
91+
92+
def test_fetch_with_rec_limit():
93+
# arrange
94+
page_size = 5
95+
96+
def fetch_something(offset, pages=1):
97+
pages0 = pages - 1
98+
if offset > pages0:
99+
return []
100+
start = offset * page_size
101+
page = range(start, start + page_size)
102+
return list(page)
103+
104+
# act
105+
page_iter = fetch_all_pages(name="test1", fetch_page=fetch_something, rec_limit=2)
106+
results = []
107+
with pytest.raises(Exception) as exc_info:
108+
for page in page_iter():
109+
results += page
110+
111+
# assert
112+
assert f"fetched 5 records which is more than the limit: 2" in str(exc_info)
89113

90114

91115
def test_fetch_all_pages_handle_error():

0 commit comments

Comments
 (0)