11import json
22import os
3- from typing import List , Optional
43from base64 import b64encode
54from datetime import datetime , timedelta
5+ from typing import List , Optional
66from urllib .parse import quote_plus , urlencode , urljoin
77
88import requests
99
10+ from .util import fetch_all_pages
11+
1012SYPHT_API_BASE_ENDPOINT = "https://api.sypht.com"
1113SYPHT_AUTH_ENDPOINT = "https://auth.sypht.com/oauth2/token"
1214SYPHT_LEGACY_AUTH_ENDPOINT = "https://login.sypht.com/oauth/token"
@@ -379,7 +381,9 @@ def get_file_data(self, file_id, endpoint=None, headers=None):
379381
380382 return response .content
381383
382- def fetch_results (self , file_id , timeout = None , endpoint = None , verbose = False , headers = None ):
384+ def fetch_results (
385+ self , file_id , timeout = None , endpoint = None , verbose = False , headers = None
386+ ):
383387 """
384388 :param file_id: the id of the document that was uploaded and extracted
385389 :param timeout: a timeout in milliseconds to wait for the results
@@ -415,7 +419,37 @@ def get_annotations(
415419 to_date = None ,
416420 endpoint = None ,
417421 ):
418- filters = []
422+ page_iter = fetch_all_pages (
423+ name = "get_annotations" ,
424+ fetch_page = self ._get_annotations ,
425+ get_page = lambda response : response ["annotations" ],
426+ )
427+ annotations = []
428+ for response in page_iter (
429+ doc_id = doc_id ,
430+ task_id = task_id ,
431+ user_id = user_id ,
432+ specification = specification ,
433+ from_date = from_date ,
434+ to_date = to_date ,
435+ endpoint = endpoint ,
436+ ):
437+ annotations .extend (response ["annotations" ])
438+ return {"annotations" : annotations }
439+
440+ def _get_annotations (
441+ self ,
442+ doc_id = None ,
443+ task_id = None ,
444+ user_id = None ,
445+ specification = None ,
446+ from_date = None ,
447+ to_date = None ,
448+ endpoint = None ,
449+ offset = 0 ,
450+ ):
451+ """Fetch a single page of annotations skipping the given offset number of pages first. Use get_annotations to fetch all pages."""
452+ filters = ["offset=" + str (offset )]
419453 if doc_id is not None :
420454 filters .append ("docId=" + doc_id )
421455 if task_id is not None :
@@ -438,7 +472,22 @@ def get_annotations(
438472 return self ._parse_response (self .requests .get (endpoint , headers = headers ))
439473
440474 def get_annotations_for_docs (self , doc_ids , endpoint = None ):
441- body = json .dumps ({"docIds" : doc_ids })
475+ page_iter = fetch_all_pages (
476+ name = "get_annotations_for_docs" ,
477+ fetch_page = self ._get_annotations_for_docs ,
478+ get_page = lambda response : response ["annotations" ],
479+ )
480+ annotations = []
481+ for response in page_iter (
482+ doc_ids = doc_ids ,
483+ endpoint = endpoint ,
484+ ):
485+ annotations .extend (response ["annotations" ])
486+ return {"annotations" : annotations }
487+
488+ def _get_annotations_for_docs (self , doc_ids , endpoint = None , offset = 0 ):
489+ """Fetch a single page of annotations skipping the given offset number of pages first. Use get_annotations_for_docs to fetch all pages."""
490+ body = json .dumps ({"docIds" : doc_ids , "offset" : offset })
442491 endpoint = urljoin (endpoint or self .base_endpoint , ("/app/annotations/search" ))
443492 headers = self ._get_headers ()
444493 headers ["Accept" ] = "application/json"
@@ -814,7 +863,13 @@ def submit_task(
814863 self .requests .post (endpoint , data = json .dumps (task ), headers = headers )
815864 )
816865
817- def add_tags_to_tasks (self , task_ids : List [str ], tags : List [str ], company_id : Optional [str ]= None , endpoint : Optional [str ]= None ):
866+ def add_tags_to_tasks (
867+ self ,
868+ task_ids : List [str ],
869+ tags : List [str ],
870+ company_id : Optional [str ] = None ,
871+ endpoint : Optional [str ] = None ,
872+ ):
818873 company_id = company_id or self .company_id
819874 endpoint = urljoin (
820875 endpoint or self .base_endpoint ,
@@ -825,12 +880,15 @@ def add_tags_to_tasks(self, task_ids: List[str], tags: List[str], company_id: Op
825880 headers ["Content-Type" ] = "application/json"
826881 data = {"taskIds" : task_ids , "add" : tags , "remove" : []}
827882 return self ._parse_response (
828- self .requests .post (
829- endpoint , data = json .dumps (data ), headers = headers
830- )
883+ self .requests .post (endpoint , data = json .dumps (data ), headers = headers )
831884 )
832885
833- def get_tags_for_task (self , task_id : str , company_id : Optional [str ]= None , endpoint : Optional [str ]= None ):
886+ def get_tags_for_task (
887+ self ,
888+ task_id : str ,
889+ company_id : Optional [str ] = None ,
890+ endpoint : Optional [str ] = None ,
891+ ):
834892 company_id = company_id or self .company_id
835893 endpoint = urljoin (
836894 endpoint or self .base_endpoint ,
0 commit comments