Skip to content

Commit 5d05b5c

Browse files
authored
Merge pull request #976 from Labelbox/mno/al-5192
2 parents 600f95d + c1f8af0 commit 5d05b5c

File tree

3 files changed

+176
-22
lines changed

3 files changed

+176
-22
lines changed

labelbox/schema/export_filters.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import sys
2+
3+
from typing import Optional
4+
if sys.version_info >= (3, 8):
5+
from typing import TypedDict
6+
else:
7+
from typing_extensions import TypedDict
8+
9+
from typing import Tuple
10+
11+
12+
class ProjectExportFilters(TypedDict):
13+
label_created_at: Optional[Tuple[str, str]]
14+
""" Date range for labels created at
15+
Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
16+
Examples:
17+
>>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
18+
>>> [None, "2050-01-01 00:00:00"]
19+
>>> ["2000-01-01 00:00:00", None]
20+
"""
21+
last_activity_at: Optional[Tuple[str, str]]
22+
""" Date range for last activity at
23+
Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
24+
Examples:
25+
>>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
26+
>>> [None, "2050-01-01 00:00:00"]
27+
>>> ["2000-01-01 00:00:00", None]
28+
"""

labelbox/schema/project.py

Lines changed: 131 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from collections import namedtuple
55
from datetime import datetime, timezone
66
from pathlib import Path
7-
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
7+
from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, List, Optional, Union
88
from urllib.parse import urlparse
99

1010
import ndjson
@@ -20,6 +20,7 @@
2020
from labelbox.pagination import PaginatedCollection
2121
from labelbox.schema.consensus_settings import ConsensusSettings
2222
from labelbox.schema.data_row import DataRow
23+
from labelbox.schema.export_filters import ProjectExportFilters
2324
from labelbox.schema.export_params import ProjectExportParams
2425
from labelbox.schema.media_type import MediaType
2526
from labelbox.schema.queue_mode import QueueMode
@@ -46,6 +47,20 @@
4647
logger = logging.getLogger(__name__)
4748

4849

50+
def _validate_datetime(string_date: str) -> bool:
51+
"""helper function validate that datetime is as follows: YYYY-MM-DD for the export"""
52+
if string_date:
53+
for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"):
54+
try:
55+
datetime.strptime(string_date, fmt)
56+
return True
57+
except ValueError:
58+
pass
59+
raise ValueError(f"""Incorrect format for: {string_date}.
60+
Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""")
61+
return True
62+
63+
4964
class Project(DbObject, Updateable, Deletable):
5065
""" A Project is a container that includes a labeling frontend, an ontology,
5166
datasets and labels.
@@ -337,19 +352,6 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
337352
if dictionary.get(c)
338353
])
339354

340-
def _validate_datetime(string_date: str) -> bool:
341-
"""helper function validate that datetime is as follows: YYYY-MM-DD for the export"""
342-
if string_date:
343-
for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"):
344-
try:
345-
datetime.strptime(string_date, fmt)
346-
return True
347-
except ValueError:
348-
pass
349-
raise ValueError(f"""Incorrect format for: {string_date}.
350-
Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""")
351-
return True
352-
353355
sleep_time = 2
354356
id_param = "projectId"
355357
filter_param = ""
@@ -400,16 +402,27 @@ def _validate_datetime(string_date: str) -> bool:
400402
self.uid)
401403
time.sleep(sleep_time)
402404

403-
"""
404-
Creates a project run export task with the given params and returns the task.
405-
406-
>>> export_task = export_v2("my_export_task", filter={"media_attributes": True})
407-
408-
"""
409-
410405
def export_v2(self,
411406
task_name: Optional[str] = None,
407+
filters: Optional[ProjectExportFilters] = None,
412408
params: Optional[ProjectExportParams] = None) -> Task:
409+
"""
410+
Creates a project run export task with the given params and returns the task.
411+
412+
For more information visit: https://docs.labelbox.com/docs/exports-v2#export-from-a-project-python-sdk
413+
414+
>>> task = project.export_v2(
415+
>>> filters={
416+
>>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
417+
>>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
418+
>>> },
419+
>>> params={
420+
>>> "include_performance_details": False,
421+
>>> "include_labels": True
422+
>>> })
423+
>>> task.wait_till_done()
424+
>>> task.result
425+
"""
413426

414427
_params = params or ProjectExportParams({
415428
"attachments": False,
@@ -420,15 +433,33 @@ def export_v2(self,
420433
"label_details": False
421434
})
422435

436+
_filters = filters or ProjectExportFilters({
437+
"last_activity_at": None,
438+
"label_created_at": None
439+
})
440+
441+
def _get_timezone() -> str:
442+
timezone_query_str = """query CurrentUserPyApi { user { timezone } }"""
443+
tz_res = self.client.execute(timezone_query_str)
444+
return tz_res["user"]["timezone"] or "UTC"
445+
446+
timezone: Optional[str] = None
447+
423448
mutation_name = "exportDataRowsInProject"
424449
create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){
425450
%s(input: $input) {taskId} }
426451
""" % (mutation_name)
452+
453+
search_query: List[Dict[str, Collection[str]]] = []
427454
query_params = {
428455
"input": {
429456
"taskName": task_name,
430457
"filters": {
431-
"projectId": self.uid
458+
"projectId": self.uid,
459+
"searchQuery": {
460+
"scope": None,
461+
"query": search_query
462+
}
432463
},
433464
"params": {
434465
"includeAttachments":
@@ -446,6 +477,84 @@ def export_v2(self,
446477
},
447478
}
448479
}
480+
481+
if "last_activity_at" in _filters and _filters[
482+
'last_activity_at'] is not None:
483+
if timezone is None:
484+
timezone = _get_timezone()
485+
values = _filters['last_activity_at']
486+
start, end = values
487+
if (start is not None and end is not None):
488+
[_validate_datetime(date) for date in values]
489+
search_query.append({
490+
"type": "data_row_last_activity_at",
491+
"value": {
492+
"operator": "BETWEEN",
493+
"timezone": timezone,
494+
"value": {
495+
"min": start,
496+
"max": end
497+
}
498+
}
499+
})
500+
elif (start is not None):
501+
_validate_datetime(start)
502+
search_query.append({
503+
"type": "data_row_last_activity_at",
504+
"value": {
505+
"operator": "GREATER_THAN_OR_EQUAL",
506+
"timezone": timezone,
507+
"value": start
508+
}
509+
})
510+
elif (end is not None):
511+
_validate_datetime(end)
512+
search_query.append({
513+
"type": "data_row_last_activity_at",
514+
"value": {
515+
"operator": "LESS_THAN_OR_EQUAL",
516+
"timezone": timezone,
517+
"value": end
518+
}
519+
})
520+
521+
if "label_created_at" in _filters and _filters[
522+
"label_created_at"] is not None:
523+
if timezone is None:
524+
timezone = _get_timezone()
525+
values = _filters['label_created_at']
526+
start, end = values
527+
if (start is not None and end is not None):
528+
[_validate_datetime(date) for date in values]
529+
search_query.append({
530+
"type": "labeled_at",
531+
"value": {
532+
"operator": "BETWEEN",
533+
"value": {
534+
"min": start,
535+
"max": end
536+
}
537+
}
538+
})
539+
elif (start is not None):
540+
_validate_datetime(start)
541+
search_query.append({
542+
"type": "labeled_at",
543+
"value": {
544+
"operator": "GREATER_THAN_OR_EQUAL",
545+
"value": start
546+
}
547+
})
548+
elif (end is not None):
549+
_validate_datetime(end)
550+
search_query.append({
551+
"type": "labeled_at",
552+
"value": {
553+
"operator": "LESS_THAN_OR_EQUAL",
554+
"value": end
555+
}
556+
})
557+
449558
res = self.client.execute(
450559
create_task_query_str,
451560
query_params,

tests/integration/test_project.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,32 @@ def test_project_export_v2(configured_project_with_label):
5353
include_performance_details = True
5454
task = project.export_v2(
5555
task_name,
56+
filters={
57+
"last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
58+
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
59+
},
5660
params={
5761
"include_performance_details": include_performance_details,
5862
"include_labels": True
5963
})
64+
65+
task_to = project.export_v2(
66+
filters={"last_activity_at": [None, "2050-01-01 00:00:00"]})
67+
68+
task_from = project.export_v2(
69+
filters={"label_created_at": ["2000-01-01 00:00:00", None]})
70+
6071
assert task.name == task_name
6172
task.wait_till_done()
6273
assert task.status == "COMPLETE"
6374
assert task.errors is None
6475

76+
task_to.wait_till_done()
77+
assert task_to.status == "COMPLETE"
78+
79+
task_from.wait_till_done()
80+
assert task_from.status == "COMPLETE"
81+
6582
for task_result in task.result:
6683
task_project = task_result['projects'][project.uid]
6784
task_project_label_ids_set = set(

0 commit comments

Comments
 (0)