Skip to content

Commit 70a6863

Browse files
authored
[SDK-427] Add unit-tests for ExportTask (#1289)
1 parent ed84fdd commit 70a6863

File tree

5 files changed

+451
-2
lines changed

5 files changed

+451
-2
lines changed

tests/unit/conftest.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import requests
2-
1+
import json
32
import pytest
43

54

@@ -97,3 +96,36 @@ def ndjson_content_with_nonascii_and_line_breaks():
9796
'created_at': '2015-01-01T15:00:10Z'
9897
}]
9998
return line, expected_objects
99+
100+
101+
@pytest.fixture
102+
def generate_random_ndjson(rand_gen):
103+
104+
def _generate_random_ndjson(lines: int = 10):
105+
return [
106+
json.dumps({"data_row": {
107+
"id": rand_gen(str)
108+
}}) for _ in range(lines)
109+
]
110+
111+
return _generate_random_ndjson
112+
113+
114+
@pytest.fixture
115+
def mock_response():
116+
117+
class MockResponse:
118+
119+
def __init__(self, text: str, exception: Exception = None) -> None:
120+
self._text = text
121+
self._exception = exception
122+
123+
@property
124+
def text(self):
125+
return self._text
126+
127+
def raise_for_status(self):
128+
if self._exception:
129+
raise self._exception
130+
131+
return MockResponse
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from unittest.mock import MagicMock
2+
3+
from labelbox.schema.export_task import (
4+
Converter,
5+
FileConverter,
6+
Range,
7+
StreamType,
8+
_MetadataFileInfo,
9+
_MetadataHeader,
10+
_TaskContext,
11+
)
12+
13+
14+
class TestFileConverter:
15+
16+
def test_with_correct_ndjson(self, tmp_path, generate_random_ndjson):
17+
directory = tmp_path / "file-converter"
18+
directory.mkdir()
19+
line_count = 10
20+
ndjson = generate_random_ndjson(line_count)
21+
file_content = "\n".join(ndjson) + "\n"
22+
input_args = Converter.ConverterInputArgs(
23+
ctx=_TaskContext(
24+
client=MagicMock(),
25+
task_id="task-id",
26+
stream_type=StreamType.RESULT,
27+
metadata_header=_MetadataHeader(total_size=len(file_content),
28+
total_lines=line_count),
29+
),
30+
file_info=_MetadataFileInfo(
31+
offsets=Range(start=0, end=len(file_content) - 1),
32+
lines=Range(start=0, end=line_count - 1),
33+
file="file.ndjson",
34+
),
35+
raw_data=file_content,
36+
)
37+
path = directory / "output.ndjson"
38+
with FileConverter(file_path=path) as converter:
39+
for output in converter.convert(input_args):
40+
assert output.current_line == 0
41+
assert output.current_offset == 0
42+
assert output.file_path == path
43+
assert output.total_lines == line_count
44+
assert output.total_size == len(file_content)
45+
assert output.bytes_written == len(file_content)
46+
47+
def test_with_no_newline_at_end(self, tmp_path, generate_random_ndjson):
48+
directory = tmp_path / "file-converter"
49+
directory.mkdir()
50+
line_count = 10
51+
ndjson = generate_random_ndjson(line_count)
52+
file_content = "\n".join(ndjson)
53+
input_args = Converter.ConverterInputArgs(
54+
ctx=_TaskContext(
55+
client=MagicMock(),
56+
task_id="task-id",
57+
stream_type=StreamType.RESULT,
58+
metadata_header=_MetadataHeader(total_size=len(file_content),
59+
total_lines=line_count),
60+
),
61+
file_info=_MetadataFileInfo(
62+
offsets=Range(start=0, end=len(file_content) - 1),
63+
lines=Range(start=0, end=line_count - 1),
64+
file="file.ndjson",
65+
),
66+
raw_data=file_content,
67+
)
68+
path = directory / "output.ndjson"
69+
with FileConverter(file_path=path) as converter:
70+
for output in converter.convert(input_args):
71+
assert output.current_line == 0
72+
assert output.current_offset == 0
73+
assert output.file_path == path
74+
assert output.total_lines == line_count
75+
assert output.total_size == len(file_content)
76+
assert output.bytes_written == len(file_content)
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
from unittest.mock import MagicMock, patch
2+
from labelbox.schema.export_task import (
3+
FileRetrieverByLine,
4+
_TaskContext,
5+
_MetadataHeader,
6+
StreamType,
7+
)
8+
9+
10+
class TestFileRetrieverByLine:
11+
12+
def test_by_line_from_start(self, generate_random_ndjson, mock_response):
13+
line_count = 10
14+
ndjson = generate_random_ndjson(line_count)
15+
file_content = "\n".join(ndjson) + "\n"
16+
17+
mock_client = MagicMock()
18+
mock_client.execute = MagicMock(
19+
return_value={
20+
"task": {
21+
"exportFileFromLine": {
22+
"offsets": {
23+
"start": "0",
24+
"end": len(file_content) - 1
25+
},
26+
"lines": {
27+
"start": "0",
28+
"end": str(line_count - 1)
29+
},
30+
"file": "http://some-url.com/file.ndjson",
31+
}
32+
}
33+
})
34+
35+
mock_ctx = _TaskContext(
36+
client=mock_client,
37+
task_id="task-id",
38+
stream_type=StreamType.RESULT,
39+
metadata_header=_MetadataHeader(total_size=len(file_content),
40+
total_lines=line_count),
41+
)
42+
43+
with patch("requests.get", return_value=mock_response(file_content)):
44+
retriever = FileRetrieverByLine(mock_ctx, 0)
45+
info, content = retriever.get_next_chunk()
46+
assert info.offsets.start == 0
47+
assert info.offsets.end == len(file_content) - 1
48+
assert info.lines.start == 0
49+
assert info.lines.end == line_count - 1
50+
assert info.file == "http://some-url.com/file.ndjson"
51+
assert content == file_content
52+
53+
def test_by_line_from_middle(self, generate_random_ndjson, mock_response):
54+
line_count = 10
55+
ndjson = generate_random_ndjson(line_count)
56+
file_content = "\n".join(ndjson) + "\n"
57+
58+
mock_client = MagicMock()
59+
mock_client.execute = MagicMock(
60+
return_value={
61+
"task": {
62+
"exportFileFromLine": {
63+
"offsets": {
64+
"start": "0",
65+
"end": len(file_content) - 1
66+
},
67+
"lines": {
68+
"start": "0",
69+
"end": str(line_count - 1)
70+
},
71+
"file": "http://some-url.com/file.ndjson",
72+
}
73+
}
74+
})
75+
76+
mock_ctx = _TaskContext(
77+
client=mock_client,
78+
task_id="task-id",
79+
stream_type=StreamType.RESULT,
80+
metadata_header=_MetadataHeader(total_size=len(file_content),
81+
total_lines=line_count),
82+
)
83+
84+
line_start = 5
85+
current_offset = file_content.find(ndjson[line_start])
86+
87+
with patch("requests.get", return_value=mock_response(file_content)):
88+
retriever = FileRetrieverByLine(mock_ctx, line_start)
89+
info, content = retriever.get_next_chunk()
90+
assert info.offsets.start == current_offset
91+
assert info.offsets.end == len(file_content) - 1
92+
assert info.lines.start == line_start
93+
assert info.lines.end == line_count - 1
94+
assert info.file == "http://some-url.com/file.ndjson"
95+
assert content == file_content[current_offset:]
96+
97+
def test_by_line_from_last(self, generate_random_ndjson, mock_response):
98+
line_count = 10
99+
ndjson = generate_random_ndjson(line_count)
100+
file_content = "\n".join(ndjson) + "\n"
101+
102+
mock_client = MagicMock()
103+
mock_client.execute = MagicMock(
104+
return_value={
105+
"task": {
106+
"exportFileFromLine": {
107+
"offsets": {
108+
"start": "0",
109+
"end": len(file_content) - 1
110+
},
111+
"lines": {
112+
"start": "0",
113+
"end": str(line_count - 1)
114+
},
115+
"file": "http://some-url.com/file.ndjson",
116+
}
117+
}
118+
})
119+
120+
mock_ctx = _TaskContext(
121+
client=mock_client,
122+
task_id="task-id",
123+
stream_type=StreamType.RESULT,
124+
metadata_header=_MetadataHeader(total_size=len(file_content),
125+
total_lines=line_count),
126+
)
127+
128+
line_start = 9
129+
current_offset = file_content.find(ndjson[line_start])
130+
131+
with patch("requests.get", return_value=mock_response(file_content)):
132+
retriever = FileRetrieverByLine(mock_ctx, line_start)
133+
info, content = retriever.get_next_chunk()
134+
assert info.offsets.start == current_offset
135+
assert info.offsets.end == len(file_content) - 1
136+
assert info.lines.start == line_start
137+
assert info.lines.end == line_count - 1
138+
assert info.file == "http://some-url.com/file.ndjson"
139+
assert content == file_content[current_offset:]
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
from unittest.mock import MagicMock, patch
2+
from labelbox.schema.export_task import (
3+
FileRetrieverByOffset,
4+
_TaskContext,
5+
_MetadataHeader,
6+
StreamType,
7+
)
8+
9+
10+
class TestFileRetrieverByOffset:
11+
12+
def test_by_offset_from_start(self, generate_random_ndjson, mock_response):
13+
line_count = 10
14+
ndjson = generate_random_ndjson(line_count)
15+
file_content = "\n".join(ndjson) + "\n"
16+
17+
mock_client = MagicMock()
18+
mock_client.execute = MagicMock(
19+
return_value={
20+
"task": {
21+
"exportFileFromOffset": {
22+
"offsets": {
23+
"start": "0",
24+
"end": len(file_content) - 1
25+
},
26+
"lines": {
27+
"start": "0",
28+
"end": str(line_count - 1)
29+
},
30+
"file": "http://some-url.com/file.ndjson",
31+
}
32+
}
33+
})
34+
35+
mock_ctx = _TaskContext(
36+
client=mock_client,
37+
task_id="task-id",
38+
stream_type=StreamType.RESULT,
39+
metadata_header=_MetadataHeader(total_size=len(file_content),
40+
total_lines=line_count),
41+
)
42+
43+
with patch("requests.get", return_value=mock_response(file_content)):
44+
retriever = FileRetrieverByOffset(mock_ctx, 0)
45+
info, content = retriever.get_next_chunk()
46+
assert info.offsets.start == 0
47+
assert info.offsets.end == len(file_content) - 1
48+
assert info.lines.start == 0
49+
assert info.lines.end == line_count - 1
50+
assert info.file == "http://some-url.com/file.ndjson"
51+
assert content == file_content
52+
53+
def test_by_offset_from_middle(self, generate_random_ndjson, mock_response):
54+
line_count = 10
55+
ndjson = generate_random_ndjson(line_count)
56+
file_content = "\n".join(ndjson) + "\n"
57+
58+
mock_client = MagicMock()
59+
mock_client.execute = MagicMock(
60+
return_value={
61+
"task": {
62+
"exportFileFromOffset": {
63+
"offsets": {
64+
"start": "0",
65+
"end": len(file_content) - 1
66+
},
67+
"lines": {
68+
"start": "0",
69+
"end": str(line_count - 1)
70+
},
71+
"file": "http://some-url.com/file.ndjson",
72+
}
73+
}
74+
})
75+
76+
mock_ctx = _TaskContext(
77+
client=mock_client,
78+
task_id="task-id",
79+
stream_type=StreamType.RESULT,
80+
metadata_header=_MetadataHeader(total_size=len(file_content),
81+
total_lines=line_count),
82+
)
83+
84+
line_start = 5
85+
skipped_bytes = 15
86+
current_offset = file_content.find(ndjson[line_start]) + skipped_bytes
87+
88+
with patch("requests.get", return_value=mock_response(file_content)):
89+
retriever = FileRetrieverByOffset(mock_ctx, current_offset)
90+
info, content = retriever.get_next_chunk()
91+
assert info.offsets.start == current_offset
92+
assert info.offsets.end == len(file_content) - 1
93+
assert info.lines.start == 5
94+
assert info.lines.end == line_count - 1
95+
assert info.file == "http://some-url.com/file.ndjson"
96+
assert content == file_content[current_offset:]

0 commit comments

Comments
 (0)