Skip to content

Commit 3508aae

Browse files
authored
TST: add test to read binary jsonlines in chunks (#63125)
1 parent 27a3271 commit 3508aae

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

pandas/tests/io/json/test_readlines.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from collections.abc import Iterator
2-
from io import StringIO
2+
from io import (
3+
BytesIO,
4+
StringIO,
5+
)
36
from pathlib import Path
47

58
import numpy as np
@@ -121,10 +124,15 @@ def test_to_jsonl_count_new_lines():
121124

122125

123126
@pytest.mark.parametrize("chunksize", [1, 1.0])
124-
def test_readjson_chunks(request, lines_json_df, chunksize, engine):
127+
@pytest.mark.parametrize("buffer", [BytesIO, StringIO])
128+
def test_readjson_chunks(request, lines_json_df, chunksize, buffer, engine):
125129
# Basic test that read_json(chunks=True) gives the same result as
126130
# read_json(chunks=False)
127131
# GH17048: memory usage when lines=True
132+
# GH#28906: read binary json lines in chunks
133+
134+
if buffer == BytesIO:
135+
lines_json_df = lines_json_df.encode()
128136

129137
if engine == "pyarrow":
130138
# GH 48893
@@ -134,10 +142,11 @@ def test_readjson_chunks(request, lines_json_df, chunksize, engine):
134142
)
135143
request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
136144

137-
unchunked = read_json(StringIO(lines_json_df), lines=True)
138-
with read_json(
139-
StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
140-
) as reader:
145+
unchunked = read_json(buffer(lines_json_df), lines=True)
146+
with (
147+
buffer(lines_json_df) as buf,
148+
read_json(buf, lines=True, chunksize=chunksize, engine=engine) as reader,
149+
):
141150
chunked = pd.concat(reader)
142151

143152
tm.assert_frame_equal(chunked, unchunked)

0 commit comments

Comments
 (0)