11from collections .abc import Iterator
2- from io import StringIO
2+ from io import (
3+ BytesIO ,
4+ StringIO ,
5+ )
36from pathlib import Path
47
58import numpy as np
@@ -121,10 +124,15 @@ def test_to_jsonl_count_new_lines():
121124
122125
123126@pytest .mark .parametrize ("chunksize" , [1 , 1.0 ])
124- def test_readjson_chunks (request , lines_json_df , chunksize , engine ):
127+ @pytest .mark .parametrize ("buffer" , [BytesIO , StringIO ])
128+ def test_readjson_chunks (request , lines_json_df , chunksize , buffer , engine ):
125129 # Basic test that read_json(chunks=True) gives the same result as
126130 # read_json(chunks=False)
127131 # GH17048: memory usage when lines=True
132+ # GH#28906: read binary json lines in chunks
133+
134+ if buffer == BytesIO :
135+ lines_json_df = lines_json_df .encode ()
128136
129137 if engine == "pyarrow" :
130138 # GH 48893
@@ -134,10 +142,11 @@ def test_readjson_chunks(request, lines_json_df, chunksize, engine):
134142 )
135143 request .applymarker (pytest .mark .xfail (reason = reason , raises = ValueError ))
136144
137- unchunked = read_json (StringIO (lines_json_df ), lines = True )
138- with read_json (
139- StringIO (lines_json_df ), lines = True , chunksize = chunksize , engine = engine
140- ) as reader :
145+ unchunked = read_json (buffer (lines_json_df ), lines = True )
146+ with (
147+ buffer (lines_json_df ) as buf ,
148+ read_json (buf , lines = True , chunksize = chunksize , engine = engine ) as reader ,
149+ ):
141150 chunked = pd .concat (reader )
142151
143152 tm .assert_frame_equal (chunked , unchunked )
0 commit comments