@@ -463,7 +463,10 @@ def __init__(
463463 self .__files = root_collection .files
464464 self .__file_id = file_id
465465 self .__buffer = EMPTY
466+ # Start position within the current buffered chunk.
467+ self .__buffer_pos = 0
466468 self .__chunk_iter = None
469+ # Position within the total file.
467470 self .__position = 0
468471 self ._file = file_document
469472 self ._session = session
@@ -510,12 +513,12 @@ def readchunk(self) -> bytes:
510513 """Reads a chunk at a time. If the current position is within a
511514 chunk the remainder of the chunk is returned.
512515 """
513- received = len (self .__buffer )
516+ received = len (self .__buffer ) - self . __buffer_pos
514517 chunk_data = EMPTY
515518 chunk_size = int (self .chunk_size )
516519
517520 if received > 0 :
518- chunk_data = self .__buffer
521+ chunk_data = self .__buffer [ self . __buffer_pos :]
519522 elif self .__position < int (self .length ):
520523 chunk_number = int ((received + self .__position ) / chunk_size )
521524 if self .__chunk_iter is None :
@@ -531,25 +534,12 @@ def readchunk(self) -> bytes:
531534
532535 self .__position += len (chunk_data )
533536 self .__buffer = EMPTY
537+ self .__buffer_pos = 0
534538 return chunk_data
535539
536- def read (self , size : int = - 1 ) -> bytes :
537- """Read at most `size` bytes from the file (less if there
538- isn't enough data).
539-
540- The bytes are returned as an instance of :class:`str` (:class:`bytes`
541- in python 3). If `size` is negative or omitted all data is read.
542-
543- :Parameters:
544- - `size` (optional): the number of bytes to read
545-
546- .. versionchanged:: 3.8
547- This method now only checks for extra chunks after reading the
548- entire file. Previously, this method would check for extra chunks
549- on every call.
550- """
540+ def _read_size_or_line (self , size : int = - 1 , line : bool = False ) -> bytes :
541+ """Internal read() and readline() helper."""
551542 self ._ensure_file ()
552-
553543 remainder = int (self .length ) - self .__position
554544 if size < 0 or size > remainder :
555545 size = remainder
@@ -558,11 +548,36 @@ def read(self, size: int = -1) -> bytes:
558548 return EMPTY
559549
560550 received = 0
561- data = io . BytesIO ()
551+ data = []
562552 while received < size :
563- chunk_data = self .readchunk ()
553+ needed = size - received
554+ if self .__buffer :
555+ # Optimization: Read the buffer with zero byte copies.
556+ buf = self .__buffer
557+ chunk_start = self .__buffer_pos
558+ chunk_data = memoryview (buf )[self .__buffer_pos :]
559+ self .__buffer = EMPTY
560+ self .__buffer_pos = 0
561+ self .__position += len (chunk_data )
562+ else :
563+ buf = self .readchunk ()
564+ chunk_start = 0
565+ chunk_data = memoryview (buf )
566+ if line :
567+ pos = buf .find (NEWLN , chunk_start , chunk_start + needed ) - chunk_start
568+ if pos >= 0 :
569+ # Decrease size to exit the loop.
570+ size = received + pos + 1
571+ needed = pos + 1
572+ if len (chunk_data ) > needed :
573+ data .append (chunk_data [:needed ])
574+ # Optimization: Save the buffer with zero byte copies.
575+ self .__buffer = buf
576+ self .__buffer_pos = chunk_start + needed
577+ self .__position -= len (self .__buffer ) - self .__buffer_pos
578+ else :
579+ data .append (chunk_data )
564580 received += len (chunk_data )
565- data .write (chunk_data )
566581
567582 # Detect extra chunks after reading the entire file.
568583 if size == remainder and self .__chunk_iter :
@@ -571,47 +586,32 @@ def read(self, size: int = -1) -> bytes:
571586 except StopIteration :
572587 pass
573588
574- self .__position -= received - size
589+ return b"" .join (data )
590+
591+ def read (self , size : int = - 1 ) -> bytes :
592+ """Read at most `size` bytes from the file (less if there
593+ isn't enough data).
594+
595+ The bytes are returned as an instance of :class:`str` (:class:`bytes`
596+ in python 3). If `size` is negative or omitted all data is read.
597+
598+ :Parameters:
599+ - `size` (optional): the number of bytes to read
575600
576- # Return 'size' bytes and store the rest.
577- data .seek (size )
578- self .__buffer = data .read ()
579- data .seek (0 )
580- return data .read (size )
601+ .. versionchanged:: 3.8
602+ This method now only checks for extra chunks after reading the
603+ entire file. Previously, this method would check for extra chunks
604+ on every call.
605+ """
606+ return self ._read_size_or_line (size = size )
581607
582608 def readline (self , size : int = - 1 ) -> bytes : # type: ignore[override]
583609 """Read one line or up to `size` bytes from the file.
584610
585611 :Parameters:
586612 - `size` (optional): the maximum number of bytes to read
587613 """
588- remainder = int (self .length ) - self .__position
589- if size < 0 or size > remainder :
590- size = remainder
591-
592- if size == 0 :
593- return EMPTY
594-
595- received = 0
596- data = io .BytesIO ()
597- while received < size :
598- chunk_data = self .readchunk ()
599- pos = chunk_data .find (NEWLN , 0 , size )
600- if pos != - 1 :
601- size = received + pos + 1
602-
603- received += len (chunk_data )
604- data .write (chunk_data )
605- if pos != - 1 :
606- break
607-
608- self .__position -= received - size
609-
610- # Return 'size' bytes and store the rest.
611- data .seek (size )
612- self .__buffer = data .read ()
613- data .seek (0 )
614- return data .read (size )
614+ return self ._read_size_or_line (size = size , line = True )
615615
616616 def tell (self ) -> int :
617617 """Return the current position of this file."""
@@ -651,6 +651,7 @@ def seek(self, pos: int, whence: int = _SEEK_SET) -> int:
651651
652652 self .__position = new_pos
653653 self .__buffer = EMPTY
654+ self .__buffer_pos = 0
654655 if self .__chunk_iter :
655656 self .__chunk_iter .close ()
656657 self .__chunk_iter = None
0 commit comments