@@ -385,14 +385,9 @@ def _read(cls, fileobj, header, buffer_size=4):
385385 Streamline points
386386 """
387387 dtype = header ["_dtype" ]
388- coordinate_size = 3 * dtype .itemsize
389- # Make buffer_size an integer and a multiple of coordinate_size.
390- buffer_size = int (buffer_size * MEGABYTE )
391- buffer_size += coordinate_size - (buffer_size % coordinate_size )
392388
393- # Markers for streamline end and file end
394- fiber_marker = cls .FIBER_DELIMITER .astype (dtype ).tostring ()
395- eof_marker = cls .EOF_DELIMITER .astype (dtype ).tostring ()
389+ #align batch_size to be multiple of 3 within the specified buffer size
390+ batch_size = int (buffer_size * MEGABYTE / dtype .itemsize / 3 ) * 3
396391
397392 with Opener (fileobj ) as f :
398393 start_position = f .tell ()
@@ -401,46 +396,36 @@ def _read(cls, fileobj, header, buffer_size=4):
401396 f .seek (header ["_offset_data" ], os .SEEK_SET )
402397
403398 eof = False
404- buffs = []
405399 n_streams = 0
406-
400+ leftover = np . empty ([ 0 , 3 ])
407401 while not eof :
408- buff = bytearray (buffer_size )
409- n_read = f .readinto (buff )
410- eof = n_read != buffer_size
411- if eof :
412- buff = buff [:n_read ]
413-
414- buffs .append (buff )
415-
416- # Make sure we've read enough to find a streamline delimiter.
417- if fiber_marker not in buff :
418- # If we've read the whole file, then fail.
419- if eof :
420- # Could have minimal buffering, and have read only the
421- # EOF delimiter
422- buffs = [bytearray ().join (buffs )]
423- if not buffs [0 ] == eof_marker :
424- raise DataError (
425- "Cannot find a streamline delimiter. This file"
426- " might be corrupted." )
427- else :
428- # Otherwise read a bit more.
429- continue
430-
431- all_parts = bytearray ().join (buffs ).split (fiber_marker )
432- point_parts , buffs = all_parts [:- 1 ], all_parts [- 1 :]
433- point_parts = [p for p in point_parts if p != b'' ]
434-
435- for point_part in point_parts :
436- # Read floats.
437- pts = np .frombuffer (point_part , dtype = dtype )
438- # Convert data to little-endian if needed.
439- yield pts .astype ('<f4' , copy = False ).reshape ([- 1 , 3 ])
440-
441- n_streams += len (point_parts )
442-
443- if not buffs [- 1 ] == eof_marker :
402+
403+ # read raw files from file
404+ raw_values = np .fromfile (f .fobj , dtype , batch_size )
405+ if len (raw_values ) < batch_size :
406+ eof = True
407+
408+ # Convert raw_values into a list of little-endian tuples (for x,y,z coord)
409+ coords = raw_values .astype ('<f4' , copy = False ).reshape ([- 1 , 3 ])
410+
411+ # find stream delimiter locations (all NaNs)
412+ delims = np .where (np .all (np .isnan (coords ), axis = 1 ))[0 ]
413+
414+ # for each delimiters, yeild new streams
415+ begin = 0
416+ for i in range (0 , len (delims )):
417+ end = delims [i ]
418+ stream = np .append (leftover , coords [begin :end ], axis = 0 )
419+ leftover = np .empty ([0 ,3 ])
420+ yield stream
421+ n_streams += 1
422+
423+ begin = end + 1 #skip the delimiter
424+
425+ # the rest gets appended to the leftover
426+ leftover = np .append (leftover , coords [begin :], axis = 0 )
427+
428+ if not np .all (np .isinf (leftover ), axis = 1 ):
444429 raise DataError ("Expecting end-of-file marker 'inf inf inf'" )
445430
446431 # In case the 'count' field was not provided.
0 commit comments