@@ -1066,6 +1066,7 @@ def _rd_segment(
10661066 ignore_skew ,
10671067 no_file = False ,
10681068 sig_data = None ,
1069+ sig_stream = None ,
10691070 return_res = 64 ,
10701071):
10711072 """
@@ -1211,6 +1212,7 @@ def _rd_segment(
12111212 sampto = sampto ,
12121213 no_file = no_file ,
12131214 sig_data = sig_data ,
1215+ sig_stream = sig_stream ,
12141216 )
12151217
12161218 # Copy over the wanted signals
@@ -1235,6 +1237,7 @@ def _rd_dat_signals(
12351237 sampto ,
12361238 no_file = False ,
12371239 sig_data = None ,
1240+ sig_stream = None ,
12381241):
12391242 """
12401243 Read all signals from a WFDB dat file.
@@ -1324,20 +1327,31 @@ def _rd_dat_signals(
13241327 if no_file :
13251328 data_to_read = sig_data
13261329 elif fmt in COMPRESSED_FMTS :
1327- data_to_read = _rd_compressed_file (
1328- file_name = file_name ,
1329- dir_name = dir_name ,
1330- pn_dir = pn_dir ,
1331- fmt = fmt ,
1332- sample_offset = byte_offset ,
1333- n_sig = n_sig ,
1334- samps_per_frame = samps_per_frame ,
1335- start_frame = sampfrom ,
1336- end_frame = sampto ,
1337- )
1330+ if sig_stream is not None :
1331+ data_to_read = _rd_compressed_stream (
1332+ fp = sig_stream ,
1333+ fmt = fmt ,
1334+ sample_offset = byte_offset ,
1335+ n_sig = n_sig ,
1336+ samps_per_frame = samps_per_frame ,
1337+ start_frame = sampfrom ,
1338+ end_frame = sampto ,
1339+ )
1340+ else :
1341+ data_to_read = _rd_compressed_file (
1342+ file_name = file_name ,
1343+ dir_name = dir_name ,
1344+ pn_dir = pn_dir ,
1345+ fmt = fmt ,
1346+ sample_offset = byte_offset ,
1347+ n_sig = n_sig ,
1348+ samps_per_frame = samps_per_frame ,
1349+ start_frame = sampfrom ,
1350+ end_frame = sampto ,
1351+ )
13381352 else :
13391353 data_to_read = _rd_dat_file (
1340- file_name , dir_name , pn_dir , fmt , start_byte , n_read_samples
1354+ file_name , dir_name , pn_dir , fmt , start_byte , n_read_samples , sig_stream
13411355 )
13421356
13431357 if extra_flat_samples :
@@ -1577,7 +1591,7 @@ def _required_byte_num(mode, fmt, n_samp):
15771591 return int (n_bytes )
15781592
15791593
1580- def _rd_dat_file (file_name , dir_name , pn_dir , fmt , start_byte , n_samp ):
1594+ def _rd_dat_file (file_name , dir_name , pn_dir , fmt , start_byte , n_samp , sig_stream ):
15811595 """
15821596 Read data from a dat file, either local or remote, into a 1d numpy
15831597 array.
@@ -1635,8 +1649,14 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16351649 element_count = n_samp
16361650 byte_count = n_samp * BYTES_PER_SAMPLE [fmt ]
16371651
1652+ # Memory Stream
1653+ if sig_stream is not None :
1654+ sig_stream .seek (start_byte )
1655+ sig_data = np .frombuffer (
1656+ sig_stream .read (), dtype = np .dtype (DATA_LOAD_TYPES [fmt ]), count = element_count
1657+ )
16381658 # Local dat file
1639- if pn_dir is None :
1659+ elif pn_dir is None :
16401660 with open (os .path .join (dir_name , file_name ), "rb" ) as fp :
16411661 fp .seek (start_byte )
16421662 sig_data = np .fromfile (
@@ -1651,7 +1671,6 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16511671
16521672 return sig_data
16531673
1654-
16551674def _blocks_to_samples (sig_data , n_samp , fmt ):
16561675 """
16571676 Convert uint8 blocks into signal samples for unaligned dat formats.
@@ -1770,6 +1789,123 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
17701789 return sig
17711790
17721791
1792+ def _rd_compressed_stream (
1793+ fp ,
1794+ fmt ,
1795+ sample_offset ,
1796+ n_sig ,
1797+ samps_per_frame ,
1798+ start_frame ,
1799+ end_frame ,
1800+ ):
1801+ signature = fp .read (4 )
1802+ if signature != b"fLaC" :
1803+ raise ValueError (f"{ fp .name } is not a FLAC file" )
1804+ fp .seek (0 )
1805+
1806+ with soundfile .SoundFile (fp ) as sf :
1807+ # Determine the actual resolution of the FLAC stream and the
1808+ # data type will use when reading it. Note that soundfile
1809+ # doesn't support int8.
1810+ if sf .subtype == "PCM_S8" :
1811+ format_bits = 8
1812+ read_dtype = "int16"
1813+ elif sf .subtype == "PCM_16" :
1814+ format_bits = 16
1815+ read_dtype = "int16"
1816+ elif sf .subtype == "PCM_24" :
1817+ format_bits = 24
1818+ read_dtype = "int32"
1819+ else :
1820+ raise ValueError (f"unknown subtype in { fp .name } ({ sf .subtype } )" )
1821+
1822+ max_bits = int (fmt ) - 500
1823+ if format_bits > max_bits :
1824+ raise ValueError (
1825+ f"wrong resolution in { fp .name } "
1826+ f"({ format_bits } , expected <= { max_bits } )"
1827+ )
1828+
1829+ if sf .channels != n_sig :
1830+ raise ValueError (
1831+ f"wrong number of channels in { fp .name } "
1832+ f"({ sf .channels } , expected { n_sig } )"
1833+ )
1834+
1835+ # Read the samples.
1836+ start_samp = start_frame * samps_per_frame [0 ]
1837+ end_samp = end_frame * samps_per_frame [0 ]
1838+ sf .seek (start_samp + sample_offset )
1839+
1840+ # We could do this:
1841+ # sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1842+ # However, sf.read fails for huge blocks (over 2**24 total
1843+ # samples) due to a bug in libsndfile:
1844+ # https://github.com/libsndfile/libsndfile/issues/431
1845+ # So read the data in chunks instead.
1846+ n_samp = end_samp - start_samp
1847+ sig_data = np .empty ((n_samp , n_sig ), dtype = read_dtype )
1848+ CHUNK_SIZE = 1024 * 1024
1849+ for chunk_start in range (0 , n_samp , CHUNK_SIZE ):
1850+ chunk_end = chunk_start + CHUNK_SIZE
1851+ chunk_data = sf .read (out = sig_data [chunk_start :chunk_end ])
1852+ samples_read = chunk_data .shape [0 ]
1853+ if samples_read != CHUNK_SIZE :
1854+ sig_data = sig_data [: chunk_start + samples_read ]
1855+ break
1856+
1857+ # If we read an 8-bit stream as int16 or a 24-bit stream as
1858+ # int32, soundfile shifts each sample left by 8 bits. We
1859+ # want to undo this shift (and, in the case of 8-bit data,
1860+ # convert to an int8 array.)
1861+ if format_bits == 8 :
1862+ # np.right_shift(sig_data, 8, dtype='int8') doesn't work.
1863+ # This seems wrong, but the numpy documentation is unclear.
1864+ sig_data2 = np .empty (sig_data .shape , dtype = "int8" )
1865+ sig_data = np .right_shift (sig_data , 8 , out = sig_data2 )
1866+ elif format_bits == 24 :
1867+ # Shift 32-bit array in-place.
1868+ np .right_shift (sig_data , 8 , out = sig_data )
1869+
1870+ # Suppose we have 3 channels and 2 samples per frame. The array
1871+ # returned by sf.read looks like this:
1872+ #
1873+ # channel 0 channel 1 channel 2
1874+ # time 0 [0,0] [0,1] [0,2]
1875+ # time 1 [1,0] [1,1] [1,2]
1876+ # time 2 [2,0] [2,1] [2,2]
1877+ # time 3 [3,0] [3,1] [3,2]
1878+ #
1879+ # We reshape this first into the following:
1880+ #
1881+ # channel 0 channel 1 channel 2
1882+ # time 0 [0,0,0] [0,0,1] [0,0,2]
1883+ # time 1 [0,1,0] [0,1,1] [0,1,2]
1884+ # time 2 [1,0,0] [1,0,1] [1,0,2]
1885+ # time 3 [1,1,0] [1,1,1] [1,1,2]
1886+ #
1887+ # Then we transpose axes 1 and 2:
1888+ #
1889+ # channel 0 channel 1 channel 2
1890+ # time 0 [0,0,0] [0,1,0] [0,2,0]
1891+ # time 1 [0,0,1] [0,1,1] [0,2,1]
1892+ # time 2 [1,0,0] [1,1,0] [1,2,0]
1893+ # time 3 [1,0,1] [1,1,1] [1,2,1]
1894+ #
1895+ # Then when we reshape the array to 1D, the result is in dat file
1896+ # order:
1897+ #
1898+ # channel 0 channel 1 channel 2
1899+ # time 0 [0] [2] [4]
1900+ # time 1 [1] [3] [5]
1901+ # time 2 [6] [8] [10]
1902+ # time 3 [7] [9] [11]
1903+
1904+ sig_data = sig_data .reshape (- 1 , samps_per_frame [0 ], n_sig )
1905+ sig_data = sig_data .transpose (0 , 2 , 1 )
1906+ return sig_data .reshape (- 1 )
1907+
1908+
17731909def _rd_compressed_file (
17741910 file_name ,
17751911 dir_name ,
@@ -1834,112 +1970,7 @@ def _rd_compressed_file(
18341970 file_name = os .path .join (dir_name , file_name )
18351971
18361972 with _coreio ._open_file (pn_dir , file_name , "rb" ) as fp :
1837- signature = fp .read (4 )
1838- if signature != b"fLaC" :
1839- raise ValueError (f"{ fp .name } is not a FLAC file" )
1840- fp .seek (0 )
1841-
1842- with soundfile .SoundFile (fp ) as sf :
1843- # Determine the actual resolution of the FLAC stream and the
1844- # data type will use when reading it. Note that soundfile
1845- # doesn't support int8.
1846- if sf .subtype == "PCM_S8" :
1847- format_bits = 8
1848- read_dtype = "int16"
1849- elif sf .subtype == "PCM_16" :
1850- format_bits = 16
1851- read_dtype = "int16"
1852- elif sf .subtype == "PCM_24" :
1853- format_bits = 24
1854- read_dtype = "int32"
1855- else :
1856- raise ValueError (f"unknown subtype in { fp .name } ({ sf .subtype } )" )
1857-
1858- max_bits = int (fmt ) - 500
1859- if format_bits > max_bits :
1860- raise ValueError (
1861- f"wrong resolution in { fp .name } "
1862- f"({ format_bits } , expected <= { max_bits } )"
1863- )
1864-
1865- if sf .channels != n_sig :
1866- raise ValueError (
1867- f"wrong number of channels in { fp .name } "
1868- f"({ sf .channels } , expected { n_sig } )"
1869- )
1870-
1871- # Read the samples.
1872- start_samp = start_frame * samps_per_frame [0 ]
1873- end_samp = end_frame * samps_per_frame [0 ]
1874- sf .seek (start_samp + sample_offset )
1875-
1876- # We could do this:
1877- # sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1878- # However, sf.read fails for huge blocks (over 2**24 total
1879- # samples) due to a bug in libsndfile:
1880- # https://github.com/libsndfile/libsndfile/issues/431
1881- # So read the data in chunks instead.
1882- n_samp = end_samp - start_samp
1883- sig_data = np .empty ((n_samp , n_sig ), dtype = read_dtype )
1884- CHUNK_SIZE = 1024 * 1024
1885- for chunk_start in range (0 , n_samp , CHUNK_SIZE ):
1886- chunk_end = chunk_start + CHUNK_SIZE
1887- chunk_data = sf .read (out = sig_data [chunk_start :chunk_end ])
1888- samples_read = chunk_data .shape [0 ]
1889- if samples_read != CHUNK_SIZE :
1890- sig_data = sig_data [: chunk_start + samples_read ]
1891- break
1892-
1893- # If we read an 8-bit stream as int16 or a 24-bit stream as
1894- # int32, soundfile shifts each sample left by 8 bits. We
1895- # want to undo this shift (and, in the case of 8-bit data,
1896- # convert to an int8 array.)
1897- if format_bits == 8 :
1898- # np.right_shift(sig_data, 8, dtype='int8') doesn't work.
1899- # This seems wrong, but the numpy documentation is unclear.
1900- sig_data2 = np .empty (sig_data .shape , dtype = "int8" )
1901- sig_data = np .right_shift (sig_data , 8 , out = sig_data2 )
1902- elif format_bits == 24 :
1903- # Shift 32-bit array in-place.
1904- np .right_shift (sig_data , 8 , out = sig_data )
1905-
1906- # Suppose we have 3 channels and 2 samples per frame. The array
1907- # returned by sf.read looks like this:
1908- #
1909- # channel 0 channel 1 channel 2
1910- # time 0 [0,0] [0,1] [0,2]
1911- # time 1 [1,0] [1,1] [1,2]
1912- # time 2 [2,0] [2,1] [2,2]
1913- # time 3 [3,0] [3,1] [3,2]
1914- #
1915- # We reshape this first into the following:
1916- #
1917- # channel 0 channel 1 channel 2
1918- # time 0 [0,0,0] [0,0,1] [0,0,2]
1919- # time 1 [0,1,0] [0,1,1] [0,1,2]
1920- # time 2 [1,0,0] [1,0,1] [1,0,2]
1921- # time 3 [1,1,0] [1,1,1] [1,1,2]
1922- #
1923- # Then we transpose axes 1 and 2:
1924- #
1925- # channel 0 channel 1 channel 2
1926- # time 0 [0,0,0] [0,1,0] [0,2,0]
1927- # time 1 [0,0,1] [0,1,1] [0,2,1]
1928- # time 2 [1,0,0] [1,1,0] [1,2,0]
1929- # time 3 [1,0,1] [1,1,1] [1,2,1]
1930- #
1931- # Then when we reshape the array to 1D, the result is in dat file
1932- # order:
1933- #
1934- # channel 0 channel 1 channel 2
1935- # time 0 [0] [2] [4]
1936- # time 1 [1] [3] [5]
1937- # time 2 [6] [8] [10]
1938- # time 3 [7] [9] [11]
1939-
1940- sig_data = sig_data .reshape (- 1 , samps_per_frame [0 ], n_sig )
1941- sig_data = sig_data .transpose (0 , 2 , 1 )
1942- return sig_data .reshape (- 1 )
1973+ return _rd_compressed_stream (fp , fmt , sample_offset , n_sig , samps_per_frame , start_frame , end_frame )
19431974
19441975
19451976def _skew_sig (
0 commit comments