Skip to content

Commit b13e9f8

Browse files
committed
add fsspec to rdrecord
1 parent 2edca28 commit b13e9f8

File tree

3 files changed

+31
-15
lines changed

3 files changed

+31
-15
lines changed

wfdb/io/_coreio.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import posixpath
22

3+
import fsspec
4+
35
from wfdb.io import _url
46
from wfdb.io.download import config
57

@@ -28,8 +30,9 @@ def _open_file(
2830
The PhysioNet database directory where the file is stored, or None
2931
if file_name is a local path.
3032
file_name : str
31-
The name of the file, either as a local filesystem path (if
32-
`pn_dir` is None) or a URL path (if `pn_dir` is a string.)
33+
The name of the file, either as a local filesystem path or cloud
34+
URL (if `pn_dir` is None) or a PhysioNet URL path
35+
(if `pn_dir` is a string.)
3336
mode : str, optional
3437
The standard I/O mode for the file ("r" by default). If `pn_dir`
3538
is not None, this must be "r", "rt", or "rb".
@@ -47,7 +50,7 @@ def _open_file(
4750
4851
"""
4952
if pn_dir is None:
50-
return open(
53+
return fsspec.open(
5154
file_name,
5255
mode,
5356
buffering=buffering,

wfdb/io/_signal.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import math
22
import os
3+
import posixpath
34
import sys
45

6+
import fsspec
57
import numpy as np
68

79
from wfdb.io import download, _coreio, util
@@ -1643,10 +1645,10 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16431645
The name of the dat file.
16441646
dir_name : str
16451647
The full directory where the dat file(s) are located, if the dat
1646-
file(s) are local.
1648+
file(s) are local or in the cloud.
16471649
pn_dir : str
16481650
The PhysioNet directory where the dat file(s) are located, if
1649-
the dat file(s) are remote.
1651+
the dat file(s) are on a PhysioNet server.
16501652
fmt : str
16511653
The format of the dat file.
16521654
start_byte : int
@@ -1688,7 +1690,7 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16881690

16891691
# Local dat file
16901692
if pn_dir is None:
1691-
with open(os.path.join(dir_name, file_name), "rb") as fp:
1693+
with fsspec.open(os.path.join(dir_name, file_name), "rb") as fp:
16921694
fp.seek(start_byte)
16931695
sig_data = np.fromfile(
16941696
fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
@@ -1840,8 +1842,9 @@ def _rd_compressed_file(
18401842
file_name : str
18411843
The name of the signal file.
18421844
dir_name : str
1843-
The full directory where the signal file is located, if local.
1844-
This argument is ignored if `pn_dir` is not None.
1845+
The full directory where the signal file is located, if this
1846+
is a local or cloud path. This argument is ignored if `pn_dir`
1847+
is not None.
18451848
pn_dir : str or None
18461849
The PhysioNet database directory where the signal file is located.
18471850
fmt : str
@@ -2585,10 +2588,10 @@ def _infer_sig_len(
25852588
The byte offset of the dat file. None is equivalent to zero.
25862589
dir_name : str
25872590
The full directory where the dat file(s) are located, if the dat
2588-
file(s) are local.
2591+
file(s) are local or on the cloud.
25892592
pn_dir : str, optional
25902593
The PhysioNet directory where the dat file(s) are located, if
2591-
the dat file(s) are remote.
2594+
the dat file(s) are on a PhysioNet server.
25922595
25932596
Returns
25942597
-------
@@ -2600,13 +2603,23 @@ def _infer_sig_len(
26002603
sig_len * tsamps_per_frame * bytes_per_sample == file_size
26012604
26022605
"""
2603-
if pn_dir is None:
2604-
file_size = os.path.getsize(os.path.join(dir_name, file_name))
2605-
else:
2606+
from wfdb.io.record import CLOUD_PROTOCOLS
2607+
2608+
# If this is a cloud path, use posixpath to construct the path and fsspec to open file
2609+
if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS):
2610+
with fsspec.open(posixpath.join(dir_name, file_name), mode="rb") as f:
2611+
file_size = f.seek(0, os.SEEK_END)
2612+
2613+
# If the PhysioNet database path is provided, construct the download path using the database version
2614+
elif pn_dir is not None:
26062615
file_size = download._remote_file_size(
26072616
file_name=file_name, pn_dir=pn_dir
26082617
)
26092618

2619+
# If it isn't a cloud path or a PhysioNet path, we treat as a local file
2620+
else:
2621+
file_size = os.path.getsize(os.path.join(dir_name, file_name))
2622+
26102623
if byte_offset is None:
26112624
byte_offset = 0
26122625
data_size = file_size - byte_offset

wfdb/io/record.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,12 +1830,12 @@ def rdheader(record_name, pn_dir=None, rd_segments=False):
18301830
dir_name, base_record_name = os.path.split(record_name)
18311831
file_name = f"{base_record_name}.hea"
18321832

1833-
# If this is a cloud path, use posixpath to construct the path
1833+
# If this is a cloud path, use posixpath to construct the path and fsspec to open file
18341834
if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS):
18351835
with fsspec.open(posixpath.join(dir_name, file_name), mode="r") as f:
18361836
header_content = f.read()
18371837

1838-
# If it isn't a cloud path, construct the download path using the database version
1838+
# If the PhysioNet database path is provided, construct the download path using the database version
18391839
elif pn_dir is not None:
18401840
if "." not in pn_dir:
18411841
dir_list = pn_dir.split("/")

0 commit comments

Comments
 (0)