Skip to content

Commit 35a8e4f

Browse files
committed
Add support for reading .wfdb archive files.
1 parent 1eb04bd commit 35a8e4f

File tree

3 files changed

+134
-22
lines changed

3 files changed

+134
-22
lines changed

wfdb/io/_signal.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,7 @@ def _rd_segment(
11201120
no_file=False,
11211121
sig_data=None,
11221122
return_res=64,
1123+
wfdb_archive=None,
11231124
):
11241125
"""
11251126
Read the digital samples from a single segment record's associated
@@ -1264,6 +1265,7 @@ def _rd_segment(
12641265
sampto=sampto,
12651266
no_file=no_file,
12661267
sig_data=sig_data,
1268+
wfdb_archive=wfdb_archive,
12671269
)
12681270

12691271
# Copy over the wanted signals
@@ -1288,6 +1290,7 @@ def _rd_dat_signals(
12881290
sampto,
12891291
no_file=False,
12901292
sig_data=None,
1293+
wfdb_archive=None,
12911294
):
12921295
"""
12931296
Read all signals from a WFDB dat file.
@@ -1390,7 +1393,8 @@ def _rd_dat_signals(
13901393
)
13911394
else:
13921395
data_to_read = _rd_dat_file(
1393-
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples
1396+
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples,
1397+
wfdb_archive=wfdb_archive
13941398
)
13951399

13961400
if extra_flat_samples:
@@ -1630,7 +1634,8 @@ def _required_byte_num(mode, fmt, n_samp):
16301634
return int(n_bytes)
16311635

16321636

1633-
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
1637+
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp,
1638+
wfdb_archive=None):
16341639
"""
16351640
Read data from a dat file, either local or remote, into a 1d numpy
16361641
array.
@@ -1688,14 +1693,19 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16881693
element_count = n_samp
16891694
byte_count = n_samp * BYTES_PER_SAMPLE[fmt]
16901695

1691-
# Local or cloud dat file
1692-
if pn_dir is None:
1696+
# Local file or .wfdb archive
1697+
if wfdb_archive is not None:
1698+
with wfdb_archive.open(file_name, "rb") as fp:
1699+
fp.seek(start_byte)
1700+
sig_data = util.fromfile(
1701+
fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1702+
)
1703+
elif pn_dir is None:
16931704
with fsspec.open(os.path.join(dir_name, file_name), "rb") as fp:
16941705
fp.seek(start_byte)
16951706
sig_data = util.fromfile(
16961707
fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
16971708
)
1698-
16991709
# Stream dat file from PhysioNet
17001710
else:
17011711
# check to make sure a cloud path isn't being passed under pn_dir

wfdb/io/archive.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import os
2+
import zipfile
3+
from contextlib import contextmanager
4+
5+
_archive_cache = {}
6+
7+
8+
class WFDBArchive:
9+
"""
10+
Helper class for working with WFDB .wfdb ZIP archives.
11+
12+
Used only if:
13+
- .wfdb is included in the record_name explicitly, or
14+
- .wfdb is passed directly to the file loading function.
15+
"""
16+
def __init__(self, record_name):
17+
"""
18+
Initialize a WFDBArchive for a given record name (without extension).
19+
20+
record_name : str
21+
The base name of the archive, without the .wfdb extension.
22+
"""
23+
self.record_name = record_name
24+
self.archive_path = f"{record_name}.wfdb"
25+
26+
if not os.path.exists(self.archive_path):
27+
raise FileNotFoundError(f"Archive not found: {self.archive_path}")
28+
if not zipfile.is_zipfile(self.archive_path):
29+
raise ValueError(f"Invalid WFDB archive: {self.archive_path}")
30+
self.zipfile = zipfile.ZipFile(self.archive_path, mode="r")
31+
32+
def exists(self, filename):
33+
"""
34+
Check if a file exists in the archive.
35+
"""
36+
return self.zipfile and filename in self.zipfile.namelist()
37+
38+
@contextmanager
39+
def open(self, filename, mode="r"):
40+
"""
41+
Open a file, either from disk or from the archive.
42+
Mode 'r' (text) or 'rb' (binary) supported.
43+
"""
44+
if self.zipfile and filename in self.zipfile.namelist():
45+
with self.zipfile.open(filename, 'r') as f:
46+
if "b" in mode:
47+
yield f
48+
else:
49+
import io
50+
yield io.TextIOWrapper(f)
51+
else:
52+
raise FileNotFoundError(
53+
f"Could not find '{filename}' as loose file or inside '{self.archive_path}'."
54+
)
55+
56+
def close(self):
57+
"""
58+
Close the archive if open.
59+
"""
60+
if self.zipfile:
61+
self.zipfile.close()
62+
63+
def create_archive(self, file_list, output_path=None):
64+
"""
65+
Create a .wfdb archive containing the specified list of files.
66+
If output_path is not specified, uses self.archive_path.
67+
"""
68+
output_path = output_path or self.archive_path
69+
with zipfile.ZipFile(output_path, mode="w") as zf:
70+
for file in file_list:
71+
compress = (
72+
zipfile.ZIP_STORED
73+
if file.endswith((".hea", ".hea.json", ".hea.yml"))
74+
else zipfile.ZIP_DEFLATED
75+
)
76+
zf.write(file, arcname=os.path.basename(file), compress_type=compress)
77+
78+
79+
def get_archive(record_base_name):
80+
"""
81+
Get or create a WFDBArchive for the given record base name.
82+
"""
83+
if record_base_name not in _archive_cache:
84+
_archive_cache[record_base_name] = WFDBArchive(record_base_name)
85+
return _archive_cache[record_base_name]

wfdb/io/record.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from wfdb.io import _header
1212
from wfdb.io import _signal
1313
from wfdb.io import _url
14+
from wfdb.io.archive import get_archive
1415
from wfdb.io import download
1516
from wfdb.io import header
1617
from wfdb.io import util
@@ -2030,25 +2031,41 @@ def rdrecord(
20302031
channels=[1, 3])
20312032
20322033
"""
2033-
dir_name, base_record_name = os.path.split(record_name)
2034-
# Update the dir_name using abspath unless it is a cloud path
2035-
if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS):
2036-
dir_name = os.path.abspath(dir_name)
2034+
is_wfdb_archive = record_name.endswith(".wfdb")
20372035

2038-
# Read the header fields
2039-
if pn_dir is not None:
2040-
# check to make sure a cloud path isn't being passed under pn_dir
2041-
if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS):
2042-
raise ValueError(
2043-
"Cloud paths should be passed under record_name, not under pn_dir"
2044-
)
2045-
if "." not in pn_dir:
2046-
dir_list = pn_dir.split("/")
2047-
pn_dir = posixpath.join(
2048-
dir_list[0], download.get_version(dir_list[0]), *dir_list[1:]
2049-
)
2036+
if is_wfdb_archive:
2037+
record_base = record_name[:-5] # remove ".wfdb"
2038+
archive = get_archive(record_base)
2039+
hea_file = os.path.basename(record_base) + ".hea"
2040+
2041+
with archive.open(hea_file, "r") as f:
2042+
record = Record()
2043+
record.wfdb_archive = archive
2044+
record._read_header(f.read())
2045+
2046+
# Set dir_name to the archive base (needed for _rd_segment)
2047+
dir_name = os.path.dirname(record_base)
2048+
2049+
else:
2050+
dir_name, base_record_name = os.path.split(record_name)
2051+
# Update the dir_name using abspath unless it is a cloud path
2052+
if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS):
2053+
dir_name = os.path.abspath(dir_name)
2054+
2055+
# Read the header fields
2056+
if pn_dir is not None:
2057+
# check to make sure a cloud path isn't being passed under pn_dir
2058+
if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS):
2059+
raise ValueError(
2060+
"Cloud paths should be passed under record_name, not under pn_dir"
2061+
)
2062+
if "." not in pn_dir:
2063+
dir_list = pn_dir.split("/")
2064+
pn_dir = posixpath.join(
2065+
dir_list[0], download.get_version(dir_list[0]), *dir_list[1:]
2066+
)
20502067

2051-
record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False)
2068+
record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False)
20522069

20532070
# Set defaults for sampto and channels input variables
20542071
if sampto is None:

0 commit comments

Comments
 (0)