Add support for writing .wfdb files.

tompollard · tompollard · commit 7814876944ec · 2025-05-03T15:08:51.000-04:00
diff --git a/wfdb/io/_header.py b/wfdb/io/_header.py
@@ -1,4 +1,5 @@
 import datetime
+import os
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 import numpy as np
@@ -278,7 +279,7 @@ def set_defaults(self):
         for f in sfields:
             self.set_default(f)
 
-    def wrheader(self, write_dir="", expanded=True):
+    def wrheader(self, write_dir="", expanded=True, wfdb_archive=None):
         """
         Write a WFDB header file. The signals are not used. Before
         writing:
@@ -325,7 +326,8 @@ def wrheader(self, write_dir="", expanded=True):
         self.check_field_cohesion(rec_write_fields, list(sig_write_fields))
 
         # Write the header file using the specified fields
-        self.wr_header_file(rec_write_fields, sig_write_fields, write_dir)
+        self.wr_header_file(rec_write_fields, sig_write_fields, write_dir,
+                            wfdb_archive=wfdb_archive)
 
     def get_write_fields(self):
         """
@@ -508,7 +510,8 @@ def check_field_cohesion(self, rec_write_fields, sig_write_fields):
                                 "Each file_name (dat file) specified must have the same byte offset"
                             )
 
-    def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir):
+    def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir,
+                       wfdb_archive=None):
         """
         Write a header file using the specified fields. Converts Record
         attributes into appropriate WFDB format strings.
@@ -522,6 +525,8 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir):
             being equal to a list of channels to write for each field.
         write_dir : str
             The directory in which to write the header file.
+        wfdb_archive : WFDBArchive, optional
+            If provided, write the header into this archive instead of to disk.
 
         Returns
         -------
@@ -583,7 +588,13 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir):
             comment_lines = ["# " + comment for comment in self.comments]
             header_lines += comment_lines
 
-        util.lines_to_file(self.record_name + ".hea", write_dir, header_lines)
+        header_str = "\n".join(header_lines) + "\n"
+        hea_filename = os.path.basename(self.record_name) + ".hea"
+
+        if wfdb_archive:
+            wfdb_archive.write(hea_filename, header_str.encode("utf-8"))
+        else:
+            util.lines_to_file(hea_filename, write_dir, header_lines)
 
 
 class MultiHeaderMixin(BaseHeaderMixin):
@@ -621,7 +632,7 @@ def set_defaults(self):
         for field in self.get_write_fields():
             self.set_default(field)
 
-    def wrheader(self, write_dir=""):
+    def wrheader(self, write_dir="", wfdb_archive=None):
         """
         Write a multi-segment WFDB header file. The signals or segments are
         not used. Before writing:
@@ -655,7 +666,7 @@ def wrheader(self, write_dir=""):
         self.check_field_cohesion()
 
         # Write the header file using the specified fields
-        self.wr_header_file(write_fields, write_dir)
+        self.wr_header_file(write_fields, write_dir, wfdb_archive=wfdb_archive)
 
     def get_write_fields(self):
         """
@@ -733,7 +744,7 @@ def check_field_cohesion(self):
                 "The sum of the 'seg_len' fields do not match the 'sig_len' field"
             )
 
-    def wr_header_file(self, write_fields, write_dir):
+    def wr_header_file(self, write_fields, write_dir, wfdb_archive=None):
         """
         Write a header file using the specified fields.
 
@@ -744,6 +755,8 @@ def wr_header_file(self, write_fields, write_dir):
             and their dependencies.
         write_dir : str
             The output directory in which the header is written.
+        wfdb_archive : WFDBArchive, optional
+            If provided, write the header into this archive instead of to disk.
 
         Returns
         -------
@@ -779,7 +792,13 @@ def wr_header_file(self, write_fields, write_dir):
             comment_lines = ["# " + comment for comment in self.comments]
             header_lines += comment_lines
 
-        util.lines_to_file(self.record_name + ".hea", write_dir, header_lines)
+        header_str = "\n".join(header_lines) + "\n"
+        hea_filename = os.path.basename(self.record_name) + ".hea"
+
+        if wfdb_archive:
+            wfdb_archive.write(hea_filename, header_str.encode("utf-8"))
+        else:
+            util.lines_to_file(hea_filename, write_dir, header_lines)
 
     def get_sig_segments(self, sig_name=None):
         """
diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py
@@ -2574,7 +2574,7 @@ def wr_dat_file(
     # Write the bytes to the file
     if wfdb_archive:
         with io.BytesIO() as f:
-            b_write.tofile(f)
+            f.write(b_write.tobytes())
             wfdb_archive.write(os.path.basename(file_name), f.getvalue())
     else:
         with open(file_path, "wb") as f:
diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py
@@ -10,25 +10,42 @@ class WFDBArchive:
     """
     Helper class for working with WFDB .wfdb ZIP archives.
 
+    If used for reading, the archive must already exist.
+    If used for writing, use mode='w' and call `write(...)` or `create_archive(...)`.
+
     Used only if:
       - .wfdb is included in the record_name explicitly, or
       - .wfdb is passed directly to the file loading function.
     """
-    def __init__(self, record_name):
+    def __init__(self, record_name, mode="r"):
         """
         Initialize a WFDBArchive for a given record name (without extension).
 
+        Parameters
+        ----------
         record_name : str
-          The base name of the archive, without the .wfdb extension.
+            The base name of the archive, without the .wfdb extension.
+        mode : str
+            'r' for read (default), 'w' for write.
         """
         self.record_name = record_name
         self.archive_path = f"{record_name}.wfdb"
+        self.zipfile = None
+        self.mode = mode
+
+        if mode == "r":
+            if not os.path.exists(self.archive_path):
+                raise FileNotFoundError(f"Archive not found: {self.archive_path}")
+            if not zipfile.is_zipfile(self.archive_path):
+                raise ValueError(f"Invalid WFDB archive: {self.archive_path}")
+            self.zipfile = zipfile.ZipFile(self.archive_path, mode="r")
 
-        if not os.path.exists(self.archive_path):
-            raise FileNotFoundError(f"Archive not found: {self.archive_path}")
-        if not zipfile.is_zipfile(self.archive_path):
-            raise ValueError(f"Invalid WFDB archive: {self.archive_path}")
-        self.zipfile = zipfile.ZipFile(self.archive_path, mode="r")
+        elif mode == "w":
+            # Initialize an empty archive on disk
+            if not os.path.exists(self.archive_path):
+                with zipfile.ZipFile(self.archive_path, mode="w"):
+                    pass  # Just create the file
+            self.zipfile = zipfile.ZipFile(self.archive_path, mode="a")
 
     def exists(self, filename):
         """
@@ -65,16 +82,19 @@ def write(self, filename, data):
         """
         Write binary data to the archive (replaces if already exists).
         """
-        # Write to a new temporary archive
+        if self.zipfile is None:
+            self.zipfile = zipfile.ZipFile(self.archive_path, mode="w")
+            self.zipfile.writestr(filename, data)
+            return
+
+        # If already opened in read or append mode, use the replace-then-move trick
         tmp_path = self.archive_path + ".tmp"
         with zipfile.ZipFile(self.archive_path, mode="r") as zin:
             with zipfile.ZipFile(tmp_path, mode="w") as zout:
                 for item in zin.infolist():
                     if item.filename != filename:
                         zout.writestr(item, zin.read(item.filename))
                 zout.writestr(filename, data)
-
-        # Replace the original archive
         shutil.move(tmp_path, self.archive_path)
         self.zipfile = zipfile.ZipFile(self.archive_path, mode="a")
 
@@ -94,10 +114,11 @@ def create_archive(self, file_list, output_path=None):
                 zf.write(file, arcname=os.path.basename(file), compress_type=compress)
 
 
-def get_archive(record_base_name):
+def get_archive(record_base_name, mode="r"):
     """
     Get or create a WFDBArchive for the given record base name.
     """
     if record_base_name not in _archive_cache:
-        _archive_cache[record_base_name] = WFDBArchive(record_base_name)
+        _archive_cache[record_base_name] = WFDBArchive(record_base_name,
+                                                       mode=mode)
     return _archive_cache[record_base_name]
diff --git a/wfdb/io/record.py b/wfdb/io/record.py
@@ -935,11 +935,12 @@ def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None):
 
         # Perform field validity and cohesion checks, and write the
         # header file.
-        self.wrheader(write_dir=write_dir, expanded=expanded)
+        self.wrheader(write_dir=write_dir, expanded=expanded,
+                      wfdb_archive=wfdb_archive)
         if self.n_sig > 0:
             # Perform signal validity and cohesion checks, and write the
             # associated dat files.
-            self.wr_dats(expanded=expanded, write_dir=write_dir, 
+            self.wr_dats(expanded=expanded, write_dir=write_dir,
                          wfdb_archive=wfdb_archive)
 
     def _arrange_fields(self, channels, sampfrom, smooth_frames):
@@ -1162,7 +1163,7 @@ def __init__(
             if not seg_len:
                 self.seg_len = [segment.sig_len for segment in segments]
 
-    def wrsamp(self, write_dir=""):
+    def wrsamp(self, write_dir="", wfdb_archive=None):
         """
         Write a multi-segment header, along with headers and dat files
         for all segments, from this object.
@@ -1179,11 +1180,11 @@ def wrsamp(self, write_dir=""):
         """
         # Perform field validity and cohesion checks, and write the
         # header file.
-        self.wrheader(write_dir=write_dir)
+        self.wrheader(write_dir=write_dir, wfdb_archive=wfdb_archive)
         # Perform record validity and cohesion checks, and write the
         # associated segments.
         for seg in self.segments:
-            seg.wrsamp(write_dir=write_dir)
+            seg.wrsamp(write_dir=write_dir, wfdb_archive=wfdb_archive)
 
     def _check_segment_cohesion(self):
         """
@@ -1828,7 +1829,11 @@ def rdheader(record_name, pn_dir=None, rd_segments=False):
 
     """
     dir_name, base_record_name = os.path.split(record_name)
-    file_name = f"{base_record_name}.hea"
+
+    if not base_record_name.endswith(".hea"):
+        file_name = f"{base_record_name}.hea"
+    else:
+        file_name = base_record_name
 
     # If this is a cloud path, use posixpath to construct the path and fsspec to open file
     if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS):
@@ -2032,17 +2037,23 @@ def rdrecord(
                                channels=[1, 3])
 
     """
+    wfdb_archive = None
     is_wfdb_archive = record_name.endswith(".wfdb")
 
     if is_wfdb_archive:
         record_base = record_name[:-5]  # remove ".wfdb"
-        archive = get_archive(record_base)
+        wfdb_archive = get_archive(record_base)
         hea_file = os.path.basename(record_base) + ".hea"
 
-        with archive.open(hea_file, "r") as f:
-            record = Record()
-            record.wfdb_archive = archive
-            record._read_header(f.read())
+        import tempfile
+        with wfdb_archive.open(hea_file, "r") as f:
+            header_str = f.read()
+
+        with tempfile.NamedTemporaryFile("w+", suffix=".hea", delete=False) as tmpf:
+            tmpf.write(header_str)
+            tmpf.flush()
+            record = rdheader(tmpf.name)
+            record.wfdb_archive = wfdb_archive
 
         # Set dir_name to the archive base (needed for _rd_segment)
         dir_name = os.path.dirname(record_base)
@@ -2168,6 +2179,7 @@ def rdrecord(
             no_file=no_file,
             sig_data=sig_data,
             return_res=return_res,
+            wfdb_archive=wfdb_archive,
         )
 
         # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array
@@ -2879,7 +2891,7 @@ def wrsamp(
     base_date=None,
     base_datetime=None,
     write_dir="",
-    archive=False,
+    wfdb_archive=None,
 ):
     """
     Write a single segment WFDB record, creating a WFDB header file and any
@@ -3067,9 +3079,9 @@ def wrsamp(
     else:
         expanded = False
 
-    wfdb_archive = None
-    if archive:
-        wfdb_archive = get_archive(os.path.join(write_dir, record_name))
+    if wfdb_archive:
+        wfdb_archive = get_archive(os.path.join(write_dir, record_name),
+                                   mode="w")
 
     # Write the record files - header and associated dat
     record.wrsamp(write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive)