11import math
22import os
3+ import posixpath
34import sys
45
6+ import fsspec
57import numpy as np
68
79from wfdb .io import download , _coreio , util
8-
10+ from wfdb . io . _coreio import CLOUD_PROTOCOLS
911
1012MAX_I32 = 2147483647
1113MIN_I32 = - 2147483648
@@ -1643,10 +1645,10 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16431645 The name of the dat file.
16441646 dir_name : str
16451647 The full directory where the dat file(s) are located, if the dat
1646- file(s) are local.
1648+ file(s) are local or in the cloud .
16471649 pn_dir : str
16481650 The PhysioNet directory where the dat file(s) are located, if
1649- the dat file(s) are remote .
1651+ the dat file(s) are on a PhysioNet server .
16501652 fmt : str
16511653 The format of the dat file.
16521654 start_byte : int
@@ -1686,15 +1688,22 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16861688 element_count = n_samp
16871689 byte_count = n_samp * BYTES_PER_SAMPLE [fmt ]
16881690
1689- # Local dat file
1691+ # Local or cloud dat file
16901692 if pn_dir is None :
1691- with open (os .path .join (dir_name , file_name ), "rb" ) as fp :
1693+ with fsspec . open (os .path .join (dir_name , file_name ), "rb" ) as fp :
16921694 fp .seek (start_byte )
1693- sig_data = np .fromfile (
1695+ sig_data = util .fromfile (
16941696 fp , dtype = np .dtype (DATA_LOAD_TYPES [fmt ]), count = element_count
16951697 )
1696- # Stream dat file from Physionet
1698+
1699+ # Stream dat file from PhysioNet
16971700 else :
1701+ # check to make sure a cloud path isn't being passed under pn_dir
1702+ if any (pn_dir .startswith (proto ) for proto in CLOUD_PROTOCOLS ):
1703+ raise ValueError (
1704+ "Cloud paths should be passed under record_name, not under pn_dir"
1705+ )
1706+
16981707 dtype_in = np .dtype (DATA_LOAD_TYPES [fmt ])
16991708 sig_data = download ._stream_dat (
17001709 file_name , pn_dir , byte_count , start_byte , dtype_in
@@ -1840,8 +1849,9 @@ def _rd_compressed_file(
18401849 file_name : str
18411850 The name of the signal file.
18421851 dir_name : str
1843- The full directory where the signal file is located, if local.
1844- This argument is ignored if `pn_dir` is not None.
1852+ The full directory where the signal file is located, if this
1853+ is a local or cloud path. This argument is ignored if `pn_dir`
1854+ is not None.
18451855 pn_dir : str or None
18461856 The PhysioNet database directory where the signal file is located.
18471857 fmt : str
@@ -2585,10 +2595,10 @@ def _infer_sig_len(
25852595 The byte offset of the dat file. None is equivalent to zero.
25862596 dir_name : str
25872597 The full directory where the dat file(s) are located, if the dat
2588- file(s) are local.
2598+ file(s) are local or on the cloud .
25892599 pn_dir : str, optional
25902600 The PhysioNet directory where the dat file(s) are located, if
2591- the dat file(s) are remote .
2601+ the dat file(s) are on a PhysioNet server .
25922602
25932603 Returns
25942604 -------
@@ -2600,13 +2610,29 @@ def _infer_sig_len(
26002610 sig_len * tsamps_per_frame * bytes_per_sample == file_size
26012611
26022612 """
2603- if pn_dir is None :
2604- file_size = os .path .getsize (os .path .join (dir_name , file_name ))
2605- else :
2613+ from wfdb .io .record import CLOUD_PROTOCOLS
2614+
2615+ # If this is a cloud path, use posixpath to construct the path and fsspec to open file
2616+ if any (dir_name .startswith (proto ) for proto in CLOUD_PROTOCOLS ):
2617+ with fsspec .open (posixpath .join (dir_name , file_name ), mode = "rb" ) as f :
2618+ file_size = f .seek (0 , os .SEEK_END )
2619+
2620+ # If the PhysioNet database path is provided, construct the download path using the database version
2621+ elif pn_dir is not None :
2622+ # check to make sure a cloud path isn't being passed under pn_dir
2623+ if any (pn_dir .startswith (proto ) for proto in CLOUD_PROTOCOLS ):
2624+ raise ValueError (
2625+ "Cloud paths should be passed under record_name, not under pn_dir"
2626+ )
2627+
26062628 file_size = download ._remote_file_size (
26072629 file_name = file_name , pn_dir = pn_dir
26082630 )
26092631
2632+ # If it isn't a cloud path or a PhysioNet path, we treat as a local file
2633+ else :
2634+ file_size = os .path .getsize (os .path .join (dir_name , file_name ))
2635+
26102636 if byte_offset is None :
26112637 byte_offset = 0
26122638 data_size = file_size - byte_offset
0 commit comments