33import os
44import sys
55import requests
6- from configparser import ConfigParser
7- # from distutils.sysconfig import get_python_lib
8-
9- def checkrecordfiles (recordname , filedirectory ):
10- """Check a local directory along with the database cache directory specified in
11- 'config.ini' for all necessary files required to read a WFDB record.
12- Calls pbdownload.dlrecordfiles to download any missing files into the database
13- cache directory. Returns the base record name if all files were present, or a
14- full path record name specifying where the downloaded files are to be read,
15- and a list of files downloaded.
16-
17- *If you wish to directly download files for a record, it highly recommended to call
18- 'dlrecordfiles' directly. This is a helper function for rdsamp which
19- tries to parse the 'recordname' input to deduce whether it contains a local directory,
20- physiobank database, or both. Its usage format is different and more complex than
21- that of 'dlrecordfiles'.
22-
23- Usage: readrecordname, downloadedfiles = checkrecordfiles(recordname, filedirectory)
24-
25- Input arguments:
26- - recordname (required): The name of the WFDB record to be read
27- (without any file extensions). Can be prepended with a local directory, or a
28- physiobank subdirectory (or both if the relative local directory exists and
29- takes the same name as the physiobank subdirectory). eg: recordname=mitdb/100
30- - filedirectory (required): The local directory to check for the files required to
31- read the record before checking the database cache directory. If the 'recordname'
32- argument is prepended with a directory, this function will assume that it is a
33- local directory and prepend that to this 'filedirectory' argument and check the
34- resulting directory instead.
35-
36- Output arguments:
37- - readrecordname: The record name prepended with the path the files are to be read from.
38- - downloadedfiles: The list of files downloaded from PhysioBank.
39- """
40-
41- # Base directory to store downloaded physiobank files
42- config = loadconfig ('wfdb.config' )
43- dbcachedir = config .get ('pbdownload' ,'dbcachedir' )
44- basedir , baserecname = os .path .split (recordname )
45-
46- # At this point we do not know whether basedir is a local directory, a
47- # physiobank directory, or both.
48-
49- if not basedir : # if basedir is not defined, then there is no physiobank
50- # database specified. If files are missing we cannot download them anyway.
51- return recordname , []
52-
53- # If this is reached, basedir is defined. Check if there is a directory
54- # called 'basedir'. If it exists, check it for files.
55- if os .path .isdir (basedir ):
56- # It is possible that basedir is also a physiobank database. Therefore
57- # if any files are missing, ,try to download files assuming basedir is
58- # the physiobank database directory. If it turns out that basedir is
59- # not a pb database, an error will be triggered. The record would not
60- # be readable without the missing file(s) anyway.
61-
62- downloaddir = os .path .join (dbcachedir , basedir )
63-
64- # The basedir directory is missing the header file.
65- if not os .path .isfile (os .path .join (basedir , baserecname + ".hea" )):
66- # If invalid pb database, function would exit.
67- dledfiles = dlrecordfiles (recordname , downloaddir )
68- # Files downloaded, confirmed valid pb database.
69- return os .path .join (downloaddir , baserecname ), dledfiles
70-
71- # Header is present in basedir
72- fields = readheader (recordname )
73-
74- if fields [
75- "nseg" ] == 1 : # Single segment. Check for all the required dat files
76- for f in fields ["filename" ]:
77- # Missing a dat file. Download in db cache dir.
78- if not os .path .isfile (os .path .join (basedir , f )):
79- dledfiles = dlrecordfiles (recordname , downloaddir )
80- return os .path .join (downloaddir , baserecname ), dledfiles
81- else : # Multi segment. Check for all segment headers and their dat files
82- for segment in fields ["filename" ]:
83- if segment != '~' :
84- if not os .path .isfile (
85- os .path .join (
86- basedir ,
87- segment +
88- ".hea" )): # Missing a segment header
89- dledfiles = dlrecordfiles (recordname , downloaddir )
90- return os .path .join (
91- downloaddir , baserecname ), dledfiles
92- segfields = readheader (os .path .join (basedir , segment ))
93- for f in segfields ["filename" ]:
94- if f != '~' :
95- if not os .path .isfile (
96- os .path .join (
97- basedir ,
98- f )): # Missing a segment's dat file
99- dledfiles = dlrecordfiles (
100- recordname , downloaddir )
101- return os .path .join (
102- downloaddir , baserecname ), dledfiles
103-
104- # All files were already present in the 'basedir' directory.
105- return recordname , []
106-
107- else : # there is no 'basedir' directory in your relative path. Therefore basedir must be a
108- # physiobank database directory. check the current working directory for files.
109- # If any are missing, check the cache directory for files and download missing
110- # files from physiobank.
111-
112- pbdir = basedir # physiobank directory
113- downloaddir = os .path .join (dbcachedir , pbdir )
114-
115- if not os .path .isfile (baserecname + ".hea" ):
116- dledfiles = dlrecordfiles (recordname , downloaddir )
117- return os .path .join (downloaddir , baserecname ), dledfiles
118-
119- # Header is present in current working dir.
120- fields = readheader (baserecname )
121-
122- if fields [
123- "nseg" ] == 1 : # Single segment. Check for all the required dat files
124- for f in fields ["filename" ]:
125- # Missing a dat file. Download in db cache dir.
126- if not os .path .isfile (f ):
127- dledfiles = dlrecordfiles (recordname , downloaddir )
128- return os .path .join (downloaddir , baserecname ), dledfiles
129- else : # Multi segment. Check for all segment headers and their dat files
130- for segment in fields ["filename" ]:
131- if segment != '~' :
132- if not os .path .isfile (
133- os .path .join (
134- targetdir ,
135- segment +
136- ".hea" )): # Missing a segment header
137- dledfiles = dlrecordfiles (recordname , downloaddir )
138- return os .path .join (
139- downloaddir , baserecname ), dledfiles
140- segfields = readheader (os .path .join (targetdir , segment ))
141- for f in segfields ["filename" ]:
142- if f != '~' :
143- if not os .path .isfile (
144- os .path .join (
145- targetdir ,
146- f )): # Missing a segment's dat file
147- dledfiles = dlrecordfiles (
148- recordname , downloaddir )
149- return os .path .join (
150- downloaddir , baserecname ), dledfiles
151-
152- # All files are present in current directory. Return base record name
153- # and no dled files.
154- return baserecname , []
155-
1566
1577def dlrecordfiles (pbrecname , targetdir ):
1588 """Check a specified local directory for all necessary files required to read a Physiobank
@@ -163,7 +13,7 @@ def dlrecordfiles(pbrecname, targetdir):
16313
16414 Input arguments:
16515 - pbrecname (required): The name of the MIT format Physiobank record to be read, prepended
166- with the Physiobank subdirectory the file is contain in (without any file extensions).
16+ with the Physiobank subdirectory the file is contained in (without any file extensions).
16717 eg. pbrecname=prcp/12726 to download files http://physionet.org/physiobank/database/prcp/12726.hea
16818 and 12727.dat
16919 - targetdir (required): The local directory to check for files required to read the record,
@@ -849,19 +699,6 @@ def processwfdbbytes(fp, fmt, siglen, nsig, sampsperframe, floorsamp=0):
849699 "initvalue" ,
850700 "signame" ]
851701
852- def loadconfig (fn ):
853- """
854- Search for a configuration file. Load the first version found.
855- """
856- config = ConfigParser ()
857- for loc in [os .curdir ,os .path .expanduser ("~" ),os .path .dirname (__file__ )]:
858- configfn = os .path .join (loc ,fn )
859- if os .path .isfile (configfn ):
860- with open (configfn ) as source :
861- config .readfp (source )
862- break
863- return config
864-
865702
866703def processsegment (fields , dirname , baserecordname , sampfrom , sampto , channels , physical ):
867704 if (len (set (fields ["filename" ])) ==
@@ -1137,27 +974,66 @@ def expandfields(segmentfields, segnum, startseg, readsegs, channels, returninds
1137974 # Keep fields['nsig'] as the number of returned channels from the segments.
1138975 return segmentfields
1139976
977+
978+ def checkrecordfiles (recordname , pbdl , dldir , keepfiles ):
979+ """Figure out the directory in which to process record files and download missing
980+ files if specified. *If you wish to directly download files for a record, call
981+ 'dlrecordfiles'. This is a helper function for rdsamp.
982+
983+ Input arguments:
984+ - recordname: name of the record
985+ - pbdl: flag specifying whether a physiobank record should be downloaded
986+ - dldir: directory in which to download physiobank files
987+ - keepfiles: flag specifying whether to keep downloaded files
1140988
989+ Output arguments:
990+ - dirname: the directory name from where the data files will be read
991+ - baserecordname: the base name of the WFDB record without any file paths
992+ - filestoremove: a list of downloaded files that are to be removed
993+ """
994+
995+ filestoremove = []
996+
997+ # Download physiobank files if specified
998+ if pbdl == 1 :
999+ dledfiles = dlrecordfiles (recordname , dldir )
1000+ if keepfiles == 0 :
1001+ filestoremove = dledfiles
1002+ # The directory to read the files from is the downloaded directory
1003+ dirname = dldir
1004+ (_ , baserecordname )= os .path .split (recordname )
1005+ else :
1006+ dirname , baserecordname = os .path .split (recordname )
1007+
1008+ return dirname , baserecordname , filestoremove
1009+
1010+
1011+
11411012def rdsamp (
11421013 recordname ,
11431014 sampfrom = 0 ,
11441015 sampto = [],
11451016 channels = [],
11461017 physical = 1 ,
1147- stacksegments = 1 ):
1018+ stacksegments = 1 ,
1019+ pbdl = 0 ,
1020+ dldir = os .getcwd (),
1021+ keepfiles = 0 ):
11481022 """Read a WFDB record and return the signal as a numpy array and the metadata as a dictionary.
11491023
11501024 Usage:
11511025 sig, fields = rdsamp(recordname, sampfrom, sampto, channels, physical, stacksegments)
11521026
11531027 Input arguments:
1154- - recordname (required): The name of the WFDB record to be read (without any file extensions).
1028+ - recordname (required): The name of the WFDB record to be read (without any file extensions). If the argument contains any path delimiter characters, the argument will be interpreted as PATH/baserecord and the data files will be searched for in the local path. If the pbdownload flag is set to 1, recordname will be interpreted as a physiobank record name including the database subdirectory.
11551029 - sampfrom (default=0): The starting sample number to read for each channel.
11561030 - sampto (default=length of entire signal): The final sample number to read for each channel.
11571031 - channels (default=all channels): Indices specifying the channel to be returned.
11581032 - physical (default=1): Flag that specifies whether to return signals in physical (1) or digital (0) units.
11591033 - stacksegments (default=1): Flag used only for multi-segment files. Specifies whether to return the signal as a single stacked/concatenated numpy array (1) or as a list of one numpy array for each segment (0).
1160-
1034+ - pbdl (default=0): If this argument is set, the function will assume that the user is trying to download a physiobank file. Therefore the 'recordname' argument will be interpreted as a physiobank record name including the database subdirectory, rather than a local directory.
1035+ - dldir (default=os.getcwd()): The directory to download physiobank files to.
1036+ - keepfiles (default=0): Flag specifying whether to keep physiobank files newly downloaded through the function call.
11611037
11621038 Output variables:
11631039 - sig: An nxm numpy array where n is the signal length and m is the number of channels.
@@ -1173,31 +1049,27 @@ def rdsamp(
11731049 : The last list element will be a list of dictionaries of metadata for each segment.
11741050 For empty segments, the dictionary will be replaced by a single string: 'Empty Segment'
11751051 """
1176-
1177- filestoremove = []
1178- config = loadconfig ('wfdb.config' )
1179-
1180- if int (config .get ('pbdownload' ,'getpbfiles' )) == 1 : # Flag specifying whether to allow downloading from physiobank
1181- recordname , dledfiles = checkrecordfiles (recordname , os .getcwd ())
1182- if int (config .get ('pbdownload' ,'keepdledfiles' )) == 0 : # Flag specifying whether to keep downloaded physiobank files
1183- filestoremove = dledfiles
1184-
1185- fields = readheader (recordname ) # Get the info from the header file
1186-
1187- if fields ["nsig" ] == 0 :
1188- sys .exit ("This record has no signals. Use rdann to read annotations" )
1052+
11891053 if sampfrom < 0 :
11901054 sys .exit ("sampfrom must be non-negative" )
1191- dirname , baserecordname = os .path .split (recordname )
1055+ if channels and min (channels ) < 0 :
1056+ sys .exit ("input channels must be non-negative" )
1057+
1058+ dirname , baserecordname , filestoremove = checkrecordfiles (recordname , pbdl , dldir , keepfiles )
1059+
1060+ fields = readheader (os .path .join (dirname , baserecordname ))
11921061
1062+ if fields ["nsig" ] == 0 :
1063+ sys .exit ("This record has no signals. Use rdann to read annotations" )
11931064
1194- if fields ["nseg" ] == 1 : # single segment file
1065+ # Begin processing the data files.
1066+
1067+ # Single segment file
1068+ if fields ["nseg" ] == 1 :
11951069 sig , fields = processsegment (fields , dirname , baserecordname , sampfrom , sampto , channels , physical )
11961070
1197- # Multi-segment file. Preprocess and recursively call rdsamp on single
1198- # segments.
1071+ # Multi-segment file. Preprocess and recursively call rdsamp on segments
11991072 else :
1200-
12011073 # Determine if the record is fixed or variable layout.
12021074 # startseg is the first signal segment, 1 or 0.
12031075 startseg , layoutfields = fixedorvariable (fields , dirname )
@@ -1278,8 +1150,7 @@ def rdsamp(
12781150 else : # Fixed layout format.
12791151 fields = [fields , segmentfields ]
12801152
1281- if filestoremove :
1282- for fr in filestoremove :
1153+ for fr in filestoremove :
12831154 os .remove (fr )
12841155
12851156 return (sig , fields )
0 commit comments