@@ -20,6 +20,7 @@ def read_edf(
2020 header_only = False ,
2121 verbose = False ,
2222 rdedfann_flag = False ,
23+ encoding = "iso8859-1" ,
2324):
2425 """
2526 Read a EDF format file into a WFDB Record.
@@ -61,6 +62,9 @@ def read_edf(
6162 is being called by the user and the file has annotations, then warn
6263 them that the EDF file has annotations and that they should use
6364 `rdedfann` instead.
65+ encoding : str, optional
66+ The encoding to use for strings in the header. Although the edf
67+ specification requires ascii strings, some files do not adhere to it.
6468
6569 Returns
6670 -------
@@ -139,7 +143,7 @@ def read_edf(
139143 edf_file = open (record_name , mode = "rb" )
140144
141145 # Version of this data format (8 bytes)
142- version = struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()
146+ version = struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )
143147
144148 # Check to see that the input is an EDF file. (This check will detect
145149 # most but not all other types of files.)
@@ -152,7 +156,7 @@ def read_edf(
152156 print ("EDF version number: {}" .format (version .strip ()))
153157
154158 # Local patient identification (80 bytes)
155- patient_id = struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode ()
159+ patient_id = struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode (encoding )
156160 if verbose :
157161 print ("Patient ID: {}" .format (patient_id ))
158162
@@ -161,12 +165,12 @@ def read_edf(
161165 # including an abbreviated month name in English and a full (4-digit)
162166 # year, as is done here if this information is available in the input
163167 # record. EDF+ requires this.
164- record_id = struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode ()
168+ record_id = struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode (encoding )
165169 if verbose :
166170 print ("Recording ID: {}" .format (record_id ))
167171
168172 # Start date of recording (dd.mm.yy) (8 bytes)
169- start_date = struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()
173+ start_date = struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )
170174 if verbose :
171175 print ("Recording Date: {}" .format (start_date ))
172176 start_day , start_month , start_year = [int (i ) for i in start_date .split ("." )]
@@ -177,21 +181,21 @@ def read_edf(
177181 start_year += 100
178182
179183 # Start time of recording (hh.mm.ss) (8 bytes)
180- start_time = struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()
184+ start_time = struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )
181185 if verbose :
182186 print ("Recording Time: {}" .format (start_time ))
183187 start_hour , start_minute , start_second = [
184188 int (i ) for i in start_time .split ("." )
185189 ]
186190
187191 # Number of bytes in header (8 bytes)
188- header_bytes = int (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ())
192+ header_bytes = int (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding ))
189193 if verbose :
190194 print ("Number of bytes in header record: {}" .format (header_bytes ))
191195
192196 # Reserved (44 bytes)
193197 reserved_notes = (
194- struct .unpack ("<44s" , edf_file .read (44 ))[0 ].decode ().strip ()
198+ struct .unpack ("<44s" , edf_file .read (44 ))[0 ].decode (encoding ).strip ()
195199 )
196200 if reserved_notes [:5 ] == "EDF+C" :
197201 # The file is EDF compatible and will work without issue
@@ -209,7 +213,7 @@ def read_edf(
209213 print ("Free Space: {}" .format (reserved_notes ))
210214
211215 # Number of blocks (-1 if unknown) (8 bytes)
212- num_blocks = int (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ())
216+ num_blocks = int (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding ))
213217 if verbose :
214218 print ("Number of data records: {}" .format (num_blocks ))
215219 if num_blocks == - 1 :
@@ -218,7 +222,7 @@ def read_edf(
218222 )
219223
220224 # Duration of a block, in seconds (8 bytes)
221- block_duration = float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ())
225+ block_duration = float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding ))
222226 if verbose :
223227 print (
224228 "Duration of each data record in seconds: {}" .format (block_duration )
@@ -227,7 +231,7 @@ def read_edf(
227231 block_duration = 1.0
228232
229233 # Number of signals (4 bytes)
230- n_sig = int (struct .unpack ("<4s" , edf_file .read (4 ))[0 ].decode ())
234+ n_sig = int (struct .unpack ("<4s" , edf_file .read (4 ))[0 ].decode (encoding ))
231235 if verbose :
232236 print ("Number of signals: {}" .format (n_sig ))
233237 if n_sig < 1 :
@@ -236,7 +240,7 @@ def read_edf(
236240 # Label (e.g., EEG FpzCz or Body temp) (16 bytes each)
237241 sig_name = []
238242 for _ in range (n_sig ):
239- temp_sig = struct .unpack ("<16s" , edf_file .read (16 ))[0 ].decode ().strip ()
243+ temp_sig = struct .unpack ("<16s" , edf_file .read (16 ))[0 ].decode (encoding ).strip ()
240244 if temp_sig == "EDF Annotations" and not rdedfann_flag :
241245 print (
242246 "*** This may be an EDF+ Annotation file instead, please see "
@@ -250,7 +254,7 @@ def read_edf(
250254 transducer_types = []
251255 for _ in range (n_sig ):
252256 transducer_types .append (
253- struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode ().strip ()
257+ struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode (encoding ).strip ()
254258 )
255259 if verbose :
256260 print ("Transducer Types: {}" .format (transducer_types ))
@@ -259,7 +263,7 @@ def read_edf(
259263 physical_dims = []
260264 for _ in range (n_sig ):
261265 physical_dims .append (
262- struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ().strip ()
266+ struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding ).strip ()
263267 )
264268 if verbose :
265269 print ("Physical Dimensions: {}" .format (physical_dims ))
@@ -269,7 +273,7 @@ def read_edf(
269273 for _ in range (n_sig ):
270274 physical_min = np .append (
271275 physical_min ,
272- float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()),
276+ float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )),
273277 )
274278 if verbose :
275279 print ("Physical Minimums: {}" .format (physical_min ))
@@ -279,7 +283,7 @@ def read_edf(
279283 for _ in range (n_sig ):
280284 physical_max = np .append (
281285 physical_max ,
282- float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()),
286+ float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )),
283287 )
284288 if verbose :
285289 print ("Physical Maximums: {}" .format (physical_max ))
@@ -289,7 +293,7 @@ def read_edf(
289293 for _ in range (n_sig ):
290294 digital_min = np .append (
291295 digital_min ,
292- float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()),
296+ float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )),
293297 )
294298 if verbose :
295299 print ("Digital Minimums: {}" .format (digital_min ))
@@ -299,7 +303,7 @@ def read_edf(
299303 for _ in range (n_sig ):
300304 digital_max = np .append (
301305 digital_max ,
302- float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ()),
306+ float (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding )),
303307 )
304308 if verbose :
305309 print ("Digital Maximums: {}" .format (digital_max ))
@@ -308,7 +312,7 @@ def read_edf(
308312 prefilter_info = []
309313 for _ in range (n_sig ):
310314 prefilter_info .append (
311- struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode ().strip ()
315+ struct .unpack ("<80s" , edf_file .read (80 ))[0 ].decode (encoding ).strip ()
312316 )
313317 if verbose :
314318 print ("Prefiltering Information: {}" .format (prefilter_info ))
@@ -317,14 +321,14 @@ def read_edf(
317321 samps_per_block = []
318322 for _ in range (n_sig ):
319323 samps_per_block .append (
320- int (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode ())
324+ int (struct .unpack ("<8s" , edf_file .read (8 ))[0 ].decode (encoding ))
321325 )
322326 if verbose :
323327 print ("Number of Samples per Record: {}" .format (samps_per_block ))
324328
325329 # The last 32*nsig bytes in the header are unused
326330 for _ in range (n_sig ):
327- struct .unpack ("<32s" , edf_file .read (32 ))[0 ].decode ()
331+ struct .unpack ("<32s" , edf_file .read (32 ))[0 ].decode (encoding )
328332
329333 # Pre-process the acquired data before creating the record
330334 record_name_out = (
@@ -997,6 +1001,7 @@ def rdedfann(
9971001 info_only = True ,
9981002 record_only = False ,
9991003 verbose = False ,
1004+ encoding = "iso8859-1" ,
10001005):
10011006 """
10021007 This program returns the annotation information from an EDF+ file
@@ -1038,6 +1043,9 @@ def rdedfann(
10381043 verbose : bool, optional
10391044 Whether to print all the information read about the file (True) or
10401045 not (False).
1046+ encoding : str, optional
1047+ The encoding to use for strings in the header. Although the edf
1048+ specification requires ascii strings, some files do not adhere to it.
10411049
10421050 Returns
10431051 -------
@@ -1110,7 +1118,7 @@ def rdedfann(
11101118 adjusted_hex = hex (
11111119 struct .unpack ("<H" , struct .pack (">H" , chunk + 1 ))[0 ]
11121120 )
1113- annotation_string += bytes .fromhex (adjusted_hex [2 :]).decode ("ascii" )
1121+ annotation_string += bytes .fromhex (adjusted_hex [2 :]).decode (encoding )
11141122 # Remove all of the whitespace
11151123 for rep in ["\x00 " , "\x14 " , "\x15 " ]:
11161124 annotation_string = annotation_string .replace (rep , " " )
0 commit comments