Skip to content

Commit c93f315

Browse files
author
Benjamin Moody
committed
Parse header lines starting at the beginning of the line.
When reading a header file, each line is parsed by matching it against a regular expression. Previously, this was done by searching for the pattern *anywhere* in the input line (using findall()) rather than at the start of the line (using match()). In particular, if the first token on the line (e.g., a signal file name or segment name) contains a disallowed character, this must be treated as an error, rather than silently ignoring everything up to and including the disallowed character. Note, however, that horizontal whitespace is permitted at the start of the line, so the expressions must begin with "[ \t]*".
1 parent 7bd34a1 commit c93f315

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

wfdb/io/_header.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@
101101
# Regexp objects for reading headers
102102
# Record line
103103
_rx_record = re.compile(''.join(
104-
["(?P<record_name>[-\w]+)/?(?P<n_seg>\d*)[ \t]+",
104+
["[ \t]*",
105+
"(?P<record_name>[-\w]+)/?(?P<n_seg>\d*)[ \t]+",
105106
"(?P<n_sig>\d+)[ \t]*(?P<fs>\d*\.?\d*)/*(?P<counter_freq>-?\d*\.?\d*)",
106107
"\(?(?P<base_counter>-?\d*\.?\d*)\)?[ \t]*(?P<sig_len>\d*)[ \t]*",
107108
"(?P<base_time>\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6})[ \t]*",
@@ -110,7 +111,8 @@
110111

111112
# Signal line
112113
_rx_signal = re.compile(''.join(
113-
["(?P<file_name>~?[-\w]*\.?[\w]*)[ \t]+(?P<fmt>\d+)x?"
114+
["[ \t]*",
115+
"(?P<file_name>~?[-\w]*\.?[\w]*)[ \t]+(?P<fmt>\d+)x?"
114116
"(?P<samps_per_frame>\d*):?(?P<skew>\d*)\+?(?P<byte_offset>\d*)[ \t]*",
115117
"(?P<adc_gain>-?\d*\.?\d*e?[\+-]?\d*)\(?(?P<baseline>-?\d*)\)?",
116118
"/?(?P<units>[\w\^\-\?%\/]*)[ \t]*(?P<adc_res>\d*)[ \t]*",
@@ -119,7 +121,7 @@
119121
)
120122

121123
# Segment line
122-
_rx_segment = re.compile('(?P<seg_name>[-\w]*~?)[ \t]+(?P<seg_len>\d+)')
124+
_rx_segment = re.compile('[ \t]*(?P<seg_name>[-\w]*~?)[ \t]+(?P<seg_len>\d+)')
123125

124126

125127
class BaseHeaderMixin(object):
@@ -879,11 +881,12 @@ def _parse_record_line(record_line):
879881
record_fields = {}
880882

881883
# Read string fields from record line
884+
match = _rx_record.match(record_line)
882885
(record_fields['record_name'], record_fields['n_seg'],
883886
record_fields['n_sig'], record_fields['fs'],
884887
record_fields['counter_freq'], record_fields['base_counter'],
885888
record_fields['sig_len'], record_fields['base_time'],
886-
record_fields['base_date']) = re.findall(_rx_record, record_line)[0]
889+
record_fields['base_date']) = match.groups()
887890

888891
for field in RECORD_SPECS.index:
889892
# Replace empty strings with their read defaults (which are
@@ -942,14 +945,15 @@ def _parse_signal_lines(signal_lines):
942945

943946
# Read string fields from signal line
944947
for ch in range(n_sig):
948+
match = _rx_signal.match(signal_lines[ch])
945949
(signal_fields['file_name'][ch], signal_fields['fmt'][ch],
946950
signal_fields['samps_per_frame'][ch], signal_fields['skew'][ch],
947951
signal_fields['byte_offset'][ch], signal_fields['adc_gain'][ch],
948952
signal_fields['baseline'][ch], signal_fields['units'][ch],
949953
signal_fields['adc_res'][ch], signal_fields['adc_zero'][ch],
950954
signal_fields['init_value'][ch], signal_fields['checksum'][ch],
951955
signal_fields['block_size'][ch],
952-
signal_fields['sig_name'][ch]) = _rx_signal.findall(signal_lines[ch])[0]
956+
signal_fields['sig_name'][ch]) = match.groups()
953957

954958
for field in SIGNAL_SPECS.index:
955959
# Replace empty strings with their read defaults (which are mostly None)
@@ -998,7 +1002,9 @@ def _read_segment_lines(segment_lines):
9981002

9991003
# Read string fields from signal line
10001004
for i in range(len(segment_lines)):
1001-
(segment_fields['seg_name'][i], segment_fields['seg_len'][i]) = _rx_segment.findall(segment_lines[i])[0]
1005+
match = _rx_segment.match(segment_lines[i])
1006+
(segment_fields['seg_name'][i],
1007+
segment_fields['seg_len'][i]) = match.groups()
10021008

10031009
# Typecast strings for numerical field
10041010
if field == 'seg_len':

0 commit comments

Comments
 (0)