|
10 | 10 | import mne |
11 | 11 | import math |
12 | 12 | import functools |
| 13 | +import struct |
13 | 14 | import pdb |
14 | 15 |
|
15 | 16 | from wfdb.io import _header |
@@ -1497,6 +1498,168 @@ def edf2mit(record_name, pn_dir=None, delete_file=True, record_only=False): |
1497 | 1498 | pass |
1498 | 1499 |
|
1499 | 1500 |
|
| 1501 | +def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False): |
| 1502 | + """ |
| 1503 | + Convert WAV formatted files to MIT format. See here for more details about |
| 1504 | + the formatting of a WAV file: http://soundfile.sapp.org/doc/WaveFormat/. |
| 1505 | +
|
| 1506 | + Parameters |
| 1507 | + ---------- |
| 1508 | + record_name : str |
| 1509 | + The name of the input WAV record to be read. |
| 1510 | + pn_dir : str, optional |
| 1511 | + Option used to stream data from Physionet. The Physionet |
| 1512 | + database directory from which to find the required record files. |
| 1513 | + eg. For record '100' in 'http://physionet.org/content/mitdb' |
| 1514 | + pn_dir='mitdb'. |
| 1515 | + delete_file : bool, optional |
| 1516 | + Whether to delete the saved WAV file (False) or not (True) |
| 1517 | + after being imported. |
| 1518 | + record_only : bool, optional |
| 1519 | + Whether to only return the record information (True) or not (False). |
| 1520 | + If false, this function will generate both a .dat and .hea file. |
| 1521 | +
|
| 1522 | + Returns |
| 1523 | + ------- |
| 1524 | + record : dict, optional |
| 1525 | + All of the record information needed to generate MIT formatted files. |
| 1526 | + Only returns if 'record_only' is set to True, else generates the |
| 1527 | + corresponding .dat and .hea files. This record file will not match the |
| 1528 | + `rdrecord` output since it will only give us the digital signal for now. |
| 1529 | +
|
| 1530 | + Examples |
| 1531 | + -------- |
| 1532 | + >>> wav_record = wfdb.wav2mit('SC4001E0_PSG.wav', record_only=True) |
| 1533 | +
|
| 1534 | + """ |
| 1535 | + if not record_name.endswith('.wav'): |
| 1536 | + raise Exception('Name of the input file must end in .wav') |
| 1537 | + |
| 1538 | + if pn_dir is not None: |
| 1539 | + |
| 1540 | + if '.' not in pn_dir: |
| 1541 | + dir_list = pn_dir.split(os.sep) |
| 1542 | + pn_dir = posixpath.join(dir_list[0], get_version(dir_list[0]), *dir_list[1:]) |
| 1543 | + |
| 1544 | + file_url = posixpath.join(download.PN_INDEX_URL, pn_dir, record_name) |
| 1545 | + # Currently must download file to read it though can give the |
| 1546 | + # user the option to delete it immediately afterwards |
| 1547 | + r = requests.get(file_url, allow_redirects=False) |
| 1548 | + open(record_name, 'wb').write(r.content) |
| 1549 | + |
| 1550 | + wave_file = open(record_name, mode='rb') |
| 1551 | + record_name_out = record_name.split(os.sep)[-1].replace('-','_').replace('.wav','') |
| 1552 | + |
| 1553 | + chunk_ID = ''.join([s.decode() for s in struct.unpack('>4s', wave_file.read(4))]) |
| 1554 | + if chunk_ID != 'RIFF': |
| 1555 | + raise Exception('{} is not a .wav-format file'.format(record_name)) |
| 1556 | + |
| 1557 | + correct_chunk_size = os.path.getsize(record_name) - 8 |
| 1558 | + chunk_size = struct.unpack('<I', wave_file.read(4))[0] |
| 1559 | + if chunk_size != correct_chunk_size: |
| 1560 | + raise Exception('Header chunk has incorrect length (is {} should be {})'.format(chunk_size,correct_chunk_size)) |
| 1561 | + |
| 1562 | + fmt = struct.unpack('>4s', wave_file.read(4))[0].decode() |
| 1563 | + if fmt != 'WAVE': |
| 1564 | + raise Exception('{} is not a .wav-format file'.format(record_name)) |
| 1565 | + |
| 1566 | + subchunk1_ID = struct.unpack('>4s', wave_file.read(4))[0].decode() |
| 1567 | + if subchunk1_ID != 'fmt ': |
| 1568 | + raise Exception('Format chunk missing or corrupt') |
| 1569 | + |
| 1570 | + subchunk1_size = struct.unpack('<I', wave_file.read(4))[0] |
| 1571 | + audio_format = struct.unpack('<H', wave_file.read(2))[0] |
| 1572 | + if audio_format > 1: |
| 1573 | + print('PCM has compression of {}'.format(audio_format)) |
| 1574 | + |
| 1575 | + if (subchunk1_size != 16) or (audio_format != 1): |
| 1576 | + raise Exception('Unsupported format {}'.format(audio_format)) |
| 1577 | + |
| 1578 | + num_channels = struct.unpack('<H', wave_file.read(2))[0] |
| 1579 | + if num_channels == 1: |
| 1580 | + print('Reading Mono formatted .wav file...') |
| 1581 | + elif num_channels == 2: |
| 1582 | + print('Reading Stereo formatted .wav file...') |
| 1583 | + else: |
| 1584 | + print('Reading {}-channel formatted .wav file...'.format(num_channels)) |
| 1585 | + |
| 1586 | + sample_rate = struct.unpack('<I', wave_file.read(4))[0] |
| 1587 | + print('Sample rate: {}'.format(sample_rate)) |
| 1588 | + byte_rate = struct.unpack('<I', wave_file.read(4))[0] |
| 1589 | + print('Byte rate: {}'.format(byte_rate)) |
| 1590 | + block_align = struct.unpack('<H', wave_file.read(2))[0] |
| 1591 | + print('Block align: {}'.format(block_align)) |
| 1592 | + bits_per_sample = struct.unpack('<H', wave_file.read(2))[0] |
| 1593 | + print('Bits per sample: {}'.format(bits_per_sample)) |
| 1594 | + # I wish this were more precise but unfortunately some information |
| 1595 | + # is lost in .wav files which is needed for these calculations |
| 1596 | + if bits_per_sample <= 8: |
| 1597 | + adc_res = 8 |
| 1598 | + adc_gain = 12.5 |
| 1599 | + elif bits_per_sample <= 16: |
| 1600 | + adc_res = 16 |
| 1601 | + adc_gain = 6400 |
| 1602 | + else: |
| 1603 | + raise Exception('Unsupported resolution ({} bits/sample)'.format(bits_per_sample)) |
| 1604 | + |
| 1605 | + if block_align != (num_channels * int(adc_res / 8)): |
| 1606 | + raise Exception('Format chunk of {} has incorrect frame length'.format(block_align)) |
| 1607 | + |
| 1608 | + subchunk2_ID = struct.unpack('>4s', wave_file.read(4))[0].decode() |
| 1609 | + if subchunk2_ID != 'data': |
| 1610 | + raise Exception('Format chunk missing or corrupt') |
| 1611 | + |
| 1612 | + correct_subchunk2_size = os.path.getsize(record_name) - 44 |
| 1613 | + subchunk2_size = struct.unpack('<I', wave_file.read(4))[0] |
| 1614 | + if subchunk2_size != correct_subchunk2_size: |
| 1615 | + raise Exception('Data chunk has incorrect length.. (is {} should be {})'.format(subchunk2_size, correct_subchunk2_size)) |
| 1616 | + sig_len = int(subchunk2_size / block_align) |
| 1617 | + |
| 1618 | + sig_data = (np.fromfile(wave_file, dtype=np.int16).reshape((-1,num_channels)) / (2*adc_res)).astype(np.int16) |
| 1619 | + |
| 1620 | + init_value = [int(s[0]) for s in np.transpose(sig_data)] |
| 1621 | + checksum = [int(np.sum(v) % 65536) for v in np.transpose(sig_data)] # not all values correct? |
| 1622 | + |
| 1623 | + if pn_dir is not None and delete_file: |
| 1624 | + os.remove(record_name) |
| 1625 | + |
| 1626 | + record = Record( |
| 1627 | + record_name = record_name_out, |
| 1628 | + n_sig = num_channels, |
| 1629 | + fs = num_channels * [sample_rate], |
| 1630 | + samps_per_frame = num_channels * [1], |
| 1631 | + counter_freq = None, |
| 1632 | + base_counter = None, |
| 1633 | + sig_len = sig_len, |
| 1634 | + base_time = None, |
| 1635 | + base_date = None, |
| 1636 | + comments = [], |
| 1637 | + sig_name = num_channels * [None], |
| 1638 | + p_signal = None, |
| 1639 | + d_signal = sig_data, |
| 1640 | + e_p_signal = None, |
| 1641 | + e_d_signal = None, |
| 1642 | + file_name = num_channels * [record_name_out + '.dat'], |
| 1643 | + fmt = num_channels * ['16' if (adc_res == 16) else '80'], |
| 1644 | + skew = num_channels * [None], |
| 1645 | + byte_offset = num_channels * [None], |
| 1646 | + adc_gain = num_channels * [adc_gain], |
| 1647 | + baseline = num_channels * [0 if (adc_res == 16) else 128], |
| 1648 | + units = num_channels * [None], |
| 1649 | + adc_res = num_channels * [adc_res], |
| 1650 | + adc_zero = num_channels * [0 if (adc_res == 16) else 128], |
| 1651 | + init_value = init_value, |
| 1652 | + checksum = checksum, |
| 1653 | + block_size = num_channels * [0] |
| 1654 | + ) |
| 1655 | + |
| 1656 | + if record_only: |
| 1657 | + return record |
| 1658 | + else: |
| 1659 | + # TODO: Generate the .dat and .hea files |
| 1660 | + pass |
| 1661 | + |
| 1662 | + |
1500 | 1663 | #------------------------- Reading Records --------------------------- # |
1501 | 1664 |
|
1502 | 1665 |
|
@@ -1626,6 +1789,8 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, |
1626 | 1789 | parameter is set, this parameter should contain just the base |
1627 | 1790 | record name, and the files fill be searched for remotely. |
1628 | 1791 | Otherwise, the data files will be searched for in the local path. |
| 1792 | + Can also handle .edf and .wav files as long as the extension is |
| 1793 | + provided in the `record_name`. |
1629 | 1794 | sampfrom : int, optional |
1630 | 1795 | The starting sample number to read for all channels. |
1631 | 1796 | sampto : int, 'end', optional |
@@ -1713,6 +1878,8 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, |
1713 | 1878 |
|
1714 | 1879 | if record_name.endswith('.edf'): |
1715 | 1880 | record = edf2mit(record_name, pn_dir=pn_dir, record_only=True) |
| 1881 | + elif record_name.endswith('.wav'): |
| 1882 | + record = wav2mit(record_name, pn_dir=pn_dir, record_only=True) |
1716 | 1883 | else: |
1717 | 1884 | record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) |
1718 | 1885 |
|
@@ -1785,7 +1952,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, |
1785 | 1952 | if smooth_frames or max([record.samps_per_frame[c] for c in channels]) == 1: |
1786 | 1953 | # Read signals from the associated dat files that contain |
1787 | 1954 | # wanted channels |
1788 | | - if record_name.endswith('.edf'): |
| 1955 | + if record_name.endswith('.edf') or record_name.endswith('.wav'): |
1789 | 1956 | record.d_signal = _signal._rd_segment(record.file_name, |
1790 | 1957 | dir_name, pn_dir, |
1791 | 1958 | record.fmt, |
@@ -1825,7 +1992,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, |
1825 | 1992 |
|
1826 | 1993 | # Return each sample of the signals with multiple samples per frame |
1827 | 1994 | else: |
1828 | | - if record_name.endswith('.edf'): |
| 1995 | + if record_name.endswith('.edf') or record_name.endswith('.wav'): |
1829 | 1996 | record.e_d_signal = _signal._rd_segment(record.file_name, |
1830 | 1997 | dir_name, pn_dir, |
1831 | 1998 | record.fmt, |
|
0 commit comments