more compatibility tweaks [ci skip]

drammock · drammock · commit 05a02224ab8e · 2025-08-15T16:47:11.000-05:00
diff --git a/mne/io/otb/otb.py b/mne/io/otb/otb.py
@@ -14,6 +14,9 @@
 from ...utils import _check_fname, fill_doc, logger, verbose, warn
 from ..base import BaseRaw
 
+# these are the only non-data channel IDs (besides "AUX*", handled via glob)
+_NON_DATA_CHS = ("Quaternion", "BufferChannel", "RampChannel", "LoadCellChannel")
+
 
 def _parse_date(dt):
     return datetime.fromisoformat(dt).date()
@@ -23,8 +26,8 @@ def _parse_patient_xml(tree):
     """Convert an ElementTree to a dict."""
 
     def _parse_sex(sex):
-        # TODO For devices that generate `.otb+` files, the recording GUI only has M or
-        # F options and choosing one is mandatory. For `.otb4` the field is optional.
+        # For devices that generate `.otb+` files, the recording GUI only has M or F
+        # options and choosing one is mandatory. For `.otb4` the field is optional.
         return dict(m=1, f=2)[sex.lower()[0]] if sex else 0  # 0 means "unknown"
 
     subj_info_mapping = (
@@ -45,17 +48,71 @@ def _parse_sex(sex):
 
 def _parse_otb_plus_metadata(metadata, extras_metadata):
     assert metadata.tag == "Device"
+    # device-level metadata
     sfreq = float(metadata.attrib["SampleFrequency"])
     n_chan = int(metadata.attrib["DeviceTotalChannels"])
     bit_depth = int(metadata.attrib["ad_bits"])
-    model = metadata.attrib["Name"]
-    adc_range = 3.3
+    device_name = metadata.attrib["Name"]
+    adc_range = 3.3  # TODO is this V or mV ??
+    # containers
+    gains = np.full(n_chan, np.nan)
+    ch_names = list()
+    ch_types = list()
+    highpass = list()
+    lowpass = list()
+    # check in advance where we'll need to append indices to uniquify ch_names
+    n_ch_by_type = Counter([ch.get("ID") for ch in metadata.iter("Channel")])
+    dupl_ids = [k for k, v in n_ch_by_type.items() if v > 1]
+    # iterate over adapters & channels to extract gain, filters, names, etc
+    for adapter in metadata.iter("Adapter"):
+        adapter_id = adapter.get("ID")
+        adapter_gain = float(adapter.get("Gain"))
+        ch_offset = int(adapter.get("ChannelStartIndex"))
+        # we only really care about lowpass/highpass on the data channels
+        if adapter_id not in ("AdapterQuaternions", "AdapterControl"):
+            highpass.append(float(adapter.get("HighPassFilter")))
+            lowpass.append(float(adapter.get("LowPassFilter")))
+        for ch in adapter.iter("Channel"):
+            ix = int(ch.get("Index"))
+            ch_id = ch.get("ID")
+            # # see if we can parse the adapter name to get row,col info
+            # pattern = re.compile(
+            #     # connector type   inter-elec dist    grid rows    grid cols
+            #     r"(?:[a-zA-Z]+)(?:(?P<ied>\d+)MM)(?P<row>\d{2})(?P<col>\d{2})"
+            # )
+            # if match := pattern.match(ch_id):
+            #     col = ix % int(match["col"])
+            #     row = ix // int(match["row"])
+            #     ch_name = f"EMG_{adapter_ix}({row:02},{col:02})"
+            # elif ch_id
+            # else:
+            #     ch_name = f"EMG_{ix + adapter_ch_offset:03}"
+            # ch_names.append(ch_name)
+            ch_names.append(f"{ch_id}_{ix}" if ch_id in dupl_ids else ch_id)
+            # store gains
+            gain_ix = ix + ch_offset
+            gains[gain_ix] = float(ch.get("Gain")) * adapter_gain
+            # TODO verify ch_type for quats, buffer channel, and ramp channel
+            ch_types.append(
+                "misc"
+                if ch_id in _NON_DATA_CHS or ch_id.lower().startswith("aux")
+                else "emg"
+            )
+    # parse subject info
+    subject_info = _parse_patient_xml(extras_metadata)
+
     return dict(
         sfreq=sfreq,
         n_chan=n_chan,
         bit_depth=bit_depth,
-        model=model,
+        device_name=device_name,
         adc_range=adc_range,
+        subject_info=subject_info,
+        gains=gains,
+        ch_names=ch_names,
+        ch_types=ch_types,
+        highpass=highpass,
+        lowpass=lowpass,
     )
 
 
@@ -90,14 +147,6 @@ def __init__(self, fname, *, verbose=None):
 
         self.preload = True  # lazy loading not supported
 
-        highpass = list()
-        lowpass = list()
-        ch_names = list()
-        ch_types = list()
-
-        # these are the only non-data channel IDs (besides "AUX*", handled via glob)
-        NON_DATA_CHS = ("Quaternion", "BufferChannel", "RampChannel", "LoadCellChannel")
-
         with tarfile.open(fname, "r") as fid:
             fnames = fid.getnames()
             # the .sig file(s) are the binary channel data.
@@ -132,8 +181,14 @@ def __init__(self, fname, *, verbose=None):
         sfreq = metadata["sfreq"]
         n_chan = metadata["n_chan"]
         bit_depth = metadata["bit_depth"]
-        model = metadata["model"]
+        device_name = metadata["device_name"]
         adc_range = metadata["adc_range"]
+        subject_info = metadata["subject_info"]
+        ch_names = metadata["ch_names"]
+        ch_types = metadata["ch_types"]
+        gains = metadata["gains"]
+        highpass = metadata["highpass"]
+        lowpass = metadata["lowpass"]
 
         if bit_depth == 16:
             _dtype = np.int16
@@ -150,49 +205,8 @@ def __init__(self, fname, *, verbose=None):
                 "If this file can be successfully read with other software (i.e. it is "
                 "not corrupted), please open an issue at "
                 "https://github.com/mne-tools/mne-emg/issues so we can add support for "
-                "your use case."
+                "your file."
             )
-        gains = np.full(n_chan, np.nan)
-        # check in advance where we'll need to append indices to uniquify ch_names
-        n_ch_by_type = Counter([ch.get("ID") for ch in metadata_tree.iter("Channel")])
-        dupl_ids = [k for k, v in n_ch_by_type.items() if v > 1]
-        # iterate over adapters & channels to extract gain, filters, names, etc
-        for adapter_ix, adapter in enumerate(metadata_tree.iter("Adapter")):
-            adapter_ch_offset = int(adapter.get("ChannelStartIndex"))
-            adapter_gain = float(adapter.get("Gain"))
-            # we only care about lowpass/highpass on the data channels
-            # TODO verify these two are the only non-data adapter types
-            if adapter.get("ID") not in ("AdapterQuaternions", "AdapterControl"):
-                highpass.append(float(adapter.get("HighPassFilter")))
-                lowpass.append(float(adapter.get("LowPassFilter")))
-
-            for ch in adapter.iter("Channel"):
-                ix = int(ch.get("Index"))
-                ch_id = ch.get("ID")
-                # # see if we can parse the adapter name to get row,col info
-                # pattern = re.compile(
-                #     # connector type   inter-elec dist    grid rows    grid cols
-                #     r"(?:[a-zA-Z]+)(?:(?P<ied>\d+)MM)(?P<row>\d{2})(?P<col>\d{2})"
-                # )
-                # if match := pattern.match(ch_id):
-                #     col = ix % int(match["col"])
-                #     row = ix // int(match["row"])
-                #     ch_name = f"EMG_{adapter_ix}({row:02},{col:02})"
-                # elif ch_id
-                # else:
-                #     ch_name = f"EMG_{ix + adapter_ch_offset:03}"
-                # ch_names.append(ch_name)
-                ch_names.append(f"{ch_id}_{ix}" if ch_id in dupl_ids else ch_id)
-                # store gains
-                gains[ix + adapter_ch_offset] = float(ch.get("Gain")) * adapter_gain
-                # TODO verify ch_type for quats, buffer channel, and ramp channel
-                ch_types.append(
-                    "misc"
-                    if ch_id in NON_DATA_CHS or ch_id.lower().startswith("aux")
-                    else "emg"
-                )
-        assert np.isfinite(gains).all()
-
         # compute number of samples
         n_samples, extra = divmod(data_size_bytes, (bit_depth // 8) * n_chan)
         if extra != 0:
@@ -202,6 +216,9 @@ def __init__(self, fname, *, verbose=None):
             )
         n_samples = int(n_samples)
 
+        # validate gains
+        assert np.isfinite(gains).all()
+
         # check filter freqs. Can vary by adapter, so in theory we might get different
         # filters for different *data* channels (not just different between data and
         # misc/aux/whatever).
@@ -220,8 +237,7 @@ def __init__(self, fname, *, verbose=None):
 
         # create info
         info = create_info(ch_names=ch_names, ch_types=ch_types, sfreq=sfreq)
-        subject_info = _parse_patient_xml(extras_tree)
-        device_info = dict(type="OTB", model=model)  # other allowed keys: serial
+        device_info = dict(type="OTB", model=device_name)  # other allowed keys: serial
         meas_date = extras_tree.find("time")
         site = extras_tree.find("place")
         if site is not None:
@@ -230,8 +246,8 @@ def __init__(self, fname, *, verbose=None):
         with info._unlock():
             info["highpass"] = highpass
             info["lowpass"] = lowpass
-            for _ch in info["chs"]:
-                cal = 1 / 2**bit_depth / gains[ix + adapter_ch_offset]
+            for ix, _ch in enumerate(info["chs"]):
+                cal = 1 / 2**bit_depth / gains[ix]
                 _ch.update(cal=cal, range=adc_range)
             if meas_date is not None:
                 info["meas_date"] = datetime.fromisoformat(meas_date.text).astimezone(
@@ -245,7 +261,7 @@ def __init__(self, fname, *, verbose=None):
                 float(dur.text), n_samples / sfreq, decimal=3
             )
 
-        # TODO other fields in extras_tree:
+        # TODO other fields in extras_tree for otb+ format:
         # protocol_code, pathology, commentsPatient, comments
 
         # TODO parse files markers_0.xml, markers_1.xml as annotations?
@@ -266,7 +282,7 @@ def __init__(self, fname, *, verbose=None):
             last_samps=(n_samples - 1,),
             filenames=[fname],
             orig_format=orig_format,
-            # orig_units="V",  # TODO maybe not needed
+            # orig_units=dict(...),  # TODO needed?
             raw_extras=[raw_extras],
             verbose=verbose,
         )
@@ -292,11 +308,12 @@ def _preload_data(self, preload):
             else:
                 _data = np.concatenate(_data, axis=0)
 
+        # TODO without this fudge factor, the scale of the signals seems way too high
+        # (sample data channels show a dynamic range of 0.2 - 3.3 V)
+        # the puzzling thing is that in the MATLAB code the fudge is 1e3 (not 1e-3) ?!?
+        fudge_factor = 1e-3
         cals = np.array(
-            [
-                _ch["cal"] * _ch["range"] * _ch.get("scale", 1.0)
-                for _ch in self.info["chs"]
-            ]
+            [_ch["cal"] * _ch["range"] * fudge_factor for _ch in self.info["chs"]]
         )
         self._data = _data * cals[:, np.newaxis]