ENH: Add aliases for set/get_data_dtype on NIfTI images

effigies · effigies · commit c9b2d294efad · 2022-06-03T12:19:33.000-04:00
diff --git a/nibabel/nifti1.py b/nibabel/nifti1.py
@@ -1799,6 +1799,10 @@ class Nifti1Pair(analyze.AnalyzeImage):
     _meta_sniff_len = header_class.sizeof_hdr
     rw = True
 
+    # If a _dtype_alias has been set, it can only be resolved by inspecting
+    # the data at serialization time
+    _dtype_alias = None
+
     def __init__(self, dataobj, affine, header=None,
                  extra=None, file_map=None, dtype=None):
         # Special carve-out for 64 bit integers
@@ -2043,6 +2047,127 @@ def set_sform(self, affine, code=None, **kwargs):
             else:
                 self._affine[:] = self._header.get_best_affine()
 
+    def set_data_dtype(self, datatype):
+        """ Set numpy dtype for data from code, dtype, type or alias
+
+        Using :py:class:`int` or ``"int"`` is disallowed, as these types
+        will be interpreted as ``np.int64``, which is almost never desired.
+        ``np.int64`` is permitted for those intent on making poor choices.
+
+        The following aliases are defined to allow for flexible specification:
+
+          * ``'mask'`` - Alias for ``uint8``
+          * ``'compat'`` - The smallest Analyze-compatible datatype
+            (``uint8``, ``int16``, ``int32``, ``float32``)
+          * ``'smallest'`` - The smallest Analyze-compatible integer
+            (``uint8``, ``int16``, ``int32``)
+
+        Dynamic aliases are resolved when ``get_data_dtype()`` is called
+        with a ``finalize=True`` flag. Until then, these aliases are not
+        written to the header and will not persist to new images.
+
+        Examples
+        --------
+        >>> ints = np.arange(24, dtype='i4').reshape((2,3,4))
+
+        >>> img = Nifti1Image(ints, np.eye(4))
+        >>> img.set_data_dtype(np.uint8)
+        >>> img.get_data_dtype()
+        dtype('uint8')
+        >>> img.set_data_dtype('mask')
+        >>> img.get_data_dtype()
+        dtype('uint8')
+        >>> img.set_data_dtype('compat')
+        >>> img.get_data_dtype()
+        'compat'
+        >>> img.get_data_dtype(finalize=True)
+        dtype('uint8')
+        >>> img.get_data_dtype()
+        dtype('uint8')
+        >>> img.set_data_dtype('smallest')
+        >>> img.get_data_dtype()
+        'smallest'
+        >>> img.get_data_dtype(finalize=True)
+        dtype('uint8')
+        >>> img.get_data_dtype()
+        dtype('uint8')
+
+        Note that floating point values will not be coerced to ``int``
+
+        >>> floats = np.arange(24, dtype='f4').reshape((2,3,4))
+        >>> img = Nifti1Image(ints, np.eye(4))
+
+        >>> arr = np.arange(1000, 1024, dtype='i4').reshape((2,3,4))
+        >>> img = Nifti1Image(arr, np.eye(4))
+        >>> img.set_data_dtype('smallest')
+        >>> img.set_data_dtype('implausible') #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+           ...
+        HeaderDataError: data dtype "implausible" not recognized
+        >>> img.set_data_dtype('none') #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+           ...
+        HeaderDataError: data dtype "none" known but not supported
+        >>> img.set_data_dtype(np.void) #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+           ...
+        HeaderDataError: data dtype "<type 'numpy.void'>" known but not supported
+        >>> img.set_data_dtype('int') #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+           ...
+        ValueError: Invalid data type 'int'. Specify a sized integer, e.g., 'uint8' or numpy.int16.
+        >>> img.set_data_dtype(int) #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+           ...
+        ValueError: Invalid data type 'int'. Specify a sized integer, e.g., 'uint8' or numpy.int16.
+        >>> img.set_data_dtype('int64')
+        >>> img.get_data_dtype() == np.dtype('int64')
+        True
+        """
+        # Numpy dtype comparison can fail in odd ways, check for aliases only if str
+        if isinstance(datatype, str):
+            # Static aliases
+            if datatype == 'mask':
+                datatype = 'u1'
+            # Dynamic aliases
+            elif datatype in ('compat', 'smallest'):
+                self._dtype_alias = datatype
+                return
+
+        self._dtype_alias = None
+        super().set_data_dtype(datatype)
+
+    def get_data_dtype(self, finalize=False):
+        """ Get numpy dtype for data
+
+        If ``set_data_dtype()`` has been called with an alias
+        and ``finalize`` is ``False``, return the alias.
+        If ``finalize`` is ``True``, determine the appropriate dtype
+        from the image data object and set the final dtype in the
+        header before returning it.
+        """
+        if self._dtype_alias is None:
+            return super().get_data_dtype()
+        if not finalize:
+            return self._dtype_alias
+
+        datatype = None
+        if self._dtype_alias == 'compat':
+            datatype = _get_smallest_dtype(self._dataobj)
+            descrip = "an Analyze-compatible dtype"
+        elif self._dtype_alias == 'smallest':
+            datatype = _get_smallest_dtype(self._dataobj, ftypes=())
+            descrip = "an integer type with fewer than 64 bits"
+        else:
+            raise ValueError(f"Unknown dtype alias {self._dtype_alias}.")
+        if datatype is None:
+            dt = get_obj_dtype(self._dataobj)
+            raise ValueError(f"Cannot automatically cast array (of type {dt}) to {descrip}."
+                             " Please set_data_dtype() to an explicit data type.")
+
+        self.set_data_dtype(datatype)  # Clears the alias
+        return super().get_data_dtype()
+
     def as_reoriented(self, ornt):
         """Apply an orientation change and return a new image
 
@@ -2136,3 +2261,52 @@ def save(img, filename):
         Nifti1Image.instance_to_filename(img, filename)
     except ImageFileError:
         Nifti1Pair.instance_to_filename(img, filename)
+
+
+def _get_smallest_dtype(
+        arr,
+        itypes=(np.uint8, np.int16, np.int32),
+        ftypes=(np.float32,),
+        ):
+    """ Return the smallest "sensible" dtype that will hold the array data
+
+    The purpose of this function is to support automatic type selection
+    for serialization, so "sensible" here means well-supported in the NIfTI-1 world.
+
+    For floating point data, select between single- and double-precision.
+    For integer data, select among uint8, int16 and int32.
+
+    The test is for min/max range, so float64 is pretty unlikely to be hit.
+
+    Returns ``None`` if these dtypes do not suffice.
+
+    >>> _get_smallest_dtype(np.array([0, 1]))
+    dtype('uint8')
+    >>> _get_smallest_dtype(np.array([-1, 1]))
+    dtype('int16')
+    >>> _get_smallest_dtype(np.array([0, 256]))
+    dtype('int16')
+    >>> _get_smallest_dtype(np.array([-65536, 65536]))
+    dtype('int32')
+    >>> _get_smallest_dtype(np.array([-2147483648, 2147483648]))
+    >>> _get_smallest_dtype(np.array([1.]))
+    dtype('float32')
+    >>> _get_smallest_dtype(np.array([2. ** 1000]))
+    >>> _get_smallest_dtype(np.float128(2) ** 2000)
+    >>> _get_smallest_dtype(np.array([1+0j]))
+    """
+    arr = np.asanyarray(arr)
+    if np.issubdtype(arr.dtype, np.floating):
+        test_dts = ftypes
+        info = np.finfo
+    elif np.issubdtype(arr.dtype, np.integer):
+        test_dts = itypes
+        info = np.iinfo
+    else:
+        return None
+
+    mn, mx = np.min(arr), np.max(arr)
+    for dt in test_dts:
+        dtinfo = info(dt)
+        if dtinfo.min <= mn and mx <= dtinfo.max:
+            return np.dtype(dt)