ENH: limit number of unique counts to be processed by 1000 and add --all-counts to override

yarikoptic · yarikoptic · commit 5d1b19ae50a1 · 2016-04-21T15:14:58.000-04:00
diff --git a/bin/nib-ls b/bin/nib-ls
@@ -19,9 +19,11 @@ import numpy as np
 import nibabel as nib
 
 from math import ceil
+from collections import defaultdict
 from optparse import OptionParser, Option
 from io import StringIO
 from nibabel.py3k import asunicode
+from nibabel.externals.six.moves import xrange
 
 __author__ = 'Yaroslav Halchenko'
 __copyright__ = 'Copyright (c) 2011-2016 Yaroslav Halchenko ' \
@@ -31,7 +33,13 @@ __license__ = 'MIT'
 
 # global verbosity switch
 verbose_level = 0
+MAX_UNIQUE = 1000  # maximal number of unique values to report for --counts
 
+def _err(msg=None):
+    """To return a string to signal "error" in output table"""
+    if msg is None:
+        msg = 'error'
+    return '!' + msg
 
 def verbose(l, msg):
     """Print `s` if `l` is less than the `verbose_level`
@@ -159,6 +167,10 @@ def get_opt_parser():
                help="Output counts - number of entries for each numeric value "
                     "(useful for int ROI maps)"),
 
+        Option("--all-counts",
+               action="store_true", dest='all_counts', default=False,
+               help="Output all counts, even if number of unique values > %d" % MAX_UNIQUE),
+
         Option("-z", "--zeros",
                action="store_true", dest='stats_zeros', default=False,
                help="Include zeros into output basic data statistics (--stats, --counts)"),
@@ -210,7 +222,7 @@ def proc_file(f, opts):
             try:
                 row += [str(h[f])]
             except (KeyError, ValueError):
-                row += ['error']
+                row += [_err()]
 
     try:
         if (hasattr(h, 'get_qform') and hasattr(h, 'get_sform') and
@@ -223,26 +235,34 @@ def proc_file(f, opts):
         if isinstance(h, nib.AnalyzeHeader):
             row += ['']
         else:
-            row += ['error']
+            row += [_err()]
 
     if opts.stats or opts.counts:
         # We are doomed to load data
         try:
             d = vol.get_data()
             if not opts.stats_zeros:
                 d = d[np.nonzero(d)]
+            else:
+                # at least flatten it -- functionality below doesn't
+                # depend on the original shape, so let's use a flat view
+                d = d.reshape(-1)
             if opts.stats:
                 # just # of elements
                 row += ["@l[%d]" % np.prod(d.shape)]
                 # stats
                 row += [len(d) and '@l[%.2g, %.2g]' % (np.min(d), np.max(d)) or '-']
             if opts.counts:
                 items, inv = np.unique(d, return_inverse=True)
-                freq = np.bincount(inv)
-                row += ["@l" + " ".join("%g:%d" % (i, f) for i, f in zip(items, freq))]
+                if len(items) > 1000 and not opts.all_counts:
+                    counts = _err("%d uniques. Use --all-counts" % len(items))
+                else:
+                    freq = np.bincount(inv)
+                    counts = " ".join("%g:%d" % (i, f) for i, f in zip(items, freq))
+                row += ["@l" + counts]
         except IOError as e:
             verbose(2, "Failed to obtain stats/counts -- %s" % str(e))
-            row += ['error']
+            row += [_err()]
     return row
 
 
diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
@@ -69,13 +69,14 @@ def test_nib_ls():
     yield check_nib_ls_example4d
     yield check_nib_ls_example4d, \
         ['-H', 'dim,bitpix'], " \[  4 128  96  24   2   1   1   1\] 16"
-    yield check_nib_ls_example4d, ['-c'], "", " 2:3 3:2 4:1 5:1.*"
+    yield check_nib_ls_example4d, ['-c'], "", " !1030 uniques. Use --all-counts"
+    yield check_nib_ls_example4d, ['-c', '--all-counts'], "", " 2:3 3:2 4:1 5:1.*"
     # both stats and counts
     yield check_nib_ls_example4d, \
-        ['-c', '-s'], "", " \[229725\] \[2, 1.2e\+03\] 2:3 3:2 4:1 5:1.*"
+        ['-c', '-s', '--all-counts'], "", " \[229725\] \[2, 1.2e\+03\] 2:3 3:2 4:1 5:1.*"
     # and must not error out if we allow for zeros
     yield check_nib_ls_example4d, \
-        ['-c', '-s', '-z'], "", " \[589824\] \[0, 1.2e\+03\] 0:360099 2:3 3:2 4:1 5:1.*"
+        ['-c', '-s', '-z', '--all-counts'], "", " \[589824\] \[0, 1.2e\+03\] 0:360099 2:3 3:2 4:1 5:1.*"
 
 
 @script_test
@@ -120,7 +121,7 @@ def test_nib_ls_multiple():
             '[128,  96,  24,   2] 2.00x2.00x2.20x2000.00  #exts: 2 sform [229725] [2, 1.2e+03]',
             '[ 32,  20,  12,   2] 2.00x2.00x2.20x2000.00  #exts: 2 sform [15360]  [46, 7.6e+02]',
             '[ 18,  28,  29]      9.00x8.00x7.00                         [14616]  [0.12, 93]',
-            '[ 91, 109,  91]      2.00x2.00x2.00                           error'
+            '[ 91, 109,  91]      2.00x2.00x2.00                          !error'
         ]
     )