@@ -39,31 +39,78 @@ def get_opt_parser():
3939 Option ("-H" , "--header-fields" ,
4040 dest = "header_fields" , default = 'all' ,
4141 help = "Header fields (comma separated) to be printed as well (if present)" ),
42+
43+ Option ("--ma" , "--data-max-abs-diff" ,
44+ dest = "data_max_abs_diff" ,
45+ type = float ,
46+ default = 0.0 ,
47+ help = "Maximal absolute difference in data between files to tolerate." ),
48+
49+ Option ("--mr" , "--data-max-rel-diff" ,
50+ dest = "data_max_rel_diff" ,
51+ type = float ,
52+ default = 0.0 ,
53+ help = "Maximal relative difference in data between files to tolerate."
54+ " If --data-max-abs-diff is also specified, only the data points "
55+ " with absolute difference greater than that value would be "
56+ " considered for relative difference check." ),
57+ Option ("--dt" , "--datatype" ,
58+ dest = "dtype" ,
59+ default = np .float64 ,
60+ help = "Enter a numpy datatype such as 'float32'." )
4261 ])
4362
4463 return p
4564
4665
4766def are_values_different (* values ):
48- """Generically compares values, returns true if different"""
49- value0 = values [0 ]
50- values = values [1 :] # to ensure that the first value isn't compared with itself
51-
52- for value in values :
53- try : # we sometimes don't want NaN values
54- if np .any (np .isnan (value0 )) and np .any (np .isnan (value )): # if they're both NaN
55- break
56- elif np .any (np .isnan (value0 )) or np .any (np .isnan (value )): # if only 1 is NaN
57- return True
67+ """Generically compare values, return True if different
5868
59- except TypeError :
60- pass
69+ Note that comparison is targetting reporting of comparison of the headers
70+ so has following specifics:
71+ - even a difference in data types is considered a difference, i.e. 1 != 1.0
72+ - nans are considered to be the "same", although generally nan != nan
73+ """
74+ value0 = values [0 ]
6175
76+ # to not recompute over again
77+ if isinstance (value0 , np .ndarray ):
78+ try :
79+ # np.asarray for elderly numpys, e.g. 1.7.1 where for
80+ # degenerate arrays (shape ()) it would return a pure scalar
81+ value0_nans = np .asanyarray (np .isnan (value0 ))
82+ value0_nonnans = np .asanyarray (np .logical_not (value0_nans ))
83+ # if value0_nans.size == 1:
84+ # import pdb; pdb.set_trace()
85+ if not np .any (value0_nans ):
86+ value0_nans = None
87+ except TypeError as exc :
88+ str_exc = str (exc )
89+ # Not implemented in numpy 1.7.1
90+ if "not supported" in str_exc or "ot implemented" in str_exc :
91+ value0_nans = None
92+ else :
93+ raise
94+
95+ for value in values [1 :]:
6296 if type (value0 ) != type (value ): # if types are different, then we consider them different
6397 return True
6498 elif isinstance (value0 , np .ndarray ):
65- return np .any (value0 != value )
66-
99+ if value0 .dtype != value .dtype or \
100+ value0 .shape != value .shape :
101+ return True
102+ # there might be nans and they need special treatment
103+ if value0_nans is not None :
104+ value_nans = np .isnan (value )
105+ if np .any (value0_nans != value_nans ):
106+ return True
107+ if np .any (value0 [value0_nonnans ] != value [value0_nonnans ]):
108+ return True
109+ elif np .any (value0 != value ):
110+ return True
111+ elif value0 is np .nan :
112+ if value is not np .nan :
113+ return True
67114 elif value0 != value :
68115 return True
69116
@@ -101,8 +148,8 @@ def get_headers_diff(file_headers, names=None):
101148 return difference
102149
103150
104- def get_data_diff (files ):
105- """Get difference between md5 values
151+ def get_data_hash_diff (files , dtype = np . float64 ):
152+ """Get difference between md5 values of data
106153
107154 Parameters
108155 ----------
@@ -115,7 +162,7 @@ def get_data_diff(files):
115162 """
116163
117164 md5sums = [
118- hashlib .md5 (np .ascontiguousarray (nib .load (f ).get_data (), dtype = np . float32 )).hexdigest ()
165+ hashlib .md5 (np .ascontiguousarray (nib .load (f ).get_fdata ( dtype = dtype ) )).hexdigest ()
119166 for f in files
120167 ]
121168
@@ -125,6 +172,86 @@ def get_data_diff(files):
125172 return md5sums
126173
127174
175+ def get_data_diff (files , max_abs = 0 , max_rel = 0 , dtype = np .float64 ):
176+ """Get difference between data
177+
178+ Parameters
179+ ----------
180+ files: list of (str or ndarray)
181+ If list of strings is provided -- they must be existing file names
182+ max_abs: float, optional
183+ Maximal absolute difference to tolerate.
184+ max_rel: float, optional
185+ Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
186+ If `max_abs` is specified, then those data points with lesser than that
187+ absolute difference, are not considered for relative difference testing
188+ dtype: np, optional
189+ Datatype to be used when extracting data from files
190+
191+ Returns
192+ -------
193+ diffs: OrderedDict
194+ An ordered dict with a record per each file which has differences
195+ with other files subsequent detected. Each record is a list of
196+ difference records, one per each file pair.
197+ Each difference record is an Ordered Dict with possible keys
198+ 'abs' or 'rel' showing maximal absolute or relative differences
199+ in the file or the record ('CMP': 'incompat') if file shapes
200+ are incompatible.
201+ """
202+
203+ # we are doomed to keep them in RAM now
204+ data = [f if isinstance (f , np .ndarray ) else nib .load (f ).get_fdata (dtype = dtype )
205+ for f in files ]
206+ diffs = OrderedDict ()
207+ for i , d1 in enumerate (data [:- 1 ]):
208+ # populate empty entries for non-compared
209+ diffs1 = [None ] * (i + 1 )
210+
211+ for j , d2 in enumerate (data [i + 1 :], i + 1 ):
212+
213+ if d1 .shape == d2 .shape :
214+ abs_diff = np .abs (d1 - d2 )
215+ mean_abs = (np .abs (d1 ) + np .abs (d2 )) * 0.5
216+ candidates = np .logical_or (mean_abs != 0 , abs_diff != 0 )
217+
218+ if max_abs :
219+ candidates [abs_diff <= max_abs ] = False
220+
221+ max_abs_diff = np .max (abs_diff )
222+ if np .any (candidates ):
223+ rel_diff = abs_diff [candidates ] / mean_abs [candidates ]
224+ if max_rel :
225+ sub_thr = rel_diff <= max_rel
226+ # Since we operated on sub-selected values already, we need
227+ # to plug them back in
228+ candidates [
229+ tuple ((indexes [sub_thr ] for indexes in np .where (candidates )))
230+ ] = False
231+ max_rel_diff = np .max (rel_diff )
232+ else :
233+ max_rel_diff = 0
234+
235+ if np .any (candidates ):
236+
237+ diff_rec = OrderedDict () # so that abs goes before relative
238+
239+ diff_rec ['abs' ] = max_abs_diff .astype (dtype )
240+ diff_rec ['rel' ] = max_rel_diff .astype (dtype )
241+ diffs1 .append (diff_rec )
242+ else :
243+ diffs1 .append (None )
244+
245+ else :
246+ diffs1 .append ({'CMP' : "incompat" })
247+
248+ if any (diffs1 ):
249+
250+ diffs ['DATA(diff %d:)' % (i + 1 )] = diffs1
251+
252+ return diffs
253+
254+
128255def display_diff (files , diff ):
129256 """Format header differences into a nice string
130257
@@ -140,21 +267,27 @@ def display_diff(files, diff):
140267 """
141268 output = ""
142269 field_width = "{:<15}"
270+ filename_width = "{:<53}"
143271 value_width = "{:<55}"
144272
145273 output += "These files are different.\n "
146- output += field_width .format ('Field' )
274+ output += field_width .format ('Field/File ' )
147275
148- for f in files :
149- output += value_width .format (os .path .basename (f ))
276+ for i , f in enumerate ( files , 1 ) :
277+ output += "%d:%s" % ( i , filename_width .format (os .path .basename (f ) ))
150278
151279 output += "\n "
152280
153281 for key , value in diff .items ():
154282 output += field_width .format (key )
155283
156284 for item in value :
157- item_str = str (item )
285+ if isinstance (item , dict ):
286+ item_str = ', ' .join ('%s: %s' % i for i in item .items ())
287+ elif item is None :
288+ item_str = '-'
289+ else :
290+ item_str = str (item )
158291 # Value might start/end with some invisible spacing characters so we
159292 # would "condition" it on both ends a bit
160293 item_str = re .sub ('^[ \t ]+' , '<' , item_str )
@@ -169,8 +302,39 @@ def display_diff(files, diff):
169302 return output
170303
171304
305+ def diff (files , header_fields = 'all' , data_max_abs_diff = None , data_max_rel_diff = None ,
306+ dtype = np .float64 ):
307+ assert len (files ) >= 2 , "Please enter at least two files"
308+
309+ file_headers = [nib .load (f ).header for f in files ]
310+
311+ # signals "all fields"
312+ if header_fields == 'all' :
313+ # TODO: header fields might vary across file types, thus prior sensing would be needed
314+ header_fields = file_headers [0 ].keys ()
315+ else :
316+ header_fields = header_fields .split (',' )
317+
318+ diff = get_headers_diff (file_headers , header_fields )
319+
320+ data_md5_diffs = get_data_hash_diff (files , dtype )
321+ if data_md5_diffs :
322+ # provide details, possibly triggering the ignore of the difference
323+ # in data
324+ data_diffs = get_data_diff (files ,
325+ max_abs = data_max_abs_diff ,
326+ max_rel = data_max_rel_diff ,
327+ dtype = dtype )
328+ if data_diffs :
329+ diff ['DATA(md5)' ] = data_md5_diffs
330+ diff .update (data_diffs )
331+
332+ return diff
333+
334+
172335def main (args = None , out = None ):
173336 """Getting the show on the road"""
337+
174338 out = out or sys .stdout
175339 parser = get_opt_parser ()
176340 (opts , files ) = parser .parse_args (args )
@@ -181,27 +345,17 @@ def main(args=None, out=None):
181345 # suppress nibabel format-compliance warnings
182346 nib .imageglobals .logger .level = 50
183347
184- assert len (files ) >= 2 , "Please enter at least two files"
185-
186- file_headers = [nib .load (f ).header for f in files ]
187-
188- # signals "all fields"
189- if opts .header_fields == 'all' :
190- # TODO: header fields might vary across file types, thus prior sensing would be needed
191- header_fields = file_headers [0 ].keys ()
192- else :
193- header_fields = opts .header_fields .split (',' )
194-
195- diff = get_headers_diff (file_headers , header_fields )
196- data_diff = get_data_diff (files )
197-
198- if data_diff :
199- diff ['DATA(md5)' ] = data_diff
348+ files_diff = diff (
349+ files ,
350+ header_fields = opts .header_fields ,
351+ data_max_abs_diff = opts .data_max_abs_diff ,
352+ data_max_rel_diff = opts .data_max_rel_diff ,
353+ dtype = opts .dtype
354+ )
200355
201- if diff :
202- out .write (display_diff (files , diff ))
356+ if files_diff :
357+ out .write (display_diff (files , files_diff ))
203358 raise SystemExit (1 )
204-
205359 else :
206360 out .write ("These files are identical.\n " )
207361 raise SystemExit (0 )
0 commit comments