@@ -38,32 +38,82 @@ def get_opt_parser():
3838
3939 Option ("-H" , "--header-fields" ,
4040 dest = "header_fields" , default = 'all' ,
41- help = "Header fields (comma separated) to be printed as well (if present)" ),
41+ help = "Header fields (comma separated) to be printed as well"
42+ " (if present)" ),
43+
44+ Option ("--ma" , "--data-max-abs-diff" ,
45+ dest = "data_max_abs_diff" ,
46+ type = float ,
47+ default = 0.0 ,
48+ help = "Maximal absolute difference in data between files"
49+ " to tolerate." ),
50+
51+ Option ("--mr" , "--data-max-rel-diff" ,
52+ dest = "data_max_rel_diff" ,
53+ type = float ,
54+ default = 0.0 ,
55+ help = "Maximal relative difference in data between files to"
56+ " tolerate. If --data-max-abs-diff is also specified,"
57+ " only the data points with absolute difference greater"
58+ " than that value would be considered for relative"
59+ " difference check." ),
60+ Option ("--dt" , "--datatype" ,
61+ dest = "dtype" ,
62+ default = np .float64 ,
63+ help = "Enter a numpy datatype such as 'float32'." )
4264 ])
4365
4466 return p
4567
4668
4769def are_values_different (* values ):
48- """Generically compares values, returns true if different"""
49- value0 = values [0 ]
50- values = values [1 :] # to ensure that the first value isn't compared with itself
51-
52- for value in values :
53- try : # we sometimes don't want NaN values
54- if np .any (np .isnan (value0 )) and np .any (np .isnan (value )): # if they're both NaN
55- break
56- elif np .any (np .isnan (value0 )) or np .any (np .isnan (value )): # if only 1 is NaN
57- return True
70+ """Generically compare values, return True if different
5871
59- except TypeError :
60- pass
72+ Note that comparison is targetting reporting of comparison of the headers
73+ so has following specifics:
74+ - even a difference in data types is considered a difference, i.e. 1 != 1.0
75+ - nans are considered to be the "same", although generally nan != nan
76+ """
77+ value0 = values [0 ]
6178
79+ # to not recompute over again
80+ if isinstance (value0 , np .ndarray ):
81+ try :
82+ # np.asarray for elderly numpys, e.g. 1.7.1 where for
83+ # degenerate arrays (shape ()) it would return a pure scalar
84+ value0_nans = np .asanyarray (np .isnan (value0 ))
85+ value0_nonnans = np .asanyarray (np .logical_not (value0_nans ))
86+ # if value0_nans.size == 1:
87+ # import pdb; pdb.set_trace()
88+ if not np .any (value0_nans ):
89+ value0_nans = None
90+ except TypeError as exc :
91+ str_exc = str (exc )
92+ # Not implemented in numpy 1.7.1
93+ if "not supported" in str_exc or "ot implemented" in str_exc :
94+ value0_nans = None
95+ else :
96+ raise
97+
98+ for value in values [1 :]:
6299 if type (value0 ) != type (value ): # if types are different, then we consider them different
63100 return True
64101 elif isinstance (value0 , np .ndarray ):
65- return np .any (value0 != value )
66-
102+ if value0 .dtype != value .dtype or \
103+ value0 .shape != value .shape :
104+ return True
105+ # there might be nans and they need special treatment
106+ if value0_nans is not None :
107+ value_nans = np .isnan (value )
108+ if np .any (value0_nans != value_nans ):
109+ return True
110+ if np .any (value0 [value0_nonnans ] != value [value0_nonnans ]):
111+ return True
112+ elif np .any (value0 != value ):
113+ return True
114+ elif value0 is np .nan :
115+ if value is not np .nan :
116+ return True
67117 elif value0 != value :
68118 return True
69119
@@ -101,8 +151,8 @@ def get_headers_diff(file_headers, names=None):
101151 return difference
102152
103153
104- def get_data_diff (files ):
105- """Get difference between md5 values
154+ def get_data_hash_diff (files , dtype = np . float64 ):
155+ """Get difference between md5 values of data
106156
107157 Parameters
108158 ----------
@@ -115,7 +165,7 @@ def get_data_diff(files):
115165 """
116166
117167 md5sums = [
118- hashlib .md5 (np .ascontiguousarray (nib .load (f ).get_data (), dtype = np . float32 )).hexdigest ()
168+ hashlib .md5 (np .ascontiguousarray (nib .load (f ).get_fdata ( dtype = dtype ) )).hexdigest ()
119169 for f in files
120170 ]
121171
@@ -125,6 +175,86 @@ def get_data_diff(files):
125175 return md5sums
126176
127177
178+ def get_data_diff (files , max_abs = 0 , max_rel = 0 , dtype = np .float64 ):
179+ """Get difference between data
180+
181+ Parameters
182+ ----------
183+ files: list of (str or ndarray)
184+ If list of strings is provided -- they must be existing file names
185+ max_abs: float, optional
186+ Maximal absolute difference to tolerate.
187+ max_rel: float, optional
188+ Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
189+ If `max_abs` is specified, then those data points with lesser than that
190+ absolute difference, are not considered for relative difference testing
191+ dtype: np, optional
192+ Datatype to be used when extracting data from files
193+
194+ Returns
195+ -------
196+ diffs: OrderedDict
197+ An ordered dict with a record per each file which has differences
198+ with other files subsequent detected. Each record is a list of
199+ difference records, one per each file pair.
200+ Each difference record is an Ordered Dict with possible keys
201+ 'abs' or 'rel' showing maximal absolute or relative differences
202+ in the file or the record ('CMP': 'incompat') if file shapes
203+ are incompatible.
204+ """
205+
206+ # we are doomed to keep them in RAM now
207+ data = [f if isinstance (f , np .ndarray ) else nib .load (f ).get_fdata (dtype = dtype )
208+ for f in files ]
209+ diffs = OrderedDict ()
210+ for i , d1 in enumerate (data [:- 1 ]):
211+ # populate empty entries for non-compared
212+ diffs1 = [None ] * (i + 1 )
213+
214+ for j , d2 in enumerate (data [i + 1 :], i + 1 ):
215+
216+ if d1 .shape == d2 .shape :
217+ abs_diff = np .abs (d1 - d2 )
218+ mean_abs = (np .abs (d1 ) + np .abs (d2 )) * 0.5
219+ candidates = np .logical_or (mean_abs != 0 , abs_diff != 0 )
220+
221+ if max_abs :
222+ candidates [abs_diff <= max_abs ] = False
223+
224+ max_abs_diff = np .max (abs_diff )
225+ if np .any (candidates ):
226+ rel_diff = abs_diff [candidates ] / mean_abs [candidates ]
227+ if max_rel :
228+ sub_thr = rel_diff <= max_rel
229+ # Since we operated on sub-selected values already, we need
230+ # to plug them back in
231+ candidates [
232+ tuple ((indexes [sub_thr ] for indexes in np .where (candidates )))
233+ ] = False
234+ max_rel_diff = np .max (rel_diff )
235+ else :
236+ max_rel_diff = 0
237+
238+ if np .any (candidates ):
239+
240+ diff_rec = OrderedDict () # so that abs goes before relative
241+
242+ diff_rec ['abs' ] = max_abs_diff .astype (dtype )
243+ diff_rec ['rel' ] = max_rel_diff .astype (dtype )
244+ diffs1 .append (diff_rec )
245+ else :
246+ diffs1 .append (None )
247+
248+ else :
249+ diffs1 .append ({'CMP' : "incompat" })
250+
251+ if any (diffs1 ):
252+
253+ diffs ['DATA(diff %d:)' % (i + 1 )] = diffs1
254+
255+ return diffs
256+
257+
128258def display_diff (files , diff ):
129259 """Format header differences into a nice string
130260
@@ -140,21 +270,27 @@ def display_diff(files, diff):
140270 """
141271 output = ""
142272 field_width = "{:<15}"
273+ filename_width = "{:<53}"
143274 value_width = "{:<55}"
144275
145276 output += "These files are different.\n "
146- output += field_width .format ('Field' )
277+ output += field_width .format ('Field/File ' )
147278
148- for f in files :
149- output += value_width .format (os .path .basename (f ))
279+ for i , f in enumerate ( files , 1 ) :
280+ output += "%d:%s" % ( i , filename_width .format (os .path .basename (f ) ))
150281
151282 output += "\n "
152283
153284 for key , value in diff .items ():
154285 output += field_width .format (key )
155286
156287 for item in value :
157- item_str = str (item )
288+ if isinstance (item , dict ):
289+ item_str = ', ' .join ('%s: %s' % i for i in item .items ())
290+ elif item is None :
291+ item_str = '-'
292+ else :
293+ item_str = str (item )
158294 # Value might start/end with some invisible spacing characters so we
159295 # would "condition" it on both ends a bit
160296 item_str = re .sub ('^[ \t ]+' , '<' , item_str )
@@ -169,8 +305,40 @@ def display_diff(files, diff):
169305 return output
170306
171307
308+ def diff (files , header_fields = 'all' , data_max_abs_diff = None ,
309+ data_max_rel_diff = None , dtype = np .float64 ):
310+ assert len (files ) >= 2 , "Please enter at least two files"
311+
312+ file_headers = [nib .load (f ).header for f in files ]
313+
314+ # signals "all fields"
315+ if header_fields == 'all' :
316+ # TODO: header fields might vary across file types,
317+ # thus prior sensing would be needed
318+ header_fields = file_headers [0 ].keys ()
319+ else :
320+ header_fields = header_fields .split (',' )
321+
322+ diff = get_headers_diff (file_headers , header_fields )
323+
324+ data_md5_diffs = get_data_hash_diff (files , dtype )
325+ if data_md5_diffs :
326+ # provide details, possibly triggering the ignore of the difference
327+ # in data
328+ data_diffs = get_data_diff (files ,
329+ max_abs = data_max_abs_diff ,
330+ max_rel = data_max_rel_diff ,
331+ dtype = dtype )
332+ if data_diffs :
333+ diff ['DATA(md5)' ] = data_md5_diffs
334+ diff .update (data_diffs )
335+
336+ return diff
337+
338+
172339def main (args = None , out = None ):
173340 """Getting the show on the road"""
341+
174342 out = out or sys .stdout
175343 parser = get_opt_parser ()
176344 (opts , files ) = parser .parse_args (args )
@@ -181,27 +349,17 @@ def main(args=None, out=None):
181349 # suppress nibabel format-compliance warnings
182350 nib .imageglobals .logger .level = 50
183351
184- assert len (files ) >= 2 , "Please enter at least two files"
352+ files_diff = diff (
353+ files ,
354+ header_fields = opts .header_fields ,
355+ data_max_abs_diff = opts .data_max_abs_diff ,
356+ data_max_rel_diff = opts .data_max_rel_diff ,
357+ dtype = opts .dtype
358+ )
185359
186- file_headers = [nib .load (f ).header for f in files ]
187-
188- # signals "all fields"
189- if opts .header_fields == 'all' :
190- # TODO: header fields might vary across file types, thus prior sensing would be needed
191- header_fields = file_headers [0 ].keys ()
192- else :
193- header_fields = opts .header_fields .split (',' )
194-
195- diff = get_headers_diff (file_headers , header_fields )
196- data_diff = get_data_diff (files )
197-
198- if data_diff :
199- diff ['DATA(md5)' ] = data_diff
200-
201- if diff :
202- out .write (display_diff (files , diff ))
360+ if files_diff :
361+ out .write (display_diff (files , files_diff ))
203362 raise SystemExit (1 )
204-
205363 else :
206364 out .write ("These files are identical.\n " )
207365 raise SystemExit (0 )
0 commit comments