@@ -39,6 +39,21 @@ def get_opt_parser():
3939 Option ("-H" , "--header-fields" ,
4040 dest = "header_fields" , default = 'all' ,
4141 help = "Header fields (comma separated) to be printed as well (if present)" ),
42+
43+ Option ("--ma" , "--data-max-abs-diff" ,
44+ dest = "data_max_abs_diff" ,
45+ type = float ,
46+ default = 0.0 ,
47+ help = "Maximal absolute difference in data between files to tolerate." ),
48+
49+ Option ("--mr" , "--data-max-rel-diff" ,
50+ dest = "data_max_rel_diff" ,
51+ type = float ,
52+ default = 0.0 ,
53+ help = "Maximal relative difference in data between files to tolerate."
54+ " If also --data-max-abs-diff specified, only the data points "
55+ " with absolute difference greater than that value would be "
56+ " considered for relative difference check." ),
4257 ])
4358
4459 return p
@@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None):
101116 return difference
102117
103118
104- def get_data_diff (files ):
105- """Get difference between md5 values
119+ def get_data_md5_diff (files ):
120+ """Get difference between md5 values of data
106121
107122 Parameters
108123 ----------
@@ -125,6 +140,65 @@ def get_data_diff(files):
125140 return md5sums
126141
127142
143+ def get_data_diff (files , max_abs = 0 , max_rel = 0 ):
144+ """Get difference between data
145+
146+ Parameters
147+ ----------
148+ max_abs: float, optional
149+ Maximal absolute difference to tolerate.
150+ max_rel: float, optional
151+ Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
152+ If `max_abs` is specified, then those data points with lesser than that
153+ absolute difference, are not considered for relative difference testing
154+
155+ Returns
156+ -------
157+ TODO
158+ """
159+ # we are doomed to keep them in RAM now
160+ data = [nib .load (f ).get_data () for f in files ]
161+ diffs = OrderedDict ()
162+ for i , d1 in enumerate (data [:- 1 ]):
163+ # populate empty entries for non-compared
164+ diffs1 = [None ] * (i + 1 )
165+
166+ for j , d2 in enumerate (data [i + 1 :], i + 1 ):
167+ abs_diff = np .abs (d1 - d2 )
168+ mean_abs = (np .abs (d1 ) + np .abs (d2 )) * 0.5
169+ candidates = np .logical_or (mean_abs != 0 , abs_diff != 0 )
170+
171+ if max_abs :
172+ candidates [abs_diff <= max_abs ] = False
173+
174+ max_abs_diff = np .max (abs_diff )
175+ if np .any (candidates ):
176+ rel_diff = abs_diff [candidates ] / mean_abs [candidates ]
177+ if max_rel :
178+ sub_thr = rel_diff <= max_rel
179+ # Since we operated on sub-selected values already, we need
180+ # to plug them back in
181+ candidates [
182+ tuple ((indexes [sub_thr ] for indexes in np .where (candidates )))
183+ ] = False
184+ max_rel_diff = np .max (rel_diff )
185+ else :
186+ max_rel_diff = 0
187+
188+ if np .any (candidates ):
189+ diff_rec = OrderedDict () # so that abs goes before relative
190+ diff_rec ['abs' ] = max_abs_diff
191+ diff_rec ['rel' ] = max_rel_diff
192+ diffs1 .append (diff_rec )
193+ else :
194+ diffs1 .append (None )
195+
196+ if any (diffs1 ):
197+ diffs ['DATA(diff %d:)' % (i + 1 )] = diffs1
198+
199+ return diffs
200+
201+
128202def display_diff (files , diff ):
129203 """Format header differences into a nice string
130204
@@ -143,18 +217,23 @@ def display_diff(files, diff):
143217 value_width = "{:<55}"
144218
145219 output += "These files are different.\n "
146- output += field_width .format ('Field' )
220+ output += field_width .format ('Field/File ' )
147221
148- for f in files :
149- output += value_width .format (os .path .basename (f ))
222+ for i , f in enumerate ( files , 1 ) :
223+ output += "%d:%s" % ( i , value_width .format (os .path .basename (f ) ))
150224
151225 output += "\n "
152226
153227 for key , value in diff .items ():
154228 output += field_width .format (key )
155229
156230 for item in value :
157- item_str = str (item )
231+ if isinstance (item , dict ):
232+ item_str = ', ' .join ('%s: %s' % i for i in item .items ())
233+ elif item is None :
234+ item_str = '-'
235+ else :
236+ item_str = str (item )
158237 # Value might start/end with some invisible spacing characters so we
159238 # would "condition" it on both ends a bit
160239 item_str = re .sub ('^[ \t ]+' , '<' , item_str )
@@ -169,8 +248,37 @@ def display_diff(files, diff):
169248 return output
170249
171250
251+ def diff (files , header_fields = 'all' , data_max_abs_diff = None , data_max_rel_diff = None ):
252+ assert len (files ) >= 2 , "Please enter at least two files"
253+
254+ file_headers = [nib .load (f ).header for f in files ]
255+
256+ # signals "all fields"
257+ if header_fields == 'all' :
258+ # TODO: header fields might vary across file types, thus prior sensing would be needed
259+ header_fields = file_headers [0 ].keys ()
260+ else :
261+ header_fields = header_fields .split (',' )
262+
263+ diff = get_headers_diff (file_headers , header_fields )
264+
265+ data_md5_diffs = get_data_md5_diff (files )
266+ if data_md5_diffs :
267+ # provide details, possibly triggering the ignore of the difference
268+ # in data
269+ data_diffs = get_data_diff (files ,
270+ max_abs = data_max_abs_diff ,
271+ max_rel = data_max_rel_diff )
272+ if data_diffs :
273+ diff ['DATA(md5)' ] = data_md5_diffs
274+ diff .update (data_diffs )
275+
276+ return diff
277+
278+
172279def main (args = None , out = None ):
173280 """Getting the show on the road"""
281+
174282 out = out or sys .stdout
175283 parser = get_opt_parser ()
176284 (opts , files ) = parser .parse_args (args )
@@ -181,27 +289,16 @@ def main(args=None, out=None):
181289 # suppress nibabel format-compliance warnings
182290 nib .imageglobals .logger .level = 50
183291
184- assert len (files ) >= 2 , "Please enter at least two files"
185-
186- file_headers = [nib .load (f ).header for f in files ]
187-
188- # signals "all fields"
189- if opts .header_fields == 'all' :
190- # TODO: header fields might vary across file types, thus prior sensing would be needed
191- header_fields = file_headers [0 ].keys ()
192- else :
193- header_fields = opts .header_fields .split (',' )
292+ files_diff = diff (
293+ files ,
294+ header_fields = opts .header_fields ,
295+ data_max_abs_diff = opts .data_max_abs_diff ,
296+ data_max_rel_diff = opts .data_max_rel_diff
297+ )
194298
195- diff = get_headers_diff (file_headers , header_fields )
196- data_diff = get_data_diff (files )
197-
198- if data_diff :
199- diff ['DATA(md5)' ] = data_diff
200-
201- if diff :
202- out .write (display_diff (files , diff ))
299+ if files_diff :
300+ out .write (display_diff (files , files_diff ))
203301 raise SystemExit (1 )
204-
205302 else :
206303 out .write ("These files are identical.\n " )
207304 raise SystemExit (0 )
0 commit comments