@@ -123,6 +123,76 @@ def compare(previous, current, show_unchanged=False, fields=None, ignorefields=N
123123 return result
124124
125125
126+ def streaming_compare_csv (prev_path , curr_path , key , compare_columns = None , ignorefields = None , encoding = 'utf-8' , dialect = 'excel' ):
127+ """
128+ Compare two sorted CSV files by streaming, returning a diff dict.
129+ """
130+ import csv
131+ result = {
132+ "added" : [],
133+ "removed" : [],
134+ "changed" : [],
135+ "columns_added" : [],
136+ "columns_removed" : [],
137+ }
138+
139+ with open (prev_path , newline = '' , encoding = encoding ) as f1 , open (curr_path , newline = '' , encoding = encoding ) as f2 :
140+ reader1 = csv .DictReader (f1 , dialect = dialect )
141+ reader2 = csv .DictReader (f2 , dialect = dialect )
142+ prev_row = next (reader1 , None )
143+ curr_row = next (reader2 , None )
144+
145+ prev_columns = set (reader1 .fieldnames )
146+ curr_columns = set (reader2 .fieldnames )
147+
148+ # Determine columns to compare
149+ if compare_columns :
150+ compare_columns = set (compare_columns )
151+ elif ignorefields :
152+ compare_columns = (prev_columns | curr_columns ) - set (ignorefields )
153+ else :
154+ compare_columns = prev_columns | curr_columns
155+
156+ result ["columns_added" ] = [c for c in curr_columns if c not in prev_columns and c in compare_columns ]
157+ result ["columns_removed" ] = [c for c in prev_columns if c not in curr_columns and c in compare_columns ]
158+
159+ while prev_row or curr_row :
160+ if prev_row and curr_row :
161+ if key not in prev_row or key not in curr_row :
162+ raise KeyError (f"Key column '{ key } ' missing in one of the rows." )
163+ prev_key = str (prev_row [key ])
164+ curr_key = str (curr_row [key ])
165+ if prev_key == curr_key :
166+ # Check for changes
167+ changed_fields = {
168+ col : [prev_row .get (col ), curr_row .get (col )]
169+ for col in compare_columns
170+ if prev_row .get (col ) != curr_row .get (col )
171+ }
172+ if changed_fields :
173+ result ["changed" ].append ({
174+ "key" : prev_key ,
175+ "changes" : changed_fields
176+ })
177+ prev_row = next (reader1 , None )
178+ curr_row = next (reader2 , None )
179+ elif prev_key < curr_key :
180+ # Row removed
181+ result ["removed" ].append (prev_row )
182+ prev_row = next (reader1 , None )
183+ else :
184+ # Row added
185+ result ["added" ].append (curr_row )
186+ curr_row = next (reader2 , None )
187+ elif prev_row :
188+ result ["removed" ].append (prev_row )
189+ prev_row = next (reader1 , None )
190+ elif curr_row :
191+ result ["added" ].append (curr_row )
192+ curr_row = next (reader2 , None )
193+ return result
194+
195+
126196def human_text (result , key = None , current = None , extras = None ):
127197 title = []
128198 summary = []
0 commit comments