@@ -321,3 +321,189 @@ def test_table_segment(self):
321321 self .assertRaises (ValueError , self .table .replace , min_update = late , max_update = early )
322322
323323 self .assertRaises (ValueError , self .table .replace , min_key = 10 , max_key = 0 )
324+
325+
326+ class TestTableUUID (TestWithConnection ):
327+ def setUp (self ):
328+ super ().setUp ()
329+
330+ queries = [
331+ f"DROP TABLE IF EXISTS { self .table_src } " ,
332+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
333+ f"CREATE TABLE { self .table_src } (id varchar(100), comment varchar(1000))" ,
334+ ]
335+ for i in range (10 ):
336+ uuid_value = uuid .uuid1 (i )
337+ queries .append (f"INSERT INTO { self .table_src } VALUES ('{ uuid_value } ', '{ uuid_value } ')" )
338+
339+ self .null_uuid = uuid .uuid1 (32132131 )
340+ queries += [
341+ f"CREATE TABLE { self .table_dst } AS SELECT * FROM { self .table_src } " ,
342+
343+ f"INSERT INTO { self .table_src } VALUES ('{ self .null_uuid } ', NULL)" ,
344+
345+ "COMMIT"
346+ ]
347+
348+ for query in queries :
349+ self .connection .query (query , None )
350+
351+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , "comment" )
352+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , "comment" )
353+
354+ def test_uuid_column_with_nulls (self ):
355+ differ = TableDiffer ()
356+ diff = list (differ .diff_tables (self .a , self .b ))
357+ self .assertEqual (diff , [("-" , (str (self .null_uuid ), None ))])
358+
359+
360+ class TestTableNullRowChecksum (TestWithConnection ):
361+ def setUp (self ):
362+ super ().setUp ()
363+
364+ self .null_uuid = uuid .uuid1 (1 )
365+ queries = [
366+ f"DROP TABLE IF EXISTS { self .table_src } " ,
367+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
368+ f"CREATE TABLE { self .table_src } (id varchar(100), comment varchar(1000))" ,
369+
370+ f"INSERT INTO { self .table_src } VALUES ('{ uuid .uuid1 (1 )} ', '1')" ,
371+
372+ f"CREATE TABLE { self .table_dst } AS SELECT * FROM { self .table_src } " ,
373+
374+ # Add a row where a column has NULL value
375+ f"INSERT INTO { self .table_src } VALUES ('{ self .null_uuid } ', NULL)" ,
376+
377+ "COMMIT"
378+ ]
379+
380+ for query in queries :
381+ self .connection .query (query , None )
382+
383+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , "comment" )
384+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , "comment" )
385+
386+ def test_uuid_columns_with_nulls (self ):
387+ """
388+ Here we test a case when in one segment one or more columns has only null values. For example,
389+ Table A:
390+ | id | value |
391+ |------|-----------|
392+ | pk_1 | 'value_1' |
393+ | pk_2 | NULL |
394+
395+ Table B:
396+ | id | value |
397+ |------|-----------|
398+ | pk_1 | 'value_1' |
399+
400+ We can choose some bisection factor and bisection threshold (2 and 3 for our example, respectively)
401+ that one segment will look like ('pk_2', NULL). Some databases, when we do a cast these values to string and
402+ try to concatenate, some databases return NULL when concatenating (for example, MySQL). As the result, all next
403+ operations like substring, sum etc return nulls that leads incorrect diff results: ('pk_2', null) should be in
404+ diff results, but it's not. This test helps to detect such cases.
405+ """
406+
407+ differ = TableDiffer (bisection_factor = 2 , bisection_threshold = 3 )
408+ diff = list (differ .diff_tables (self .a , self .b ))
409+ self .assertEqual (diff , [("-" , (str (self .null_uuid ), None ))])
410+
411+
412+ class TestConcatMultipleColumnWithNulls (TestWithConnection ):
413+ def setUp (self ):
414+ super ().setUp ()
415+
416+ queries = [
417+ f"DROP TABLE IF EXISTS { self .table_src } " ,
418+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
419+ f"CREATE TABLE { self .table_src } (id varchar(100), c1 varchar(100), c2 varchar(100))" ,
420+ f"CREATE TABLE { self .table_dst } (id varchar(100), c1 varchar(100), c2 varchar(100))" ,
421+ ]
422+
423+ self .diffs = []
424+ for i in range (0 , 8 ):
425+ pk = uuid .uuid1 (i )
426+ table_src_c1_val = str (i )
427+ table_dst_c1_val = str (i ) + "-different"
428+
429+ queries .append (f"INSERT INTO { self .table_src } VALUES ('{ pk } ', '{ table_src_c1_val } ', NULL)" )
430+ queries .append (f"INSERT INTO { self .table_dst } VALUES ('{ pk } ', '{ table_dst_c1_val } ', NULL)" )
431+
432+ self .diffs .append (("-" , (str (pk ), table_src_c1_val , None )))
433+ self .diffs .append (("+" , (str (pk ), table_dst_c1_val , None )))
434+
435+ queries .append ("COMMIT" )
436+
437+ for query in queries :
438+ self .connection .query (query , None )
439+
440+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , extra_columns = ("c1" , "c2" ))
441+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , extra_columns = ("c1" , "c2" ))
442+
443+ def test_tables_are_different (self ):
444+ """
445+ Here we test a case when in one segment one or more columns has only null values. For example,
446+ Table A:
447+ | id | c1 | c2 |
448+ |------|----|------|
449+ | pk_1 | 1 | NULL |
450+ | pk_2 | 2 | NULL |
451+ ...
452+ | pk_n | n | NULL |
453+
454+ Table B:
455+ | id | c1 | c2 |
456+ |------|--------|------|
457+ | pk_1 | 1-diff | NULL |
458+ | pk_2 | 2-diff | NULL |
459+ ...
460+ | pk_n | n-diff | NULL |
461+
462+ To calculate a checksum, we need to concatenate string values by rows. If both tables have columns with NULL
463+ value, it may lead that concat(pk_i, i, NULL) == concat(pk_i, i-diff, NULL). This test handle such cases.
464+ """
465+
466+ differ = TableDiffer (bisection_factor = 2 , bisection_threshold = 4 )
467+ diff = list (differ .diff_tables (self .a , self .b ))
468+ self .assertEqual (diff , self .diffs )
469+
470+
471+ class TestTableTableEmpty (TestWithConnection ):
472+ def setUp (self ):
473+ super ().setUp ()
474+
475+ self .null_uuid = uuid .uuid1 (1 )
476+ queries = [
477+ f"DROP TABLE IF EXISTS { self .table_src } " ,
478+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
479+ f"CREATE TABLE { self .table_src } (id varchar(100), comment varchar(1000))" ,
480+ f"CREATE TABLE { self .table_dst } (id varchar(100), comment varchar(1000))" ,
481+ ]
482+
483+ self .diffs = [(uuid .uuid1 (i ), i ) for i in range (100 )]
484+ for pk , value in self .diffs :
485+ queries .append (f"INSERT INTO { self .table_src } VALUES ('{ pk } ', '{ value } ')" )
486+
487+ queries .append ("COMMIT" )
488+
489+ for query in queries :
490+ self .connection .query (query , None )
491+
492+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , "comment" )
493+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , "comment" )
494+
495+ def test_right_table_empty (self ):
496+ differ = TableDiffer ()
497+ self .assertRaises (ValueError , differ .diff_tables , self .a , self .b )
498+
499+ def test_left_table_empty (self ):
500+ queries = [
501+ f"INSERT INTO { self .table_dst } SELECT id, comment FROM { self .table_src } " ,
502+ f"TRUNCATE { self .table_src } " ,
503+ "COMMIT"
504+ ]
505+ for query in queries :
506+ self .connection .query (query , None )
507+
508+ differ = TableDiffer ()
509+ self .assertRaises (ValueError , differ .diff_tables , self .a , self .b )
0 commit comments