|
28 | 28 | from_sssom_json, |
29 | 29 | from_sssom_rdf, |
30 | 30 | parse_sssom_table, |
| 31 | + split_dataframe_by_prefix, |
31 | 32 | ) |
32 | 33 | from sssom.util import MappingSetDataFrame, sort_df_rows_columns |
33 | 34 | from sssom.writers import WRITER_FUNCTIONS, write_table |
@@ -513,3 +514,60 @@ def test_check_irregular_metadata(self): |
513 | 514 | self.assertTrue(is_irregular_metadata_fail_missing_property_case) |
514 | 515 | self.assertTrue(is_valid_extension) |
515 | 516 | self.assertFalse(is_irregular_metadata_ok_case) |
| 517 | + |
| 518 | + |
| 519 | +class TestSplit(unittest.TestCase): |
| 520 | + """A test case for dataframe utilities.""" |
| 521 | + |
| 522 | + def test_split_df(self) -> None: |
| 523 | + """Test the precursor to SSSOM function.""" |
| 524 | + converter = Converter.from_prefix_map( |
| 525 | + { |
| 526 | + "p1": "https://example.org/p1/", |
| 527 | + "p2": "https://example.org/p2/", |
| 528 | + "p3": "https://example.org/p3/", |
| 529 | + "p4": "https://example.org/p4/", |
| 530 | + "p5": "https://example.org/p5/", |
| 531 | + "p6": "https://example.org/p6/", |
| 532 | + "skos": "http://www.w3.org/2004/02/skos/core#", |
| 533 | + "semapv": "https://w3id.org/semapv/vocab/", |
| 534 | + } |
| 535 | + ) |
| 536 | + subrows = [ |
| 537 | + ("p1:1", "skos:exactMatch", "p2:1", "semapv:ManualMappingCuration"), |
| 538 | + ("p1:2", "skos:exactMatch", "p2:2", "semapv:ManualMappingCuration"), |
| 539 | + ] |
| 540 | + rows = [ |
| 541 | + *subrows, |
| 542 | + ("p1:2", "skos:exactMatch", "p3:2", "semapv:ManualMappingCuration"), |
| 543 | + ("p4:1", "skos:exactMatch", "p1:1", "semapv:ManualMappingCuration"), |
| 544 | + ("p5:1", "skos:broadMatch", "p6:1", "semapv:ManualMappingCuration"), |
| 545 | + ("p1:7", "skos:broadMatch", "p2:7", "semapv:ManualMappingCuration"), |
| 546 | + ] |
| 547 | + columns = ["subject_id", "predicate_id", "object_id", "mapping_justification"] |
| 548 | + df = pd.DataFrame(rows, columns=columns) |
| 549 | + msdf = from_sssom_dataframe(df, converter) |
| 550 | + |
| 551 | + # test that if there's ever an empty list, then it returns an empty dict |
| 552 | + self.assertFalse(split_dataframe_by_prefix(msdf, [], ["p2"], ["skos:exactMatch"])) |
| 553 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], [])) |
| 554 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], [], ["skos:exactMatch"])) |
| 555 | + |
| 556 | + # test that missing prefixes don't result in anything |
| 557 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["nope"], ["p2"], ["skos:exactMatch"])) |
| 558 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["nope"], ["skos:exactMatch"])) |
| 559 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["nope:nope"])) |
| 560 | + |
| 561 | + sdf = pd.DataFrame(subrows, columns=columns) |
| 562 | + # test an explicit return with only single entries |
| 563 | + rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["skos:exactMatch"]) |
| 564 | + self.assertEqual(1, len(rv), msg="nothing was indexed") |
| 565 | + self.assertIn("p1_exactmatch_p2", rv) |
| 566 | + self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist()) |
| 567 | + |
| 568 | + # test an explicit return with multiple entries |
| 569 | + rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2", "p3"], ["skos:exactMatch"]) |
| 570 | + self.assertEqual(2, len(rv), msg="nothing was indexed") |
| 571 | + self.assertIn("p1_exactmatch_p2", rv) |
| 572 | + self.assertIn("p1_exactmatch_p3", rv) |
| 573 | + self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist()) |
0 commit comments