Skip to content

Commit 3721439

Browse files
authored
Add reusable test for split dataframe function (#610)
1 parent cf75d07 commit 3721439

File tree

2 files changed

+64
-1
lines changed

2 files changed

+64
-1
lines changed

src/sssom/parsers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,11 @@ def split_dataframe(
977977
)
978978

979979

980+
def _get_split_key(subject_prefix: str, relation_luid: str, object_prefix: str) -> str:
981+
split = f"{subject_prefix.lower()}_{relation_luid.lower()}_{object_prefix.lower()}"
982+
return split
983+
984+
980985
def split_dataframe_by_prefix(
981986
msdf: MappingSetDataFrame,
982987
subject_prefixes: Iterable[str],
@@ -998,7 +1003,7 @@ def split_dataframe_by_prefix(
9981003
subject_prefixes, object_prefixes, relations
9991004
):
10001005
relation_prefix, relation_id = relation.split(":")
1001-
split = f"{subject_prefix.lower()}_{relation_id.lower()}_{object_prefix.lower()}"
1006+
split = _get_split_key(subject_prefix, relation_id, object_prefix)
10021007
if subject_prefix not in msdf.converter.bimap:
10031008
logging.warning(f"{split} - missing subject prefix - {subject_prefix}")
10041009
continue

tests/test_parsers.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from_sssom_json,
2929
from_sssom_rdf,
3030
parse_sssom_table,
31+
split_dataframe_by_prefix,
3132
)
3233
from sssom.util import MappingSetDataFrame, sort_df_rows_columns
3334
from sssom.writers import WRITER_FUNCTIONS, write_table
@@ -513,3 +514,60 @@ def test_check_irregular_metadata(self):
513514
self.assertTrue(is_irregular_metadata_fail_missing_property_case)
514515
self.assertTrue(is_valid_extension)
515516
self.assertFalse(is_irregular_metadata_ok_case)
517+
518+
519+
class TestSplit(unittest.TestCase):
520+
"""A test case for dataframe utilities."""
521+
522+
def test_split_df(self) -> None:
523+
"""Test the precursor to SSSOM function."""
524+
converter = Converter.from_prefix_map(
525+
{
526+
"p1": "https://example.org/p1/",
527+
"p2": "https://example.org/p2/",
528+
"p3": "https://example.org/p3/",
529+
"p4": "https://example.org/p4/",
530+
"p5": "https://example.org/p5/",
531+
"p6": "https://example.org/p6/",
532+
"skos": "http://www.w3.org/2004/02/skos/core#",
533+
"semapv": "https://w3id.org/semapv/vocab/",
534+
}
535+
)
536+
subrows = [
537+
("p1:1", "skos:exactMatch", "p2:1", "semapv:ManualMappingCuration"),
538+
("p1:2", "skos:exactMatch", "p2:2", "semapv:ManualMappingCuration"),
539+
]
540+
rows = [
541+
*subrows,
542+
("p1:2", "skos:exactMatch", "p3:2", "semapv:ManualMappingCuration"),
543+
("p4:1", "skos:exactMatch", "p1:1", "semapv:ManualMappingCuration"),
544+
("p5:1", "skos:broadMatch", "p6:1", "semapv:ManualMappingCuration"),
545+
("p1:7", "skos:broadMatch", "p2:7", "semapv:ManualMappingCuration"),
546+
]
547+
columns = ["subject_id", "predicate_id", "object_id", "mapping_justification"]
548+
df = pd.DataFrame(rows, columns=columns)
549+
msdf = from_sssom_dataframe(df, converter)
550+
551+
# test that if there's ever an empty list, then it returns an empty dict
552+
self.assertFalse(split_dataframe_by_prefix(msdf, [], ["p2"], ["skos:exactMatch"]))
553+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], []))
554+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], [], ["skos:exactMatch"]))
555+
556+
# test that missing prefixes don't result in anything
557+
self.assertFalse(split_dataframe_by_prefix(msdf, ["nope"], ["p2"], ["skos:exactMatch"]))
558+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["nope"], ["skos:exactMatch"]))
559+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["nope:nope"]))
560+
561+
sdf = pd.DataFrame(subrows, columns=columns)
562+
# test an explicit return with only single entries
563+
rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["skos:exactMatch"])
564+
self.assertEqual(1, len(rv), msg="nothing was indexed")
565+
self.assertIn("p1_exactmatch_p2", rv)
566+
self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist())
567+
568+
# test an explicit return with multiple entries
569+
rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2", "p3"], ["skos:exactMatch"])
570+
self.assertEqual(2, len(rv), msg="nothing was indexed")
571+
self.assertIn("p1_exactmatch_p2", rv)
572+
self.assertIn("p1_exactmatch_p3", rv)
573+
self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist())

0 commit comments

Comments
 (0)