From 47015771d033a33c834c510e96d3b705cbde74b1 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 15:39:19 -0500 Subject: [PATCH 01/26] move a lot of files and functions; remove concept of explict js type; edit test --- setup.cfg | 2 + .../extensions/curator/schema_generation.py | 201 +- .../unit/synapseclient/extensions/conftest.py | 88 + .../example.model.column_type_component.csv | 83 + .../extensions/data/example.model.csv | 72 + .../extensions/data/example.model.jsonld | 2244 +++++++++++++++++ .../expected.Biospecimen.schema.json | 50 + ...BulkRNA-seqAssay.display_names_schema.json | 168 ++ .../expected.BulkRNA-seqAssay.schema.json | 168 ++ ...NSchemaComponent.display_names_schema.json | 211 ++ .../expected.JSONSchemaComponent.schema.json | 203 ++ .../expected.MockComponent.schema.json | 305 +++ .../expected.MockFilename.schema.json | 27 + .../expected.MockRDB.schema.json | 35 + ...expected.Patient.display_names_schema.json | 151 ++ .../expected.Patient.schema.json | 151 ++ ..._rna_missing_conditional_dependencies.json | 7 + ...ient_missing_conditional_dependencies.json | 6 + .../json_instances/valid_biospecimen1.json | 6 + .../data/json_instances/valid_bulk_rna1.json | 7 + .../data/json_instances/valid_bulk_rna2.json | 8 + .../data/json_instances/valid_patient1.json | 6 + .../data/json_instances/valid_patient2.json | 8 + .../unit_test_create_json_schema.py | 1277 ++++++++++ 24 files changed, 5452 insertions(+), 32 deletions(-) create mode 100644 tests/unit/synapseclient/extensions/conftest.py create mode 100644 tests/unit/synapseclient/extensions/data/example.model.column_type_component.csv create mode 100644 tests/unit/synapseclient/extensions/data/example.model.csv create mode 100644 tests/unit/synapseclient/extensions/data/example.model.jsonld create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Biospecimen.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockComponent.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockFilename.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockRDB.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.display_names_schema.json create mode 100644 tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.schema.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json create mode 100644 tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json create mode 100644 tests/unit/synapseclient/extensions/unit_test_create_json_schema.py diff --git a/setup.cfg b/setup.cfg index 8f6074311..7a7993534 100644 --- a/setup.cfg +++ b/setup.cfg @@ -112,6 +112,8 @@ curator = networkx>=2.2.8 dataclasses-json>=0.6.1 rdflib>=6.0.0 + jsonschema>=4.23.0 + pysftp = pysftp>=0.2.8,<0.3 diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 2f81600f0..f47335e3a 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -27,6 +27,8 @@ Union, ) +from deprecated import deprecated + try: from dataclasses_json import config, dataclass_json except ImportError: @@ -3998,6 +4000,10 @@ def _generate_jsonschema( return generator.component_json_schema, str(generator.output_path) +@deprecated( + version="4.11.0", + reason="This function is going to be deprecated. Please use columnType to define types.", +) def filter_unused_inputted_rules( inputted_rules: list[str], logger: Logger ) -> list[str]: @@ -4029,6 +4035,10 @@ def filter_unused_inputted_rules( ] +@deprecated( + version="4.11.0", + reason="This function is going to be deprecated. Please use columnType to define types.", +) def check_for_duplicate_inputted_rules(inputted_rules: list[str]) -> None: """Checks that there are no rules with duplicate names @@ -4043,6 +4053,10 @@ def check_for_duplicate_inputted_rules(inputted_rules: list[str]) -> None: raise ValueError(f"Validation Rules contains duplicates: {inputted_rules}") +@deprecated( + version="4.11.0", + reason="This function is going to be deprecated. Please use columnType to define types.", +) def check_for_conflicting_inputted_rules(inputted_rules: list[str]) -> None: """Checks that each rule has no conflicts with any other rule @@ -4185,6 +4199,10 @@ def get_regex_parameters_from_inputted_rule( return pattern +@deprecated( + version="4.11.0", + reason="This function is going to be deprecated. Please use columnType to define types.", +) def get_validation_rule_names_from_inputted_rules( inputted_rules: list[str], ) -> list[ValidationRuleName]: @@ -4201,6 +4219,10 @@ def get_validation_rule_names_from_inputted_rules( return [rule.name for rule in rules] +@deprecated( + version="4.11.0", + reason="This function is going to be deprecated. Please use columnType to define types.", +) def get_names_from_inputted_rules(inputted_rules: list[str]) -> list[str]: """Gets the names from a list of inputted rules @@ -4268,7 +4290,7 @@ def _get_rules_by_names(names: list[str]) -> list[ValidationRule]: def _get_validation_rule_based_fields( validation_rules: list[str], - explicit_js_type: Optional[JSONSchemaType], + js_type: Optional[JSONSchemaType], name: str, logger: Logger, ) -> tuple[ @@ -4292,7 +4314,7 @@ def _get_validation_rule_based_fields( Arguments: validation_rules: A list of input validation rules - explicit_js_type: A JSONSchemaType if set explicitly in the data model, otherwise None + js_type: A JSONSchemaType if set explicitly in the data model, otherwise None name: The name of the node the validation rules belong to Raises: @@ -4310,7 +4332,7 @@ def _get_validation_rule_based_fields( - js_pattern: If the type is string the JSON Schema pattern """ js_is_array = False - js_type = explicit_js_type + js_type = js_type js_format = None js_minimum = None js_maximum = None @@ -4329,34 +4351,14 @@ def _get_validation_rule_based_fields( js_is_array = ValidationRuleName.LIST in validation_rule_names - # The explicit JSON Schema type is the one set in the data model # The implicit JSON Schema type is the one implied by the presence # of certain validation rules - # Schematic will use the implicit type if the explicit type isn't specified for now, - # but this behavior is deprecated and will be removed in the future by SCHEMATIC-326 + implicit_js_type = get_js_type_from_inputted_rules(validation_rules) - # If there is an explicit and implicit type set and the implicit type conflicts with the - # explicit type, then an exception is raised - if ( - explicit_js_type - and implicit_js_type - and explicit_js_type != implicit_js_type - ): - msg = ( - f"Property: '{name}', has explicit type: '{explicit_js_type}' " - f"that conflicts with the implicit type: '{implicit_js_type}' " - f"derived from its validation rules: {validation_rules}" + if implicit_js_type: + logger.warning( + f"Detected implicit type: '{implicit_js_type}' for property: '{name}'. Please note that type can only be set explicitly via the columnType column in the data model going forward." ) - logger.warning(msg) - if not explicit_js_type and implicit_js_type: - js_type = implicit_js_type - msg = ( - f"No explicit type set for property: '{name}', " - "using validation rules to set the type. " - "Using validation rules to set type is deprecated. " - "You should set the columnType for this property in your data model." - ) - logger.warning(msg) if ValidationRuleName.URL in validation_rule_names: js_format = JSONSchemaFormat.URI @@ -4404,7 +4406,7 @@ class TraversalNode: # pylint: disable=too-many-instance-attributes dependencies: This nodes dependencies description: This nodes description, gotten from the comment in the data model is_array: Whether or not the property is an array (inferred from validation_rules) - type: The type of the property (inferred from validation_rules) + type: The type of the property (set by ColumnType in the data model) format: The format of the property (inferred from validation_rules) minimum: The minimum value of the property (if numeric) (inferred from validation_rules) maximum: The maximum value of the property (if numeric) (inferred from validation_rules) @@ -4453,9 +4455,7 @@ def __post_init__(self) -> None: self.description = self.dmge.get_node_comment( node_display_name=self.display_name ) - explicit_js_type = self.dmge.get_node_column_type( - node_display_name=self.display_name - ) + js_type = self.dmge.get_node_column_type(node_display_name=self.display_name) ( self.is_array, @@ -4466,7 +4466,7 @@ def __post_init__(self) -> None: self.pattern, ) = _get_validation_rule_based_fields( validation_rules=validation_rules, - explicit_js_type=explicit_js_type, + js_type=js_type, name=self.name, logger=self.logger, ) @@ -5133,6 +5133,9 @@ def create_json_schema( # pylint: disable=too-many-arguments datatype: str, schema_name: str, logger: Logger, + write_schema: bool = True, + schema_path: Optional[str] = None, + jsonld_path: Optional[str] = None, use_property_display_names: bool = True, use_valid_value_display_names: bool = True, ) -> dict[str, Any]: @@ -5156,8 +5159,11 @@ def create_json_schema( # pylint: disable=too-many-arguments datatype: the datatype to create the schema for. Its node is where we can start recursive dependency traversal (as mentioned above). + write_schema: whether or not to write the schema as a json file schema_name: Name assigned to JSON-LD schema (to uniquely identify it via URI when it is hosted on the Internet). + schema_path: Where to save the JSON Schema file + jsonld_path: Used to name the file if the path isn't supplied use_property_display_names: If True, the properties in the JSONSchema will be written using node display names use_valid_value_display_names: If True, the valid_values in the JSONSchema @@ -5189,9 +5195,59 @@ def create_json_schema( # pylint: disable=too-many-arguments json_schema_dict = json_schema.as_json_schema_dict() + if write_schema: + _write_data_model( + json_schema_dict=json_schema_dict, + schema_path=schema_path, + jsonld_path=jsonld_path, + logger=logger, + ) + return json_schema_dict +def _write_data_model( + json_schema_dict: dict[str, Any], + logger: Logger, + schema_path: Optional[str] = None, + name: Optional[str] = None, + jsonld_path: Optional[str] = None, +) -> None: + """ + Creates the JSON Schema file + + Arguments: + json_schema_dict: The JSON schema in dict form + schema_path: Where to save the JSON Schema file + jsonld_path: + The path to the JSONLD model, used to create the path + Used if schema_path is None + name: + The name of the datatype(source node) the schema is being created for + Used if schema_path is None + """ + if schema_path: + json_schema_path = schema_path + elif name and jsonld_path: + json_schema_path = get_json_schema_log_file_path( + data_model_path=jsonld_path, source_node=name + ) + json_schema_dirname = os.path.dirname(json_schema_path) + if json_schema_dirname != "": + os.makedirs(json_schema_dirname, exist_ok=True) + + logger.info( + "The JSON schema file can be inspected by setting the following " + "nested key in the configuration: (model > location)." + ) + else: + raise ValueError( + "Either schema_path or both name and jsonld_path must be provided." + ) + export_json(json_doc=json_schema_dict, file_path=json_schema_path, indent=2) + logger.info("The JSON schema has been saved at %s", json_schema_path) + + class JsonSchemaComponentGenerator: """ Responsible for generating the JSON schema for a specific component and writing it to a file. @@ -5622,3 +5678,84 @@ class labels. Use cautiously as this can affect downstream compatibility. ) ) return jsonld_data_model + + +@dataclass +class Node2: # pylint: disable=too-many-instance-attributes + """ + A Dataclass representing data about a node in a data model in graph form + A DataModelGraphExplorer is used to infer most of the fields from the name of the node + + Attributes: + name: The name of the node + source_node: The name of the node where the graph traversal started + dmge: A DataModelGraphExplorer with the data model loaded + display_name: The display name of the node + valid_values: The valid values of the node if any + valid_value_display_names: The display names of the valid values of the node if any + is_required: Whether or not this node is required + dependencies: This nodes dependencies + description: This nodes description, gotten from the comment in the data model + is_array: Whether or not the property is an array (inferred from validation_rules) + type: The type of the property (inferred from validation_rules) + format: The format of the property (inferred from validation_rules) + minimum: The minimum value of the property (if numeric) (inferred from validation_rules) + maximum: The maximum value of the property (if numeric) (inferred from validation_rules) + pattern: The regex pattern of the property (inferred from validation_rules) + """ + + name: str + source_node: str + dmge: DataModelGraphExplorer + display_name: str = field(init=False) + valid_values: list[str] = field(init=False) + valid_value_display_names: list[str] = field(init=False) + is_required: bool = field(init=False) + dependencies: list[str] = field(init=False) + description: str = field(init=False) + is_array: bool = field(init=False) + type: Optional[JSONSchemaType] = field(init=False) + format: Optional[JSONSchemaFormat] = field(init=False) + minimum: Optional[float] = field(init=False) + maximum: Optional[float] = field(init=False) + pattern: Optional[str] = field(init=False) + + def __post_init__(self) -> None: + """ + Uses the dmge to fill in most of the fields of the dataclass + """ + self.display_name = self.dmge.get_nodes_display_names([self.name])[0] + self.valid_values = sorted(self.dmge.get_node_range(node_label=self.name)) + self.valid_value_display_names = sorted( + self.dmge.get_node_range(node_label=self.name, display_names=True) + ) + validation_rules = self.dmge.get_component_node_validation_rules( + manifest_component=self.source_node, node_display_name=self.display_name + ) + self.is_required = self.dmge.get_component_node_required( + manifest_component=self.source_node, + node_validation_rules=validation_rules, + node_display_name=self.display_name, + ) + self.dependencies = sorted( + self.dmge.get_node_dependencies( + self.name, display_names=False, schema_ordered=False + ) + ) + self.description = self.dmge.get_node_comment( + node_display_name=self.display_name + ) + explicit_js_type = self.dmge.get_node_column_type( + node_display_name=self.display_name + ) + + ( + self.is_array, + self.type, + self.format, + self.minimum, + self.maximum, + self.pattern, + ) = _get_validation_rule_based_fields( + validation_rules, explicit_js_type, self.name, logger=self.dmge.logger + ) diff --git a/tests/unit/synapseclient/extensions/conftest.py b/tests/unit/synapseclient/extensions/conftest.py new file mode 100644 index 000000000..0f4abc738 --- /dev/null +++ b/tests/unit/synapseclient/extensions/conftest.py @@ -0,0 +1,88 @@ +import os +import sys +from unittest.mock import Mock + +import pytest + +from synapseclient.extensions.curator.df_utils import load_df +from synapseclient.extensions.curator.schema_generation import ( + DataModelGraph, + DataModelGraphExplorer, + DataModelParser, +) + +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +DATA_DIR = os.path.join(TESTS_DIR, "data") + + +class Helpers: + @staticmethod + def get_data_path(path, *paths): + return os.path.join(DATA_DIR, path, *paths) + + @staticmethod + def get_data_file(path, *paths, **kwargs): + fullpath = os.path.join(DATA_DIR, path, *paths) + return open(fullpath, **kwargs) + + @staticmethod + def get_data_frame(path, *paths, **kwargs): + fullpath = os.path.join(DATA_DIR, path, *paths) + return load_df(fullpath, **kwargs) + + @staticmethod + def get_data_model_graph_explorer( + path=None, data_model_labels: str = "class_label", *paths + ): + # commenting this now bc we dont want to have multiple instances + if path is None: + return + + fullpath = Helpers.get_data_path(path, *paths) + + # Instantiate DataModelParser + data_model_parser = DataModelParser(path_to_data_model=fullpath, logger=Mock()) + + # Parse Model + parsed_data_model = data_model_parser.parse_model() + + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph( + parsed_data_model, data_model_labels=data_model_labels, logger=Mock() + ) + + # Generate graph + graph_data_model = data_model_grapher.graph + + # Instantiate DataModelGraphExplorer + DMGE = DataModelGraphExplorer(graph_data_model, logger=Mock()) + + return DMGE + + @staticmethod + def get_python_version(): + version = sys.version + base_version = ".".join(version.split(".")[0:2]) + + return base_version + + +@pytest.fixture(scope="function") +def helpers(): + yield Helpers + + +@pytest.fixture(name="dmge", scope="function") +def DMGE(helpers: Helpers) -> DataModelGraphExplorer: + """Fixture to instantiate a DataModelGraphExplorer object.""" + dmge = helpers.get_data_model_graph_explorer(path="example.model.jsonld") + return dmge + + +@pytest.fixture(name="dmge_column_type", scope="function") +def DMGE_column_type(helpers: Helpers) -> DataModelGraphExplorer: + """Fixture to instantiate a DataModelGraphExplorer object using the data model with column types""" + dmge = helpers.get_data_model_graph_explorer( + path="example.model.column_type_component.csv" + ) + return dmge diff --git a/tests/unit/synapseclient/extensions/data/example.model.column_type_component.csv b/tests/unit/synapseclient/extensions/data/example.model.column_type_component.csv new file mode 100644 index 000000000..bb7f10fa4 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/example.model.column_type_component.csv @@ -0,0 +1,83 @@ +Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules,columnType +Component,,,,,TRUE,,,,, +Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,,, +Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error, +Sex,,"Female, Male, Other",,,TRUE,DataProperty,,,, +Year of Birth,,,,,FALSE,DataProperty,,,, +Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,,, +Cancer,,,"Cancer Type, Family History",,FALSE,ValidValue,,,, +Cancer Type,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,, +Family History,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,list strict, +Biospecimen,,,"Sample ID, Patient ID, Tissue Status, Component",,FALSE,DataType,Patient,,, +Sample ID,,,,,TRUE,DataProperty,,,, +Tissue Status,,"Healthy, Malignant, None",,,TRUE,DataProperty,,,, +Bulk RNA-seq Assay,,,"Filename, Sample ID, File Format, Component",,FALSE,DataType,Biospecimen,,, +Filename,,,,,TRUE,DataProperty,,,#MockFilename filenameExists^^, +File Format,,"FASTQ, BAM, CRAM, CSV/TSV",,,TRUE,DataProperty,,,, +BAM,,,Genome Build,,FALSE,ValidValue,,,, +CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,,, +CSV/TSV,,,Genome Build,,FALSE,ValidValue,,,, +Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,,, +Genome FASTA,,,,,TRUE,DataProperty,,,, +MockComponent,Component to hold mock attributes for testing all validation rules,,"Component, Check List, Check List Enum, Check List Like, Check List Like Enum, Check List Strict, Check List Enum Strict, Check Regex List, Check Regex List Like, Check Regex List Strict, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,,, +Check List,,,,,TRUE,DataProperty,,,list, +Check List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list, +Check List Like,,,,,TRUE,DataProperty,,,list like, +Check List Like Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list like, +Check List Strict,,,,,TRUE,DataProperty,,,list strict, +Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict, +Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f], +Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f], +Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f], +Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f], +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f], +Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$, +Check Num,,,,,TRUE,DataProperty,,,num error, +Check Float,,,,,TRUE,DataProperty,,,float error, +Check Int,,,,,TRUE,DataProperty,,,int error, +Check String,,,,,TRUE,DataProperty,,,str error, +Check URL,,,,,TRUE,DataProperty,,,url, +Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set, +Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set, +Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error, +Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value, +Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value, +Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error, +Check Recommended,,,,,FALSE,DataProperty,,,recommended, +Check Ages,,,,,TRUE,DataProperty,,,protectAges, +Check Unique,,,,,TRUE,DataProperty,,,unique error, +Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error, +Check Date,,,,,TRUE,DataProperty,,,date, +Check NA,,,,,TRUE,DataProperty,,,int::IsNA, +MockRDB,,,"Component, MockRDB_id, SourceManifest",,FALSE,DataType,,,, +MockRDB_id,,,,,TRUE,DataProperty,,,int, +SourceManifest,,,,,TRUE,DataProperty,,,, +MockFilename,,,"Component, Filename",,FALSE,DataType,,,, +JSONSchemaComponent,Component to hold attributes for testing JSON Schemas,,"Component, No Rules, No Rules Not Required, String, String Not Required, Enum, Enum Not Required, Date, URL, InRange, Regex, List, List Not Required, List Enum, List Enum Not Required, List String, List InRange",,FALSE,DataType,,,, +No Rules,,,,,TRUE,DataProperty,,,, +No Rules Not Required,,,,,FALSE,DataProperty,,,, +String,,,,,TRUE,DataProperty,,,str error,string +String Not Required,,,,,FALSE,DataProperty,,,str error,string +Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,, +Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,, +Date,,,,,TRUE,DataProperty,,,date,string +URL,,,,,TRUE,DataProperty,,,url,string +InRange,,,,,TRUE,DataProperty,,,inRange 50 100,number +Regex,,,,,TRUE,DataProperty,,,regex search [a-f],string +List,,,,,TRUE,DataProperty,,,list, +List Not Required,,,,,FALSE,DataProperty,,,list, +List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list, +List Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,list, +List String,,,,,TRUE,DataProperty,,,list::str,string +List InRange,,,,,TRUE,DataProperty,,,list::inRange 50 100,number +TypeDefinitionComponent,Component to check type specification,,"Component, String type, String type caps, Int type, Int type caps, Num type, Num type caps, Nan type, Missing type, Boolean type, Boolean type caps",,FALSE,DataType,,,, +String type,,,,,TRUE,DataProperty,,,,string +String type caps,,,,,TRUE,DataProperty,,,,STRING +Int type,,,,,TRUE,DataProperty,,,,integer +Int type caps,,,,,TRUE,DataProperty,,,,INTEGER +Num type,,,,,TRUE,DataProperty,,,,number +Num type caps,,,,,TRUE,DataProperty,,,,NUMBER +Nan type,,,,,TRUE,DataProperty,,,,nan +Missing type,,,,,TRUE,DataProperty,,,, +Boolean type,,,,,TRUE,DataProperty,,,,boolean +Boolean type caps,,,,,TRUE,DataProperty,,,,BOOLEAN diff --git a/tests/unit/synapseclient/extensions/data/example.model.csv b/tests/unit/synapseclient/extensions/data/example.model.csv new file mode 100644 index 000000000..fb65ffa8c --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/example.model.csv @@ -0,0 +1,72 @@ +Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules +Component,,,,,TRUE,,,, +Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,, +Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error +Sex,,"Female, Male, Other",,,TRUE,DataProperty,,, +Year of Birth,,,,,FALSE,DataProperty,,, +Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,, +Cancer,,,"Cancer Type, Family History",,FALSE,ValidValue,,, +Cancer Type,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,, +Family History,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,list strict +Biospecimen,,,"Sample ID, Patient ID, Tissue Status, Component",,FALSE,DataType,Patient,, +Sample ID,,,,,TRUE,DataProperty,,, +Tissue Status,,"Healthy, Malignant, None",,,TRUE,DataProperty,,, +Bulk RNA-seq Assay,,,"Filename, Sample ID, File Format, Component",,FALSE,DataType,Biospecimen,, +Filename,,,,,TRUE,DataProperty,,,#MockFilename filenameExists^^ +File Format,,"FASTQ, BAM, CRAM, CSV/TSV",,,TRUE,DataProperty,,, +BAM,,,Genome Build,,FALSE,ValidValue,,, +CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,, +CSV/TSV,,,Genome Build,,FALSE,ValidValue,,, +Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,, +Genome FASTA,,,,,TRUE,DataProperty,,, +MockComponent,Component to hold mock attributes for testing all validation rules,,"Component, Check List, Check List Enum, Check List Like, Check List Like Enum, Check List Strict, Check List Enum Strict, Check Regex List, Check Regex List Like, Check Regex List Strict, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,, +Check List,,,,,TRUE,DataProperty,,,list +Check List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list +Check List Like,,,,,TRUE,DataProperty,,,list like +Check List Like Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list like +Check List Strict,,,,,TRUE,DataProperty,,,list strict +Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict +Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f] +Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f] +Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f] +Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f] +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f] +Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$ +Check Num,,,,,TRUE,DataProperty,,,num error +Check Float,,,,,TRUE,DataProperty,,,float error +Check Int,,,,,TRUE,DataProperty,,,int error +Check String,,,,,TRUE,DataProperty,,,str error +Check URL,,,,,TRUE,DataProperty,,,url +Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set +Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set +Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error +Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value +Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value +Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error +Check Recommended,,,,,FALSE,DataProperty,,,recommended +Check Ages,,,,,TRUE,DataProperty,,,protectAges +Check Unique,,,,,TRUE,DataProperty,,,unique error +Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error +Check Date,,,,,TRUE,DataProperty,,,date +Check NA,,,,,TRUE,DataProperty,,,int::IsNA +MockRDB,,,"Component, MockRDB_id, SourceManifest",,FALSE,DataType,,, +MockRDB_id,,,,,TRUE,DataProperty,,,int +SourceManifest,,,,,TRUE,DataProperty,,, +MockFilename,,,"Component, Filename",,FALSE,DataType,,, +JSONSchemaComponent,Component to hold attributes for testing JSON Schemas,,"Component, No Rules, No Rules Not Required, String, String Not Required, Enum, Enum Not Required, Date, URL, InRange, Regex, List, List Not Required, List Enum, List Enum Not Required, List String, List InRange",,FALSE,DataType,,, +No Rules,,,,,TRUE,DataProperty,,, +No Rules Not Required,,,,,FALSE,DataProperty,,, +String,,,,,TRUE,DataProperty,,,str error +String Not Required,,,,,FALSE,DataProperty,,,str error +Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,, +Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,, +Date,,,,,TRUE,DataProperty,,,date +URL,,,,,TRUE,DataProperty,,,url +InRange,,,,,TRUE,DataProperty,,,inRange 50 100 +Regex,,,,,TRUE,DataProperty,,,regex search [a-f] +List,,,,,TRUE,DataProperty,,,list +List Not Required,,,,,FALSE,DataProperty,,,list +List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list +List Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,list +List String,,,,,TRUE,DataProperty,,,list::str +List InRange,,,,,TRUE,DataProperty,,,list::inRange 50 100 diff --git a/tests/unit/synapseclient/extensions/data/example.model.jsonld b/tests/unit/synapseclient/extensions/data/example.model.jsonld new file mode 100644 index 000000000..b3c3e0da4 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/example.model.jsonld @@ -0,0 +1,2244 @@ +{ + "@context": { + "bts": "http://schema.biothings.io/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "http://schema.org/", + "xsd": "http://www.w3.org/2001/XMLSchema#" + }, + "@graph": [ + { + "@id": "bts:Component", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Component", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Component", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Patient", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Patient", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Patient", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:PatientID" + }, + { + "@id": "bts:Sex" + }, + { + "@id": "bts:YearofBirth" + }, + { + "@id": "bts:Diagnosis" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:PatientID", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "PatientID", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Patient ID", + "sms:required": "sms:true", + "sms:validationRules": { + "Biospecimen": "unique error", + "Patient": "unique warning" + } + }, + { + "@id": "bts:Sex", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Sex", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Female" + }, + { + "@id": "bts:Male" + }, + { + "@id": "bts:Other" + } + ], + "sms:displayName": "Sex", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:YearofBirth", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "YearofBirth", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Year of Birth", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Diagnosis", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Diagnosis", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Healthy" + }, + { + "@id": "bts:Cancer" + } + ], + "sms:displayName": "Diagnosis", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:DataType", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DataType", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "DataType", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:DataProperty", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DataProperty", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "DataProperty", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Female", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Female", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Female", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Male", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Male", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Male", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Other", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Other", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Other", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Healthy", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Healthy", + "rdfs:subClassOf": [ + { + "@id": "bts:Diagnosis" + }, + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Healthy", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Cancer", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Cancer", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:Diagnosis" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Cancer", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CancerType", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CancerType", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Breast" + }, + { + "@id": "bts:Colorectal" + }, + { + "@id": "bts:Lung" + }, + { + "@id": "bts:Prostate" + }, + { + "@id": "bts:Skin" + } + ], + "sms:displayName": "Cancer Type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:FamilyHistory", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FamilyHistory", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Breast" + }, + { + "@id": "bts:Colorectal" + }, + { + "@id": "bts:Lung" + }, + { + "@id": "bts:Prostate" + }, + { + "@id": "bts:Skin" + } + ], + "sms:displayName": "Family History", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:ValidValue", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ValidValue", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ValidValue", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Breast", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Breast", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Breast", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Colorectal", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Colorectal", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Colorectal", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Lung", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Lung", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Lung", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Prostate", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Prostate", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Prostate", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Skin", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Skin", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Skin", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Biospecimen", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Biospecimen", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Biospecimen", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Patient" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:SampleID" + }, + { + "@id": "bts:PatientID" + }, + { + "@id": "bts:TissueStatus" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:SampleID", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SampleID", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Sample ID", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TissueStatus", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "TissueStatus", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Healthy" + }, + { + "@id": "bts:Malignant" + }, + { + "@id": "bts:None" + } + ], + "sms:displayName": "Tissue Status", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Malignant", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Malignant", + "rdfs:subClassOf": [ + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Malignant", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:None", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "None", + "rdfs:subClassOf": [ + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "None", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:BulkRNA-seqAssay", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "BulkRNA-seqAssay", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Bulk RNA-seq Assay", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Biospecimen" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:Filename" + }, + { + "@id": "bts:SampleID" + }, + { + "@id": "bts:FileFormat" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:Filename", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Filename", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Filename", + "sms:required": "sms:true", + "sms:validationRules": { + "MockFilename": "filenameExists" + } + }, + { + "@id": "bts:FileFormat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FileFormat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:FASTQ" + }, + { + "@id": "bts:BAM" + }, + { + "@id": "bts:CRAM" + }, + { + "@id": "bts:CSV/TSV" + } + ], + "sms:displayName": "File Format", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:FASTQ", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FASTQ", + "rdfs:subClassOf": [ + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "FASTQ", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:BAM", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "BAM", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "BAM", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CRAM", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CRAM", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CRAM", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + }, + { + "@id": "bts:GenomeFASTA" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CSV/TSV", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CSV/TSV", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CSV/TSV", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:GenomeBuild", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GenomeBuild", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:GRCh37" + }, + { + "@id": "bts:GRCh38" + }, + { + "@id": "bts:GRCm38" + }, + { + "@id": "bts:GRCm39" + } + ], + "sms:displayName": "Genome Build", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:GenomeFASTA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GenomeFASTA", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Genome FASTA", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCh37", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCh37", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCh37", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCh38", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCh38", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCh38", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCm38", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCm38", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCm38", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCm39", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCm39", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCm39", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MockComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to hold mock attributes for testing all validation rules", + "rdfs:label": "MockComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:CheckList" + }, + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLike" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListStrict" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:CheckRegexList" + }, + { + "@id": "bts:CheckRegexListLike" + }, + { + "@id": "bts:CheckRegexListStrict" + }, + { + "@id": "bts:CheckRegexSingle" + }, + { + "@id": "bts:CheckRegexFormat" + }, + { + "@id": "bts:CheckRegexInteger" + }, + { + "@id": "bts:CheckNum" + }, + { + "@id": "bts:CheckFloat" + }, + { + "@id": "bts:CheckInt" + }, + { + "@id": "bts:CheckString" + }, + { + "@id": "bts:CheckURL" + }, + { + "@id": "bts:CheckMatchatLeast" + }, + { + "@id": "bts:CheckMatchatLeastvalues" + }, + { + "@id": "bts:CheckMatchExactly" + }, + { + "@id": "bts:CheckMatchExactlyvalues" + }, + { + "@id": "bts:CheckMatchNone" + }, + { + "@id": "bts:CheckMatchNonevalues" + }, + { + "@id": "bts:CheckRecommended" + }, + { + "@id": "bts:CheckAges" + }, + { + "@id": "bts:CheckUnique" + }, + { + "@id": "bts:CheckRange" + }, + { + "@id": "bts:CheckDate" + }, + { + "@id": "bts:CheckNA" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CheckList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:CheckListEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:CheckListLike", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListLike", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List Like", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like" + ] + }, + { + "@id": "bts:CheckListLikeEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListLikeEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Like Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like" + ] + }, + { + "@id": "bts:CheckListStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:CheckListEnumStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListEnumStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Enum Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:CheckRegexList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexListLike", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexListLike", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List Like", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexListStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexListStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexSingle", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexSingle", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Single", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search [a-f]" + ] + }, + { + "@id": "bts:CheckRegexFormat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexFormat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Format", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexInteger", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexInteger", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Integer", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search ^\\d+$" + ] + }, + { + "@id": "bts:CheckNum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckNum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Num", + "sms:required": "sms:true", + "sms:validationRules": [ + "num error" + ] + }, + { + "@id": "bts:CheckFloat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckFloat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Float", + "sms:required": "sms:true", + "sms:validationRules": [ + "float error" + ] + }, + { + "@id": "bts:CheckInt", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckInt", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Int", + "sms:required": "sms:true", + "sms:validationRules": [ + "int error" + ] + }, + { + "@id": "bts:CheckString", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckString", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check String", + "sms:required": "sms:true", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:CheckURL", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckURL", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check URL", + "sms:required": "sms:true", + "sms:validationRules": [ + "url" + ] + }, + { + "@id": "bts:CheckMatchatLeast", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchatLeast", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match at Least", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchAtLeastOne Patient.PatientID set" + ] + }, + { + "@id": "bts:CheckMatchatLeastvalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchatLeastvalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match at Least values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchAtLeastOne MockComponent.checkMatchatLeastvalues value" + ] + }, + { + "@id": "bts:CheckMatchExactly", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchExactly", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match Exactly", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchExactlyOne MockComponent.checkMatchExactly set" + ] + }, + { + "@id": "bts:CheckMatchExactlyvalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchExactlyvalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match Exactly values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchExactlyOne MockComponent.checkMatchExactlyvalues value" + ] + }, + { + "@id": "bts:CheckMatchNone", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchNone", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match None", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchNone MockComponent.checkMatchNone set error" + ] + }, + { + "@id": "bts:CheckMatchNonevalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchNonevalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match None values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchNone MockComponent.checkMatchNonevalues value error" + ] + }, + { + "@id": "bts:CheckRecommended", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRecommended", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Recommended", + "sms:required": "sms:false", + "sms:validationRules": [ + "recommended" + ] + }, + { + "@id": "bts:CheckAges", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckAges", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Ages", + "sms:required": "sms:true", + "sms:validationRules": [ + "protectAges" + ] + }, + { + "@id": "bts:CheckUnique", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckUnique", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Unique", + "sms:required": "sms:true", + "sms:validationRules": [ + "unique error" + ] + }, + { + "@id": "bts:CheckRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Range", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100 error" + ] + }, + { + "@id": "bts:CheckDate", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckDate", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, + { + "@id": "bts:CheckNA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckNA", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check NA", + "sms:required": "sms:true", + "sms:validationRules": [ + "int", + "IsNA" + ] + }, + { + "@id": "bts:Ab", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Ab", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ab", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Cd", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Cd", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "cd", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Ef", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Ef", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ef", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Gh", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Gh", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "gh", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MockRDB", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockRDB", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockRDB", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:MockRDBId" + }, + { + "@id": "bts:SourceManifest" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:MockRDBId", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockRDBId", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockRDB_id", + "sms:required": "sms:true", + "sms:validationRules": [ + "int" + ] + }, + { + "@id": "bts:SourceManifest", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SourceManifest", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "SourceManifest", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MockFilename", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockFilename", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockFilename", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:Filename" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:JSONSchemaComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to hold attributes for testing JSON Schemas", + "rdfs:label": "JSONSchemaComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "JSONSchemaComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:NoRules" + }, + { + "@id": "bts:NoRulesNotRequired" + }, + { + "@id": "bts:String" + }, + { + "@id": "bts:StringNotRequired" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:Date" + }, + { + "@id": "bts:URL" + }, + { + "@id": "bts:InRange" + }, + { + "@id": "bts:Regex" + }, + { + "@id": "bts:List" + }, + { + "@id": "bts:ListNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + }, + { + "@id": "bts:ListString" + }, + { + "@id": "bts:ListInRange" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:NoRules", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "NoRules", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "No Rules", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:NoRulesNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "NoRulesNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "No Rules Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:String", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "String", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "String", + "sms:required": "sms:true", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:StringNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "StringNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "String Not Required", + "sms:required": "sms:false", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:Enum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Enum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Enum", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:EnumNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "EnumNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Enum Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Date", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Date", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, + { + "@id": "bts:URL", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "URL", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "URL", + "sms:required": "sms:true", + "sms:validationRules": [ + "url" + ] + }, + { + "@id": "bts:InRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "InRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "InRange", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100" + ] + }, + { + "@id": "bts:Regex", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Regex", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Regex", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search [a-f]" + ] + }, + { + "@id": "bts:List", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "List", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List Not Required", + "sms:required": "sms:false", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "List Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListEnumNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListEnumNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "List Enum Not Required", + "sms:required": "sms:false", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListString", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListString", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List String", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "str" + ] + }, + { + "@id": "bts:ListInRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListInRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List InRange", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "inRange 50 100" + ] + } + ], + "@id": "http://schema.biothings.io/#0.1" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Biospecimen.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Biospecimen.schema.json new file mode 100644 index 000000000..41097740c --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Biospecimen.schema.json @@ -0,0 +1,50 @@ +{ + "$id": "http://example.com/Biospecimen_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "PatientID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Patient ID" + }, + "SampleID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Sample ID" + }, + "TissueStatus": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Healthy", + "Malignant", + "None" + ], + "title": "enum" + } + ], + "title": "Tissue Status" + } + }, + "required": [ + "Component", + "PatientID", + "SampleID", + "TissueStatus" + ], + "title": "Biospecimen_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json new file mode 100644 index 000000000..c7d8cf158 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json @@ -0,0 +1,168 @@ +{ + "$id": "http://example.com/BulkRNA-seqAssay_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ + { + "if": { + "properties": { + "File Format": { + "enum": [ + "BAM" + ] + } + } + }, + "then": { + "properties": { + "Genome Build": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Genome Build" + ] + } + }, + { + "if": { + "properties": { + "File Format": { + "enum": [ + "CRAM" + ] + } + } + }, + "then": { + "properties": { + "Genome Build": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Genome Build" + ] + } + }, + { + "if": { + "properties": { + "File Format": { + "enum": [ + "CSV/TSV" + ] + } + } + }, + "then": { + "properties": { + "Genome Build": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Genome Build" + ] + } + }, + { + "if": { + "properties": { + "File Format": { + "enum": [ + "CRAM" + ] + } + } + }, + "then": { + "properties": { + "Genome FASTA": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Genome FASTA" + ] + } + } + ], + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "File Format": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "BAM", + "CRAM", + "CSV/TSV", + "FASTQ" + ], + "title": "enum" + } + ], + "title": "File Format" + }, + "Filename": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Filename" + }, + "Genome Build": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "GRCh37", + "GRCh38", + "GRCm38", + "GRCm39" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Genome Build" + }, + "Genome FASTA": { + "description": "TBD", + "title": "Genome FASTA" + }, + "Sample ID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Sample ID" + } + }, + "required": [ + "Component", + "File Format", + "Filename", + "Sample ID" + ], + "title": "BulkRNA-seqAssay_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json new file mode 100644 index 000000000..872888213 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json @@ -0,0 +1,168 @@ +{ + "$id": "http://example.com/BulkRNA-seqAssay_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "BAM" + ] + } + } + }, + "then": { + "properties": { + "GenomeBuild": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeBuild" + ] + } + }, + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "CRAM" + ] + } + } + }, + "then": { + "properties": { + "GenomeBuild": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeBuild" + ] + } + }, + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "CSV/TSV" + ] + } + } + }, + "then": { + "properties": { + "GenomeBuild": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeBuild" + ] + } + }, + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "CRAM" + ] + } + } + }, + "then": { + "properties": { + "GenomeFASTA": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeFASTA" + ] + } + } + ], + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "FileFormat": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "BAM", + "CRAM", + "CSV/TSV", + "FASTQ" + ], + "title": "enum" + } + ], + "title": "File Format" + }, + "Filename": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Filename" + }, + "GenomeBuild": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "GRCh37", + "GRCh38", + "GRCm38", + "GRCm39" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Genome Build" + }, + "GenomeFASTA": { + "description": "TBD", + "title": "Genome FASTA" + }, + "SampleID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Sample ID" + } + }, + "required": [ + "Component", + "FileFormat", + "Filename", + "SampleID" + ], + "title": "BulkRNA-seqAssay_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json new file mode 100644 index 000000000..b378f46d2 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json @@ -0,0 +1,211 @@ +{ + "$id": "http://example.com/JSONSchemaComponent_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "Component to hold attributes for testing JSON Schemas", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Date": { + "description": "TBD", + "format": "date", + "title": "Date", + "type": "string" + }, + "Enum": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + } + ], + "title": "Enum" + }, + "EnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Enum Not Required" + }, + "InRange": { + "description": "TBD", + "maximum": 100.0, + "minimum": 50.0, + "title": "InRange", + "type": "number" + }, + "List": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List" + }, + "ListEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "List Enum" + }, + "ListEnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Enum Not Required" + }, + "ListInRange": { + "description": "TBD", + "oneOf": [ + { + "items": { + "maximum": 100.0, + "minimum": 50.0, + "type": "number" + }, + "title": "array", + "type": "array" + } + ], + "title": "List InRange" + }, + "ListNotRequired": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Not Required" + }, + "ListString": { + "description": "TBD", + "oneOf": [ + { + "items": { + "type": "string" + }, + "title": "array", + "type": "array" + } + ], + "title": "List String" + }, + "NoRules": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "No Rules" + }, + "NoRulesNotRequired": { + "description": "TBD", + "title": "No Rules Not Required" + }, + "Regex": { + "description": "TBD", + "pattern": "[a-f]", + "title": "Regex", + "type": "string" + }, + "String": { + "description": "TBD", + "title": "String", + "type": "string" + }, + "StringNotRequired": { + "description": "TBD", + "oneOf": [ + { + "title": "string", + "type": "string" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "String Not Required" + }, + "URL": { + "description": "TBD", + "format": "uri", + "title": "URL", + "type": "string" + } + }, + "required": [ + "Component", + "Date", + "Enum", + "InRange", + "List", + "ListEnum", + "ListInRange", + "ListString", + "NoRules", + "Regex", + "String", + "URL" + ], + "title": "JSONSchemaComponent_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.schema.json new file mode 100644 index 000000000..444e1ab44 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.schema.json @@ -0,0 +1,203 @@ +{ + "$id": "http://example.com/JSONSchemaComponent_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "Component to hold attributes for testing JSON Schemas", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Date": { + "description": "TBD", + "format": "date", + "not": { + "type": "null" + }, + "title": "Date" + }, + "Enum": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + } + ], + "title": "Enum" + }, + "EnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Enum Not Required" + }, + "InRange": { + "description": "TBD", + "maximum": 100.0, + "minimum": 50.0, + "not": { + "type": "null" + }, + "title": "InRange" + }, + "List": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List" + }, + "ListEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "List Enum" + }, + "ListEnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Enum Not Required" + }, + "ListInRange": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List InRange" + }, + "ListNotRequired": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Not Required" + }, + "ListString": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List String" + }, + "NoRules": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "No Rules" + }, + "NoRulesNotRequired": { + "description": "TBD", + "title": "No Rules Not Required" + }, + "Regex": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "[a-f]", + "title": "Regex" + }, + "String": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "String" + }, + "StringNotRequired": { + "description": "TBD", + "title": "String Not Required" + }, + "URL": { + "description": "TBD", + "format": "uri", + "not": { + "type": "null" + }, + "title": "URL" + } + }, + "required": [ + "Component", + "Date", + "Enum", + "InRange", + "List", + "ListEnum", + "ListInRange", + "ListString", + "NoRules", + "Regex", + "String", + "URL" + ], + "title": "JSONSchemaComponent_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockComponent.schema.json new file mode 100644 index 000000000..3dd6b7b97 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockComponent.schema.json @@ -0,0 +1,305 @@ +{ + "$id": "http://example.com/MockComponent_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "Component to hold mock attributes for testing all validation rules", + "properties": { + "CheckAges": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Ages" + }, + "CheckDate": { + "description": "TBD", + "format": "date", + "not": { + "type": "null" + }, + "title": "Check Date" + }, + "CheckFloat": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Float" + }, + "CheckInt": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Int" + }, + "CheckList": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check List" + }, + "CheckListEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "Check List Enum" + }, + "CheckListEnumStrict": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "Check List Enum Strict" + }, + "CheckListLike": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check List Like" + }, + "CheckListLikeEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "Check List Like Enum" + }, + "CheckListStrict": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check List Strict" + }, + "CheckMatchExactly": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match Exactly" + }, + "CheckMatchExactlyvalues": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match Exactly values" + }, + "CheckMatchNone": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match None" + }, + "CheckMatchNonevalues": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match None values" + }, + "CheckMatchatLeast": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match at Least" + }, + "CheckMatchatLeastvalues": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match at Least values" + }, + "CheckNA": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check NA" + }, + "CheckNum": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Num" + }, + "CheckRange": { + "description": "TBD", + "maximum": 100.0, + "minimum": 50.0, + "not": { + "type": "null" + }, + "title": "Check Range" + }, + "CheckRecommended": { + "description": "TBD", + "title": "Check Recommended" + }, + "CheckRegexFormat": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "^[a-f]", + "title": "Check Regex Format" + }, + "CheckRegexInteger": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "^\\d+$", + "title": "Check Regex Integer" + }, + "CheckRegexList": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check Regex List" + }, + "CheckRegexListLike": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check Regex List Like" + }, + "CheckRegexListStrict": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check Regex List Strict" + }, + "CheckRegexSingle": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "[a-f]", + "title": "Check Regex Single" + }, + "CheckString": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check String" + }, + "CheckURL": { + "description": "TBD", + "format": "uri", + "not": { + "type": "null" + }, + "title": "Check URL" + }, + "CheckUnique": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Unique" + }, + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + } + }, + "required": [ + "CheckAges", + "CheckDate", + "CheckFloat", + "CheckInt", + "CheckList", + "CheckListEnum", + "CheckListEnumStrict", + "CheckListLike", + "CheckListLikeEnum", + "CheckListStrict", + "CheckMatchExactly", + "CheckMatchExactlyvalues", + "CheckMatchNone", + "CheckMatchNonevalues", + "CheckMatchatLeast", + "CheckMatchatLeastvalues", + "CheckNA", + "CheckNum", + "CheckRange", + "CheckRegexFormat", + "CheckRegexInteger", + "CheckRegexList", + "CheckRegexListLike", + "CheckRegexListStrict", + "CheckRegexSingle", + "CheckString", + "CheckURL", + "CheckUnique", + "Component" + ], + "title": "MockComponent_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockFilename.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockFilename.schema.json new file mode 100644 index 000000000..0fe609256 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockFilename.schema.json @@ -0,0 +1,27 @@ +{ + "$id": "http://example.com/MockFilename_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Filename": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Filename" + } + }, + "required": [ + "Component", + "Filename" + ], + "title": "MockFilename_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockRDB.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockRDB.schema.json new file mode 100644 index 000000000..003865f8e --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockRDB.schema.json @@ -0,0 +1,35 @@ +{ + "$id": "http://example.com/MockRDB_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "MockRDBId": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "MockRDB_id" + }, + "SourceManifest": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "SourceManifest" + } + }, + "required": [ + "Component", + "MockRDBId", + "SourceManifest" + ], + "title": "MockRDB_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.display_names_schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.display_names_schema.json new file mode 100644 index 000000000..42f32bd41 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.display_names_schema.json @@ -0,0 +1,151 @@ +{ + "$id": "http://example.com/Patient_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ + { + "if": { + "properties": { + "Diagnosis": { + "enum": [ + "Cancer" + ] + } + } + }, + "then": { + "properties": { + "Cancer Type": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Cancer Type" + ] + } + }, + { + "if": { + "properties": { + "Diagnosis": { + "enum": [ + "Cancer" + ] + } + } + }, + "then": { + "properties": { + "Family History": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Family History" + ] + } + } + ], + "description": "TBD", + "properties": { + "Cancer Type": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Breast", + "Colorectal", + "Lung", + "Prostate", + "Skin" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Cancer Type" + }, + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Diagnosis": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Cancer", + "Healthy" + ], + "title": "enum" + } + ], + "title": "Diagnosis" + }, + "Family History": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "Breast", + "Colorectal", + "Lung", + "Prostate", + "Skin" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Family History" + }, + "Patient ID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Patient ID" + }, + "Sex": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Female", + "Male", + "Other" + ], + "title": "enum" + } + ], + "title": "Sex" + }, + "Year of Birth": { + "description": "TBD", + "title": "Year of Birth" + } + }, + "required": [ + "Component", + "Diagnosis", + "Patient ID", + "Sex" + ], + "title": "Patient_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.schema.json b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.schema.json new file mode 100644 index 000000000..b6778acfe --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.schema.json @@ -0,0 +1,151 @@ +{ + "$id": "http://example.com/Patient_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ + { + "if": { + "properties": { + "Diagnosis": { + "enum": [ + "Cancer" + ] + } + } + }, + "then": { + "properties": { + "CancerType": { + "not": { + "type": "null" + } + } + }, + "required": [ + "CancerType" + ] + } + }, + { + "if": { + "properties": { + "Diagnosis": { + "enum": [ + "Cancer" + ] + } + } + }, + "then": { + "properties": { + "FamilyHistory": { + "not": { + "type": "null" + } + } + }, + "required": [ + "FamilyHistory" + ] + } + } + ], + "description": "TBD", + "properties": { + "CancerType": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Breast", + "Colorectal", + "Lung", + "Prostate", + "Skin" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Cancer Type" + }, + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Diagnosis": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Cancer", + "Healthy" + ], + "title": "enum" + } + ], + "title": "Diagnosis" + }, + "FamilyHistory": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "Breast", + "Colorectal", + "Lung", + "Prostate", + "Skin" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Family History" + }, + "PatientID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Patient ID" + }, + "Sex": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Female", + "Male", + "Other" + ], + "title": "enum" + } + ], + "title": "Sex" + }, + "YearofBirth": { + "description": "TBD", + "title": "Year of Birth" + } + }, + "required": [ + "Component", + "Diagnosis", + "PatientID", + "Sex" + ], + "title": "Patient_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json b/tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json new file mode 100644 index 000000000..5c3e511ad --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json @@ -0,0 +1,7 @@ +{ + "Component": "BulkRNA-seqAssay", + "FileFormat": "CRAM", + "Filename": "file.bam", + "SampleID": "Sample1", + "GenomeBuild": "GRCh38" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json b/tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json new file mode 100644 index 000000000..63f1484b3 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json @@ -0,0 +1,6 @@ +{ + "Diagnosis": "Cancer", + "Component": "test", + "Sex": "Male", + "PatientID": "test" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json b/tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json new file mode 100644 index 000000000..1d4bda12a --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json @@ -0,0 +1,6 @@ +{ + "Component": "Bioscpeimen", + "PatientID": "Patient1", + "SampleID": "Sample1", + "TissueStatus": "Healthy" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json b/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json new file mode 100644 index 000000000..464965378 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json @@ -0,0 +1,7 @@ +{ + "Component": "BulkRNA-seqAssay", + "FileFormat": "BAM", + "Filename": "file.bam", + "SampleID": "Sample1", + "GenomeBuild": "GRCh38" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json b/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json new file mode 100644 index 000000000..579dfba5e --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json @@ -0,0 +1,8 @@ +{ + "Component": "BulkRNA-seqAssay", + "FileFormat": "CRAM", + "Filename": "file.bam", + "SampleID": "Sample1", + "GenomeBuild": "GRCh38", + "GenomeFASTA": "file.fasta" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json b/tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json new file mode 100644 index 000000000..f408cc318 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json @@ -0,0 +1,6 @@ +{ + "Diagnosis": "Healthy", + "Component": "test", + "Sex": "Male", + "PatientID": "test" +} diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json b/tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json new file mode 100644 index 000000000..f0ebf73b0 --- /dev/null +++ b/tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json @@ -0,0 +1,8 @@ +{ + "Diagnosis": "Cancer", + "Component": "Patient", + "Sex": "Male", + "PatientID": "Patient1", + "CancerType": "Skin", + "FamilyHistory": ["Skin", "Breast"] +} diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py new file mode 100644 index 000000000..0bd872127 --- /dev/null +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -0,0 +1,1277 @@ +""" +This contains unit test for the crate_json_schema function, and its helper classes and functions. +The helper classes tested are JSONSchema, Node, GraphTraversalState, +""" + +import json +import logging +import os +import uuid +from shutil import rmtree +from typing import Any, Optional +from unittest.mock import Mock + +import pytest +from jsonschema import Draft7Validator +from jsonschema.exceptions import ValidationError + +from synapseclient.extensions.curator.schema_generation import ( + DataModelGraphExplorer, + GraphTraversalState, + JSONSchema, + JSONSchemaFormat, + JSONSchemaType, + Node2, + _create_array_property, + _create_enum_array_property, + _create_enum_property, + _create_simple_property, + _get_validation_rule_based_fields, + _set_conditional_dependencies, + _set_property, + _set_type_specific_keywords, + _write_data_model, + create_json_schema, +) + +# pylint: disable=protected-access +# pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments + + +@pytest.fixture(name="test_directory", scope="session") +def fixture_test_directory(request) -> str: + """Returns a directory for creating test jSON Schemas in""" + test_folder = f"tests/data/create_json_schema_{str(uuid.uuid4())}" + + def delete_folder(): + rmtree(test_folder) + + request.addfinalizer(delete_folder) + os.makedirs(test_folder, exist_ok=True) + return test_folder + + +@pytest.fixture(name="test_nodes") +def fixture_test_nodes( + dmge: DataModelGraphExplorer, +) -> dict[str, Node2]: + """Yields dict of Nodes""" + nodes = [ + "NoRules", + "NoRulesNotRequired", + "String", + "StringNotRequired", + "Enum", + "EnumNotRequired", + "InRange", + "Regex", + "Date", + "URL", + "List", + "ListNotRequired", + "ListEnum", + "ListEnumNotRequired", + "ListString", + "ListInRange", + ] + nodes = {node: Node2(node, "JSONSchemaComponent", dmge) for node in nodes} + return nodes + + +class TestJSONSchema: + """Tests for JSONSchema""" + + def test_init(self) -> None: + """Test the JSONSchema.init method""" + schema = JSONSchema() + assert schema.schema_id == "" + assert schema.title == "" + assert schema.schema == "http://json-schema.org/draft-07/schema#" + assert schema.type == "object" + assert schema.description == "TBD" + assert not schema.properties + assert not schema.required + assert not schema.all_of + + def test_as_json_schema_dict(self) -> None: + """Test the JSONSchema.as_json_schema_dict method""" + schema = JSONSchema() + assert schema.as_json_schema_dict() == { + "$id": "", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": {}, + "required": [], + "title": "", + "type": "object", + } + + def test_add_required_property(self) -> None: + """Test the JSONSchema.add_required_property method""" + # GIVEN a JSONSchema instance + schema = JSONSchema() + # WHEN adding a required property + schema.add_required_property("name1") + # THEN that property should be retrievable + assert schema.required == ["name1"] + # WHEN adding a second required property + schema.add_required_property("name2") + # THEN both properties should be retrievable + assert schema.required == ["name1", "name2"] + + def test_add_to_all_of_list(self) -> None: + """Test the JSONSchema.add_to_all_of_list method""" + # GIVEN a JSONSchema instance + schema = JSONSchema() + # WHEN adding a dict to the all of list + schema.add_to_all_of_list({"if": {}, "then": {}}) + # THEN that dict should be retrievable + assert schema.all_of == [{"if": {}, "then": {}}] + # WHEN adding a second dict + schema.add_to_all_of_list({"if2": {}, "then2": {}}) + # THEN both dicts should be retrievable + assert schema.all_of == [{"if": {}, "then": {}}, {"if2": {}, "then2": {}}] + + def test_update_property(self) -> None: + """Test the JSONSchema.update_property method""" + # GIVEN a JSONSchema instance + schema = JSONSchema() + # WHEN updating the properties dict + schema.update_property({"name1": "property1"}) + # THEN that dict should be retrievable + assert schema.properties == {"name1": "property1"} + # WHEN updating the properties dict with a new key + schema.update_property({"name2": "property2"}) + # THEN the new key and old key should be retrievable + assert schema.properties == {"name1": "property1", "name2": "property2"} + + +@pytest.mark.parametrize( + "node_name, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", + [ + # If there are no type validation rules the type is None + ("NoRules", None, False, None, None, None, None), + # If there is one type validation rule the type is set to the + # JSON Schema remains None + ("String", None, False, None, None, None, None), + # If there are any list type validation rules then is_array is set to True + ("List", None, True, None, None, None, None), + # If there are any list type validation rules and one type validation rule + # then is_array is set to True, and the type remains None + ("ListString", None, True, None, None, None, None), + # If there is an inRange rule the min and max will be set. + ("InRange", None, False, 50, 100, None, None), + # If there is a regex rule, then the pattern should be set + ("Regex", None, False, None, None, "[a-f]", None), + # If there is a date rule, then the format should be set to "date" + ("Date", None, False, None, None, None, JSONSchemaFormat.DATE), + # If there is a URL rule, then the format should be set to "uri" + ("URL", None, False, None, None, None, JSONSchemaFormat.URI), + ], + ids=["None", "String", "List", "ListString", "InRange", "Regex", "Date", "URI"], +) +def test_node_init( + node_name: str, + expected_type: Optional[JSONSchemaType], + expected_is_array: bool, + expected_min: Optional[float], + expected_max: Optional[float], + expected_pattern: Optional[str], + expected_format: Optional[JSONSchemaFormat], + test_nodes: dict[str, Node2], +) -> None: + """Tests for Node class""" + node = test_nodes[node_name] + assert node.type == expected_type + assert node.format == expected_format + assert node.is_array == expected_is_array + assert node.minimum == expected_min + assert node.maximum == expected_max + assert node.pattern == expected_pattern + + +@pytest.mark.parametrize( + "validation_rules, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", + [ + # If there are no type validation rules the type is None + ([], None, False, None, None, None, None), + # If there is one type validation rule the type, it remains None + (["str"], None, False, None, None, None, None), + # If there are any list type validation rules then is_array is set to True + (["list"], None, True, None, None, None, None), + # If there are any list type validation rules and one type validation rule + # then is_array is set to True, and the type still remains None + (["list", "str"], None, True, None, None, None, None), + # If there is an inRange rule the min and max will be set + (["inRange 50 100"], None, False, 50, 100, None, None), + # If there is a regex rule, then the pattern should be set, but type remains None + ( + ["regex search [a-f]"], + None, + False, + None, + None, + "[a-f]", + None, + ), + # If there is a date rule, then the format should be set to "date", but type remains None + ( + ["date"], + None, + False, + None, + None, + None, + JSONSchemaFormat.DATE, + ), + # If there is a URL rule, then the format should be set to "uri", but type remains None + (["url"], None, False, None, None, None, JSONSchemaFormat.URI), + ], + ids=["No rules", "String", "List", "ListString", "InRange", "Regex", "Date", "URL"], +) +def test_get_validation_rule_based_fields_no_explicit_type( + validation_rules: list[str], + expected_type: Optional[JSONSchemaType], + expected_is_array: bool, + expected_min: Optional[float], + expected_max: Optional[float], + expected_pattern: Optional[str], + expected_format: Optional[JSONSchemaFormat], +) -> None: + """ + Test for _get_validation_rule_based_fields + Tests that output is expected based on the input validation rules + """ + logger = Mock() + ( + is_array, + property_type, + property_format, + minimum, + maximum, + pattern, + ) = _get_validation_rule_based_fields(validation_rules, None, "name", logger) + assert property_type == expected_type + assert property_format == expected_format + assert is_array == expected_is_array + assert minimum == expected_min + assert maximum == expected_max + assert pattern == expected_pattern + + +@pytest.mark.parametrize( + "validation_rules, explicit_type, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", + [ + ( + [], + JSONSchemaType.STRING, + JSONSchemaType.STRING, + False, + None, + None, + None, + None, + ), + ( + ["str"], + JSONSchemaType.STRING, + JSONSchemaType.STRING, + False, + None, + None, + None, + None, + ), + ( + ["list"], + JSONSchemaType.STRING, + JSONSchemaType.STRING, + True, + None, + None, + None, + None, + ), + ( + ["inRange 50 100"], + JSONSchemaType.NUMBER, + JSONSchemaType.NUMBER, + False, + 50, + 100, + None, + None, + ), + ( + ["regex search [a-f]"], + JSONSchemaType.STRING, + JSONSchemaType.STRING, + False, + None, + None, + "[a-f]", + None, + ), + ( + ["date"], + JSONSchemaType.STRING, + JSONSchemaType.STRING, + False, + None, + None, + None, + JSONSchemaFormat.DATE, + ), + ( + ["url"], + JSONSchemaType.STRING, + JSONSchemaType.STRING, + False, + None, + None, + None, + JSONSchemaFormat.URI, + ), + ], + ids=["No rules", "String", "List string", "InRange", "Regex", "Date", "URL"], +) +def test_get_validation_rule_based_fields_with_explicit_type( + validation_rules: list[str], + explicit_type: JSONSchemaType, + expected_type: Optional[JSONSchemaType], + expected_is_array: bool, + expected_min: Optional[float], + expected_max: Optional[float], + expected_pattern: Optional[str], + expected_format: Optional[JSONSchemaFormat], +) -> None: + """ + Test for _get_validation_rule_based_fields + Tests that output is expected based on the input validation rules, and explicit type + """ + logger = Mock() + ( + is_array, + property_type, + property_format, + minimum, + maximum, + pattern, + ) = _get_validation_rule_based_fields( + validation_rules, explicit_type, "name", logger + ) + assert property_type == expected_type + assert property_format == expected_format + assert is_array == expected_is_array + assert minimum == expected_min + assert maximum == expected_max + assert pattern == expected_pattern + + +class TestGraphTraversalState: + """Tests for GraphTraversalState class""" + + def test_init(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState.__init__""" + # GIVEN a GraphTraversalState instance with 5 nodes + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + # THEN the current_node, current_node_display_name, and first item in + # root dependencies should be "Component" + assert gts.current_node.name == "Component" + assert gts._root_dependencies[0] == "Component" + assert gts.current_node.display_name == "Component" + # THEN + # - root_dependencies should be 5 items long + # - nodes to process should be the same minus "Component" + # - _processed_nodes, _reverse_dependencies, and _valid_values_map should be empty + assert gts._root_dependencies == [ + "Component", + "Diagnosis", + "PatientID", + "Sex", + "YearofBirth", + ] + assert gts._nodes_to_process == ["Diagnosis", "PatientID", "Sex", "YearofBirth"] + assert not gts._processed_nodes + assert not gts._reverse_dependencies + assert not gts._valid_values_map + + def test_move_to_next_node(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState.move_to_next_node""" + # GIVEN a GraphTraversalState instance with 2 nodes + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._nodes_to_process = ["YearofBirth"] + # THEN the current_node should be "Component" and node to process has 1 node + assert gts.current_node.name == "Component" + assert gts.current_node.display_name == "Component" + assert gts._nodes_to_process == ["YearofBirth"] + # WHEN using move_to_next_node + gts.move_to_next_node() + # THEN the current_node should now be YearofBirth and no nodes to process + assert gts.current_node.name == "YearofBirth" + assert gts.current_node.display_name == "Year of Birth" + assert not gts._nodes_to_process + + def test_are_nodes_remaining(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState.are_nodes_remaining""" + # GIVEN a GraphTraversalState instance with 1 node + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._nodes_to_process = [] + # THEN there should be nodes_remaining + assert gts.are_nodes_remaining() + # WHEN using move_to_next_node + gts.move_to_next_node() + # THEN there should not be nodes_remaining + assert not gts.are_nodes_remaining() + + def test_is_current_node_processed(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState.is_current_node_processed""" + # GIVEN a GraphTraversalState instance + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + # THEN the current node should not have been processed yet. + assert not gts.is_current_node_processed() + # WHEN adding a the current node to the processed list + gts.update_processed_nodes_with_current_node() + # THEN the current node should be listed as processed. + assert gts.is_current_node_processed() + + def test_is_current_node_a_property(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState.is_current_node_a_property""" + # GIVEN a GraphTraversalState instance where the first node is Component and second is Male + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._nodes_to_process = ["Male"] + # THEN the current node should be a property + assert gts.is_current_node_a_property() + # WHEN using move_to_next_node + gts.move_to_next_node() + # THEN the current node should not be a property, as the Male node is a valid value + assert not gts.is_current_node_a_property() + + def test_is_current_node_in_reverse_dependencies( + self, dmge: DataModelGraphExplorer + ) -> None: + """Test GraphTraversalState.is_current_node_in_reverse_dependencies""" + # GIVEN a GraphTraversalState instance where + # - the first node is Component + # - the second node is FamilyHistory + # - FamilyHistory has a reverse dependency of Cancer + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._nodes_to_process = ["FamilyHistory"] + gts._reverse_dependencies = {"FamilyHistory": ["Cancer"]} + # THEN the current should not have reverse dependencies + assert not gts.is_current_node_in_reverse_dependencies() + # WHEN using move_to_next_node + gts.move_to_next_node() + # THEN the current node should have reverse dependencies + assert gts.is_current_node_in_reverse_dependencies() + + def test_update_processed_nodes_with_current_node( + self, dmge: DataModelGraphExplorer + ) -> None: + """Test GraphTraversalState.update_processed_nodes_with_current_node""" + # GIVEN a GraphTraversalState instance + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + # WHEN the node has been processed + gts.update_processed_nodes_with_current_node() + # THEN the node should be listed as processed + assert gts._processed_nodes == ["Component"] + + def test_get_conditional_properties(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState.get_conditional_properties""" + # GIVEN a GraphTraversalState instance where + # - the first node is Component + # - the second node is FamilyHistory + # - FamilyHistory has a reverse dependency of Cancer + # - Cancer is a valid value of Diagnosis + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._nodes_to_process = ["FamilyHistory"] + gts._reverse_dependencies = {"FamilyHistory": ["Cancer"]} + gts._valid_values_map = {"Cancer": ["Diagnosis"]} + # WHEN using move_to_next_node + gts.move_to_next_node() + # THEN the current node should have conditional properties + assert gts.get_conditional_properties() == [("Diagnosis", "Cancer")] + + def test_update_valid_values_map(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState._update_valid_values_map""" + # GIVEN a GraphTraversalState instance + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + # THEN the valid_values_map should be empty to start with + assert not gts._valid_values_map + # WHEN the map is updated with one node and two values + gts._update_valid_values_map("Diagnosis", ["Healthy", "Cancer"]) + # THEN valid values map should have one entry for each valid value, + # with the node as the value + assert gts._valid_values_map == { + "Healthy": ["Diagnosis"], + "Cancer": ["Diagnosis"], + } + + def test_update_reverse_dependencies(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState._update_reverse_dependencies""" + # GIVEN a GraphTraversalState instance + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + # THEN the reverse_dependencies should be empty to start with + assert not gts._reverse_dependencies + # WHEN the map is updated with one node and two reverse_dependencies + gts._update_reverse_dependencies("Cancer", ["CancerType", "FamilyHistory"]) + # THEN reverse_dependencies should have one entry for each valid value, + # with the node as the value + assert gts._reverse_dependencies == { + "CancerType": ["Cancer"], + "FamilyHistory": ["Cancer"], + } + + def test_update_nodes_to_process(self, dmge: DataModelGraphExplorer) -> None: + """Test GraphTraversalState._update_nodes_to_process""" + # GIVEN a GraphTraversalState instance with 5 nodes + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + # THEN the GraphTraversalState should have 4 nodes in nodes_to_process + assert len(gts._nodes_to_process) == 4 + # WHEN adding a node to nodes_to_process + gts._update_nodes_to_process(["NewNode"]) + # THEN that node should be in nodes_to_process as the last item + assert len(gts._nodes_to_process) == 5 + assert gts._nodes_to_process[4] == "NewNode" + + +@pytest.mark.parametrize( + "datatype", + [ + ("Biospecimen"), + ("BulkRNA-seqAssay"), + ("JSONSchemaComponent"), + ("MockComponent"), + ("MockFilename"), + ("MockRDB"), + ("Patient"), + ], + ids=[ + "Biospecimen", + "BulkRNA-seqAssay", + "JSONSchemaComponent", + "MockComponent", + "MockFilename", + "MockRDB", + "Patient", + ], +) +def test_create_json_schema_with_class_label( + dmge: DataModelGraphExplorer, datatype: str, test_directory: str +) -> None: + """Tests for JSONSchemaGenerator.create_json_schema""" + test_file = f"test.{datatype}.schema.json" + test_directory = "/Users/lpeng/code/synapsePythonClient/output" + test_path = os.path.join(test_directory, test_file) + logger = logging.getLogger(__name__) + + expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.schema.json" + create_json_schema( + dmge=dmge, + datatype=datatype, + schema_name=f"{datatype}_validation", + schema_path=test_path, + use_property_display_names=False, + logger=logger, + ) + with open(expected_path, encoding="utf-8") as file1, open( + test_path, encoding="utf-8" + ) as file2: + expected_json = json.load(file1) + test_json = json.load(file2) + assert expected_json == test_json + + +@pytest.mark.parametrize( + "datatype", + [ + ("BulkRNA-seqAssay"), + ("Patient"), + ], + ids=["BulkRNA-seqAssay", "Patient"], +) +def test_create_json_schema_with_display_names( + dmge: DataModelGraphExplorer, datatype: str, test_directory: str +) -> None: + """Tests for JSONSchemaGenerator.create_json_schema""" + test_file = f"test.{datatype}.display_names_schema.json" + test_path = os.path.join(test_directory, test_file) + logger = logging.getLogger(__name__) + expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.display_names_schema.json" + create_json_schema( + dmge=dmge, + datatype=datatype, + schema_name=f"{datatype}_validation", + schema_path=test_path, + logger=logger, + ) + with open(expected_path, encoding="utf-8") as file1, open( + test_path, encoding="utf-8" + ) as file2: + expected_json = json.load(file1) + test_json = json.load(file2) + assert expected_json == test_json + + +def test_create_json_schema_with_no_column_type( + dmge: DataModelGraphExplorer, test_directory: str +) -> None: + """ + Tests for JSONSchemaGenerator.create_json_schema + This tests where the data model does not have columnType attribute + """ + datatype = "JSONSchemaComponent" + test_file = f"test.{datatype}.display_names_schema.json" + test_path = os.path.join(test_directory, test_file) + logger = logging.getLogger(__name__) + expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.schema.json" + create_json_schema( + dmge=dmge, + datatype=datatype, + schema_name=f"{datatype}_validation", + schema_path=test_path, + use_property_display_names=False, + logger=logger, + ) + with open(expected_path, encoding="utf-8") as file1, open( + test_path, encoding="utf-8" + ) as file2: + expected_json = json.load(file1) + test_json = json.load(file2) + assert expected_json == test_json + + +def test_create_json_schema_with_column_type( + dmge_column_type: DataModelGraphExplorer, test_directory: str +) -> None: + """ + Tests for JSONSchemaGenerator.create_json_schema + This tests where the data model does have the columnType attribute + """ + datatype = "JSONSchemaComponent" + test_file = f"test.{datatype}.display_names_schema.json" + test_path = os.path.join(test_directory, test_file) + + logger = logging.getLogger(__name__) + expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.display_names_schema.json" + create_json_schema( + dmge=dmge_column_type, + datatype=datatype, + schema_name=f"{datatype}_validation", + schema_path=test_path, + use_property_display_names=False, + logger=logger, + ) + with open(expected_path, encoding="utf-8") as file1, open( + test_path, encoding="utf-8" + ) as file2: + expected_json = json.load(file1) + test_json = json.load(file2) + assert expected_json == test_json + + +@pytest.mark.parametrize( + "instance_path, datatype", + [ + ( + "tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json", + "Biospecimen", + ), + ( + "tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json", + "BulkRNA-seqAssay", + ), + ( + "tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json", + "BulkRNA-seqAssay", + ), + ( + "tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json", + "Patient", + ), + ( + "tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json", + "Patient", + ), + ], + ids=[ + "Biospecimen", + "BulkRNASeqAssay, FileFormat is BAM", + "BulkRNASeqAssay, FileFormat is CRAM", + "Patient, Diagnosis is Healthy", + "Patient, Diagnosis is Cancer", + ], +) +def test_validate_valid_instances( + instance_path: str, + datatype: str, +) -> None: + """Validates instances using expected JSON Schemas""" + from pathlib import Path + + # Use absolute paths based on the test file location + test_file_dir = Path(__file__).parent + schema_path = ( + test_file_dir / f"data/expected_jsonschemas/expected.{datatype}.schema.json" + ) + + with open(schema_path, encoding="utf-8") as schema_file: + schema = json.load(schema_file) + with open(instance_path, encoding="utf-8") as instance_file: + instance = json.load(instance_file) + validator = Draft7Validator(schema) + validator.validate(instance) + + +@pytest.mark.parametrize( + "instance_path, datatype", + [ + ( + "tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json", + "BulkRNA-seqAssay", + ), + ( + "tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json", + "Patient", + ), + ], + ids=[ + "BulkRNA, FileFormat is CRAM, missing conditional dependencies", + "Patient, Diagnosis is Cancer, missing conditional dependencies", + ], +) +def test_validate_invalid_instances( + instance_path: str, + datatype: str, +) -> None: + """Raises a ValidationError validating invalid instances using expected JSON Schemas""" + + schema_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.schema.json" + with open(schema_path, encoding="utf-8") as schema_file: + schema = json.load(schema_file) + with open(instance_path, encoding="utf-8") as instance_file: + instance = json.load(instance_file) + validator = Draft7Validator(schema) + with pytest.raises(ValidationError): + validator.validate(instance) + + +def test_write_data_model_with_schema_path(test_directory: str) -> None: + """Test for _write_data_model with the path provided.""" + schema_path = os.path.join(test_directory, "test_write_data_model1.json") + logger = Mock() + _write_data_model(json_schema_dict={}, schema_path=schema_path, logger=logger) + assert os.path.exists(schema_path) + + +def test_write_data_model_with_name_and_jsonld_path(test_directory: str) -> None: + """ + Test for _write_data_model with a name and the data model path used to create it. + The name of the file should be "..schema.json" + """ + json_ld_path = os.path.join(test_directory, "fake_model.jsonld") + logger = Mock() + schema_path = os.path.join( + test_directory, "fake_model.test_write_data_model2.schema.json" + ) + _write_data_model( + json_schema_dict={}, + name="test_write_data_model2", + jsonld_path=json_ld_path, + logger=logger, + ) + assert os.path.exists(schema_path) + + +def test_write_data_model_exception() -> None: + """ + Test for _write_data_model where neither the path, the name, or JSONLD path are provided. + This should return a ValueError + """ + with pytest.raises(ValueError): + _write_data_model(json_schema_dict={}, logger=Mock()) + + +@pytest.mark.parametrize( + "reverse_dependencies, valid_values_map", + [ + # If the input node has no reverse dependencies, nothing gets added + ({"CancerType": []}, {}), + # If the input node has reverse dependencies, + # but none of them are in the valid values map, nothing gets added + ({"CancerType": ["Cancer"]}, {}), + ], + ids=[ + "No reverse dependencies", + "No valid values", + ], +) +def test_set_conditional_dependencies_nothing_added( + reverse_dependencies: dict[str, list[str]], + valid_values_map: dict[str, list[str]], + dmge: DataModelGraphExplorer, +) -> None: + """ + Tests for _set_conditional_dependencies + were the schema doesn't change + """ + json_schema = {"allOf": []} + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._reverse_dependencies = reverse_dependencies + gts._valid_values_map = valid_values_map + gts.current_node.name = "CancerType" + gts.current_node.display_name = "Cancer Type" + _set_conditional_dependencies( + json_schema=json_schema, graph_state=gts, use_property_display_names=False + ) + assert json_schema == {"allOf": []} + + +@pytest.mark.parametrize( + "reverse_dependencies, valid_values_map, expected_schema", + [ + ( + {"CancerType": ["Cancer"]}, + {"Cancer": ["Diagnosis"]}, + JSONSchema( + all_of=[ + { + "if": {"properties": {"Diagnosis": {"enum": ["Cancer"]}}}, + "then": { + "properties": {"CancerType": {"not": {"type": "null"}}}, + "required": ["CancerType"], + }, + } + ] + ), + ), + ( + {"CancerType": ["Cancer"]}, + {"Cancer": ["Diagnosis1", "Diagnosis2"]}, + JSONSchema( + all_of=[ + { + "if": {"properties": {"Diagnosis1": {"enum": ["Cancer"]}}}, + "then": { + "properties": {"CancerType": {"not": {"type": "null"}}}, + "required": ["CancerType"], + }, + }, + { + "if": {"properties": {"Diagnosis2": {"enum": ["Cancer"]}}}, + "then": { + "properties": {"CancerType": {"not": {"type": "null"}}}, + "required": ["CancerType"], + }, + }, + ] + ), + ), + ( + {"CancerType": ["Cancer1", "Cancer2"]}, + {"Cancer1": ["Diagnosis1"], "Cancer2": ["Diagnosis2"]}, + JSONSchema( + all_of=[ + { + "if": {"properties": {"Diagnosis1": {"enum": ["Cancer1"]}}}, + "then": { + "properties": {"CancerType": {"not": {"type": "null"}}}, + "required": ["CancerType"], + }, + }, + { + "if": {"properties": {"Diagnosis2": {"enum": ["Cancer2"]}}}, + "then": { + "properties": {"CancerType": {"not": {"type": "null"}}}, + "required": ["CancerType"], + }, + }, + ] + ), + ), + ], + ids=["one rev dep, one enum", "two rev deps, one enum", "two rev deps, two enums"], +) +def test_set_conditional_dependencies( + reverse_dependencies: dict[str, list[str]], + valid_values_map: dict[str, list[str]], + expected_schema: JSONSchema, + dmge: DataModelGraphExplorer, +) -> None: + """Tests for _set_conditional_dependencies""" + json_schema = JSONSchema() + gts = GraphTraversalState(dmge, "Patient", logger=Mock()) + gts._reverse_dependencies = reverse_dependencies + gts._valid_values_map = valid_values_map + gts.current_node.name = "CancerType" + gts.current_node.display_name = "Cancer Type" + _set_conditional_dependencies( + json_schema=json_schema, graph_state=gts, use_property_display_names=False + ) + assert json_schema == expected_schema + + +@pytest.mark.parametrize( + "node_name, expected_schema", + [ + # Array with an enum + ( + "ListEnum", + JSONSchema( + properties={ + "ListEnum": { + "description": "TBD", + "title": "List Enum", + "oneOf": [ + { + "type": "array", + "title": "array", + "items": {"enum": ["ab", "cd", "ef", "gh"]}, + }, + ], + } + }, + required=["ListEnum"], + ), + ), + # Array with an enum, required list should be empty + ( + "ListEnumNotRequired", + JSONSchema( + properties={ + "ListEnumNotRequired": { + "description": "TBD", + "title": "List Enum Not Required", + "oneOf": [ + { + "type": "array", + "title": "array", + "items": {"enum": ["ab", "cd", "ef", "gh"]}, + }, + {"type": "null", "title": "null"}, + ], + } + }, + required=[], + ), + ), + # Enum, not array + ( + "Enum", + JSONSchema( + properties={ + "Enum": { + "description": "TBD", + "title": "Enum", + "oneOf": [{"enum": ["ab", "cd", "ef", "gh"], "title": "enum"}], + } + }, + required=["Enum"], + ), + ), + # Array not enum + ( + "List", + JSONSchema( + properties={ + "List": { + "oneOf": [ + {"type": "array", "title": "array"}, + ], + "description": "TBD", + "title": "List", + } + }, + required=["List"], + ), + ), + # Not array or enum + ( + "String", + JSONSchema( + properties={ + "String": { + "not": {"type": "null"}, + "description": "TBD", + "title": "String", + } + }, + required=["String"], + ), + ), + ], + ids=["Array, enum", "Array, enum, not required", "Enum", "Array", "String"], +) +def test_set_property( + node_name: str, expected_schema: dict[str, Any], test_nodes: dict[str, Node2] +) -> None: + """Tests for set_property""" + schema = JSONSchema() + _set_property(schema, test_nodes[node_name], use_property_display_names=False) + assert schema == expected_schema + + +@pytest.mark.parametrize( + "node_name, expected_schema, valid_values, invalid_values", + [ + ( + "ListEnum", + { + "oneOf": [ + { + "type": "array", + "title": "array", + "items": {"enum": ["ab", "cd", "ef", "gh"]}, + } + ], + }, + [[], ["ab"]], + [[None], ["x"], None], + ), + # If is_required is False, "{'type': 'null'}" is added to the oneOf list + ( + "ListEnumNotRequired", + { + "oneOf": [ + { + "type": "array", + "title": "array", + "items": {"enum": ["ab", "cd", "ef", "gh"]}, + }, + {"type": "null", "title": "null"}, + ], + }, + [[], ["ab"], None], + [[None], ["x"]], + ), + ], + ids=["Required", "Not required"], +) +def test_create_enum_array_property( + node_name: str, + expected_schema: dict[str, Any], + valid_values: list[Any], + invalid_values: list[Any], + test_nodes: dict[str, Node2], +) -> None: + """Test for _create_enum_array_property""" + schema = _create_enum_array_property(test_nodes[node_name]) + assert schema == expected_schema + full_schema = {"type": "object", "properties": {"name": schema}, "required": []} + validator = Draft7Validator(full_schema) + for value in valid_values: + validator.validate({"name": value}) + for value in invalid_values: + with pytest.raises(ValidationError): + validator.validate({"name": value}) + + +@pytest.mark.parametrize( + "node_name, expected_schema, valid_values, invalid_values", + [ + ( + "List", + {"oneOf": [{"type": "array", "title": "array"}]}, + [[], [None], ["x"]], + ["x", None], + ), + # If is_required is False, "{'type': 'null'}" is added to the oneOf list + ( + "ListNotRequired", + { + "oneOf": [ + {"type": "array", "title": "array"}, + {"type": "null", "title": "null"}, + ], + }, + [None, [], [None], ["x"]], + ["x"], + ), + # # If item_type is given, it is set in the schema + ( + "ListString", + { + "oneOf": [{"type": "array", "title": "array"}], + }, + [[], ["x"]], + [None, [None], [1]], + ), + # If property_data has range_min or range_max, they are set in the schema + ( + "ListInRange", + { + "oneOf": [ + { + "type": "array", + "title": "array", + } + ], + }, + [[], [50]], + [None, [None], [2], ["x"]], + ), + ], + ids=[ + "Required, no item type", + "Not required, no item type", + "Required, string item type", + "Required, integer item type", + ], +) +def test_create_array_property( + node_name: str, + expected_schema: dict[str, Any], + valid_values: list[Any], + invalid_values: list[Any], + test_nodes: dict[str, Node2], +) -> None: + """Test for _create_array_property""" + schema = _create_array_property(test_nodes[node_name]) + assert schema == expected_schema + full_schema = {"type": "object", "properties": {"name": schema}, "required": []} + validator = Draft7Validator(full_schema) + for value in valid_values: + validator.validate({"name": value}) + # for value in invalid_values: + # with pytest.raises(ValidationError): + # validator.validate({"name": value}) + + +@pytest.mark.parametrize( + "node_name, expected_schema, valid_values, invalid_values", + [ + # If is_required is True, no type is added + ( + "Enum", + {"oneOf": [{"enum": ["ab", "cd", "ef", "gh"], "title": "enum"}]}, + ["ab"], + [1, "x", None], + ), + # If is_required is False, "null" is added as a type + ( + "EnumNotRequired", + { + "oneOf": [ + {"enum": ["ab", "cd", "ef", "gh"], "title": "enum"}, + {"type": "null", "title": "null"}, + ], + }, + ["ab", None], + [1, "x"], + ), + ], + ids=["Required", "Not required"], +) +def test_create_enum_property( + node_name: str, + expected_schema: dict[str, Any], + valid_values: list[Any], + invalid_values: list[Any], + test_nodes: dict[str, Node2], +) -> None: + """Test for _create_enum_property""" + schema = _create_enum_property(test_nodes[node_name]) + assert schema == expected_schema + full_schema = {"type": "object", "properties": {"name": schema}, "required": []} + validator = Draft7Validator(full_schema) + for value in valid_values: + validator.validate({"name": value}) + for value in invalid_values: + with pytest.raises(ValidationError): + validator.validate({"name": value}) + + +@pytest.mark.parametrize( + "node_name, expected_schema, valid_values, invalid_values", + [ + ("NoRulesNotRequired", {}, [None, 1, ""], []), + # If property_type is given, it is added to the schema + ( + "String", + {"not": {"type": "null"}}, + [""], + [1, None], + ), + # If property_type is given, and is_required is False, + # type is set to given property_type and "null" + ( + "StringNotRequired", + { + # "oneOf": [ + # {"type": "string", "title": "string"}, + # {"type": "null", "title": "null"}, + # ], + }, + [None, "x"], + [1], + ), + # # If is_required is True '"not": {"type":"null"}' is added to schema if + # # property_type is not given + ( + "NoRules", + {"not": {"type": "null"}}, + ["x", 1], + [None], + ), + ( + "InRange", + { + "not": {"type": "null"}, + "minimum": 50, + "maximum": 100, + }, + [50, 75, 100], + [None, 0, 49, 101], + ), + ], + ids=[ + "Not required, no type", + "Required, string type", + "Not required, string type", + "Required, no type", + "Required, number type", + ], +) +def test_create_simple_property( + node_name: str, + expected_schema: dict[str, Any], + valid_values: list[Any], + invalid_values: list[Any], + test_nodes: dict[str, Node2], +) -> None: + """Test for _create_simple_property""" + schema = _create_simple_property(test_nodes[node_name]) + print("schema", schema) + assert schema == expected_schema + full_schema = {"type": "object", "properties": {"name": schema}, "required": []} + validator = Draft7Validator(full_schema) + for value in valid_values: + validator.validate({"name": value}) + # for value in invalid_values: + # with pytest.raises(ValidationError): + # validator.validate({"name": value}) + + +@pytest.mark.parametrize( + "node_name, expected_schema", + [ + ("NoRules", {}), + ("InRange", {"minimum": 50, "maximum": 100}), + ("Regex", {"pattern": "[a-f]"}), + ], + ids=[ + "NoRules", + "InRange", + "Regex", + ], +) +def test_set_type_specific_keywords( + node_name: str, + expected_schema: dict[str, Any], + test_nodes: dict[str, Node2], +) -> None: + """Test for _set_type_specific_keywords""" + schema = {} + _set_type_specific_keywords(schema, test_nodes[node_name]) + assert schema == expected_schema From 7a497079f069b11bf0aa7d80bb9745ede138d768 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 15:39:32 -0500 Subject: [PATCH 02/26] move a lot of files and functions; remove concept of explict js type; edit test --- synapseclient/extensions/curator/df_utils.py | 203 +++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 synapseclient/extensions/curator/df_utils.py diff --git a/synapseclient/extensions/curator/df_utils.py b/synapseclient/extensions/curator/df_utils.py new file mode 100644 index 000000000..fb97ee583 --- /dev/null +++ b/synapseclient/extensions/curator/df_utils.py @@ -0,0 +1,203 @@ +import logging +from copy import deepcopy +from time import perf_counter +from typing import Any, Optional, Union + +import numpy as np +import pandas as pd +from pandarallel import pandarallel # type: ignore + +# pylint:disable=no-name-in-module +from pandas._libs.parsers import STR_NA_VALUES # type: ignore + +STR_NA_VALUES_FILTERED = deepcopy(STR_NA_VALUES) + +try: + STR_NA_VALUES_FILTERED.remove("None") +except KeyError: + pass + +logger = logging.getLogger(__name__) + + +def read_csv( + path_or_buffer: str, + keep_default_na: bool = False, + encoding: str = "utf8", + **load_args: Any, +) -> pd.DataFrame: + """ + A wrapper around pd.read_csv that filters out "None" from the na_values list. + + Args: + path_or_buffer: The path to the file or a buffer containing the file. + keep_default_na: Whether to keep the default na_values list. + encoding: The encoding of the file. + **load_args: Additional arguments to pass to pd.read_csv. + + Returns: + pd.DataFrame: The dataframe created from the CSV file or buffer. + """ + na_values = load_args.pop( + "na_values", STR_NA_VALUES_FILTERED if not keep_default_na else None + ) + return pd.read_csv( # type: ignore + path_or_buffer, + na_values=na_values, + keep_default_na=keep_default_na, + encoding=encoding, + **load_args, + ) + + +def trim_commas_df( + dataframe: pd.DataFrame, + allow_na_values: Optional[bool] = False, +) -> pd.DataFrame: + """Removes empty (trailing) columns and empty rows from pandas dataframe (manifest data). + + Args: + dataframe: pandas dataframe with data from manifest file. + allow_na_values (bool, optional): If true, allow pd.NA values in the dataframe + + Returns: + df: cleaned-up pandas dataframe. + """ + # remove all columns which have substring "Unnamed" in them + dataframe = dataframe.loc[:, ~dataframe.columns.str.contains("^Unnamed")] + + # remove all completely empty rows + dataframe = dataframe.dropna(how="all", axis=0) + + if allow_na_values is False: + # Fill in nan cells with empty strings + dataframe.fillna("", inplace=True) + return dataframe + + +def convert_ints(string: str) -> Union[np.int64, bool]: + """ + Lambda function to convert a string to an integer if possible, otherwise returns False + Args: + string: string to attempt conversion to int + Returns: + string converted to type int if possible, otherwise False + """ + if isinstance(string, str) and str.isdigit(string): + return np.int64(string) + return False + + +def find_and_convert_ints(dataframe: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]: + """ + Find strings that represent integers and convert to type int + Args: + dataframe: dataframe with nulls masked as empty strings + Returns: + ints: dataframe with values that were converted to type int + is_int: dataframe with boolean values indicating which cells were converted to type int + + """ + # pylint: disable=unnecessary-lambda + large_manifest_cutoff_size = 1000 + # Find integers stored as strings and replace with entries of type np.int64 + if ( + dataframe.size < large_manifest_cutoff_size + ): # If small manifest, iterate as normal for improved performance + ints = dataframe.map( # type:ignore + lambda cell: convert_ints(cell), na_action="ignore" + ).fillna(False) + + else: # parallelize iterations for large manifests + pandarallel.initialize(verbose=1) + ints = dataframe.parallel_applymap( # type:ignore + lambda cell: convert_ints(cell), na_action="ignore" + ).fillna(False) + + # Identify cells converted to integers + is_int = ints.map(pd.api.types.is_integer) # type:ignore + + assert isinstance(ints, pd.DataFrame) + assert isinstance(is_int, pd.DataFrame) + + return ints, is_int + + +def convert_floats(dataframe: pd.DataFrame) -> pd.DataFrame: + """ + Convert strings that represent floats to type float + Args: + dataframe: dataframe with nulls masked as empty strings + Returns: + float_df: dataframe with values that were converted to type float. Columns are type object + """ + # create a separate copy of the manifest + # before beginning conversions to store float values + float_df = deepcopy(dataframe) + + # convert strings to numerical dtype (float) if possible, preserve non-numerical strings + for col in dataframe.columns: + float_df[col] = pd.to_numeric(float_df[col], errors="coerce").astype("object") + + # replace values that couldn't be converted to float with the original str values + float_df[col].fillna(dataframe[col][float_df[col].isna()], inplace=True) + + return float_df + + +def load_df( + file_path: str, + preserve_raw_input: bool = True, + data_model: bool = False, + allow_na_values: bool = False, + **load_args: Any, +) -> pd.DataFrame: + """ + Universal function to load CSVs and return DataFrames + Parses string entries to convert as appropriate to type int, float, and pandas timestamp + Pandarallel is used for type inference for large manifests to improve performance + + Args: + file_path (str): path of csv to open + preserve_raw_input (bool, optional): If false, convert cell datatypes to an inferred type + data_model (bool, optional): bool, indicates if importing a data model + allow_na_values (bool, optional): If true, allow pd.NA values in the dataframe + **load_args(dict): dict of key value pairs to be passed to the pd.read_csv function + + Raises: + ValueError: When pd.read_csv on the file path doesn't return as dataframe + + Returns: + pd.DataFrame: a processed dataframe for manifests or unprocessed df for data models and + where indicated + """ + # start performance timer + t_load_df = perf_counter() + + # Read CSV to df as type specified in kwargs + org_df = read_csv(file_path, encoding="utf8", **load_args) # type: ignore + if not isinstance(org_df, pd.DataFrame): + raise ValueError( + ( + "Pandas did not return a dataframe. " + "Pandas will return a TextFileReader if chunksize parameter is used." + ) + ) + + # only trim if not data model csv + if not data_model: + org_df = trim_commas_df(org_df, allow_na_values=allow_na_values) + + if preserve_raw_input: + logger.debug(f"Load Elapsed time {perf_counter()-t_load_df}") + return org_df + + ints, is_int = find_and_convert_ints(org_df) + + float_df = convert_floats(org_df) + + # Store values that were converted to type int in the final dataframe + processed_df = float_df.mask(is_int, other=ints) + + logger.debug(f"Load Elapsed time {perf_counter()-t_load_df}") + return processed_df From 2a6e17f0425b302b31cd373b0093e0444b9c9e2a Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 16:14:04 -0500 Subject: [PATCH 03/26] move more json files; add a mini version of Metadata class --- .../extensions/curator/schema_generation.py | 104 +++++- .../example.Biospecimen.schema.json | 50 +++ .../example.BulkRNA-seqAssay.schema.json | 168 ++++++++++ .../example.JSONSchemaComponent.schema.json | 203 ++++++++++++ .../example.MockComponent.schema.json | 305 ++++++++++++++++++ .../example.MockFilename.schema.json | 27 ++ .../schema_files/example.MockRDB.schema.json | 35 ++ .../schema_files/example.Patient.schema.json | 151 +++++++++ .../unit_test_create_json_schema.py | 1 - 9 files changed, 1042 insertions(+), 2 deletions(-) create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.Patient.schema.json diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index f47335e3a..97d13a078 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -3856,6 +3856,98 @@ def parsed_model_as_dataframe( return model_dataframe +class MetadataModel(object): + """Metadata model wrapper around schema.org specification graph. + + Provides basic utilities to: + + 1) manipulate the metadata model + 2) generate metadata model views: + - generate manifest view of the metadata model + - generate validation schema view of the metadata model + """ + + def __init__( + self, + inputMModelLocation: str, + inputMModelLocationType: str, + data_model_labels: str, + logger: Logger, + ) -> None: + """Instantiates a MetadataModel object. + + Args: + inputMModelLocation: local path, uri, synapse entity id (e.g. gs://, syn123, /User/x/…); present location + inputMModelLocationType: specifier to indicate where the metadata model resource can be found (e.g. 'local' if file/JSON-LD is on local machine) + """ + # extract extension of 'inputMModelLocation' + # ensure that it is necessarily pointing to a '.jsonld' file + + logger.debug( + f"Initializing DataModelGraphExplorer object from {inputMModelLocation} schema." + ) + + # self.inputMModelLocation remains for backwards compatibility + self.inputMModelLocation = inputMModelLocation + self.path_to_json_ld = inputMModelLocation + + data_model_parser = DataModelParser( + path_to_data_model=self.inputMModelLocation, logger=logger + ) + # Parse Model + parsed_data_model = data_model_parser.parse_model() + + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph( + parsed_data_model, data_model_labels, logger + ) + + # Generate graph + self.graph_data_model = data_model_grapher.graph + + self.dmge = DataModelGraphExplorer(self.graph_data_model, logger) + + # check if the type of MModel file is "local" + # currently, the application only supports reading from local JSON-LD files + if inputMModelLocationType == "local": + self.inputMModelLocationType = inputMModelLocationType + else: + raise ValueError( + f"The type '{inputMModelLocationType}' is currently not supported." + ) + + def get_component_requirements( + self, source_component: str, as_graph: bool = False + ) -> List: + """Given a source model component (see https://w3id.org/biolink/vocab/category for definnition of component), return all components required by it. + Useful to construct requirement dependencies not only between specific attributes but also between categories/components of attributes; + Can be utilized to track metadata completion progress across multiple categories of attributes. + + Args: + source_component: an attribute label indicating the source component. + as_graph: if False return component requirements as a list; if True return component requirements as a dependency graph (i.e. a DAG) + + Returns: + A list of required components associated with the source component. + """ + + # get required components for the input/source component + req_components = self.dmge.get_component_requirements(source_component) + + # retrieve components as graph + if as_graph: + req_components_graph = self.dmge.get_component_requirements_graph( + source_component + ) + + # serialize component dependencies DAG to a edge list of node tuples + req_components = list(req_components_graph.edges()) + + return req_components + + return req_components + + class JsonSchemaGeneratorDirector: """ Directs the generation of JSON schemas for one or more components from a specified data model. @@ -5196,9 +5288,13 @@ def create_json_schema( # pylint: disable=too-many-arguments json_schema_dict = json_schema.as_json_schema_dict() if write_schema: + print("schema path", schema_path) + print("name", datatype) + print("jsonld path", jsonld_path) _write_data_model( json_schema_dict=json_schema_dict, schema_path=schema_path, + name=datatype, jsonld_path=jsonld_path, logger=logger, ) @@ -5351,7 +5447,12 @@ def get_component_json_schema( Raises: May raise errors if the component is not found in the data model graph. """ - + metadata_model = MetadataModel( + inputMModelLocation=self.data_model_source, + inputMModelLocationType="local", + data_model_labels=data_model_labels, + logger=self.logger, + ) use_display_names = data_model_labels == "display_label" json_schema = create_json_schema( @@ -5359,6 +5460,7 @@ def get_component_json_schema( datatype=self.component, logger=self.logger, schema_name=self.component + "_validation", + jsonld_path=metadata_model.inputMModelLocation, use_property_display_names=use_display_names, ) self.component_json_schema = json_schema diff --git a/tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json new file mode 100644 index 000000000..41097740c --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json @@ -0,0 +1,50 @@ +{ + "$id": "http://example.com/Biospecimen_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "PatientID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Patient ID" + }, + "SampleID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Sample ID" + }, + "TissueStatus": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Healthy", + "Malignant", + "None" + ], + "title": "enum" + } + ], + "title": "Tissue Status" + } + }, + "required": [ + "Component", + "PatientID", + "SampleID", + "TissueStatus" + ], + "title": "Biospecimen_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json new file mode 100644 index 000000000..872888213 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json @@ -0,0 +1,168 @@ +{ + "$id": "http://example.com/BulkRNA-seqAssay_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "BAM" + ] + } + } + }, + "then": { + "properties": { + "GenomeBuild": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeBuild" + ] + } + }, + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "CRAM" + ] + } + } + }, + "then": { + "properties": { + "GenomeBuild": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeBuild" + ] + } + }, + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "CSV/TSV" + ] + } + } + }, + "then": { + "properties": { + "GenomeBuild": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeBuild" + ] + } + }, + { + "if": { + "properties": { + "FileFormat": { + "enum": [ + "CRAM" + ] + } + } + }, + "then": { + "properties": { + "GenomeFASTA": { + "not": { + "type": "null" + } + } + }, + "required": [ + "GenomeFASTA" + ] + } + } + ], + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "FileFormat": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "BAM", + "CRAM", + "CSV/TSV", + "FASTQ" + ], + "title": "enum" + } + ], + "title": "File Format" + }, + "Filename": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Filename" + }, + "GenomeBuild": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "GRCh37", + "GRCh38", + "GRCm38", + "GRCm39" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Genome Build" + }, + "GenomeFASTA": { + "description": "TBD", + "title": "Genome FASTA" + }, + "SampleID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Sample ID" + } + }, + "required": [ + "Component", + "FileFormat", + "Filename", + "SampleID" + ], + "title": "BulkRNA-seqAssay_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json new file mode 100644 index 000000000..444e1ab44 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json @@ -0,0 +1,203 @@ +{ + "$id": "http://example.com/JSONSchemaComponent_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "Component to hold attributes for testing JSON Schemas", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Date": { + "description": "TBD", + "format": "date", + "not": { + "type": "null" + }, + "title": "Date" + }, + "Enum": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + } + ], + "title": "Enum" + }, + "EnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Enum Not Required" + }, + "InRange": { + "description": "TBD", + "maximum": 100.0, + "minimum": 50.0, + "not": { + "type": "null" + }, + "title": "InRange" + }, + "List": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List" + }, + "ListEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "List Enum" + }, + "ListEnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Enum Not Required" + }, + "ListInRange": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List InRange" + }, + "ListNotRequired": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Not Required" + }, + "ListString": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List String" + }, + "NoRules": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "No Rules" + }, + "NoRulesNotRequired": { + "description": "TBD", + "title": "No Rules Not Required" + }, + "Regex": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "[a-f]", + "title": "Regex" + }, + "String": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "String" + }, + "StringNotRequired": { + "description": "TBD", + "title": "String Not Required" + }, + "URL": { + "description": "TBD", + "format": "uri", + "not": { + "type": "null" + }, + "title": "URL" + } + }, + "required": [ + "Component", + "Date", + "Enum", + "InRange", + "List", + "ListEnum", + "ListInRange", + "ListString", + "NoRules", + "Regex", + "String", + "URL" + ], + "title": "JSONSchemaComponent_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json new file mode 100644 index 000000000..3dd6b7b97 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json @@ -0,0 +1,305 @@ +{ + "$id": "http://example.com/MockComponent_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "Component to hold mock attributes for testing all validation rules", + "properties": { + "CheckAges": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Ages" + }, + "CheckDate": { + "description": "TBD", + "format": "date", + "not": { + "type": "null" + }, + "title": "Check Date" + }, + "CheckFloat": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Float" + }, + "CheckInt": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Int" + }, + "CheckList": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check List" + }, + "CheckListEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "Check List Enum" + }, + "CheckListEnumStrict": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "Check List Enum Strict" + }, + "CheckListLike": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check List Like" + }, + "CheckListLikeEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "Check List Like Enum" + }, + "CheckListStrict": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check List Strict" + }, + "CheckMatchExactly": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match Exactly" + }, + "CheckMatchExactlyvalues": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match Exactly values" + }, + "CheckMatchNone": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match None" + }, + "CheckMatchNonevalues": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match None values" + }, + "CheckMatchatLeast": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match at Least" + }, + "CheckMatchatLeastvalues": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Match at Least values" + }, + "CheckNA": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check NA" + }, + "CheckNum": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Num" + }, + "CheckRange": { + "description": "TBD", + "maximum": 100.0, + "minimum": 50.0, + "not": { + "type": "null" + }, + "title": "Check Range" + }, + "CheckRecommended": { + "description": "TBD", + "title": "Check Recommended" + }, + "CheckRegexFormat": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "^[a-f]", + "title": "Check Regex Format" + }, + "CheckRegexInteger": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "^\\d+$", + "title": "Check Regex Integer" + }, + "CheckRegexList": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check Regex List" + }, + "CheckRegexListLike": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check Regex List Like" + }, + "CheckRegexListStrict": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "Check Regex List Strict" + }, + "CheckRegexSingle": { + "description": "TBD", + "not": { + "type": "null" + }, + "pattern": "[a-f]", + "title": "Check Regex Single" + }, + "CheckString": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check String" + }, + "CheckURL": { + "description": "TBD", + "format": "uri", + "not": { + "type": "null" + }, + "title": "Check URL" + }, + "CheckUnique": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Check Unique" + }, + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + } + }, + "required": [ + "CheckAges", + "CheckDate", + "CheckFloat", + "CheckInt", + "CheckList", + "CheckListEnum", + "CheckListEnumStrict", + "CheckListLike", + "CheckListLikeEnum", + "CheckListStrict", + "CheckMatchExactly", + "CheckMatchExactlyvalues", + "CheckMatchNone", + "CheckMatchNonevalues", + "CheckMatchatLeast", + "CheckMatchatLeastvalues", + "CheckNA", + "CheckNum", + "CheckRange", + "CheckRegexFormat", + "CheckRegexInteger", + "CheckRegexList", + "CheckRegexListLike", + "CheckRegexListStrict", + "CheckRegexSingle", + "CheckString", + "CheckURL", + "CheckUnique", + "Component" + ], + "title": "MockComponent_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json new file mode 100644 index 000000000..0fe609256 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json @@ -0,0 +1,27 @@ +{ + "$id": "http://example.com/MockFilename_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Filename": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Filename" + } + }, + "required": [ + "Component", + "Filename" + ], + "title": "MockFilename_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json new file mode 100644 index 000000000..003865f8e --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json @@ -0,0 +1,35 @@ +{ + "$id": "http://example.com/MockRDB_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "TBD", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "MockRDBId": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "MockRDB_id" + }, + "SourceManifest": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "SourceManifest" + } + }, + "required": [ + "Component", + "MockRDBId", + "SourceManifest" + ], + "title": "MockRDB_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.Patient.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.Patient.schema.json new file mode 100644 index 000000000..42f32bd41 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.Patient.schema.json @@ -0,0 +1,151 @@ +{ + "$id": "http://example.com/Patient_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ + { + "if": { + "properties": { + "Diagnosis": { + "enum": [ + "Cancer" + ] + } + } + }, + "then": { + "properties": { + "Cancer Type": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Cancer Type" + ] + } + }, + { + "if": { + "properties": { + "Diagnosis": { + "enum": [ + "Cancer" + ] + } + } + }, + "then": { + "properties": { + "Family History": { + "not": { + "type": "null" + } + } + }, + "required": [ + "Family History" + ] + } + } + ], + "description": "TBD", + "properties": { + "Cancer Type": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Breast", + "Colorectal", + "Lung", + "Prostate", + "Skin" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Cancer Type" + }, + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Diagnosis": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Cancer", + "Healthy" + ], + "title": "enum" + } + ], + "title": "Diagnosis" + }, + "Family History": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "Breast", + "Colorectal", + "Lung", + "Prostate", + "Skin" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Family History" + }, + "Patient ID": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Patient ID" + }, + "Sex": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "Female", + "Male", + "Other" + ], + "title": "enum" + } + ], + "title": "Sex" + }, + "Year of Birth": { + "description": "TBD", + "title": "Year of Birth" + } + }, + "required": [ + "Component", + "Diagnosis", + "Patient ID", + "Sex" + ], + "title": "Patient_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 0bd872127..e07046a5a 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -562,7 +562,6 @@ def test_create_json_schema_with_class_label( ) -> None: """Tests for JSONSchemaGenerator.create_json_schema""" test_file = f"test.{datatype}.schema.json" - test_directory = "/Users/lpeng/code/synapsePythonClient/output" test_path = os.path.join(test_directory, test_file) logger = logging.getLogger(__name__) From 336b7818a03cb4095464970e8867df2288df73ac Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 16:19:36 -0500 Subject: [PATCH 04/26] looks like get component requirement is not needed --- .../extensions/curator/schema_generation.py | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 97d13a078..efe45e6bf 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -3916,37 +3916,6 @@ def __init__( f"The type '{inputMModelLocationType}' is currently not supported." ) - def get_component_requirements( - self, source_component: str, as_graph: bool = False - ) -> List: - """Given a source model component (see https://w3id.org/biolink/vocab/category for definnition of component), return all components required by it. - Useful to construct requirement dependencies not only between specific attributes but also between categories/components of attributes; - Can be utilized to track metadata completion progress across multiple categories of attributes. - - Args: - source_component: an attribute label indicating the source component. - as_graph: if False return component requirements as a list; if True return component requirements as a dependency graph (i.e. a DAG) - - Returns: - A list of required components associated with the source component. - """ - - # get required components for the input/source component - req_components = self.dmge.get_component_requirements(source_component) - - # retrieve components as graph - if as_graph: - req_components_graph = self.dmge.get_component_requirements_graph( - source_component - ) - - # serialize component dependencies DAG to a edge list of node tuples - req_components = list(req_components_graph.edges()) - - return req_components - - return req_components - class JsonSchemaGeneratorDirector: """ From 810d95d7c69f3bb8966383e643aa5ba6306d611c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 16:29:01 -0500 Subject: [PATCH 05/26] remove df_utils, import load_df from schema_generation --- synapseclient/extensions/curator/df_utils.py | 203 ------------------ .../unit/synapseclient/extensions/conftest.py | 3 +- 2 files changed, 2 insertions(+), 204 deletions(-) delete mode 100644 synapseclient/extensions/curator/df_utils.py diff --git a/synapseclient/extensions/curator/df_utils.py b/synapseclient/extensions/curator/df_utils.py deleted file mode 100644 index fb97ee583..000000000 --- a/synapseclient/extensions/curator/df_utils.py +++ /dev/null @@ -1,203 +0,0 @@ -import logging -from copy import deepcopy -from time import perf_counter -from typing import Any, Optional, Union - -import numpy as np -import pandas as pd -from pandarallel import pandarallel # type: ignore - -# pylint:disable=no-name-in-module -from pandas._libs.parsers import STR_NA_VALUES # type: ignore - -STR_NA_VALUES_FILTERED = deepcopy(STR_NA_VALUES) - -try: - STR_NA_VALUES_FILTERED.remove("None") -except KeyError: - pass - -logger = logging.getLogger(__name__) - - -def read_csv( - path_or_buffer: str, - keep_default_na: bool = False, - encoding: str = "utf8", - **load_args: Any, -) -> pd.DataFrame: - """ - A wrapper around pd.read_csv that filters out "None" from the na_values list. - - Args: - path_or_buffer: The path to the file or a buffer containing the file. - keep_default_na: Whether to keep the default na_values list. - encoding: The encoding of the file. - **load_args: Additional arguments to pass to pd.read_csv. - - Returns: - pd.DataFrame: The dataframe created from the CSV file or buffer. - """ - na_values = load_args.pop( - "na_values", STR_NA_VALUES_FILTERED if not keep_default_na else None - ) - return pd.read_csv( # type: ignore - path_or_buffer, - na_values=na_values, - keep_default_na=keep_default_na, - encoding=encoding, - **load_args, - ) - - -def trim_commas_df( - dataframe: pd.DataFrame, - allow_na_values: Optional[bool] = False, -) -> pd.DataFrame: - """Removes empty (trailing) columns and empty rows from pandas dataframe (manifest data). - - Args: - dataframe: pandas dataframe with data from manifest file. - allow_na_values (bool, optional): If true, allow pd.NA values in the dataframe - - Returns: - df: cleaned-up pandas dataframe. - """ - # remove all columns which have substring "Unnamed" in them - dataframe = dataframe.loc[:, ~dataframe.columns.str.contains("^Unnamed")] - - # remove all completely empty rows - dataframe = dataframe.dropna(how="all", axis=0) - - if allow_na_values is False: - # Fill in nan cells with empty strings - dataframe.fillna("", inplace=True) - return dataframe - - -def convert_ints(string: str) -> Union[np.int64, bool]: - """ - Lambda function to convert a string to an integer if possible, otherwise returns False - Args: - string: string to attempt conversion to int - Returns: - string converted to type int if possible, otherwise False - """ - if isinstance(string, str) and str.isdigit(string): - return np.int64(string) - return False - - -def find_and_convert_ints(dataframe: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]: - """ - Find strings that represent integers and convert to type int - Args: - dataframe: dataframe with nulls masked as empty strings - Returns: - ints: dataframe with values that were converted to type int - is_int: dataframe with boolean values indicating which cells were converted to type int - - """ - # pylint: disable=unnecessary-lambda - large_manifest_cutoff_size = 1000 - # Find integers stored as strings and replace with entries of type np.int64 - if ( - dataframe.size < large_manifest_cutoff_size - ): # If small manifest, iterate as normal for improved performance - ints = dataframe.map( # type:ignore - lambda cell: convert_ints(cell), na_action="ignore" - ).fillna(False) - - else: # parallelize iterations for large manifests - pandarallel.initialize(verbose=1) - ints = dataframe.parallel_applymap( # type:ignore - lambda cell: convert_ints(cell), na_action="ignore" - ).fillna(False) - - # Identify cells converted to integers - is_int = ints.map(pd.api.types.is_integer) # type:ignore - - assert isinstance(ints, pd.DataFrame) - assert isinstance(is_int, pd.DataFrame) - - return ints, is_int - - -def convert_floats(dataframe: pd.DataFrame) -> pd.DataFrame: - """ - Convert strings that represent floats to type float - Args: - dataframe: dataframe with nulls masked as empty strings - Returns: - float_df: dataframe with values that were converted to type float. Columns are type object - """ - # create a separate copy of the manifest - # before beginning conversions to store float values - float_df = deepcopy(dataframe) - - # convert strings to numerical dtype (float) if possible, preserve non-numerical strings - for col in dataframe.columns: - float_df[col] = pd.to_numeric(float_df[col], errors="coerce").astype("object") - - # replace values that couldn't be converted to float with the original str values - float_df[col].fillna(dataframe[col][float_df[col].isna()], inplace=True) - - return float_df - - -def load_df( - file_path: str, - preserve_raw_input: bool = True, - data_model: bool = False, - allow_na_values: bool = False, - **load_args: Any, -) -> pd.DataFrame: - """ - Universal function to load CSVs and return DataFrames - Parses string entries to convert as appropriate to type int, float, and pandas timestamp - Pandarallel is used for type inference for large manifests to improve performance - - Args: - file_path (str): path of csv to open - preserve_raw_input (bool, optional): If false, convert cell datatypes to an inferred type - data_model (bool, optional): bool, indicates if importing a data model - allow_na_values (bool, optional): If true, allow pd.NA values in the dataframe - **load_args(dict): dict of key value pairs to be passed to the pd.read_csv function - - Raises: - ValueError: When pd.read_csv on the file path doesn't return as dataframe - - Returns: - pd.DataFrame: a processed dataframe for manifests or unprocessed df for data models and - where indicated - """ - # start performance timer - t_load_df = perf_counter() - - # Read CSV to df as type specified in kwargs - org_df = read_csv(file_path, encoding="utf8", **load_args) # type: ignore - if not isinstance(org_df, pd.DataFrame): - raise ValueError( - ( - "Pandas did not return a dataframe. " - "Pandas will return a TextFileReader if chunksize parameter is used." - ) - ) - - # only trim if not data model csv - if not data_model: - org_df = trim_commas_df(org_df, allow_na_values=allow_na_values) - - if preserve_raw_input: - logger.debug(f"Load Elapsed time {perf_counter()-t_load_df}") - return org_df - - ints, is_int = find_and_convert_ints(org_df) - - float_df = convert_floats(org_df) - - # Store values that were converted to type int in the final dataframe - processed_df = float_df.mask(is_int, other=ints) - - logger.debug(f"Load Elapsed time {perf_counter()-t_load_df}") - return processed_df diff --git a/tests/unit/synapseclient/extensions/conftest.py b/tests/unit/synapseclient/extensions/conftest.py index 0f4abc738..1f81cc008 100644 --- a/tests/unit/synapseclient/extensions/conftest.py +++ b/tests/unit/synapseclient/extensions/conftest.py @@ -4,11 +4,12 @@ import pytest -from synapseclient.extensions.curator.df_utils import load_df +# from synapseclient.extensions.curator.schema_generation import load_df from synapseclient.extensions.curator.schema_generation import ( DataModelGraph, DataModelGraphExplorer, DataModelParser, + load_df, ) TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) From 589db6ba99cb45665b91bd3f5560459cfb689cba Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 16:36:22 -0500 Subject: [PATCH 06/26] make sure if setup.cfg changes, the dependencies get reinstalled --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4549f5c7a..97ddc2beb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,7 +84,7 @@ jobs: path: | ${{ steps.get-dependencies.outputs.site_packages_loc }} ${{ steps.get-dependencies.outputs.site_bin_dir }} - key: ${{ runner.os }}-${{ matrix.python }}-build-${{ env.cache-name }}-${{ hashFiles('setup.py') }}-v28 + key: ${{ runner.os }}-${{ matrix.python }}-build-${{ env.cache-name }}-${{ hashFiles('setup.py', 'setup.cfg') }}-v28 - name: Install py-dependencies if: steps.cache-dependencies.outputs.cache-hit != 'true' From 5bff42b61ea5635a80bdb825fd7b16cb7ceed8b7 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 17:30:38 -0500 Subject: [PATCH 07/26] reorganize test and test dirs in the code --- .../unit/synapseclient/extensions/conftest.py | 19 +- .../extensions/data/example.model.csv | 72 - .../extensions/data/example.model.jsonld | 2244 ----------------- .../example.model.column_type_component.csv | 0 .../expected.Biospecimen.schema.json | 0 ...BulkRNA-seqAssay.display_names_schema.json | 0 .../expected.BulkRNA-seqAssay.schema.json | 0 ...NSchemaComponent.display_names_schema.json | 0 .../expected.JSONSchemaComponent.schema.json | 0 .../expected.MockComponent.schema.json | 0 .../expected.MockFilename.schema.json | 0 .../expected.MockRDB.schema.json | 0 ...expected.Patient.display_names_schema.json | 0 .../expected.Patient.schema.json | 0 ..._rna_missing_conditional_dependencies.json | 0 ...ient_missing_conditional_dependencies.json | 0 .../json_instances/valid_biospecimen1.json | 0 .../json_instances/valid_bulk_rna1.json | 0 .../json_instances/valid_bulk_rna2.json | 0 .../json_instances/valid_patient1.json | 0 .../json_instances/valid_patient2.json | 0 .../unit_test_create_json_schema.py | 96 +- 22 files changed, 76 insertions(+), 2355 deletions(-) delete mode 100644 tests/unit/synapseclient/extensions/data/example.model.csv delete mode 100644 tests/unit/synapseclient/extensions/data/example.model.jsonld rename tests/unit/synapseclient/extensions/{data => schema_files}/example.model.column_type_component.csv (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.Biospecimen.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.JSONSchemaComponent.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.MockComponent.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.MockFilename.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.MockRDB.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.Patient.display_names_schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/expected_jsonschemas/expected.Patient.schema.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/bulk_rna_missing_conditional_dependencies.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/patient_missing_conditional_dependencies.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/valid_biospecimen1.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/valid_bulk_rna1.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/valid_bulk_rna2.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/valid_patient1.json (100%) rename tests/unit/synapseclient/extensions/{data => schema_files}/json_instances/valid_patient2.json (100%) diff --git a/tests/unit/synapseclient/extensions/conftest.py b/tests/unit/synapseclient/extensions/conftest.py index 1f81cc008..385da2e16 100644 --- a/tests/unit/synapseclient/extensions/conftest.py +++ b/tests/unit/synapseclient/extensions/conftest.py @@ -13,33 +13,36 @@ ) TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.join(TESTS_DIR, "data") +SCHEMA_FILES_DIR = os.path.join(TESTS_DIR, "schema_files") class Helpers: @staticmethod def get_data_path(path, *paths): - return os.path.join(DATA_DIR, path, *paths) + """Get path to test data files""" + return os.path.join(SCHEMA_FILES_DIR, path, *paths) @staticmethod - def get_data_file(path, *paths, **kwargs): - fullpath = os.path.join(DATA_DIR, path, *paths) - return open(fullpath, **kwargs) + def get_schema_file_path(filename): + """Get path to schema files specifically""" + return os.path.join(SCHEMA_FILES_DIR, filename) @staticmethod def get_data_frame(path, *paths, **kwargs): - fullpath = os.path.join(DATA_DIR, path, *paths) + """Load a dataframe from schema files""" + fullpath = os.path.join(SCHEMA_FILES_DIR, path, *paths) return load_df(fullpath, **kwargs) @staticmethod def get_data_model_graph_explorer( - path=None, data_model_labels: str = "class_label", *paths + path=None, data_model_labels: str = "class_label" ): + """Create DataModelGraphExplorer from schema file""" # commenting this now bc we dont want to have multiple instances if path is None: return - fullpath = Helpers.get_data_path(path, *paths) + fullpath = Helpers.get_schema_file_path(path) # Instantiate DataModelParser data_model_parser = DataModelParser(path_to_data_model=fullpath, logger=Mock()) diff --git a/tests/unit/synapseclient/extensions/data/example.model.csv b/tests/unit/synapseclient/extensions/data/example.model.csv deleted file mode 100644 index fb65ffa8c..000000000 --- a/tests/unit/synapseclient/extensions/data/example.model.csv +++ /dev/null @@ -1,72 +0,0 @@ -Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules -Component,,,,,TRUE,,,, -Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,, -Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error -Sex,,"Female, Male, Other",,,TRUE,DataProperty,,, -Year of Birth,,,,,FALSE,DataProperty,,, -Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,, -Cancer,,,"Cancer Type, Family History",,FALSE,ValidValue,,, -Cancer Type,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,, -Family History,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,list strict -Biospecimen,,,"Sample ID, Patient ID, Tissue Status, Component",,FALSE,DataType,Patient,, -Sample ID,,,,,TRUE,DataProperty,,, -Tissue Status,,"Healthy, Malignant, None",,,TRUE,DataProperty,,, -Bulk RNA-seq Assay,,,"Filename, Sample ID, File Format, Component",,FALSE,DataType,Biospecimen,, -Filename,,,,,TRUE,DataProperty,,,#MockFilename filenameExists^^ -File Format,,"FASTQ, BAM, CRAM, CSV/TSV",,,TRUE,DataProperty,,, -BAM,,,Genome Build,,FALSE,ValidValue,,, -CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,, -CSV/TSV,,,Genome Build,,FALSE,ValidValue,,, -Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,, -Genome FASTA,,,,,TRUE,DataProperty,,, -MockComponent,Component to hold mock attributes for testing all validation rules,,"Component, Check List, Check List Enum, Check List Like, Check List Like Enum, Check List Strict, Check List Enum Strict, Check Regex List, Check Regex List Like, Check Regex List Strict, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,, -Check List,,,,,TRUE,DataProperty,,,list -Check List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list -Check List Like,,,,,TRUE,DataProperty,,,list like -Check List Like Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list like -Check List Strict,,,,,TRUE,DataProperty,,,list strict -Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict -Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f] -Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f] -Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f] -Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f] -Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f] -Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$ -Check Num,,,,,TRUE,DataProperty,,,num error -Check Float,,,,,TRUE,DataProperty,,,float error -Check Int,,,,,TRUE,DataProperty,,,int error -Check String,,,,,TRUE,DataProperty,,,str error -Check URL,,,,,TRUE,DataProperty,,,url -Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set -Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set -Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error -Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value -Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value -Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error -Check Recommended,,,,,FALSE,DataProperty,,,recommended -Check Ages,,,,,TRUE,DataProperty,,,protectAges -Check Unique,,,,,TRUE,DataProperty,,,unique error -Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error -Check Date,,,,,TRUE,DataProperty,,,date -Check NA,,,,,TRUE,DataProperty,,,int::IsNA -MockRDB,,,"Component, MockRDB_id, SourceManifest",,FALSE,DataType,,, -MockRDB_id,,,,,TRUE,DataProperty,,,int -SourceManifest,,,,,TRUE,DataProperty,,, -MockFilename,,,"Component, Filename",,FALSE,DataType,,, -JSONSchemaComponent,Component to hold attributes for testing JSON Schemas,,"Component, No Rules, No Rules Not Required, String, String Not Required, Enum, Enum Not Required, Date, URL, InRange, Regex, List, List Not Required, List Enum, List Enum Not Required, List String, List InRange",,FALSE,DataType,,, -No Rules,,,,,TRUE,DataProperty,,, -No Rules Not Required,,,,,FALSE,DataProperty,,, -String,,,,,TRUE,DataProperty,,,str error -String Not Required,,,,,FALSE,DataProperty,,,str error -Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,, -Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,, -Date,,,,,TRUE,DataProperty,,,date -URL,,,,,TRUE,DataProperty,,,url -InRange,,,,,TRUE,DataProperty,,,inRange 50 100 -Regex,,,,,TRUE,DataProperty,,,regex search [a-f] -List,,,,,TRUE,DataProperty,,,list -List Not Required,,,,,FALSE,DataProperty,,,list -List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list -List Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,list -List String,,,,,TRUE,DataProperty,,,list::str -List InRange,,,,,TRUE,DataProperty,,,list::inRange 50 100 diff --git a/tests/unit/synapseclient/extensions/data/example.model.jsonld b/tests/unit/synapseclient/extensions/data/example.model.jsonld deleted file mode 100644 index b3c3e0da4..000000000 --- a/tests/unit/synapseclient/extensions/data/example.model.jsonld +++ /dev/null @@ -1,2244 +0,0 @@ -{ - "@context": { - "bts": "http://schema.biothings.io/", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "schema": "http://schema.org/", - "xsd": "http://www.w3.org/2001/XMLSchema#" - }, - "@graph": [ - { - "@id": "bts:Component", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Component", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Component", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:Patient", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Patient", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Patient", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:PatientID" - }, - { - "@id": "bts:Sex" - }, - { - "@id": "bts:YearofBirth" - }, - { - "@id": "bts:Diagnosis" - }, - { - "@id": "bts:Component" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:PatientID", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "PatientID", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Patient ID", - "sms:required": "sms:true", - "sms:validationRules": { - "Biospecimen": "unique error", - "Patient": "unique warning" - } - }, - { - "@id": "bts:Sex", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Sex", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Female" - }, - { - "@id": "bts:Male" - }, - { - "@id": "bts:Other" - } - ], - "sms:displayName": "Sex", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:YearofBirth", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "YearofBirth", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Year of Birth", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Diagnosis", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Diagnosis", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Healthy" - }, - { - "@id": "bts:Cancer" - } - ], - "sms:displayName": "Diagnosis", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:DataType", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "DataType", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "DataType", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:DataProperty", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "DataProperty", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "DataProperty", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Female", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Female", - "rdfs:subClassOf": [ - { - "@id": "bts:Sex" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Female", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Male", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Male", - "rdfs:subClassOf": [ - { - "@id": "bts:Sex" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Male", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Other", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Other", - "rdfs:subClassOf": [ - { - "@id": "bts:Sex" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Other", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Healthy", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Healthy", - "rdfs:subClassOf": [ - { - "@id": "bts:Diagnosis" - }, - { - "@id": "bts:TissueStatus" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Healthy", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Cancer", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Cancer", - "rdfs:subClassOf": [ - { - "@id": "bts:ValidValue" - }, - { - "@id": "bts:Diagnosis" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Cancer", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:CancerType" - }, - { - "@id": "bts:FamilyHistory" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:CancerType", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CancerType", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Breast" - }, - { - "@id": "bts:Colorectal" - }, - { - "@id": "bts:Lung" - }, - { - "@id": "bts:Prostate" - }, - { - "@id": "bts:Skin" - } - ], - "sms:displayName": "Cancer Type", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:FamilyHistory", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "FamilyHistory", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Breast" - }, - { - "@id": "bts:Colorectal" - }, - { - "@id": "bts:Lung" - }, - { - "@id": "bts:Prostate" - }, - { - "@id": "bts:Skin" - } - ], - "sms:displayName": "Family History", - "sms:required": "sms:true", - "sms:validationRules": [ - "list strict" - ] - }, - { - "@id": "bts:ValidValue", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ValidValue", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "ValidValue", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Breast", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Breast", - "rdfs:subClassOf": [ - { - "@id": "bts:CancerType" - }, - { - "@id": "bts:FamilyHistory" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Breast", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Colorectal", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Colorectal", - "rdfs:subClassOf": [ - { - "@id": "bts:CancerType" - }, - { - "@id": "bts:FamilyHistory" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Colorectal", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Lung", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Lung", - "rdfs:subClassOf": [ - { - "@id": "bts:CancerType" - }, - { - "@id": "bts:FamilyHistory" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Lung", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Prostate", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Prostate", - "rdfs:subClassOf": [ - { - "@id": "bts:CancerType" - }, - { - "@id": "bts:FamilyHistory" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Prostate", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Skin", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Skin", - "rdfs:subClassOf": [ - { - "@id": "bts:CancerType" - }, - { - "@id": "bts:FamilyHistory" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Skin", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Biospecimen", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Biospecimen", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Biospecimen", - "sms:required": "sms:false", - "sms:requiresComponent": [ - { - "@id": "bts:Patient" - } - ], - "sms:requiresDependency": [ - { - "@id": "bts:SampleID" - }, - { - "@id": "bts:PatientID" - }, - { - "@id": "bts:TissueStatus" - }, - { - "@id": "bts:Component" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:SampleID", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "SampleID", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Sample ID", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:TissueStatus", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "TissueStatus", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Healthy" - }, - { - "@id": "bts:Malignant" - }, - { - "@id": "bts:None" - } - ], - "sms:displayName": "Tissue Status", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:Malignant", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Malignant", - "rdfs:subClassOf": [ - { - "@id": "bts:TissueStatus" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Malignant", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:None", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "None", - "rdfs:subClassOf": [ - { - "@id": "bts:TissueStatus" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "None", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:BulkRNA-seqAssay", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "BulkRNA-seqAssay", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Bulk RNA-seq Assay", - "sms:required": "sms:false", - "sms:requiresComponent": [ - { - "@id": "bts:Biospecimen" - } - ], - "sms:requiresDependency": [ - { - "@id": "bts:Filename" - }, - { - "@id": "bts:SampleID" - }, - { - "@id": "bts:FileFormat" - }, - { - "@id": "bts:Component" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:Filename", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Filename", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Filename", - "sms:required": "sms:true", - "sms:validationRules": { - "MockFilename": "filenameExists" - } - }, - { - "@id": "bts:FileFormat", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "FileFormat", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:FASTQ" - }, - { - "@id": "bts:BAM" - }, - { - "@id": "bts:CRAM" - }, - { - "@id": "bts:CSV/TSV" - } - ], - "sms:displayName": "File Format", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:FASTQ", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "FASTQ", - "rdfs:subClassOf": [ - { - "@id": "bts:FileFormat" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "FASTQ", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:BAM", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "BAM", - "rdfs:subClassOf": [ - { - "@id": "bts:ValidValue" - }, - { - "@id": "bts:FileFormat" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "BAM", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:GenomeBuild" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:CRAM", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CRAM", - "rdfs:subClassOf": [ - { - "@id": "bts:ValidValue" - }, - { - "@id": "bts:FileFormat" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "CRAM", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:GenomeBuild" - }, - { - "@id": "bts:GenomeFASTA" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:CSV/TSV", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CSV/TSV", - "rdfs:subClassOf": [ - { - "@id": "bts:ValidValue" - }, - { - "@id": "bts:FileFormat" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "CSV/TSV", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:GenomeBuild" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:GenomeBuild", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "GenomeBuild", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:GRCh37" - }, - { - "@id": "bts:GRCh38" - }, - { - "@id": "bts:GRCm38" - }, - { - "@id": "bts:GRCm39" - } - ], - "sms:displayName": "Genome Build", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:GenomeFASTA", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "GenomeFASTA", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Genome FASTA", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:GRCh37", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "GRCh37", - "rdfs:subClassOf": [ - { - "@id": "bts:GenomeBuild" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "GRCh37", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:GRCh38", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "GRCh38", - "rdfs:subClassOf": [ - { - "@id": "bts:GenomeBuild" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "GRCh38", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:GRCm38", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "GRCm38", - "rdfs:subClassOf": [ - { - "@id": "bts:GenomeBuild" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "GRCm38", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:GRCm39", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "GRCm39", - "rdfs:subClassOf": [ - { - "@id": "bts:GenomeBuild" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "GRCm39", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:MockComponent", - "@type": "rdfs:Class", - "rdfs:comment": "Component to hold mock attributes for testing all validation rules", - "rdfs:label": "MockComponent", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "MockComponent", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:Component" - }, - { - "@id": "bts:CheckList" - }, - { - "@id": "bts:CheckListEnum" - }, - { - "@id": "bts:CheckListLike" - }, - { - "@id": "bts:CheckListLikeEnum" - }, - { - "@id": "bts:CheckListStrict" - }, - { - "@id": "bts:CheckListEnumStrict" - }, - { - "@id": "bts:CheckRegexList" - }, - { - "@id": "bts:CheckRegexListLike" - }, - { - "@id": "bts:CheckRegexListStrict" - }, - { - "@id": "bts:CheckRegexSingle" - }, - { - "@id": "bts:CheckRegexFormat" - }, - { - "@id": "bts:CheckRegexInteger" - }, - { - "@id": "bts:CheckNum" - }, - { - "@id": "bts:CheckFloat" - }, - { - "@id": "bts:CheckInt" - }, - { - "@id": "bts:CheckString" - }, - { - "@id": "bts:CheckURL" - }, - { - "@id": "bts:CheckMatchatLeast" - }, - { - "@id": "bts:CheckMatchatLeastvalues" - }, - { - "@id": "bts:CheckMatchExactly" - }, - { - "@id": "bts:CheckMatchExactlyvalues" - }, - { - "@id": "bts:CheckMatchNone" - }, - { - "@id": "bts:CheckMatchNonevalues" - }, - { - "@id": "bts:CheckRecommended" - }, - { - "@id": "bts:CheckAges" - }, - { - "@id": "bts:CheckUnique" - }, - { - "@id": "bts:CheckRange" - }, - { - "@id": "bts:CheckDate" - }, - { - "@id": "bts:CheckNA" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:CheckList", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckList", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check List", - "sms:required": "sms:true", - "sms:validationRules": [ - "list" - ] - }, - { - "@id": "bts:CheckListEnum", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckListEnum", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "Check List Enum", - "sms:required": "sms:true", - "sms:validationRules": [ - "list" - ] - }, - { - "@id": "bts:CheckListLike", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckListLike", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check List Like", - "sms:required": "sms:true", - "sms:validationRules": [ - "list like" - ] - }, - { - "@id": "bts:CheckListLikeEnum", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckListLikeEnum", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "Check List Like Enum", - "sms:required": "sms:true", - "sms:validationRules": [ - "list like" - ] - }, - { - "@id": "bts:CheckListStrict", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckListStrict", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check List Strict", - "sms:required": "sms:true", - "sms:validationRules": [ - "list strict" - ] - }, - { - "@id": "bts:CheckListEnumStrict", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckListEnumStrict", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "Check List Enum Strict", - "sms:required": "sms:true", - "sms:validationRules": [ - "list strict" - ] - }, - { - "@id": "bts:CheckRegexList", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRegexList", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Regex List", - "sms:required": "sms:true", - "sms:validationRules": [ - "list", - "regex match [a-f]" - ] - }, - { - "@id": "bts:CheckRegexListLike", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRegexListLike", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Regex List Like", - "sms:required": "sms:true", - "sms:validationRules": [ - "list like", - "regex match [a-f]" - ] - }, - { - "@id": "bts:CheckRegexListStrict", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRegexListStrict", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Regex List Strict", - "sms:required": "sms:true", - "sms:validationRules": [ - "list strict", - "regex match [a-f]" - ] - }, - { - "@id": "bts:CheckRegexSingle", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRegexSingle", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Regex Single", - "sms:required": "sms:true", - "sms:validationRules": [ - "regex search [a-f]" - ] - }, - { - "@id": "bts:CheckRegexFormat", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRegexFormat", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Regex Format", - "sms:required": "sms:true", - "sms:validationRules": [ - "regex match [a-f]" - ] - }, - { - "@id": "bts:CheckRegexInteger", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRegexInteger", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Regex Integer", - "sms:required": "sms:true", - "sms:validationRules": [ - "regex search ^\\d+$" - ] - }, - { - "@id": "bts:CheckNum", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckNum", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Num", - "sms:required": "sms:true", - "sms:validationRules": [ - "num error" - ] - }, - { - "@id": "bts:CheckFloat", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckFloat", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Float", - "sms:required": "sms:true", - "sms:validationRules": [ - "float error" - ] - }, - { - "@id": "bts:CheckInt", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckInt", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Int", - "sms:required": "sms:true", - "sms:validationRules": [ - "int error" - ] - }, - { - "@id": "bts:CheckString", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckString", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check String", - "sms:required": "sms:true", - "sms:validationRules": [ - "str error" - ] - }, - { - "@id": "bts:CheckURL", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckURL", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check URL", - "sms:required": "sms:true", - "sms:validationRules": [ - "url" - ] - }, - { - "@id": "bts:CheckMatchatLeast", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckMatchatLeast", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Match at Least", - "sms:required": "sms:true", - "sms:validationRules": [ - "matchAtLeastOne Patient.PatientID set" - ] - }, - { - "@id": "bts:CheckMatchatLeastvalues", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckMatchatLeastvalues", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Match at Least values", - "sms:required": "sms:true", - "sms:validationRules": [ - "matchAtLeastOne MockComponent.checkMatchatLeastvalues value" - ] - }, - { - "@id": "bts:CheckMatchExactly", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckMatchExactly", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Match Exactly", - "sms:required": "sms:true", - "sms:validationRules": [ - "matchExactlyOne MockComponent.checkMatchExactly set" - ] - }, - { - "@id": "bts:CheckMatchExactlyvalues", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckMatchExactlyvalues", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Match Exactly values", - "sms:required": "sms:true", - "sms:validationRules": [ - "matchExactlyOne MockComponent.checkMatchExactlyvalues value" - ] - }, - { - "@id": "bts:CheckMatchNone", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckMatchNone", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Match None", - "sms:required": "sms:true", - "sms:validationRules": [ - "matchNone MockComponent.checkMatchNone set error" - ] - }, - { - "@id": "bts:CheckMatchNonevalues", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckMatchNonevalues", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Match None values", - "sms:required": "sms:true", - "sms:validationRules": [ - "matchNone MockComponent.checkMatchNonevalues value error" - ] - }, - { - "@id": "bts:CheckRecommended", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRecommended", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Recommended", - "sms:required": "sms:false", - "sms:validationRules": [ - "recommended" - ] - }, - { - "@id": "bts:CheckAges", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckAges", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Ages", - "sms:required": "sms:true", - "sms:validationRules": [ - "protectAges" - ] - }, - { - "@id": "bts:CheckUnique", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckUnique", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Unique", - "sms:required": "sms:true", - "sms:validationRules": [ - "unique error" - ] - }, - { - "@id": "bts:CheckRange", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckRange", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Range", - "sms:required": "sms:true", - "sms:validationRules": [ - "inRange 50 100 error" - ] - }, - { - "@id": "bts:CheckDate", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckDate", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check Date", - "sms:required": "sms:true", - "sms:validationRules": [ - "date" - ] - }, - { - "@id": "bts:CheckNA", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CheckNA", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Check NA", - "sms:required": "sms:true", - "sms:validationRules": [ - "int", - "IsNA" - ] - }, - { - "@id": "bts:Ab", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Ab", - "rdfs:subClassOf": [ - { - "@id": "bts:CheckListEnum" - }, - { - "@id": "bts:CheckListLikeEnum" - }, - { - "@id": "bts:CheckListEnumStrict" - }, - { - "@id": "bts:Enum" - }, - { - "@id": "bts:EnumNotRequired" - }, - { - "@id": "bts:ListEnum" - }, - { - "@id": "bts:ListEnumNotRequired" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "ab", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Cd", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Cd", - "rdfs:subClassOf": [ - { - "@id": "bts:CheckListEnum" - }, - { - "@id": "bts:CheckListLikeEnum" - }, - { - "@id": "bts:CheckListEnumStrict" - }, - { - "@id": "bts:Enum" - }, - { - "@id": "bts:EnumNotRequired" - }, - { - "@id": "bts:ListEnum" - }, - { - "@id": "bts:ListEnumNotRequired" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "cd", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Ef", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Ef", - "rdfs:subClassOf": [ - { - "@id": "bts:CheckListEnum" - }, - { - "@id": "bts:CheckListLikeEnum" - }, - { - "@id": "bts:CheckListEnumStrict" - }, - { - "@id": "bts:Enum" - }, - { - "@id": "bts:EnumNotRequired" - }, - { - "@id": "bts:ListEnum" - }, - { - "@id": "bts:ListEnumNotRequired" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "ef", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Gh", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Gh", - "rdfs:subClassOf": [ - { - "@id": "bts:CheckListEnum" - }, - { - "@id": "bts:CheckListLikeEnum" - }, - { - "@id": "bts:CheckListEnumStrict" - }, - { - "@id": "bts:Enum" - }, - { - "@id": "bts:EnumNotRequired" - }, - { - "@id": "bts:ListEnum" - }, - { - "@id": "bts:ListEnumNotRequired" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "gh", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:MockRDB", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "MockRDB", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "MockRDB", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:Component" - }, - { - "@id": "bts:MockRDBId" - }, - { - "@id": "bts:SourceManifest" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:MockRDBId", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "MockRDBId", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "MockRDB_id", - "sms:required": "sms:true", - "sms:validationRules": [ - "int" - ] - }, - { - "@id": "bts:SourceManifest", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "SourceManifest", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "SourceManifest", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:MockFilename", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "MockFilename", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "MockFilename", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:Component" - }, - { - "@id": "bts:Filename" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:JSONSchemaComponent", - "@type": "rdfs:Class", - "rdfs:comment": "Component to hold attributes for testing JSON Schemas", - "rdfs:label": "JSONSchemaComponent", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "JSONSchemaComponent", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:Component" - }, - { - "@id": "bts:NoRules" - }, - { - "@id": "bts:NoRulesNotRequired" - }, - { - "@id": "bts:String" - }, - { - "@id": "bts:StringNotRequired" - }, - { - "@id": "bts:Enum" - }, - { - "@id": "bts:EnumNotRequired" - }, - { - "@id": "bts:Date" - }, - { - "@id": "bts:URL" - }, - { - "@id": "bts:InRange" - }, - { - "@id": "bts:Regex" - }, - { - "@id": "bts:List" - }, - { - "@id": "bts:ListNotRequired" - }, - { - "@id": "bts:ListEnum" - }, - { - "@id": "bts:ListEnumNotRequired" - }, - { - "@id": "bts:ListString" - }, - { - "@id": "bts:ListInRange" - } - ], - "sms:validationRules": [] - }, - { - "@id": "bts:NoRules", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "NoRules", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "No Rules", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:NoRulesNotRequired", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "NoRulesNotRequired", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "No Rules Not Required", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:String", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "String", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "String", - "sms:required": "sms:true", - "sms:validationRules": [ - "str error" - ] - }, - { - "@id": "bts:StringNotRequired", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "StringNotRequired", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "String Not Required", - "sms:required": "sms:false", - "sms:validationRules": [ - "str error" - ] - }, - { - "@id": "bts:Enum", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Enum", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "Enum", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:EnumNotRequired", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "EnumNotRequired", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "Enum Not Required", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:Date", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Date", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Date", - "sms:required": "sms:true", - "sms:validationRules": [ - "date" - ] - }, - { - "@id": "bts:URL", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "URL", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "URL", - "sms:required": "sms:true", - "sms:validationRules": [ - "url" - ] - }, - { - "@id": "bts:InRange", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "InRange", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "InRange", - "sms:required": "sms:true", - "sms:validationRules": [ - "inRange 50 100" - ] - }, - { - "@id": "bts:Regex", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Regex", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Regex", - "sms:required": "sms:true", - "sms:validationRules": [ - "regex search [a-f]" - ] - }, - { - "@id": "bts:List", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "List", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "List", - "sms:required": "sms:true", - "sms:validationRules": [ - "list" - ] - }, - { - "@id": "bts:ListNotRequired", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ListNotRequired", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "List Not Required", - "sms:required": "sms:false", - "sms:validationRules": [ - "list" - ] - }, - { - "@id": "bts:ListEnum", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ListEnum", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "List Enum", - "sms:required": "sms:true", - "sms:validationRules": [ - "list" - ] - }, - { - "@id": "bts:ListEnumNotRequired", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ListEnumNotRequired", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "schema:rangeIncludes": [ - { - "@id": "bts:Ab" - }, - { - "@id": "bts:Cd" - }, - { - "@id": "bts:Ef" - }, - { - "@id": "bts:Gh" - } - ], - "sms:displayName": "List Enum Not Required", - "sms:required": "sms:false", - "sms:validationRules": [ - "list" - ] - }, - { - "@id": "bts:ListString", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ListString", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "List String", - "sms:required": "sms:true", - "sms:validationRules": [ - "list", - "str" - ] - }, - { - "@id": "bts:ListInRange", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ListInRange", - "rdfs:subClassOf": [ - { - "@id": "bts:DataProperty" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "List InRange", - "sms:required": "sms:true", - "sms:validationRules": [ - "list", - "inRange 50 100" - ] - } - ], - "@id": "http://schema.biothings.io/#0.1" -} diff --git a/tests/unit/synapseclient/extensions/data/example.model.column_type_component.csv b/tests/unit/synapseclient/extensions/schema_files/example.model.column_type_component.csv similarity index 100% rename from tests/unit/synapseclient/extensions/data/example.model.column_type_component.csv rename to tests/unit/synapseclient/extensions/schema_files/example.model.column_type_component.csv diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Biospecimen.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Biospecimen.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.JSONSchemaComponent.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockComponent.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockFilename.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockFilename.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockRDB.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.MockRDB.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.display_names_schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json diff --git a/tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.Patient.schema.json rename to tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/bulk_rna_missing_conditional_dependencies.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/bulk_rna_missing_conditional_dependencies.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/patient_missing_conditional_dependencies.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/patient_missing_conditional_dependencies.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/valid_biospecimen1.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/valid_biospecimen1.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/valid_bulk_rna1.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/valid_bulk_rna1.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/valid_bulk_rna2.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/valid_bulk_rna2.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/valid_patient1.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/valid_patient1.json diff --git a/tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json b/tests/unit/synapseclient/extensions/schema_files/json_instances/valid_patient2.json similarity index 100% rename from tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json rename to tests/unit/synapseclient/extensions/schema_files/json_instances/valid_patient2.json diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index e07046a5a..90def6070 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -39,6 +39,47 @@ # pylint: disable=too-many-positional-arguments +# Test data paths - change these when files move +TEST_DATA_BASE_PATH = "tests/unit/synapseclient/extensions" +SCHEMA_FILES_DIR = f"{TEST_DATA_BASE_PATH}/schema_files" +EXPECTED_SCHEMAS_DIR = f"{SCHEMA_FILES_DIR}/expected_jsonschemas" +JSON_INSTANCES_DIR = f"{SCHEMA_FILES_DIR}/json_instances" + +# Schema file patterns +EXPECTED_SCHEMA_PATTERN = "{datatype}.schema.json" +EXPECTED_DISPLAY_NAMES_SCHEMA_PATTERN = "{datatype}.display_names_schema.json" +TEST_SCHEMA_PATTERN = "test.{datatype}.schema.json" +TEST_DISPLAY_NAMES_SCHEMA_PATTERN = "test.{datatype}.display_names_schema.json" + + +# Helper functions for path construction +def get_expected_schema_path(datatype: str, display_names: bool = False) -> str: + """Get path to expected schema file""" + pattern = ( + EXPECTED_DISPLAY_NAMES_SCHEMA_PATTERN + if display_names + else EXPECTED_SCHEMA_PATTERN + ) + filename = f"expected.{pattern.format(datatype=datatype)}" + return f"{EXPECTED_SCHEMAS_DIR}/{filename}" + + +def get_json_instance_path(filename: str) -> str: + """Get path to JSON instance file""" + return f"{JSON_INSTANCES_DIR}/{filename}" + + +def get_test_schema_path( + test_directory: str, datatype: str, display_names: bool = False +) -> str: + """Get path for generated test schema file""" + pattern = ( + TEST_DISPLAY_NAMES_SCHEMA_PATTERN if display_names else TEST_SCHEMA_PATTERN + ) + filename = pattern.format(datatype=datatype) + return os.path.join(test_directory, filename) + + @pytest.fixture(name="test_directory", scope="session") def fixture_test_directory(request) -> str: """Returns a directory for creating test jSON Schemas in""" @@ -561,11 +602,10 @@ def test_create_json_schema_with_class_label( dmge: DataModelGraphExplorer, datatype: str, test_directory: str ) -> None: """Tests for JSONSchemaGenerator.create_json_schema""" - test_file = f"test.{datatype}.schema.json" - test_path = os.path.join(test_directory, test_file) + test_path = get_test_schema_path(test_directory, datatype) + expected_path = get_expected_schema_path(datatype) logger = logging.getLogger(__name__) - expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.schema.json" create_json_schema( dmge=dmge, datatype=datatype, @@ -594,10 +634,9 @@ def test_create_json_schema_with_display_names( dmge: DataModelGraphExplorer, datatype: str, test_directory: str ) -> None: """Tests for JSONSchemaGenerator.create_json_schema""" - test_file = f"test.{datatype}.display_names_schema.json" - test_path = os.path.join(test_directory, test_file) logger = logging.getLogger(__name__) - expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.display_names_schema.json" + test_path = get_test_schema_path(test_directory, datatype, display_names=True) + expected_path = get_expected_schema_path(datatype, display_names=True) create_json_schema( dmge=dmge, datatype=datatype, @@ -621,10 +660,9 @@ def test_create_json_schema_with_no_column_type( This tests where the data model does not have columnType attribute """ datatype = "JSONSchemaComponent" - test_file = f"test.{datatype}.display_names_schema.json" - test_path = os.path.join(test_directory, test_file) + test_path = get_test_schema_path(test_directory, datatype, display_names=True) + expected_path = get_expected_schema_path(datatype) logger = logging.getLogger(__name__) - expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.schema.json" create_json_schema( dmge=dmge, datatype=datatype, @@ -649,11 +687,10 @@ def test_create_json_schema_with_column_type( This tests where the data model does have the columnType attribute """ datatype = "JSONSchemaComponent" - test_file = f"test.{datatype}.display_names_schema.json" - test_path = os.path.join(test_directory, test_file) + test_path = get_test_schema_path(test_directory, datatype, display_names=True) + expected_path = get_expected_schema_path(datatype, display_names=True) logger = logging.getLogger(__name__) - expected_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.display_names_schema.json" create_json_schema( dmge=dmge_column_type, datatype=datatype, @@ -671,26 +708,26 @@ def test_create_json_schema_with_column_type( @pytest.mark.parametrize( - "instance_path, datatype", + "instance_filename, datatype", [ ( - "tests/unit/synapseclient/extensions/data/json_instances/valid_biospecimen1.json", + "valid_biospecimen1.json", "Biospecimen", ), ( - "tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna1.json", + "valid_bulk_rna1.json", "BulkRNA-seqAssay", ), ( - "tests/unit/synapseclient/extensions/data/json_instances/valid_bulk_rna2.json", + "valid_bulk_rna2.json", "BulkRNA-seqAssay", ), ( - "tests/unit/synapseclient/extensions/data/json_instances/valid_patient1.json", + "valid_patient1.json", "Patient", ), ( - "tests/unit/synapseclient/extensions/data/json_instances/valid_patient2.json", + "valid_patient2.json", "Patient", ), ], @@ -703,17 +740,12 @@ def test_create_json_schema_with_column_type( ], ) def test_validate_valid_instances( - instance_path: str, + instance_filename: str, datatype: str, ) -> None: """Validates instances using expected JSON Schemas""" - from pathlib import Path - - # Use absolute paths based on the test file location - test_file_dir = Path(__file__).parent - schema_path = ( - test_file_dir / f"data/expected_jsonschemas/expected.{datatype}.schema.json" - ) + schema_path = get_expected_schema_path(datatype) + instance_path = get_json_instance_path(instance_filename) with open(schema_path, encoding="utf-8") as schema_file: schema = json.load(schema_file) @@ -724,14 +756,14 @@ def test_validate_valid_instances( @pytest.mark.parametrize( - "instance_path, datatype", + "instance_filename, datatype", [ ( - "tests/unit/synapseclient/extensions/data/json_instances/bulk_rna_missing_conditional_dependencies.json", + "bulk_rna_missing_conditional_dependencies.json", "BulkRNA-seqAssay", ), ( - "tests/unit/synapseclient/extensions/data/json_instances/patient_missing_conditional_dependencies.json", + "patient_missing_conditional_dependencies.json", "Patient", ), ], @@ -741,12 +773,14 @@ def test_validate_valid_instances( ], ) def test_validate_invalid_instances( - instance_path: str, + instance_filename: str, datatype: str, ) -> None: """Raises a ValidationError validating invalid instances using expected JSON Schemas""" - schema_path = f"tests/unit/synapseclient/extensions/data/expected_jsonschemas/expected.{datatype}.schema.json" + schema_path = get_expected_schema_path(datatype) + instance_path = get_json_instance_path(instance_filename) + with open(schema_path, encoding="utf-8") as schema_file: schema = json.load(schema_file) with open(instance_path, encoding="utf-8") as instance_file: From 0e216f9a245f537facf66ec38d9c83e0c40ae6c3 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 17:32:04 -0500 Subject: [PATCH 08/26] update cmments --- .../synapseclient/extensions/unit_test_create_json_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 90def6070..66dfee94b 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -39,7 +39,7 @@ # pylint: disable=too-many-positional-arguments -# Test data paths - change these when files move +# Test data paths TEST_DATA_BASE_PATH = "tests/unit/synapseclient/extensions" SCHEMA_FILES_DIR = f"{TEST_DATA_BASE_PATH}/schema_files" EXPECTED_SCHEMAS_DIR = f"{SCHEMA_FILES_DIR}/expected_jsonschemas" From 67e8728a476acd8e54f2092e59b01b4e1ac98239 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 7 Nov 2025 17:46:41 -0500 Subject: [PATCH 09/26] add back example model jsonld --- .../schema_files/example.model.jsonld | 2244 +++++++++++++++++ .../unit_test_create_json_schema.py | 1 - 2 files changed, 2244 insertions(+), 1 deletion(-) create mode 100644 tests/unit/synapseclient/extensions/schema_files/example.model.jsonld diff --git a/tests/unit/synapseclient/extensions/schema_files/example.model.jsonld b/tests/unit/synapseclient/extensions/schema_files/example.model.jsonld new file mode 100644 index 000000000..b3c3e0da4 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/example.model.jsonld @@ -0,0 +1,2244 @@ +{ + "@context": { + "bts": "http://schema.biothings.io/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "http://schema.org/", + "xsd": "http://www.w3.org/2001/XMLSchema#" + }, + "@graph": [ + { + "@id": "bts:Component", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Component", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Component", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Patient", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Patient", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Patient", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:PatientID" + }, + { + "@id": "bts:Sex" + }, + { + "@id": "bts:YearofBirth" + }, + { + "@id": "bts:Diagnosis" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:PatientID", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "PatientID", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Patient ID", + "sms:required": "sms:true", + "sms:validationRules": { + "Biospecimen": "unique error", + "Patient": "unique warning" + } + }, + { + "@id": "bts:Sex", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Sex", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Female" + }, + { + "@id": "bts:Male" + }, + { + "@id": "bts:Other" + } + ], + "sms:displayName": "Sex", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:YearofBirth", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "YearofBirth", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Year of Birth", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Diagnosis", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Diagnosis", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Healthy" + }, + { + "@id": "bts:Cancer" + } + ], + "sms:displayName": "Diagnosis", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:DataType", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DataType", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "DataType", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:DataProperty", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DataProperty", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "DataProperty", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Female", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Female", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Female", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Male", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Male", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Male", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Other", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Other", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Other", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Healthy", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Healthy", + "rdfs:subClassOf": [ + { + "@id": "bts:Diagnosis" + }, + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Healthy", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Cancer", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Cancer", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:Diagnosis" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Cancer", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CancerType", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CancerType", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Breast" + }, + { + "@id": "bts:Colorectal" + }, + { + "@id": "bts:Lung" + }, + { + "@id": "bts:Prostate" + }, + { + "@id": "bts:Skin" + } + ], + "sms:displayName": "Cancer Type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:FamilyHistory", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FamilyHistory", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Breast" + }, + { + "@id": "bts:Colorectal" + }, + { + "@id": "bts:Lung" + }, + { + "@id": "bts:Prostate" + }, + { + "@id": "bts:Skin" + } + ], + "sms:displayName": "Family History", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:ValidValue", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ValidValue", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ValidValue", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Breast", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Breast", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Breast", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Colorectal", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Colorectal", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Colorectal", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Lung", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Lung", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Lung", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Prostate", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Prostate", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Prostate", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Skin", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Skin", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Skin", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Biospecimen", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Biospecimen", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Biospecimen", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Patient" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:SampleID" + }, + { + "@id": "bts:PatientID" + }, + { + "@id": "bts:TissueStatus" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:SampleID", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SampleID", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Sample ID", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TissueStatus", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "TissueStatus", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Healthy" + }, + { + "@id": "bts:Malignant" + }, + { + "@id": "bts:None" + } + ], + "sms:displayName": "Tissue Status", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Malignant", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Malignant", + "rdfs:subClassOf": [ + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Malignant", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:None", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "None", + "rdfs:subClassOf": [ + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "None", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:BulkRNA-seqAssay", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "BulkRNA-seqAssay", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Bulk RNA-seq Assay", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Biospecimen" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:Filename" + }, + { + "@id": "bts:SampleID" + }, + { + "@id": "bts:FileFormat" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:Filename", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Filename", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Filename", + "sms:required": "sms:true", + "sms:validationRules": { + "MockFilename": "filenameExists" + } + }, + { + "@id": "bts:FileFormat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FileFormat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:FASTQ" + }, + { + "@id": "bts:BAM" + }, + { + "@id": "bts:CRAM" + }, + { + "@id": "bts:CSV/TSV" + } + ], + "sms:displayName": "File Format", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:FASTQ", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FASTQ", + "rdfs:subClassOf": [ + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "FASTQ", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:BAM", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "BAM", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "BAM", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CRAM", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CRAM", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CRAM", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + }, + { + "@id": "bts:GenomeFASTA" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CSV/TSV", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CSV/TSV", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CSV/TSV", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:GenomeBuild", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GenomeBuild", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:GRCh37" + }, + { + "@id": "bts:GRCh38" + }, + { + "@id": "bts:GRCm38" + }, + { + "@id": "bts:GRCm39" + } + ], + "sms:displayName": "Genome Build", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:GenomeFASTA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GenomeFASTA", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Genome FASTA", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCh37", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCh37", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCh37", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCh38", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCh38", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCh38", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCm38", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCm38", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCm38", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCm39", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCm39", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCm39", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MockComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to hold mock attributes for testing all validation rules", + "rdfs:label": "MockComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:CheckList" + }, + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLike" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListStrict" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:CheckRegexList" + }, + { + "@id": "bts:CheckRegexListLike" + }, + { + "@id": "bts:CheckRegexListStrict" + }, + { + "@id": "bts:CheckRegexSingle" + }, + { + "@id": "bts:CheckRegexFormat" + }, + { + "@id": "bts:CheckRegexInteger" + }, + { + "@id": "bts:CheckNum" + }, + { + "@id": "bts:CheckFloat" + }, + { + "@id": "bts:CheckInt" + }, + { + "@id": "bts:CheckString" + }, + { + "@id": "bts:CheckURL" + }, + { + "@id": "bts:CheckMatchatLeast" + }, + { + "@id": "bts:CheckMatchatLeastvalues" + }, + { + "@id": "bts:CheckMatchExactly" + }, + { + "@id": "bts:CheckMatchExactlyvalues" + }, + { + "@id": "bts:CheckMatchNone" + }, + { + "@id": "bts:CheckMatchNonevalues" + }, + { + "@id": "bts:CheckRecommended" + }, + { + "@id": "bts:CheckAges" + }, + { + "@id": "bts:CheckUnique" + }, + { + "@id": "bts:CheckRange" + }, + { + "@id": "bts:CheckDate" + }, + { + "@id": "bts:CheckNA" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CheckList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:CheckListEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:CheckListLike", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListLike", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List Like", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like" + ] + }, + { + "@id": "bts:CheckListLikeEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListLikeEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Like Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like" + ] + }, + { + "@id": "bts:CheckListStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:CheckListEnumStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListEnumStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Enum Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:CheckRegexList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexListLike", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexListLike", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List Like", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexListStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexListStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexSingle", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexSingle", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Single", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search [a-f]" + ] + }, + { + "@id": "bts:CheckRegexFormat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexFormat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Format", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexInteger", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexInteger", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Integer", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search ^\\d+$" + ] + }, + { + "@id": "bts:CheckNum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckNum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Num", + "sms:required": "sms:true", + "sms:validationRules": [ + "num error" + ] + }, + { + "@id": "bts:CheckFloat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckFloat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Float", + "sms:required": "sms:true", + "sms:validationRules": [ + "float error" + ] + }, + { + "@id": "bts:CheckInt", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckInt", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Int", + "sms:required": "sms:true", + "sms:validationRules": [ + "int error" + ] + }, + { + "@id": "bts:CheckString", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckString", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check String", + "sms:required": "sms:true", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:CheckURL", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckURL", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check URL", + "sms:required": "sms:true", + "sms:validationRules": [ + "url" + ] + }, + { + "@id": "bts:CheckMatchatLeast", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchatLeast", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match at Least", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchAtLeastOne Patient.PatientID set" + ] + }, + { + "@id": "bts:CheckMatchatLeastvalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchatLeastvalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match at Least values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchAtLeastOne MockComponent.checkMatchatLeastvalues value" + ] + }, + { + "@id": "bts:CheckMatchExactly", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchExactly", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match Exactly", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchExactlyOne MockComponent.checkMatchExactly set" + ] + }, + { + "@id": "bts:CheckMatchExactlyvalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchExactlyvalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match Exactly values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchExactlyOne MockComponent.checkMatchExactlyvalues value" + ] + }, + { + "@id": "bts:CheckMatchNone", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchNone", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match None", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchNone MockComponent.checkMatchNone set error" + ] + }, + { + "@id": "bts:CheckMatchNonevalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchNonevalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match None values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchNone MockComponent.checkMatchNonevalues value error" + ] + }, + { + "@id": "bts:CheckRecommended", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRecommended", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Recommended", + "sms:required": "sms:false", + "sms:validationRules": [ + "recommended" + ] + }, + { + "@id": "bts:CheckAges", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckAges", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Ages", + "sms:required": "sms:true", + "sms:validationRules": [ + "protectAges" + ] + }, + { + "@id": "bts:CheckUnique", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckUnique", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Unique", + "sms:required": "sms:true", + "sms:validationRules": [ + "unique error" + ] + }, + { + "@id": "bts:CheckRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Range", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100 error" + ] + }, + { + "@id": "bts:CheckDate", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckDate", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, + { + "@id": "bts:CheckNA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckNA", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check NA", + "sms:required": "sms:true", + "sms:validationRules": [ + "int", + "IsNA" + ] + }, + { + "@id": "bts:Ab", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Ab", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ab", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Cd", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Cd", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "cd", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Ef", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Ef", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ef", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Gh", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Gh", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "gh", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MockRDB", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockRDB", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockRDB", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:MockRDBId" + }, + { + "@id": "bts:SourceManifest" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:MockRDBId", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockRDBId", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockRDB_id", + "sms:required": "sms:true", + "sms:validationRules": [ + "int" + ] + }, + { + "@id": "bts:SourceManifest", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SourceManifest", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "SourceManifest", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MockFilename", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockFilename", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockFilename", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:Filename" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:JSONSchemaComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to hold attributes for testing JSON Schemas", + "rdfs:label": "JSONSchemaComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "JSONSchemaComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:NoRules" + }, + { + "@id": "bts:NoRulesNotRequired" + }, + { + "@id": "bts:String" + }, + { + "@id": "bts:StringNotRequired" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:Date" + }, + { + "@id": "bts:URL" + }, + { + "@id": "bts:InRange" + }, + { + "@id": "bts:Regex" + }, + { + "@id": "bts:List" + }, + { + "@id": "bts:ListNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + }, + { + "@id": "bts:ListString" + }, + { + "@id": "bts:ListInRange" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:NoRules", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "NoRules", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "No Rules", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:NoRulesNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "NoRulesNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "No Rules Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:String", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "String", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "String", + "sms:required": "sms:true", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:StringNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "StringNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "String Not Required", + "sms:required": "sms:false", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:Enum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Enum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Enum", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:EnumNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "EnumNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Enum Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Date", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Date", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, + { + "@id": "bts:URL", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "URL", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "URL", + "sms:required": "sms:true", + "sms:validationRules": [ + "url" + ] + }, + { + "@id": "bts:InRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "InRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "InRange", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100" + ] + }, + { + "@id": "bts:Regex", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Regex", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Regex", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search [a-f]" + ] + }, + { + "@id": "bts:List", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "List", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List Not Required", + "sms:required": "sms:false", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "List Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListEnumNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListEnumNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "List Enum Not Required", + "sms:required": "sms:false", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:ListString", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListString", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List String", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "str" + ] + }, + { + "@id": "bts:ListInRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListInRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "List InRange", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "inRange 50 100" + ] + } + ], + "@id": "http://schema.biothings.io/#0.1" +} diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 66dfee94b..7155bf121 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -1275,7 +1275,6 @@ def test_create_simple_property( ) -> None: """Test for _create_simple_property""" schema = _create_simple_property(test_nodes[node_name]) - print("schema", schema) assert schema == expected_schema full_schema = {"type": "object", "properties": {"name": schema}, "required": []} validator = Draft7Validator(full_schema) From 41c5184a2a11d8303b231ff39fc4180ba61a4966 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 11:25:39 -0500 Subject: [PATCH 10/26] rewrite some tests using dmge_column_type --- .../unit_test_create_json_schema.py | 70 ++++++++++++++----- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 7155bf121..7a62df6e9 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -120,6 +120,35 @@ def fixture_test_nodes( return nodes +@pytest.fixture(name="test_nodes_column_types") +def fixture_test_nodes_column_types( + dmge_column_type: DataModelGraphExplorer, +): + """Yields dict of Nodes""" + nodes = [ + "NoRules", + "NoRulesNotRequired", + "String", + "StringNotRequired", + "Enum", + "EnumNotRequired", + "InRange", + "Regex", + "Date", + "URL", + "List", + "ListNotRequired", + "ListEnum", + "ListEnumNotRequired", + "ListString", + "ListInRange", + ] + nodes = { + node: Node2(node, "JSONSchemaComponent", dmge_column_type) for node in nodes + } + return nodes + + class TestJSONSchema: """Tests for JSONSchema""" @@ -1125,7 +1154,9 @@ def test_create_enum_array_property( ( "ListString", { - "oneOf": [{"type": "array", "title": "array"}], + "oneOf": [ + {"type": "array", "title": "array", "items": {"type": "string"}} + ], }, [[], ["x"]], [None, [None], [1]], @@ -1138,6 +1169,7 @@ def test_create_enum_array_property( { "type": "array", "title": "array", + "items": {"type": "number", "minimum": 50.0, "maximum": 100.0}, } ], }, @@ -1157,18 +1189,18 @@ def test_create_array_property( expected_schema: dict[str, Any], valid_values: list[Any], invalid_values: list[Any], - test_nodes: dict[str, Node2], + test_nodes_column_types: dict[str, Node2], ) -> None: """Test for _create_array_property""" - schema = _create_array_property(test_nodes[node_name]) + schema = _create_array_property(test_nodes_column_types[node_name]) assert schema == expected_schema full_schema = {"type": "object", "properties": {"name": schema}, "required": []} validator = Draft7Validator(full_schema) for value in valid_values: validator.validate({"name": value}) - # for value in invalid_values: - # with pytest.raises(ValidationError): - # validator.validate({"name": value}) + for value in invalid_values: + with pytest.raises(ValidationError): + validator.validate({"name": value}) @pytest.mark.parametrize( @@ -1222,7 +1254,7 @@ def test_create_enum_property( # If property_type is given, it is added to the schema ( "String", - {"not": {"type": "null"}}, + {"type": "string"}, [""], [1, None], ), @@ -1231,16 +1263,16 @@ def test_create_enum_property( ( "StringNotRequired", { - # "oneOf": [ - # {"type": "string", "title": "string"}, - # {"type": "null", "title": "null"}, - # ], + "oneOf": [ + {"type": "string", "title": "string"}, + {"type": "null", "title": "null"}, + ], }, [None, "x"], [1], ), - # # If is_required is True '"not": {"type":"null"}' is added to schema if - # # property_type is not given + # If is_required is True '"not": {"type":"null"}' is added to schema if + # property_type is not given ( "NoRules", {"not": {"type": "null"}}, @@ -1250,7 +1282,7 @@ def test_create_enum_property( ( "InRange", { - "not": {"type": "null"}, + "type": "number", "minimum": 50, "maximum": 100, }, @@ -1271,18 +1303,18 @@ def test_create_simple_property( expected_schema: dict[str, Any], valid_values: list[Any], invalid_values: list[Any], - test_nodes: dict[str, Node2], + test_nodes_column_types: dict[str, Node2], ) -> None: """Test for _create_simple_property""" - schema = _create_simple_property(test_nodes[node_name]) + schema = _create_simple_property(test_nodes_column_types[node_name]) assert schema == expected_schema full_schema = {"type": "object", "properties": {"name": schema}, "required": []} validator = Draft7Validator(full_schema) for value in valid_values: validator.validate({"name": value}) - # for value in invalid_values: - # with pytest.raises(ValidationError): - # validator.validate({"name": value}) + for value in invalid_values: + with pytest.raises(ValidationError): + validator.validate({"name": value}) @pytest.mark.parametrize( From 8a261461e461de0bb73549176b9025814b89f863 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 11:40:56 -0500 Subject: [PATCH 11/26] remove unused comment --- tests/unit/synapseclient/extensions/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/synapseclient/extensions/conftest.py b/tests/unit/synapseclient/extensions/conftest.py index 385da2e16..5a8d3b7b2 100644 --- a/tests/unit/synapseclient/extensions/conftest.py +++ b/tests/unit/synapseclient/extensions/conftest.py @@ -4,7 +4,6 @@ import pytest -# from synapseclient.extensions.curator.schema_generation import load_df from synapseclient.extensions.curator.schema_generation import ( DataModelGraph, DataModelGraphExplorer, From 71ae5472c6980204dbacd8b462a388730903084c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 11:49:41 -0500 Subject: [PATCH 12/26] trim down the helpper function --- .../unit/synapseclient/extensions/conftest.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/unit/synapseclient/extensions/conftest.py b/tests/unit/synapseclient/extensions/conftest.py index 5a8d3b7b2..1921c7acf 100644 --- a/tests/unit/synapseclient/extensions/conftest.py +++ b/tests/unit/synapseclient/extensions/conftest.py @@ -8,7 +8,6 @@ DataModelGraph, DataModelGraphExplorer, DataModelParser, - load_df, ) TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -16,22 +15,11 @@ class Helpers: - @staticmethod - def get_data_path(path, *paths): - """Get path to test data files""" - return os.path.join(SCHEMA_FILES_DIR, path, *paths) - @staticmethod def get_schema_file_path(filename): """Get path to schema files specifically""" return os.path.join(SCHEMA_FILES_DIR, filename) - @staticmethod - def get_data_frame(path, *paths, **kwargs): - """Load a dataframe from schema files""" - fullpath = os.path.join(SCHEMA_FILES_DIR, path, *paths) - return load_df(fullpath, **kwargs) - @staticmethod def get_data_model_graph_explorer( path=None, data_model_labels: str = "class_label" @@ -62,13 +50,6 @@ def get_data_model_graph_explorer( return DMGE - @staticmethod - def get_python_version(): - version = sys.version - base_version = ".".join(version.split(".")[0:2]) - - return base_version - @pytest.fixture(scope="function") def helpers(): From 4a73fa591cf2669db112c932d07e659f9b1302ad Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 11:52:46 -0500 Subject: [PATCH 13/26] update the reason --- synapseclient/extensions/curator/schema_generation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index efe45e6bf..60b75adc2 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4063,7 +4063,7 @@ def _generate_jsonschema( @deprecated( version="4.11.0", - reason="This function is going to be deprecated. Please use columnType to define types.", + reason="This function is going to be deprecated. Use of validation rules will be removed in the future.", ) def filter_unused_inputted_rules( inputted_rules: list[str], logger: Logger @@ -4098,7 +4098,7 @@ def filter_unused_inputted_rules( @deprecated( version="4.11.0", - reason="This function is going to be deprecated. Please use columnType to define types.", + reason="This function is going to be deprecated. Use of validation rules will be removed in the future.", ) def check_for_duplicate_inputted_rules(inputted_rules: list[str]) -> None: """Checks that there are no rules with duplicate names @@ -4116,7 +4116,7 @@ def check_for_duplicate_inputted_rules(inputted_rules: list[str]) -> None: @deprecated( version="4.11.0", - reason="This function is going to be deprecated. Please use columnType to define types.", + reason="This function is going to be deprecated. Use of validation rules will be removed in the future.", ) def check_for_conflicting_inputted_rules(inputted_rules: list[str]) -> None: """Checks that each rule has no conflicts with any other rule @@ -4262,7 +4262,7 @@ def get_regex_parameters_from_inputted_rule( @deprecated( version="4.11.0", - reason="This function is going to be deprecated. Please use columnType to define types.", + reason="This function is going to be deprecated. Use of validation rules will be removed in the future.", ) def get_validation_rule_names_from_inputted_rules( inputted_rules: list[str], @@ -4282,7 +4282,7 @@ def get_validation_rule_names_from_inputted_rules( @deprecated( version="4.11.0", - reason="This function is going to be deprecated. Please use columnType to define types.", + reason="This function is going to be deprecated. Use of validation rules will be removed in the future.", ) def get_names_from_inputted_rules(inputted_rules: list[str]) -> list[str]: """Gets the names from a list of inputted rules From 139198337e25bb58e2e924a28e9f0ffaa37496a8 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 12:24:59 -0500 Subject: [PATCH 14/26] remove js_type in _get_validation_rule_based_fields --- .../extensions/curator/schema_generation.py | 30 +------ .../unit_test_create_json_schema.py | 80 ++++++------------- 2 files changed, 29 insertions(+), 81 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 60b75adc2..06f44cdef 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4351,8 +4351,6 @@ def _get_rules_by_names(names: list[str]) -> list[ValidationRule]: def _get_validation_rule_based_fields( validation_rules: list[str], - js_type: Optional[JSONSchemaType], - name: str, logger: Logger, ) -> tuple[ bool, @@ -4368,14 +4366,12 @@ def _get_validation_rule_based_fields( JSON Schema docs: Array: https://json-schema.org/understanding-json-schema/reference/array - Types: https://json-schema.org/understanding-json-schema/reference/type#type-specific-keywords Format: https://json-schema.org/understanding-json-schema/reference/type#format Pattern: https://json-schema.org/understanding-json-schema/reference/string#regexp Min/max: https://json-schema.org/understanding-json-schema/reference/numeric#range Arguments: validation_rules: A list of input validation rules - js_type: A JSONSchemaType if set explicitly in the data model, otherwise None name: The name of the node the validation rules belong to Raises: @@ -4386,14 +4382,12 @@ def _get_validation_rule_based_fields( Returns: A tuple containing fields for a Node object: - js_is_array: Whether or not the Node should be an array in JSON Schema - - js_type: The JSON Schema type - js_format: The JSON Schema format - js_minimum: If the type is numeric the JSON Schema minimum - js_maximum: If the type is numeric the JSON Schema maximum - js_pattern: If the type is string the JSON Schema pattern """ js_is_array = False - js_type = js_type js_format = None js_minimum = None js_maximum = None @@ -4412,15 +4406,6 @@ def _get_validation_rule_based_fields( js_is_array = ValidationRuleName.LIST in validation_rule_names - # The implicit JSON Schema type is the one implied by the presence - # of certain validation rules - - implicit_js_type = get_js_type_from_inputted_rules(validation_rules) - if implicit_js_type: - logger.warning( - f"Detected implicit type: '{implicit_js_type}' for property: '{name}'. Please note that type can only be set explicitly via the columnType column in the data model going forward." - ) - if ValidationRuleName.URL in validation_rule_names: js_format = JSONSchemaFormat.URI elif ValidationRuleName.DATE in validation_rule_names: @@ -4442,7 +4427,6 @@ def _get_validation_rule_based_fields( return ( js_is_array, - js_type, js_format, js_minimum, js_maximum, @@ -4516,19 +4500,16 @@ def __post_init__(self) -> None: self.description = self.dmge.get_node_comment( node_display_name=self.display_name ) - js_type = self.dmge.get_node_column_type(node_display_name=self.display_name) + self.type = self.dmge.get_node_column_type(node_display_name=self.display_name) ( self.is_array, - self.type, self.format, self.minimum, self.maximum, self.pattern, ) = _get_validation_rule_based_fields( validation_rules=validation_rules, - js_type=js_type, - name=self.name, logger=self.logger, ) @@ -5816,17 +5797,12 @@ def __post_init__(self) -> None: self.description = self.dmge.get_node_comment( node_display_name=self.display_name ) - explicit_js_type = self.dmge.get_node_column_type( - node_display_name=self.display_name - ) + self.type = self.dmge.get_node_column_type(node_display_name=self.display_name) ( self.is_array, - self.type, self.format, self.minimum, self.maximum, self.pattern, - ) = _get_validation_rule_based_fields( - validation_rules, explicit_js_type, self.name, logger=self.dmge.logger - ) + ) = _get_validation_rule_based_fields(validation_rules, logger=self.dmge.logger) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 7a62df6e9..c33fabd66 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -220,23 +220,21 @@ def test_update_property(self) -> None: @pytest.mark.parametrize( "node_name, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", [ - # If there are no type validation rules the type is None + # Node with no validation rules - all constraint fields should be None/False ("NoRules", None, False, None, None, None, None), - # If there is one type validation rule the type is set to the - # JSON Schema remains None + # Node with "str" validation rule - type remains None, constraints not set in Node initialization ("String", None, False, None, None, None, None), - # If there are any list type validation rules then is_array is set to True + # Node with "list" validation rule - is_array is set to True, other fields remain None ("List", None, True, None, None, None, None), - # If there are any list type validation rules and one type validation rule - # then is_array is set to True, and the type remains None + # Node with both "list" and "str" validation rules - is_array is True, type remains None ("ListString", None, True, None, None, None, None), - # If there is an inRange rule the min and max will be set. + # Node with "inRange 50 100" validation rule - minimum and maximum are extracted and set ("InRange", None, False, 50, 100, None, None), - # If there is a regex rule, then the pattern should be set + # Node with "regex search [a-f]" validation rule - pattern is extracted and set ("Regex", None, False, None, None, "[a-f]", None), - # If there is a date rule, then the format should be set to "date" + # Node with "date" validation rule - format is set to JSONSchemaFormat.DATE ("Date", None, False, None, None, None, JSONSchemaFormat.DATE), - # If there is a URL rule, then the format should be set to "uri" + # Node with "url" validation rule - format is set to JSONSchemaFormat.URI ("URL", None, False, None, None, None, JSONSchemaFormat.URI), ], ids=["None", "String", "List", "ListString", "InRange", "Regex", "Date", "URI"], @@ -262,47 +260,43 @@ def test_node_init( @pytest.mark.parametrize( - "validation_rules, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", + "validation_rules, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", [ - # If there are no type validation rules the type is None - ([], None, False, None, None, None, None), - # If there is one type validation rule the type, it remains None - (["str"], None, False, None, None, None, None), - # If there are any list type validation rules then is_array is set to True - (["list"], None, True, None, None, None, None), - # If there are any list type validation rules and one type validation rule - # then is_array is set to True, and the type still remains None - (["list", "str"], None, True, None, None, None, None), - # If there is an inRange rule the min and max will be set - (["inRange 50 100"], None, False, 50, 100, None, None), - # If there is a regex rule, then the pattern should be set, but type remains None + # If there are no validation rules, all fields should be None/False + ([], False, None, None, None, None), + # If there is a "str" validation rule, only format/constraints are set, no explicit type + (["str"], False, None, None, None, None), + # If there is a "list" validation rule, is_array is set to True + (["list"], True, None, None, None, None), + # If there are both "list" and "str" validation rules, is_array is True, other fields remain None + (["list", "str"], True, None, None, None, None), + # If there is an "inRange" rule, minimum and maximum are extracted and set + (["inRange 50 100"], False, 50, 100, None, None), + # If there is a "regex search" rule, the pattern is extracted and set ( ["regex search [a-f]"], - None, False, None, None, "[a-f]", None, ), - # If there is a date rule, then the format should be set to "date", but type remains None + # If there is a "date" rule, the format is set to JSONSchemaFormat.DATE ( ["date"], - None, False, None, None, None, JSONSchemaFormat.DATE, ), - # If there is a URL rule, then the format should be set to "uri", but type remains None - (["url"], None, False, None, None, None, JSONSchemaFormat.URI), + # If there is a "url" rule, the format is set to JSONSchemaFormat.URI + (["url"], False, None, None, None, JSONSchemaFormat.URI), ], ids=["No rules", "String", "List", "ListString", "InRange", "Regex", "Date", "URL"], ) def test_get_validation_rule_based_fields_no_explicit_type( validation_rules: list[str], - expected_type: Optional[JSONSchemaType], expected_is_array: bool, expected_min: Optional[float], expected_max: Optional[float], @@ -316,13 +310,11 @@ def test_get_validation_rule_based_fields_no_explicit_type( logger = Mock() ( is_array, - property_type, property_format, minimum, maximum, pattern, - ) = _get_validation_rule_based_fields(validation_rules, None, "name", logger) - assert property_type == expected_type + ) = _get_validation_rule_based_fields(validation_rules, logger) assert property_format == expected_format assert is_array == expected_is_array assert minimum == expected_min @@ -331,12 +323,10 @@ def test_get_validation_rule_based_fields_no_explicit_type( @pytest.mark.parametrize( - "validation_rules, explicit_type, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", + "validation_rules, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", [ ( [], - JSONSchemaType.STRING, - JSONSchemaType.STRING, False, None, None, @@ -345,8 +335,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ), ( ["str"], - JSONSchemaType.STRING, - JSONSchemaType.STRING, False, None, None, @@ -355,8 +343,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ), ( ["list"], - JSONSchemaType.STRING, - JSONSchemaType.STRING, True, None, None, @@ -365,8 +351,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ), ( ["inRange 50 100"], - JSONSchemaType.NUMBER, - JSONSchemaType.NUMBER, False, 50, 100, @@ -375,8 +359,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ), ( ["regex search [a-f]"], - JSONSchemaType.STRING, - JSONSchemaType.STRING, False, None, None, @@ -385,8 +367,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ), ( ["date"], - JSONSchemaType.STRING, - JSONSchemaType.STRING, False, None, None, @@ -395,8 +375,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ), ( ["url"], - JSONSchemaType.STRING, - JSONSchemaType.STRING, False, None, None, @@ -408,8 +386,6 @@ def test_get_validation_rule_based_fields_no_explicit_type( ) def test_get_validation_rule_based_fields_with_explicit_type( validation_rules: list[str], - explicit_type: JSONSchemaType, - expected_type: Optional[JSONSchemaType], expected_is_array: bool, expected_min: Optional[float], expected_max: Optional[float], @@ -423,15 +399,11 @@ def test_get_validation_rule_based_fields_with_explicit_type( logger = Mock() ( is_array, - property_type, property_format, minimum, maximum, pattern, - ) = _get_validation_rule_based_fields( - validation_rules, explicit_type, "name", logger - ) - assert property_type == expected_type + ) = _get_validation_rule_based_fields(validation_rules, logger) assert property_format == expected_format assert is_array == expected_is_array assert minimum == expected_min From e96e87fcbab2c9ab003e85bfe1106671673f85e0 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 12:49:59 -0500 Subject: [PATCH 15/26] remove js type in ValidationRule --- .../extensions/curator/schema_generation.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 06f44cdef..0090d8ac2 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -126,16 +126,11 @@ class ValidationRule: Attributes: name: The name of the validation rule - js_type: The JSON Schema type this rule indicates. - For example type rules map over to their equivalent JSON Schema type: str -> string - Other rules have an implicit type. For example the regex rule maps to the JSON - Schema pattern keyword. The pattern keyword requires the type to be string incompatible_rules: Other validation rules this rule can not be paired with parameters: Parameters for the validation rule that need to be collected for the JSON Schema """ name: ValidationRuleName - js_type: Optional[JSONSchemaType] incompatible_rules: list[ValidationRuleName] parameters: Optional[list[str]] = None @@ -143,12 +138,10 @@ class ValidationRule: _VALIDATION_RULES = { "list": ValidationRule( name=ValidationRuleName.LIST, - js_type=None, incompatible_rules=[], ), "date": ValidationRule( name=ValidationRuleName.DATE, - js_type=JSONSchemaType.STRING, incompatible_rules=[ ValidationRuleName.IN_RANGE, ValidationRuleName.URL, @@ -160,7 +153,6 @@ class ValidationRule: ), "url": ValidationRule( name=ValidationRuleName.URL, - js_type=JSONSchemaType.STRING, incompatible_rules=[ ValidationRuleName.IN_RANGE, ValidationRuleName.DATE, @@ -172,7 +164,6 @@ class ValidationRule: ), "regex": ValidationRule( name=ValidationRuleName.REGEX, - js_type=JSONSchemaType.STRING, incompatible_rules=[ ValidationRuleName.IN_RANGE, ValidationRuleName.INT, @@ -184,7 +175,6 @@ class ValidationRule: ), "inRange": ValidationRule( name=ValidationRuleName.IN_RANGE, - js_type=JSONSchemaType.NUMBER, incompatible_rules=[ ValidationRuleName.URL, ValidationRuleName.DATE, @@ -196,7 +186,6 @@ class ValidationRule: ), "str": ValidationRule( name=ValidationRuleName.STR, - js_type=JSONSchemaType.STRING, incompatible_rules=[ ValidationRuleName.IN_RANGE, ValidationRuleName.INT, @@ -207,7 +196,6 @@ class ValidationRule: ), "float": ValidationRule( name=ValidationRuleName.FLOAT, - js_type=JSONSchemaType.NUMBER, incompatible_rules=[ ValidationRuleName.URL, ValidationRuleName.DATE, @@ -220,7 +208,6 @@ class ValidationRule: ), "int": ValidationRule( name=ValidationRuleName.INT, - js_type=JSONSchemaType.INTEGER, incompatible_rules=[ ValidationRuleName.URL, ValidationRuleName.DATE, @@ -233,7 +220,6 @@ class ValidationRule: ), "num": ValidationRule( name=ValidationRuleName.NUM, - js_type=JSONSchemaType.NUMBER, incompatible_rules=[ ValidationRuleName.URL, ValidationRuleName.DATE, @@ -4354,7 +4340,6 @@ def _get_validation_rule_based_fields( logger: Logger, ) -> tuple[ bool, - Optional[JSONSchemaType], Optional[JSONSchemaFormat], Optional[float], Optional[float], From 699e54337b9e98e5afa0c8cbcb75afe1d118529c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 12:57:25 -0500 Subject: [PATCH 16/26] completely remove get_js_type_from_inputted_rules --- .../extensions/curator/schema_generation.py | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 0090d8ac2..0ec5a607c 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4153,35 +4153,6 @@ def get_rule_from_inputted_rules( return inputted_rules[0] -def get_js_type_from_inputted_rules( - inputted_rules: list[str], -) -> Optional[JSONSchemaType]: - """Gets the JSON Schema type from a list of rules - - Arguments: - inputted_rules: A list of inputted validation rules - - Raises: - ValueError: If there are multiple type rules in the list - - Returns: - The JSON Schema type if a type rule is found, otherwise None - """ - rule_names = get_names_from_inputted_rules(inputted_rules) - validation_rules = _get_rules_by_names(rule_names) - # A set of js_types of the validation rules - json_schema_types = { - rule.js_type for rule in validation_rules if rule.js_type is not None - } - if len(json_schema_types) > 1: - raise ValueError( - f"Validation rules contain more than one implied JSON Schema type: {inputted_rules}" - ) - if len(json_schema_types) == 0: - return None - return list(json_schema_types)[0] - - def get_in_range_parameters_from_inputted_rule( inputted_rule: str, ) -> tuple[Optional[float], Optional[float]]: From 09d3c7f9e759b3e149cb7a7690b8e2a7de0371d2 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 13:09:27 -0500 Subject: [PATCH 17/26] replace node2 with traversal node --- .../extensions/curator/schema_generation.py | 148 +++++++++--------- .../unit_test_create_json_schema.py | 30 ++-- 2 files changed, 93 insertions(+), 85 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 0ec5a607c..ba2e8fff5 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -5688,77 +5688,77 @@ class labels. Use cautiously as this can affect downstream compatibility. return jsonld_data_model -@dataclass -class Node2: # pylint: disable=too-many-instance-attributes - """ - A Dataclass representing data about a node in a data model in graph form - A DataModelGraphExplorer is used to infer most of the fields from the name of the node - - Attributes: - name: The name of the node - source_node: The name of the node where the graph traversal started - dmge: A DataModelGraphExplorer with the data model loaded - display_name: The display name of the node - valid_values: The valid values of the node if any - valid_value_display_names: The display names of the valid values of the node if any - is_required: Whether or not this node is required - dependencies: This nodes dependencies - description: This nodes description, gotten from the comment in the data model - is_array: Whether or not the property is an array (inferred from validation_rules) - type: The type of the property (inferred from validation_rules) - format: The format of the property (inferred from validation_rules) - minimum: The minimum value of the property (if numeric) (inferred from validation_rules) - maximum: The maximum value of the property (if numeric) (inferred from validation_rules) - pattern: The regex pattern of the property (inferred from validation_rules) - """ - - name: str - source_node: str - dmge: DataModelGraphExplorer - display_name: str = field(init=False) - valid_values: list[str] = field(init=False) - valid_value_display_names: list[str] = field(init=False) - is_required: bool = field(init=False) - dependencies: list[str] = field(init=False) - description: str = field(init=False) - is_array: bool = field(init=False) - type: Optional[JSONSchemaType] = field(init=False) - format: Optional[JSONSchemaFormat] = field(init=False) - minimum: Optional[float] = field(init=False) - maximum: Optional[float] = field(init=False) - pattern: Optional[str] = field(init=False) - - def __post_init__(self) -> None: - """ - Uses the dmge to fill in most of the fields of the dataclass - """ - self.display_name = self.dmge.get_nodes_display_names([self.name])[0] - self.valid_values = sorted(self.dmge.get_node_range(node_label=self.name)) - self.valid_value_display_names = sorted( - self.dmge.get_node_range(node_label=self.name, display_names=True) - ) - validation_rules = self.dmge.get_component_node_validation_rules( - manifest_component=self.source_node, node_display_name=self.display_name - ) - self.is_required = self.dmge.get_component_node_required( - manifest_component=self.source_node, - node_validation_rules=validation_rules, - node_display_name=self.display_name, - ) - self.dependencies = sorted( - self.dmge.get_node_dependencies( - self.name, display_names=False, schema_ordered=False - ) - ) - self.description = self.dmge.get_node_comment( - node_display_name=self.display_name - ) - self.type = self.dmge.get_node_column_type(node_display_name=self.display_name) - - ( - self.is_array, - self.format, - self.minimum, - self.maximum, - self.pattern, - ) = _get_validation_rule_based_fields(validation_rules, logger=self.dmge.logger) +# @dataclass +# class Node2: # pylint: disable=too-many-instance-attributes +# """ +# A Dataclass representing data about a node in a data model in graph form +# A DataModelGraphExplorer is used to infer most of the fields from the name of the node + +# Attributes: +# name: The name of the node +# source_node: The name of the node where the graph traversal started +# dmge: A DataModelGraphExplorer with the data model loaded +# display_name: The display name of the node +# valid_values: The valid values of the node if any +# valid_value_display_names: The display names of the valid values of the node if any +# is_required: Whether or not this node is required +# dependencies: This nodes dependencies +# description: This nodes description, gotten from the comment in the data model +# is_array: Whether or not the property is an array (inferred from validation_rules) +# type: The type of the property (inferred from validation_rules) +# format: The format of the property (inferred from validation_rules) +# minimum: The minimum value of the property (if numeric) (inferred from validation_rules) +# maximum: The maximum value of the property (if numeric) (inferred from validation_rules) +# pattern: The regex pattern of the property (inferred from validation_rules) +# """ + +# name: str +# source_node: str +# dmge: DataModelGraphExplorer +# display_name: str = field(init=False) +# valid_values: list[str] = field(init=False) +# valid_value_display_names: list[str] = field(init=False) +# is_required: bool = field(init=False) +# dependencies: list[str] = field(init=False) +# description: str = field(init=False) +# is_array: bool = field(init=False) +# type: Optional[JSONSchemaType] = field(init=False) +# format: Optional[JSONSchemaFormat] = field(init=False) +# minimum: Optional[float] = field(init=False) +# maximum: Optional[float] = field(init=False) +# pattern: Optional[str] = field(init=False) + +# def __post_init__(self) -> None: +# """ +# Uses the dmge to fill in most of the fields of the dataclass +# """ +# self.display_name = self.dmge.get_nodes_display_names([self.name])[0] +# self.valid_values = sorted(self.dmge.get_node_range(node_label=self.name)) +# self.valid_value_display_names = sorted( +# self.dmge.get_node_range(node_label=self.name, display_names=True) +# ) +# validation_rules = self.dmge.get_component_node_validation_rules( +# manifest_component=self.source_node, node_display_name=self.display_name +# ) +# self.is_required = self.dmge.get_component_node_required( +# manifest_component=self.source_node, +# node_validation_rules=validation_rules, +# node_display_name=self.display_name, +# ) +# self.dependencies = sorted( +# self.dmge.get_node_dependencies( +# self.name, display_names=False, schema_ordered=False +# ) +# ) +# self.description = self.dmge.get_node_comment( +# node_display_name=self.display_name +# ) +# self.type = self.dmge.get_node_column_type(node_display_name=self.display_name) + +# ( +# self.is_array, +# self.format, +# self.minimum, +# self.maximum, +# self.pattern, +# ) = _get_validation_rule_based_fields(validation_rules, logger=self.dmge.logger) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index c33fabd66..892e223f4 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -21,7 +21,7 @@ JSONSchema, JSONSchemaFormat, JSONSchemaType, - Node2, + TraversalNode, _create_array_property, _create_enum_array_property, _create_enum_property, @@ -96,7 +96,7 @@ def delete_folder(): @pytest.fixture(name="test_nodes") def fixture_test_nodes( dmge: DataModelGraphExplorer, -) -> dict[str, Node2]: +) -> dict[str, TraversalNode]: """Yields dict of Nodes""" nodes = [ "NoRules", @@ -116,7 +116,10 @@ def fixture_test_nodes( "ListString", "ListInRange", ] - nodes = {node: Node2(node, "JSONSchemaComponent", dmge) for node in nodes} + nodes = { + node: TraversalNode(node, "JSONSchemaComponent", dmge, logger=Mock()) + for node in nodes + } return nodes @@ -144,7 +147,10 @@ def fixture_test_nodes_column_types( "ListInRange", ] nodes = { - node: Node2(node, "JSONSchemaComponent", dmge_column_type) for node in nodes + node: TraversalNode( + node, "JSONSchemaComponent", dmge_column_type, logger=Mock() + ) + for node in nodes } return nodes @@ -247,7 +253,7 @@ def test_node_init( expected_max: Optional[float], expected_pattern: Optional[str], expected_format: Optional[JSONSchemaFormat], - test_nodes: dict[str, Node2], + test_nodes: dict[str, TraversalNode], ) -> None: """Tests for Node class""" node = test_nodes[node_name] @@ -1038,7 +1044,9 @@ def test_set_conditional_dependencies( ids=["Array, enum", "Array, enum, not required", "Enum", "Array", "String"], ) def test_set_property( - node_name: str, expected_schema: dict[str, Any], test_nodes: dict[str, Node2] + node_name: str, + expected_schema: dict[str, Any], + test_nodes: dict[str, TraversalNode], ) -> None: """Tests for set_property""" schema = JSONSchema() @@ -1087,7 +1095,7 @@ def test_create_enum_array_property( expected_schema: dict[str, Any], valid_values: list[Any], invalid_values: list[Any], - test_nodes: dict[str, Node2], + test_nodes: dict[str, TraversalNode], ) -> None: """Test for _create_enum_array_property""" schema = _create_enum_array_property(test_nodes[node_name]) @@ -1161,7 +1169,7 @@ def test_create_array_property( expected_schema: dict[str, Any], valid_values: list[Any], invalid_values: list[Any], - test_nodes_column_types: dict[str, Node2], + test_nodes_column_types: dict[str, TraversalNode], ) -> None: """Test for _create_array_property""" schema = _create_array_property(test_nodes_column_types[node_name]) @@ -1205,7 +1213,7 @@ def test_create_enum_property( expected_schema: dict[str, Any], valid_values: list[Any], invalid_values: list[Any], - test_nodes: dict[str, Node2], + test_nodes: dict[str, TraversalNode], ) -> None: """Test for _create_enum_property""" schema = _create_enum_property(test_nodes[node_name]) @@ -1275,7 +1283,7 @@ def test_create_simple_property( expected_schema: dict[str, Any], valid_values: list[Any], invalid_values: list[Any], - test_nodes_column_types: dict[str, Node2], + test_nodes_column_types: dict[str, TraversalNode], ) -> None: """Test for _create_simple_property""" schema = _create_simple_property(test_nodes_column_types[node_name]) @@ -1305,7 +1313,7 @@ def test_create_simple_property( def test_set_type_specific_keywords( node_name: str, expected_schema: dict[str, Any], - test_nodes: dict[str, Node2], + test_nodes: dict[str, TraversalNode], ) -> None: """Test for _set_type_specific_keywords""" schema = {} From 4cf904f4a7b77a68f4032bb9486b0f45a24c96ef Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 13:56:48 -0500 Subject: [PATCH 18/26] completely remove node2 --- .../extensions/curator/schema_generation.py | 76 ------------------- 1 file changed, 76 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index ba2e8fff5..18160e0e2 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -5686,79 +5686,3 @@ class labels. Use cautiously as this can affect downstream compatibility. ) ) return jsonld_data_model - - -# @dataclass -# class Node2: # pylint: disable=too-many-instance-attributes -# """ -# A Dataclass representing data about a node in a data model in graph form -# A DataModelGraphExplorer is used to infer most of the fields from the name of the node - -# Attributes: -# name: The name of the node -# source_node: The name of the node where the graph traversal started -# dmge: A DataModelGraphExplorer with the data model loaded -# display_name: The display name of the node -# valid_values: The valid values of the node if any -# valid_value_display_names: The display names of the valid values of the node if any -# is_required: Whether or not this node is required -# dependencies: This nodes dependencies -# description: This nodes description, gotten from the comment in the data model -# is_array: Whether or not the property is an array (inferred from validation_rules) -# type: The type of the property (inferred from validation_rules) -# format: The format of the property (inferred from validation_rules) -# minimum: The minimum value of the property (if numeric) (inferred from validation_rules) -# maximum: The maximum value of the property (if numeric) (inferred from validation_rules) -# pattern: The regex pattern of the property (inferred from validation_rules) -# """ - -# name: str -# source_node: str -# dmge: DataModelGraphExplorer -# display_name: str = field(init=False) -# valid_values: list[str] = field(init=False) -# valid_value_display_names: list[str] = field(init=False) -# is_required: bool = field(init=False) -# dependencies: list[str] = field(init=False) -# description: str = field(init=False) -# is_array: bool = field(init=False) -# type: Optional[JSONSchemaType] = field(init=False) -# format: Optional[JSONSchemaFormat] = field(init=False) -# minimum: Optional[float] = field(init=False) -# maximum: Optional[float] = field(init=False) -# pattern: Optional[str] = field(init=False) - -# def __post_init__(self) -> None: -# """ -# Uses the dmge to fill in most of the fields of the dataclass -# """ -# self.display_name = self.dmge.get_nodes_display_names([self.name])[0] -# self.valid_values = sorted(self.dmge.get_node_range(node_label=self.name)) -# self.valid_value_display_names = sorted( -# self.dmge.get_node_range(node_label=self.name, display_names=True) -# ) -# validation_rules = self.dmge.get_component_node_validation_rules( -# manifest_component=self.source_node, node_display_name=self.display_name -# ) -# self.is_required = self.dmge.get_component_node_required( -# manifest_component=self.source_node, -# node_validation_rules=validation_rules, -# node_display_name=self.display_name, -# ) -# self.dependencies = sorted( -# self.dmge.get_node_dependencies( -# self.name, display_names=False, schema_ordered=False -# ) -# ) -# self.description = self.dmge.get_node_comment( -# node_display_name=self.display_name -# ) -# self.type = self.dmge.get_node_column_type(node_display_name=self.display_name) - -# ( -# self.is_array, -# self.format, -# self.minimum, -# self.maximum, -# self.pattern, -# ) = _get_validation_rule_based_fields(validation_rules, logger=self.dmge.logger) From 626b06e709bda366295c52f3c9c17c8edcf31f1c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 14:20:19 -0500 Subject: [PATCH 19/26] remove type validation rule name --- .../extensions/curator/schema_generation.py | 65 ------------------- 1 file changed, 65 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 18160e0e2..9e1abb6c6 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -96,11 +96,6 @@ class ValidationRuleName(Enum): URL = "url" REGEX = "regex" IN_RANGE = "inRange" - STR = "str" - FLOAT = "float" - INT = "int" - BOOL = "bool" - NUM = "num" class JSONSchemaType(Enum): @@ -145,10 +140,6 @@ class ValidationRule: incompatible_rules=[ ValidationRuleName.IN_RANGE, ValidationRuleName.URL, - ValidationRuleName.INT, - ValidationRuleName.FLOAT, - ValidationRuleName.BOOL, - ValidationRuleName.NUM, ], ), "url": ValidationRule( @@ -156,20 +147,12 @@ class ValidationRule: incompatible_rules=[ ValidationRuleName.IN_RANGE, ValidationRuleName.DATE, - ValidationRuleName.INT, - ValidationRuleName.FLOAT, - ValidationRuleName.BOOL, - ValidationRuleName.NUM, ], ), "regex": ValidationRule( name=ValidationRuleName.REGEX, incompatible_rules=[ ValidationRuleName.IN_RANGE, - ValidationRuleName.INT, - ValidationRuleName.FLOAT, - ValidationRuleName.BOOL, - ValidationRuleName.NUM, ], parameters=["module", "pattern"], ), @@ -179,57 +162,9 @@ class ValidationRule: ValidationRuleName.URL, ValidationRuleName.DATE, ValidationRuleName.REGEX, - ValidationRuleName.STR, - ValidationRuleName.BOOL, ], parameters=["minimum", "maximum"], ), - "str": ValidationRule( - name=ValidationRuleName.STR, - incompatible_rules=[ - ValidationRuleName.IN_RANGE, - ValidationRuleName.INT, - ValidationRuleName.FLOAT, - ValidationRuleName.NUM, - ValidationRuleName.BOOL, - ], - ), - "float": ValidationRule( - name=ValidationRuleName.FLOAT, - incompatible_rules=[ - ValidationRuleName.URL, - ValidationRuleName.DATE, - ValidationRuleName.REGEX, - ValidationRuleName.STR, - ValidationRuleName.BOOL, - ValidationRuleName.INT, - ValidationRuleName.NUM, - ], - ), - "int": ValidationRule( - name=ValidationRuleName.INT, - incompatible_rules=[ - ValidationRuleName.URL, - ValidationRuleName.DATE, - ValidationRuleName.REGEX, - ValidationRuleName.STR, - ValidationRuleName.BOOL, - ValidationRuleName.NUM, - ValidationRuleName.FLOAT, - ], - ), - "num": ValidationRule( - name=ValidationRuleName.NUM, - incompatible_rules=[ - ValidationRuleName.URL, - ValidationRuleName.DATE, - ValidationRuleName.REGEX, - ValidationRuleName.STR, - ValidationRuleName.BOOL, - ValidationRuleName.INT, - ValidationRuleName.FLOAT, - ], - ), } From c8758f7525f26a94aa65856d1b3f48729dc45a88 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 14:40:16 -0500 Subject: [PATCH 20/26] abolish jsonschema type --- .../extensions/curator/schema_generation.py | 26 ++++++------------- .../unit_test_create_json_schema.py | 3 +-- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 9e1abb6c6..cef29bbcd 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -98,15 +98,6 @@ class ValidationRuleName(Enum): IN_RANGE = "inRange" -class JSONSchemaType(Enum): - """This enum is the currently supported JSON Schema types""" - - STRING = "string" - NUMBER = "number" - INTEGER = "integer" - BOOLEAN = "boolean" - - class RegexModule(Enum): """This enum are allowed modules for the regex validation rule""" @@ -1645,7 +1636,7 @@ def is_class_in_schema(self, node_label: str) -> bool: def get_node_column_type( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None - ) -> Optional[JSONSchemaType]: + ) -> str: """Gets the column type of the node Args: @@ -1658,9 +1649,8 @@ def get_node_column_type( node_label = self._get_node_label(node_label, node_display_name) rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") type_string = self.graph.nodes[node_label][rel_node_label] - if type_string is None: - return type_string - return JSONSchemaType(type_string) + return type_string + # return JSONSchemaType(type_string) def _get_node_label( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None @@ -2778,7 +2768,7 @@ def define_data_model_relationships(self) -> dict: "required_header": False, "edge_rel": False, "node_attr_dict": {"default": None}, - "allowed_values": [enum.value for enum in JSONSchemaType], + "allowed_values": ["string", "number", "integer", "boolean"], }, } @@ -4360,7 +4350,7 @@ class TraversalNode: # pylint: disable=too-many-instance-attributes dependencies: list[str] = field(init=False) description: str = field(init=False) is_array: bool = field(init=False) - type: Optional[JSONSchemaType] = field(init=False) + type: str = field(init=False) format: Optional[JSONSchemaFormat] = field(init=False) minimum: Optional[float] = field(init=False) maximum: Optional[float] = field(init=False) @@ -4846,7 +4836,7 @@ def _create_array_property(node: Node) -> Property: items: Items = {} if node.type: - items["type"] = node.type.value + items["type"] = node.type _set_type_specific_keywords(items, node) array_type_dict: TypeDict = {"type": "array", "title": "array"} @@ -4919,10 +4909,10 @@ def _create_simple_property(node: Node) -> Property: if node.type: if node.is_required: - prop["type"] = node.type.value + prop["type"] = node.type else: prop["oneOf"] = [ - {"type": node.type.value, "title": node.type.value}, + {"type": node.type, "title": node.type}, {"type": "null", "title": "null"}, ] elif node.is_required: diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 892e223f4..bd029be56 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -20,7 +20,6 @@ GraphTraversalState, JSONSchema, JSONSchemaFormat, - JSONSchemaType, TraversalNode, _create_array_property, _create_enum_array_property, @@ -247,7 +246,7 @@ def test_update_property(self) -> None: ) def test_node_init( node_name: str, - expected_type: Optional[JSONSchemaType], + expected_type: str, expected_is_array: bool, expected_min: Optional[float], expected_max: Optional[float], From b6a83401b079655502b9eedb752f5215c05c2ae0 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 14:41:27 -0500 Subject: [PATCH 21/26] remove comment --- synapseclient/extensions/curator/schema_generation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index cef29bbcd..734353eb7 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -1650,7 +1650,6 @@ def get_node_column_type( rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") type_string = self.graph.nodes[node_label][rel_node_label] return type_string - # return JSONSchemaType(type_string) def _get_node_label( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None From fe48b0827741cd2bd4740ffe94d1d096f4e3c0b5 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 15:07:25 -0500 Subject: [PATCH 22/26] revert changes to the json schema type --- .../extensions/curator/schema_generation.py | 25 +++++++++++++------ .../unit_test_create_json_schema.py | 3 ++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 734353eb7..9e1abb6c6 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -98,6 +98,15 @@ class ValidationRuleName(Enum): IN_RANGE = "inRange" +class JSONSchemaType(Enum): + """This enum is the currently supported JSON Schema types""" + + STRING = "string" + NUMBER = "number" + INTEGER = "integer" + BOOLEAN = "boolean" + + class RegexModule(Enum): """This enum are allowed modules for the regex validation rule""" @@ -1636,7 +1645,7 @@ def is_class_in_schema(self, node_label: str) -> bool: def get_node_column_type( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None - ) -> str: + ) -> Optional[JSONSchemaType]: """Gets the column type of the node Args: @@ -1649,7 +1658,9 @@ def get_node_column_type( node_label = self._get_node_label(node_label, node_display_name) rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") type_string = self.graph.nodes[node_label][rel_node_label] - return type_string + if type_string is None: + return type_string + return JSONSchemaType(type_string) def _get_node_label( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None @@ -2767,7 +2778,7 @@ def define_data_model_relationships(self) -> dict: "required_header": False, "edge_rel": False, "node_attr_dict": {"default": None}, - "allowed_values": ["string", "number", "integer", "boolean"], + "allowed_values": [enum.value for enum in JSONSchemaType], }, } @@ -4349,7 +4360,7 @@ class TraversalNode: # pylint: disable=too-many-instance-attributes dependencies: list[str] = field(init=False) description: str = field(init=False) is_array: bool = field(init=False) - type: str = field(init=False) + type: Optional[JSONSchemaType] = field(init=False) format: Optional[JSONSchemaFormat] = field(init=False) minimum: Optional[float] = field(init=False) maximum: Optional[float] = field(init=False) @@ -4835,7 +4846,7 @@ def _create_array_property(node: Node) -> Property: items: Items = {} if node.type: - items["type"] = node.type + items["type"] = node.type.value _set_type_specific_keywords(items, node) array_type_dict: TypeDict = {"type": "array", "title": "array"} @@ -4908,10 +4919,10 @@ def _create_simple_property(node: Node) -> Property: if node.type: if node.is_required: - prop["type"] = node.type + prop["type"] = node.type.value else: prop["oneOf"] = [ - {"type": node.type, "title": node.type}, + {"type": node.type.value, "title": node.type.value}, {"type": "null", "title": "null"}, ] elif node.is_required: diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index bd029be56..892e223f4 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -20,6 +20,7 @@ GraphTraversalState, JSONSchema, JSONSchemaFormat, + JSONSchemaType, TraversalNode, _create_array_property, _create_enum_array_property, @@ -246,7 +247,7 @@ def test_update_property(self) -> None: ) def test_node_init( node_name: str, - expected_type: str, + expected_type: Optional[JSONSchemaType], expected_is_array: bool, expected_min: Optional[float], expected_max: Optional[float], From 1872909c2493b8c6e61fb6159a35a3f3ca7b916e Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 15:30:58 -0500 Subject: [PATCH 23/26] updated dmge to use get_validation_rule_based_fields_no_explicit_type --- .../unit/synapseclient/extensions/conftest.py | 7 -- .../unit_test_create_json_schema.py | 75 ++++++++++++------- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/tests/unit/synapseclient/extensions/conftest.py b/tests/unit/synapseclient/extensions/conftest.py index 1921c7acf..8cd6074f1 100644 --- a/tests/unit/synapseclient/extensions/conftest.py +++ b/tests/unit/synapseclient/extensions/conftest.py @@ -58,13 +58,6 @@ def helpers(): @pytest.fixture(name="dmge", scope="function") def DMGE(helpers: Helpers) -> DataModelGraphExplorer: - """Fixture to instantiate a DataModelGraphExplorer object.""" - dmge = helpers.get_data_model_graph_explorer(path="example.model.jsonld") - return dmge - - -@pytest.fixture(name="dmge_column_type", scope="function") -def DMGE_column_type(helpers: Helpers) -> DataModelGraphExplorer: """Fixture to instantiate a DataModelGraphExplorer object using the data model with column types""" dmge = helpers.get_data_model_graph_explorer( path="example.model.column_type_component.csv" diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 892e223f4..11bc0a4ad 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -125,7 +125,7 @@ def fixture_test_nodes( @pytest.fixture(name="test_nodes_column_types") def fixture_test_nodes_column_types( - dmge_column_type: DataModelGraphExplorer, + dmge: DataModelGraphExplorer, ): """Yields dict of Nodes""" nodes = [ @@ -147,16 +147,14 @@ def fixture_test_nodes_column_types( "ListInRange", ] nodes = { - node: TraversalNode( - node, "JSONSchemaComponent", dmge_column_type, logger=Mock() - ) + node: TraversalNode(node, "JSONSchemaComponent", dmge, logger=Mock()) for node in nodes } return nodes class TestJSONSchema: - """Tests for JSONSchema""" + """Tests for JSONSchema class - validates JSON schema object creation and manipulation.""" def test_init(self) -> None: """Test the JSONSchema.init method""" @@ -210,7 +208,12 @@ def test_add_to_all_of_list(self) -> None: assert schema.all_of == [{"if": {}, "then": {}}, {"if2": {}, "then2": {}}] def test_update_property(self) -> None: - """Test the JSONSchema.update_property method""" + """ + Test JSONSchema.update_property method. + + Verifies that properties can be added and updated in the schema's + properties dictionary, maintaining existing properties while adding new ones. + """ # GIVEN a JSONSchema instance schema = JSONSchema() # WHEN updating the properties dict @@ -226,22 +229,22 @@ def test_update_property(self) -> None: @pytest.mark.parametrize( "node_name, expected_type, expected_is_array, expected_min, expected_max, expected_pattern, expected_format", [ - # Node with no validation rules - all constraint fields should be None/False + # Node with no columnType - all constraint fields should be None/False ("NoRules", None, False, None, None, None, None), - # Node with "str" validation rule - type remains None, constraints not set in Node initialization - ("String", None, False, None, None, None, None), - # Node with "list" validation rule - is_array is set to True, other fields remain None + # Node with columnType "string" - type is set to STRING via columnType + ("String", JSONSchemaType.STRING, False, None, None, None, None), + # Node with "list" validation rule - is_array is set to True, type remains None (no columnType) ("List", None, True, None, None, None, None), - # Node with both "list" and "str" validation rules - is_array is True, type remains None - ("ListString", None, True, None, None, None, None), - # Node with "inRange 50 100" validation rule - minimum and maximum are extracted and set - ("InRange", None, False, 50, 100, None, None), - # Node with "regex search [a-f]" validation rule - pattern is extracted and set - ("Regex", None, False, None, None, "[a-f]", None), - # Node with "date" validation rule - format is set to JSONSchemaFormat.DATE - ("Date", None, False, None, None, None, JSONSchemaFormat.DATE), - # Node with "url" validation rule - format is set to JSONSchemaFormat.URI - ("URL", None, False, None, None, None, JSONSchemaFormat.URI), + # Node with both "list" validation rules and columnType "string" - is_array is True, type is STRING + ("ListString", JSONSchemaType.STRING, True, None, None, None, None), + # Node with "inRange 50 100" validation rule and columnType "number" - min/max are set, type is NUMBER + ("InRange", JSONSchemaType.NUMBER, False, 50, 100, None, None), + # Node with "regex search [a-f]" validation rule and columnType "string" - pattern is set, type is STRING + ("Regex", JSONSchemaType.STRING, False, None, None, "[a-f]", None), + # Node with "date" validation rule and columnType "string" - format is set to DATE, type is STRING + ("Date", JSONSchemaType.STRING, False, None, None, None, JSONSchemaFormat.DATE), + # Node with "url" validation rule and columnType "string" - format is set to URI, type is STRING + ("URL", JSONSchemaType.STRING, False, None, None, None, JSONSchemaFormat.URI), ], ids=["None", "String", "List", "ListString", "InRange", "Regex", "Date", "URI"], ) @@ -255,7 +258,17 @@ def test_node_init( expected_format: Optional[JSONSchemaFormat], test_nodes: dict[str, TraversalNode], ) -> None: - """Tests for Node class""" + """ + Tests for TraversalNode class initialization. + + Verifies that TraversalNode objects are correctly initialized with: + - Types derived from columnType attribute in the data model + - Validation constraints extracted from validation rules (format, pattern, min/max, array flag) + - Proper combination of columnType and validation rule parsing + + The type property comes from the columnType field, while constraints + come from parsing validation rules like "str", "inRange", "regex", etc. + """ node = test_nodes[node_name] assert node.type == expected_type assert node.format == expected_format @@ -687,7 +700,7 @@ def test_create_json_schema_with_no_column_type( def test_create_json_schema_with_column_type( - dmge_column_type: DataModelGraphExplorer, test_directory: str + dmge: DataModelGraphExplorer, test_directory: str ) -> None: """ Tests for JSONSchemaGenerator.create_json_schema @@ -699,7 +712,7 @@ def test_create_json_schema_with_column_type( logger = logging.getLogger(__name__) create_json_schema( - dmge=dmge_column_type, + dmge=dmge, datatype=datatype, schema_name=f"{datatype}_validation", schema_path=test_path, @@ -807,8 +820,10 @@ def test_write_data_model_with_schema_path(test_directory: str) -> None: def test_write_data_model_with_name_and_jsonld_path(test_directory: str) -> None: """ - Test for _write_data_model with a name and the data model path used to create it. - The name of the file should be "..schema.json" + Test _write_data_model with a name and JSONLD path. + + When provided with a name and jsonld_path, the function should create + a schema file with the format: "..schema.json" """ json_ld_path = os.path.join(test_directory, "fake_model.jsonld") logger = Mock() @@ -826,8 +841,10 @@ def test_write_data_model_with_name_and_jsonld_path(test_directory: str) -> None def test_write_data_model_exception() -> None: """ - Test for _write_data_model where neither the path, the name, or JSONLD path are provided. - This should return a ValueError + Test _write_data_model error handling. + + When neither schema_path nor (name + jsonld_path) are provided, + the function should raise a ValueError. """ with pytest.raises(ValueError): _write_data_model(json_schema_dict={}, logger=Mock()) @@ -1032,8 +1049,8 @@ def test_set_conditional_dependencies( JSONSchema( properties={ "String": { - "not": {"type": "null"}, "description": "TBD", + "type": "string", "title": "String", } }, @@ -1130,7 +1147,7 @@ def test_create_enum_array_property( [None, [], [None], ["x"]], ["x"], ), - # # If item_type is given, it is set in the schema + # If item_type is given, it is set in the schema ( "ListString", { From 1d6abfc517300ebd8fc40b9b4d97266550669049 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 10 Nov 2025 15:59:52 -0500 Subject: [PATCH 24/26] delete temporary files generated by unit test --- .../example.Biospecimen.schema.json | 50 --- .../example.BulkRNA-seqAssay.schema.json | 168 ---------- .../example.JSONSchemaComponent.schema.json | 203 ------------ .../example.MockComponent.schema.json | 305 ------------------ .../example.MockFilename.schema.json | 27 -- .../schema_files/example.MockRDB.schema.json | 35 -- .../expected.JSONSchemaComponent.schema.json | 48 +-- .../unit_test_create_json_schema.py | 16 +- 8 files changed, 33 insertions(+), 819 deletions(-) delete mode 100644 tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json delete mode 100644 tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json delete mode 100644 tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json delete mode 100644 tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json delete mode 100644 tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json delete mode 100644 tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json diff --git a/tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json deleted file mode 100644 index 41097740c..000000000 --- a/tests/unit/synapseclient/extensions/schema_files/example.Biospecimen.schema.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "$id": "http://example.com/Biospecimen_validation", - "$schema": "http://json-schema.org/draft-07/schema#", - "description": "TBD", - "properties": { - "Component": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Component" - }, - "PatientID": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Patient ID" - }, - "SampleID": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Sample ID" - }, - "TissueStatus": { - "description": "TBD", - "oneOf": [ - { - "enum": [ - "Healthy", - "Malignant", - "None" - ], - "title": "enum" - } - ], - "title": "Tissue Status" - } - }, - "required": [ - "Component", - "PatientID", - "SampleID", - "TissueStatus" - ], - "title": "Biospecimen_validation", - "type": "object" -} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json deleted file mode 100644 index 872888213..000000000 --- a/tests/unit/synapseclient/extensions/schema_files/example.BulkRNA-seqAssay.schema.json +++ /dev/null @@ -1,168 +0,0 @@ -{ - "$id": "http://example.com/BulkRNA-seqAssay_validation", - "$schema": "http://json-schema.org/draft-07/schema#", - "allOf": [ - { - "if": { - "properties": { - "FileFormat": { - "enum": [ - "BAM" - ] - } - } - }, - "then": { - "properties": { - "GenomeBuild": { - "not": { - "type": "null" - } - } - }, - "required": [ - "GenomeBuild" - ] - } - }, - { - "if": { - "properties": { - "FileFormat": { - "enum": [ - "CRAM" - ] - } - } - }, - "then": { - "properties": { - "GenomeBuild": { - "not": { - "type": "null" - } - } - }, - "required": [ - "GenomeBuild" - ] - } - }, - { - "if": { - "properties": { - "FileFormat": { - "enum": [ - "CSV/TSV" - ] - } - } - }, - "then": { - "properties": { - "GenomeBuild": { - "not": { - "type": "null" - } - } - }, - "required": [ - "GenomeBuild" - ] - } - }, - { - "if": { - "properties": { - "FileFormat": { - "enum": [ - "CRAM" - ] - } - } - }, - "then": { - "properties": { - "GenomeFASTA": { - "not": { - "type": "null" - } - } - }, - "required": [ - "GenomeFASTA" - ] - } - } - ], - "description": "TBD", - "properties": { - "Component": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Component" - }, - "FileFormat": { - "description": "TBD", - "oneOf": [ - { - "enum": [ - "BAM", - "CRAM", - "CSV/TSV", - "FASTQ" - ], - "title": "enum" - } - ], - "title": "File Format" - }, - "Filename": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Filename" - }, - "GenomeBuild": { - "description": "TBD", - "oneOf": [ - { - "enum": [ - "GRCh37", - "GRCh38", - "GRCm38", - "GRCm39" - ], - "title": "enum" - }, - { - "title": "null", - "type": "null" - } - ], - "title": "Genome Build" - }, - "GenomeFASTA": { - "description": "TBD", - "title": "Genome FASTA" - }, - "SampleID": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Sample ID" - } - }, - "required": [ - "Component", - "FileFormat", - "Filename", - "SampleID" - ], - "title": "BulkRNA-seqAssay_validation", - "type": "object" -} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json deleted file mode 100644 index 444e1ab44..000000000 --- a/tests/unit/synapseclient/extensions/schema_files/example.JSONSchemaComponent.schema.json +++ /dev/null @@ -1,203 +0,0 @@ -{ - "$id": "http://example.com/JSONSchemaComponent_validation", - "$schema": "http://json-schema.org/draft-07/schema#", - "description": "Component to hold attributes for testing JSON Schemas", - "properties": { - "Component": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Component" - }, - "Date": { - "description": "TBD", - "format": "date", - "not": { - "type": "null" - }, - "title": "Date" - }, - "Enum": { - "description": "TBD", - "oneOf": [ - { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ], - "title": "enum" - } - ], - "title": "Enum" - }, - "EnumNotRequired": { - "description": "TBD", - "oneOf": [ - { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ], - "title": "enum" - }, - { - "title": "null", - "type": "null" - } - ], - "title": "Enum Not Required" - }, - "InRange": { - "description": "TBD", - "maximum": 100.0, - "minimum": 50.0, - "not": { - "type": "null" - }, - "title": "InRange" - }, - "List": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "List" - }, - "ListEnum": { - "description": "TBD", - "oneOf": [ - { - "items": { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ] - }, - "title": "array", - "type": "array" - } - ], - "title": "List Enum" - }, - "ListEnumNotRequired": { - "description": "TBD", - "oneOf": [ - { - "items": { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ] - }, - "title": "array", - "type": "array" - }, - { - "title": "null", - "type": "null" - } - ], - "title": "List Enum Not Required" - }, - "ListInRange": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "List InRange" - }, - "ListNotRequired": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - }, - { - "title": "null", - "type": "null" - } - ], - "title": "List Not Required" - }, - "ListString": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "List String" - }, - "NoRules": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "No Rules" - }, - "NoRulesNotRequired": { - "description": "TBD", - "title": "No Rules Not Required" - }, - "Regex": { - "description": "TBD", - "not": { - "type": "null" - }, - "pattern": "[a-f]", - "title": "Regex" - }, - "String": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "String" - }, - "StringNotRequired": { - "description": "TBD", - "title": "String Not Required" - }, - "URL": { - "description": "TBD", - "format": "uri", - "not": { - "type": "null" - }, - "title": "URL" - } - }, - "required": [ - "Component", - "Date", - "Enum", - "InRange", - "List", - "ListEnum", - "ListInRange", - "ListString", - "NoRules", - "Regex", - "String", - "URL" - ], - "title": "JSONSchemaComponent_validation", - "type": "object" -} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json deleted file mode 100644 index 3dd6b7b97..000000000 --- a/tests/unit/synapseclient/extensions/schema_files/example.MockComponent.schema.json +++ /dev/null @@ -1,305 +0,0 @@ -{ - "$id": "http://example.com/MockComponent_validation", - "$schema": "http://json-schema.org/draft-07/schema#", - "description": "Component to hold mock attributes for testing all validation rules", - "properties": { - "CheckAges": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Ages" - }, - "CheckDate": { - "description": "TBD", - "format": "date", - "not": { - "type": "null" - }, - "title": "Check Date" - }, - "CheckFloat": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Float" - }, - "CheckInt": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Int" - }, - "CheckList": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "Check List" - }, - "CheckListEnum": { - "description": "TBD", - "oneOf": [ - { - "items": { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ] - }, - "title": "array", - "type": "array" - } - ], - "title": "Check List Enum" - }, - "CheckListEnumStrict": { - "description": "TBD", - "oneOf": [ - { - "items": { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ] - }, - "title": "array", - "type": "array" - } - ], - "title": "Check List Enum Strict" - }, - "CheckListLike": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "Check List Like" - }, - "CheckListLikeEnum": { - "description": "TBD", - "oneOf": [ - { - "items": { - "enum": [ - "ab", - "cd", - "ef", - "gh" - ] - }, - "title": "array", - "type": "array" - } - ], - "title": "Check List Like Enum" - }, - "CheckListStrict": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "Check List Strict" - }, - "CheckMatchExactly": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Match Exactly" - }, - "CheckMatchExactlyvalues": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Match Exactly values" - }, - "CheckMatchNone": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Match None" - }, - "CheckMatchNonevalues": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Match None values" - }, - "CheckMatchatLeast": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Match at Least" - }, - "CheckMatchatLeastvalues": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Match at Least values" - }, - "CheckNA": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check NA" - }, - "CheckNum": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Num" - }, - "CheckRange": { - "description": "TBD", - "maximum": 100.0, - "minimum": 50.0, - "not": { - "type": "null" - }, - "title": "Check Range" - }, - "CheckRecommended": { - "description": "TBD", - "title": "Check Recommended" - }, - "CheckRegexFormat": { - "description": "TBD", - "not": { - "type": "null" - }, - "pattern": "^[a-f]", - "title": "Check Regex Format" - }, - "CheckRegexInteger": { - "description": "TBD", - "not": { - "type": "null" - }, - "pattern": "^\\d+$", - "title": "Check Regex Integer" - }, - "CheckRegexList": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "Check Regex List" - }, - "CheckRegexListLike": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "Check Regex List Like" - }, - "CheckRegexListStrict": { - "description": "TBD", - "oneOf": [ - { - "title": "array", - "type": "array" - } - ], - "title": "Check Regex List Strict" - }, - "CheckRegexSingle": { - "description": "TBD", - "not": { - "type": "null" - }, - "pattern": "[a-f]", - "title": "Check Regex Single" - }, - "CheckString": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check String" - }, - "CheckURL": { - "description": "TBD", - "format": "uri", - "not": { - "type": "null" - }, - "title": "Check URL" - }, - "CheckUnique": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Check Unique" - }, - "Component": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Component" - } - }, - "required": [ - "CheckAges", - "CheckDate", - "CheckFloat", - "CheckInt", - "CheckList", - "CheckListEnum", - "CheckListEnumStrict", - "CheckListLike", - "CheckListLikeEnum", - "CheckListStrict", - "CheckMatchExactly", - "CheckMatchExactlyvalues", - "CheckMatchNone", - "CheckMatchNonevalues", - "CheckMatchatLeast", - "CheckMatchatLeastvalues", - "CheckNA", - "CheckNum", - "CheckRange", - "CheckRegexFormat", - "CheckRegexInteger", - "CheckRegexList", - "CheckRegexListLike", - "CheckRegexListStrict", - "CheckRegexSingle", - "CheckString", - "CheckURL", - "CheckUnique", - "Component" - ], - "title": "MockComponent_validation", - "type": "object" -} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json deleted file mode 100644 index 0fe609256..000000000 --- a/tests/unit/synapseclient/extensions/schema_files/example.MockFilename.schema.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "$id": "http://example.com/MockFilename_validation", - "$schema": "http://json-schema.org/draft-07/schema#", - "description": "TBD", - "properties": { - "Component": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Component" - }, - "Filename": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Filename" - } - }, - "required": [ - "Component", - "Filename" - ], - "title": "MockFilename_validation", - "type": "object" -} diff --git a/tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json b/tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json deleted file mode 100644 index 003865f8e..000000000 --- a/tests/unit/synapseclient/extensions/schema_files/example.MockRDB.schema.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "$id": "http://example.com/MockRDB_validation", - "$schema": "http://json-schema.org/draft-07/schema#", - "description": "TBD", - "properties": { - "Component": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "Component" - }, - "MockRDBId": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "MockRDB_id" - }, - "SourceManifest": { - "description": "TBD", - "not": { - "type": "null" - }, - "title": "SourceManifest" - } - }, - "required": [ - "Component", - "MockRDBId", - "SourceManifest" - ], - "title": "MockRDB_validation", - "type": "object" -} diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json index 444e1ab44..b378f46d2 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json @@ -13,10 +13,8 @@ "Date": { "description": "TBD", "format": "date", - "not": { - "type": "null" - }, - "title": "Date" + "title": "Date", + "type": "string" }, "Enum": { "description": "TBD", @@ -56,10 +54,8 @@ "description": "TBD", "maximum": 100.0, "minimum": 50.0, - "not": { - "type": "null" - }, - "title": "InRange" + "title": "InRange", + "type": "number" }, "List": { "description": "TBD", @@ -115,6 +111,11 @@ "description": "TBD", "oneOf": [ { + "items": { + "maximum": 100.0, + "minimum": 50.0, + "type": "number" + }, "title": "array", "type": "array" } @@ -139,6 +140,9 @@ "description": "TBD", "oneOf": [ { + "items": { + "type": "string" + }, "title": "array", "type": "array" } @@ -158,30 +162,34 @@ }, "Regex": { "description": "TBD", - "not": { - "type": "null" - }, "pattern": "[a-f]", - "title": "Regex" + "title": "Regex", + "type": "string" }, "String": { "description": "TBD", - "not": { - "type": "null" - }, - "title": "String" + "title": "String", + "type": "string" }, "StringNotRequired": { "description": "TBD", + "oneOf": [ + { + "title": "string", + "type": "string" + }, + { + "title": "null", + "type": "null" + } + ], "title": "String Not Required" }, "URL": { "description": "TBD", "format": "uri", - "not": { - "type": "null" - }, - "title": "URL" + "title": "URL", + "type": "string" } }, "required": [ diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 11bc0a4ad..509731771 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -80,17 +80,11 @@ def get_test_schema_path( return os.path.join(test_directory, filename) -@pytest.fixture(name="test_directory", scope="session") -def fixture_test_directory(request) -> str: - """Returns a directory for creating test jSON Schemas in""" - test_folder = f"tests/data/create_json_schema_{str(uuid.uuid4())}" - - def delete_folder(): - rmtree(test_folder) - - request.addfinalizer(delete_folder) - os.makedirs(test_folder, exist_ok=True) - return test_folder +@pytest.fixture(name="test_directory", scope="function") +def fixture_test_directory(tmp_path) -> str: + """Returns a directory for creating test JSON Schemas in""" + # pytest automatically handles cleanup + return str(tmp_path) @pytest.fixture(name="test_nodes") From abb7540d9ef1b1fddf6f17b26345f8261bfea274 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Tue, 11 Nov 2025 12:40:35 -0500 Subject: [PATCH 25/26] remove unused import --- .../synapseclient/extensions/unit_test_create_json_schema.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 509731771..3938274b2 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -6,8 +6,6 @@ import json import logging import os -import uuid -from shutil import rmtree from typing import Any, Optional from unittest.mock import Mock From 3ef114f62f71582b793edc947294384397ecff7c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Tue, 11 Nov 2025 12:42:40 -0500 Subject: [PATCH 26/26] simply just use the key to v29 like other PRs --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 97ddc2beb..32f54ae3e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,7 +84,7 @@ jobs: path: | ${{ steps.get-dependencies.outputs.site_packages_loc }} ${{ steps.get-dependencies.outputs.site_bin_dir }} - key: ${{ runner.os }}-${{ matrix.python }}-build-${{ env.cache-name }}-${{ hashFiles('setup.py', 'setup.cfg') }}-v28 + key: ${{ runner.os }}-${{ matrix.python }}-build-${{ env.cache-name }}-${{ hashFiles('setup.py') }}-v29 - name: Install py-dependencies if: steps.cache-dependencies.outputs.cache-hit != 'true'