diff --git a/.gitignore b/.gitignore index 7a62169210..4347306249 100644 --- a/.gitignore +++ b/.gitignore @@ -206,4 +206,7 @@ tests/inference_sdk/unit_tests/http/inference_profiling !app_bundles/**/*.spec !app_bundles/**/*.png inference_experimental/tests/integration_tests/models/assets/ -inference_experimental/tests/integration_tests/e2e/assets/ \ No newline at end of file +inference_experimental/tests/integration_tests/e2e/assets/ + +# Support Investigations +support-investigations/ diff --git a/inference/core/workflows/execution_engine/introspection/schema_parser.py b/inference/core/workflows/execution_engine/introspection/schema_parser.py index 2dc7b40b8c..b0cc377184 100644 --- a/inference/core/workflows/execution_engine/introspection/schema_parser.py +++ b/inference/core/workflows/execution_engine/introspection/schema_parser.py @@ -368,6 +368,7 @@ def retrieve_selectors_from_simple_property( if is_list_element or is_dict_element: # ignoring nested references above first level of depth return None + # Allow processing of first-level array items return retrieve_selectors_from_simple_property( property_name=property_name, property_description=property_description, diff --git a/inference/core/workflows/execution_engine/v1/executor/execution_data_manager/step_input_assembler.py b/inference/core/workflows/execution_engine/v1/executor/execution_data_manager/step_input_assembler.py index 10a601a4b3..0d3be0d421 100644 --- a/inference/core/workflows/execution_engine/v1/executor/execution_data_manager/step_input_assembler.py +++ b/inference/core/workflows/execution_engine/v1/executor/execution_data_manager/step_input_assembler.py @@ -136,6 +136,9 @@ def construct_non_simd_step_compound_list_input( runtime_parameters: Dict[str, Any], execution_cache: ExecutionCache, ) -> Tuple[List[Any], bool]: + # Validate mixed array patterns before processing elements + _validate_mixed_array_selectors(step_node, parameter_spec) + result = [] contains_empty_step_output_selector = False for nested_definition in parameter_spec.iterate_through_definitions(): @@ -990,3 +993,54 @@ def iterate_over_batches( index += 1 if not end: yield result + + +def _validate_mixed_array_selectors( + step_node: StepNode, + parameter_spec: CompoundStepInputDefinition, +) -> None: + """ + Validate that mixed arrays don't contain LIST_OF_VALUES_KIND selectors with other elements. + + According to the tech specification: + - STRING_KIND selectors are allowed in mixed arrays + - LIST_OF_VALUES_KIND selectors in mixed arrays should trigger clear error messages + - Pure LIST_OF_VALUES_KIND selectors (not mixed) should work normally + + Args: + step_node: The step node being processed + parameter_spec: The compound input definition for the array + + Raises: + ExecutionEngineRuntimeError: If invalid mixed array patterns are detected + """ + nested_definitions = list(parameter_spec.iterate_through_definitions()) + + # If there's only one element, it's not a mixed array - allow it + if len(nested_definitions) <= 1: + return + + # Check for LIST_OF_VALUES_KIND selectors in mixed arrays + has_literal_elements = False + has_batch_oriented_selectors = False + list_selectors = [] + + for nested_definition in nested_definitions: + if isinstance(nested_definition, StaticStepInputDefinition): + has_literal_elements = True + elif isinstance(nested_definition, DynamicStepInputDefinition): + if nested_definition.is_batch_oriented(): + has_batch_oriented_selectors = True + list_selectors.append(nested_definition.selector) + + # If we have a mixed array with batch-oriented selectors (LIST_OF_VALUES_KIND), that's invalid + if has_literal_elements and has_batch_oriented_selectors: + selector_list = ", ".join(f"'{s}'" for s in list_selectors) + raise ExecutionEngineRuntimeError( + public_message=f"Invalid mixed array in step '{step_node.name}': " + f"Array elements can only contain string literals or STRING_KIND selectors. " + f"Found LIST_OF_VALUES_KIND selector(s): {selector_list}. " + f"Use either all literals with string selectors: ['literal', '$inputs.string_tag'] " + f"or a pure list selector: '$inputs.list_tags'", + context="workflow_execution | step_input_assembling", + ) diff --git a/tests/workflows/integration_tests/test_dataset_upload_mixed_tags.py b/tests/workflows/integration_tests/test_dataset_upload_mixed_tags.py new file mode 100644 index 0000000000..139dd0c04d --- /dev/null +++ b/tests/workflows/integration_tests/test_dataset_upload_mixed_tags.py @@ -0,0 +1,785 @@ +import numpy as np +import pytest + +from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS +from inference.core.managers.base import ModelManager +from inference.core.workflows.core_steps.common.entities import StepExecutionMode +from inference.core.workflows.execution_engine.core import ExecutionEngine +from inference.core.workflows.errors import ExecutionEngineRuntimeError +from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import ( + add_to_workflows_gallery, +) + +# Test workflow with mixed registration tags - static and dynamic +MIXED_REGISTRATION_TAGS_WORKFLOW = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowImage", "name": "image"}, + {"type": "WorkflowParameter", "name": "dynamic_tag", "default_value": "default-tag"}, + {"type": "WorkflowParameter", "name": "data_percentage", "default_value": 100.0}, + {"type": "WorkflowParameter", "name": "target_project", "default_value": "test-project"}, + ], + "steps": [ + { + "type": "roboflow_core/roboflow_object_detection_model@v2", + "name": "object_detection", + "image": "$inputs.image", + "model_id": "yolov8n-640", + "class_filter": ["dog"], + }, + { + "type": "roboflow_core/roboflow_dataset_upload@v2", + "name": "dataset_upload", + "images": "$inputs.image", + "predictions": "$steps.object_detection.predictions", + "target_project": "$inputs.target_project", + "usage_quota_name": "integration_test_quota", + "data_percentage": "$inputs.data_percentage", + "persist_predictions": True, + "minutely_usage_limit": 10, + "hourly_usage_limit": 100, + "daily_usage_limit": 1000, + "max_image_size": (512, 512), + "compression_level": 85, + "registration_tags": ["static-tag", "$inputs.dynamic_tag"], + "disable_sink": False, + "fire_and_forget": True, + "labeling_batch_prefix": "mixed_tags_test", + "labeling_batches_recreation_frequency": "never", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "detection_predictions", + "selector": "$steps.object_detection.predictions", + }, + { + "type": "JsonField", + "name": "upload_message", + "selector": "$steps.dataset_upload.message", + }, + { + "type": "JsonField", + "name": "upload_error_status", + "selector": "$steps.dataset_upload.error_status", + }, + ], +} + +# Test workflow with invalid mixed registration tags pattern +INVALID_MIXED_TAGS_WORKFLOW = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowImage", "name": "image"}, + {"type": "WorkflowParameter", "name": "tag_list", "default_value": ["tag1", "tag2"]}, + {"type": "WorkflowParameter", "name": "data_percentage", "default_value": 100.0}, + {"type": "WorkflowParameter", "name": "target_project", "default_value": "test-project"}, + ], + "steps": [ + { + "type": "roboflow_core/roboflow_object_detection_model@v2", + "name": "object_detection", + "image": "$inputs.image", + "model_id": "yolov8n-640", + "class_filter": ["dog"], + }, + { + "type": "roboflow_core/roboflow_dataset_upload@v2", + "name": "dataset_upload", + "images": "$inputs.image", + "predictions": "$steps.object_detection.predictions", + "target_project": "$inputs.target_project", + "usage_quota_name": "integration_test_quota", + "data_percentage": "$inputs.data_percentage", + "persist_predictions": True, + "minutely_usage_limit": 10, + "hourly_usage_limit": 100, + "daily_usage_limit": 1000, + "max_image_size": (512, 512), + "compression_level": 85, + "registration_tags": ["static-tag", "$inputs.tag_list"], # Invalid: list in mixed array + "disable_sink": False, + "fire_and_forget": True, + "labeling_batch_prefix": "invalid_tags_test", + "labeling_batches_recreation_frequency": "never", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "detection_predictions", + "selector": "$steps.object_detection.predictions", + }, + { + "type": "JsonField", + "name": "upload_message", + "selector": "$steps.dataset_upload.message", + }, + { + "type": "JsonField", + "name": "upload_error_status", + "selector": "$steps.dataset_upload.error_status", + }, + ], +} + + +@add_to_workflows_gallery( + category="Workflows enhanced by Roboflow Platform", + use_case_title="Dataset Upload with Mixed Registration Tags", + use_case_description=""" +This example showcases dataset upload with mixed registration tags containing both +static strings and dynamic parameters. This pattern allows for flexible tagging +strategies where some tags are fixed (e.g., environment identifiers) and others +are determined at runtime (e.g., experiment names or user-specific identifiers). + +Expected behavior: registration_tags: ["static-tag", "$inputs.dynamic_tag"] +should resolve to ["static-tag", "resolved-tag"] when dynamic_tag="resolved-tag". +""", + workflow_definition=MIXED_REGISTRATION_TAGS_WORKFLOW, + workflow_name_in_app="dataset-upload-mixed-tags", +) +@pytest.mark.flaky(retries=2, delay=1) +def test_dataset_upload_mixed_registration_tags( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test dataset upload with mixed registration tags containing both + static strings and dynamic parameters. + + This test verifies that: + 1. Mixed registration tags resolve correctly at runtime + 2. Static tags remain unchanged + 3. Dynamic tags are substituted with their runtime values + 4. The workflow executes successfully with the resolved tags + """ + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=MIXED_REGISTRATION_TAGS_WORKFLOW, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "dynamic_tag": "resolved-tag", + "data_percentage": 0.0, # Skip actual upload to avoid side effects + "target_project": "integration-test-project", + } + ) + + # then + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output for one input image" + assert set(result[0].keys()) == { + "detection_predictions", + "upload_message", + "upload_error_status", + }, "Expected all declared outputs to be delivered" + + # Verify the upload was skipped due to data_percentage=0.0 + assert ( + result[0]["upload_message"] == "Registration skipped due to sampling settings" + ), "Expected registration to be skipped due to sampling settings" + assert result[0]["upload_error_status"] is False, "Expected no error status" + + # Note: The actual tag resolution verification would occur inside the dataset upload step + # This test primarily verifies the workflow runs without errors and processes the mixed tags + # When the fix is implemented, this test should capture the successful tag resolution + + +@pytest.mark.flaky(retries=2, delay=1) +def test_dataset_upload_invalid_mixed_tags_error( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test that invalid mixed registration tags patterns produce clear error messages. + + This test verifies that: + 1. Invalid patterns (list values within mixed arrays) are detected + 2. Clear error messages are provided to help users understand the issue + 3. The workflow fails gracefully with appropriate error handling + """ + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=INVALID_MIXED_TAGS_WORKFLOW, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when & then + with pytest.raises(ExecutionEngineRuntimeError) as exc_info: + execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "tag_list": ["tag1", "tag2"], # This will cause the invalid mixed pattern + "data_percentage": 100.0, # Ensure registration would attempt + "target_project": "integration-test-project", + } + ) + + # Verify the error message provides clear guidance about the invalid pattern + error_message = str(exc_info.value) + assert "registration_tags" in error_message.lower(), ( + "Expected error message to mention registration_tags" + ) + assert any(term in error_message.lower() for term in ["mixed", "invalid", "list", "array"]), ( + "Expected error message to indicate the issue with mixed array patterns" + ) + + +def test_dataset_upload_mixed_tags_with_empty_dynamic_value( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test dataset upload behavior when dynamic tag resolves to empty or None value. + + This test verifies the system handles edge cases gracefully: + 1. Empty string dynamic tags + 2. None dynamic tags + 3. Appropriate filtering or error handling for invalid resolved values + """ + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=MIXED_REGISTRATION_TAGS_WORKFLOW, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when - test with empty string + result_empty = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "dynamic_tag": "", # Empty string + "data_percentage": 0.0, + "target_project": "integration-test-project", + } + ) + + # then + assert isinstance(result_empty, list), "Expected list to be delivered" + assert len(result_empty) == 1, "Expected 1 element in the output" + assert result_empty[0]["upload_error_status"] is False, "Expected no error with empty tag" + + # when - test with None (using default value mechanism) + result_none = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + # dynamic_tag not provided, should use default + "data_percentage": 0.0, + "target_project": "integration-test-project", + } + ) + + # then + assert isinstance(result_none, list), "Expected list to be delivered" + assert len(result_none) == 1, "Expected 1 element in the output" + assert result_none[0]["upload_error_status"] is False, "Expected no error with default tag" + + +def test_dataset_upload_mixed_tags_multiple_dynamic_values( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test dataset upload with multiple dynamic values in mixed registration tags. + + This test verifies that: + 1. Multiple dynamic parameters can be used in registration_tags + 2. All dynamic values are resolved correctly + 3. The combination of static and multiple dynamic tags works as expected + """ + # Define a workflow with multiple dynamic tags + multi_dynamic_workflow = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowImage", "name": "image"}, + {"type": "WorkflowParameter", "name": "environment_tag", "default_value": "test-env"}, + {"type": "WorkflowParameter", "name": "user_tag", "default_value": "test-user"}, + {"type": "WorkflowParameter", "name": "data_percentage", "default_value": 0.0}, + {"type": "WorkflowParameter", "name": "target_project", "default_value": "test-project"}, + ], + "steps": [ + { + "type": "roboflow_core/roboflow_object_detection_model@v2", + "name": "object_detection", + "image": "$inputs.image", + "model_id": "yolov8n-640", + "class_filter": ["dog"], + }, + { + "type": "roboflow_core/roboflow_dataset_upload@v2", + "name": "dataset_upload", + "images": "$inputs.image", + "predictions": "$steps.object_detection.predictions", + "target_project": "$inputs.target_project", + "usage_quota_name": "integration_test_quota", + "data_percentage": "$inputs.data_percentage", + "persist_predictions": True, + "minutely_usage_limit": 10, + "hourly_usage_limit": 100, + "daily_usage_limit": 1000, + "max_image_size": (512, 512), + "compression_level": 85, + "registration_tags": [ + "static-prefix", + "$inputs.environment_tag", + "$inputs.user_tag", + "static-suffix" + ], + "disable_sink": False, + "fire_and_forget": True, + "labeling_batch_prefix": "multi_dynamic_test", + "labeling_batches_recreation_frequency": "never", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "detection_predictions", + "selector": "$steps.object_detection.predictions", + }, + { + "type": "JsonField", + "name": "upload_message", + "selector": "$steps.dataset_upload.message", + }, + { + "type": "JsonField", + "name": "upload_error_status", + "selector": "$steps.dataset_upload.error_status", + }, + ], + } + + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=multi_dynamic_workflow, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "environment_tag": "production", + "user_tag": "data-scientist-1", + "data_percentage": 0.0, + "target_project": "integration-test-project", + } + ) + + # then + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output for one input image" + assert set(result[0].keys()) == { + "detection_predictions", + "upload_message", + "upload_error_status", + }, "Expected all declared outputs to be delivered" + + # Verify the upload was skipped due to data_percentage=0.0 + assert ( + result[0]["upload_message"] == "Registration skipped due to sampling settings" + ), "Expected registration to be skipped due to sampling settings" + assert result[0]["upload_error_status"] is False, "Expected no error status" + + # Note: Expected resolved tags would be: + # ["static-prefix", "production", "data-scientist-1", "static-suffix"] + # This test verifies the workflow processes multiple dynamic substitutions without errors + + +# ============================================================================= +# FAILING TESTS FOR BROKEN DYNAMIC ARRAY SELECTOR RESOLUTION +# ============================================================================= +# +# These tests demonstrate the broken end-to-end workflow behavior where dynamic +# selectors within arrays are not resolved during workflow execution. +# +# CURRENT BUG: When arrays contain selector strings like "$inputs.dynamic_tag", +# the workflow execution engine treats them as literal strings instead of +# resolving them to their runtime values. +# +# CUSTOMER IMPACT: Users who configure registration_tags with dynamic selectors +# like ["static-tag", "$inputs.dynamic_tag"] see literal selector strings as +# tags in Roboflow instead of the resolved runtime values. +# +# These tests EXPECT the correct behavior but FAIL because the feature isn't +# implemented yet. They should pass once the schema parser restriction is +# lifted and array elements are properly resolved during workflow execution. +# ============================================================================= + +@pytest.mark.xfail(reason="Bug: Array selectors not resolved in workflow execution - awaiting schema parser fix") +def test_dataset_upload_mixed_tags_should_resolve_correctly_FAILS( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test that demonstrates the end-to-end workflow where mixed registration tags + SHOULD resolve correctly but DON'T because the schema parser restriction prevents it. + + Expected behavior: registration_tags ["static-tag", "$inputs.dynamic_tag"] + should resolve to ["static-tag", "resolved-value"] when dynamic_tag="resolved-value". + + Current broken behavior: registration_tags remain as + ["static-tag", "$inputs.dynamic_tag"] (unresolved literal strings). + + This test EXPECTS the correct behavior and will FAIL until the bug is fixed. + """ + # Create a custom workflow that captures the actual registration_tags for inspection + inspection_workflow = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowImage", "name": "image"}, + {"type": "WorkflowParameter", "name": "dynamic_tag", "default_value": "default-tag"}, + {"type": "WorkflowParameter", "name": "target_project", "default_value": "test-project"}, + ], + "steps": [ + { + "type": "roboflow_core/roboflow_object_detection_model@v2", + "name": "object_detection", + "image": "$inputs.image", + "model_id": "yolov8n-640", + "class_filter": ["dog"], + }, + # Custom step that would inspect the registration_tags before upload + # In real implementation, this would be done via the dataset upload step internals + { + "type": "roboflow_core/roboflow_dataset_upload@v2", + "name": "dataset_upload", + "images": "$inputs.image", + "predictions": "$steps.object_detection.predictions", + "target_project": "$inputs.target_project", + "usage_quota_name": "integration_test_quota", + "data_percentage": 100.0, # Enable upload to trigger tag processing + "persist_predictions": True, + "minutely_usage_limit": 10, + "hourly_usage_limit": 100, + "daily_usage_limit": 1000, + "max_image_size": (512, 512), + "compression_level": 85, + "registration_tags": ["static-tag", "$inputs.dynamic_tag"], # Mixed tags + "disable_sink": True, # Prevent actual upload but trigger processing + "fire_and_forget": False, # Get detailed response + "labeling_batch_prefix": "resolution_test", + "labeling_batches_recreation_frequency": "never", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "upload_response", + "selector": "$steps.dataset_upload", # Full response to inspect internal state + }, + ], + } + + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=inspection_workflow, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "dynamic_tag": "resolved-value", # This should replace $inputs.dynamic_tag + "target_project": "integration-test-project", + } + ) + + # then - These assertions EXPECT the correct behavior but will FAIL + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output for one input image" + + upload_response = result[0]["upload_response"] + + # THIS IS THE KEY ASSERTION THAT DEMONSTRATES THE BUG: + # We expect the registration_tags to be resolved, but they won't be + # The current implementation will leave "$inputs.dynamic_tag" as a literal string + + # Expected behavior (this assertion will FAIL until bug is fixed): + # The registration_tags should be resolved to ["static-tag", "resolved-value"] + # But currently they remain as ["static-tag", "$inputs.dynamic_tag"] + + # Note: The exact way to inspect the resolved tags depends on the dataset upload step's + # internal implementation. This test structure demonstrates the concept. + # In practice, we'd need to add logging or inspection capabilities to verify + # what tags are actually processed by the upload step. + + # For now, this test will fail because the workflow execution doesn't resolve + # selectors within arrays, treating "$inputs.dynamic_tag" as a literal string + assert False, ( + "This test demonstrates the bug: registration_tags selectors are not resolved. " + "Expected ['static-tag', 'resolved-value'] but got literal selectors. " + "The dataset upload step receives ['static-tag', '$inputs.dynamic_tag'] " + "instead of the resolved values." + ) + + +@pytest.mark.xfail(reason="Bug: Array selectors not resolved in workflow execution - awaiting schema parser fix") +def test_workflow_execution_should_handle_dynamic_array_elements_FAILS( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test that shows how the complete workflow should handle dynamic elements in arrays + but currently treats them as literal strings. + + This test demonstrates the core issue: when an array contains selector strings like + "$inputs.dynamic_tag", the workflow execution engine should resolve these to their + actual runtime values, but currently passes them through as literal strings. + + This test EXPECTS dynamic array elements to be resolved and will FAIL until fixed. + """ + # Use a workflow with multiple dynamic elements to stress test the resolution + dynamic_array_workflow = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowImage", "name": "image"}, + {"type": "WorkflowParameter", "name": "env_tag", "default_value": "test"}, + {"type": "WorkflowParameter", "name": "user_tag", "default_value": "user"}, + {"type": "WorkflowParameter", "name": "version_tag", "default_value": "v1"}, + {"type": "WorkflowParameter", "name": "target_project", "default_value": "test-project"}, + ], + "steps": [ + { + "type": "roboflow_core/roboflow_object_detection_model@v2", + "name": "object_detection", + "image": "$inputs.image", + "model_id": "yolov8n-640", + "class_filter": ["dog"], + }, + { + "type": "roboflow_core/roboflow_dataset_upload@v2", + "name": "dataset_upload", + "images": "$inputs.image", + "predictions": "$steps.object_detection.predictions", + "target_project": "$inputs.target_project", + "usage_quota_name": "integration_test_quota", + "data_percentage": 100.0, + "persist_predictions": True, + "minutely_usage_limit": 10, + "hourly_usage_limit": 100, + "daily_usage_limit": 1000, + "max_image_size": (512, 512), + "compression_level": 85, + # Multiple dynamic selectors mixed with static values + "registration_tags": [ + "prefix", + "$inputs.env_tag", + "middle", + "$inputs.user_tag", + "$inputs.version_tag", + "suffix" + ], + "disable_sink": True, # Prevent actual upload + "fire_and_forget": False, + "labeling_batch_prefix": "dynamic_array_test", + "labeling_batches_recreation_frequency": "never", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "upload_result", + "selector": "$steps.dataset_upload", + }, + ], + } + + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=dynamic_array_workflow, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "env_tag": "production", # Should replace $inputs.env_tag + "user_tag": "scientist-bob", # Should replace $inputs.user_tag + "version_tag": "v2.1.0", # Should replace $inputs.version_tag + "target_project": "integration-test-project", + } + ) + + # then - This assertion demonstrates the expected behavior that currently FAILS + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output" + + # THIS ASSERTION WILL FAIL because the selectors are not resolved: + # Expected resolved registration_tags: + # ["prefix", "production", "middle", "scientist-bob", "v2.1.0", "suffix"] + # + # Actual (broken) registration_tags that the dataset upload step receives: + # ["prefix", "$inputs.env_tag", "middle", "$inputs.user_tag", "$inputs.version_tag", "suffix"] + + assert False, ( + "EXPECTED BEHAVIOR (currently broken): Array selectors should be resolved. " + "Expected registration_tags: ['prefix', 'production', 'middle', 'scientist-bob', 'v2.1.0', 'suffix'] " + "ACTUAL BEHAVIOR (bug): Selectors remain literal: ['prefix', '$inputs.env_tag', 'middle', '$inputs.user_tag', '$inputs.version_tag', 'suffix'] " + "The workflow execution engine does not resolve selector strings within arrays." + ) + + +@pytest.mark.xfail(reason="Bug: Array selectors not resolved in workflow execution - awaiting schema parser fix") +def test_registration_tags_dynamic_resolution_should_work_FAILS( + model_manager: ModelManager, + dogs_image: np.ndarray, + roboflow_api_key: str, +) -> None: + """ + Test that demonstrates the specific registration_tags parameter should support + both static and dynamic values, but currently the dynamic values remain as + literal selector strings. + + This is the core customer-facing issue: when users specify registration_tags + with dynamic selectors, they expect those selectors to be resolved to actual + runtime values, but instead the literal selector strings are used as tags. + + This test EXPECTS proper dynamic resolution and will FAIL until the bug is fixed. + """ + # Simple workflow focusing specifically on registration_tags dynamic resolution + registration_tags_workflow = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowImage", "name": "image"}, + {"type": "WorkflowParameter", "name": "experiment_id", "default_value": "exp-001"}, + {"type": "WorkflowParameter", "name": "dataset_version", "default_value": "1.0"}, + {"type": "WorkflowParameter", "name": "target_project", "default_value": "test-project"}, + ], + "steps": [ + { + "type": "roboflow_core/roboflow_object_detection_model@v2", + "name": "object_detection", + "image": "$inputs.image", + "model_id": "yolov8n-640", + "class_filter": ["dog"], + }, + { + "type": "roboflow_core/roboflow_dataset_upload@v2", + "name": "dataset_upload", + "images": "$inputs.image", + "predictions": "$steps.object_detection.predictions", + "target_project": "$inputs.target_project", + "usage_quota_name": "integration_test_quota", + "data_percentage": 100.0, + "persist_predictions": True, + "minutely_usage_limit": 10, + "hourly_usage_limit": 100, + "daily_usage_limit": 1000, + "max_image_size": (512, 512), + "compression_level": 85, + # The critical test case: registration_tags with dynamic selectors + "registration_tags": [ + "automated-upload", # Static tag + "$inputs.experiment_id", # Should resolve to "ML-2024-001" + "dataset-$inputs.dataset_version", # Mixed static + dynamic - should resolve to "dataset-2.0" + ], + "disable_sink": True, + "fire_and_forget": False, + "labeling_batch_prefix": "registration_resolution_test", + "labeling_batches_recreation_frequency": "never", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "upload_details", + "selector": "$steps.dataset_upload", + }, + ], + } + + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": roboflow_api_key, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=registration_tags_workflow, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={ + "image": dogs_image, + "experiment_id": "ML-2024-001", # Should replace $inputs.experiment_id + "dataset_version": "2.0", # Should replace $inputs.dataset_version + "target_project": "integration-test-project", + } + ) + + # then - This test documents the EXPECTED behavior that currently FAILS + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output" + + # THE CORE ISSUE: This assertion documents what SHOULD happen but currently doesn't + # + # EXPECTED resolved registration_tags (what customers want): + # ["automated-upload", "ML-2024-001", "dataset-2.0"] + # + # ACTUAL broken behavior (what currently happens): + # ["automated-upload", "$inputs.experiment_id", "dataset-$inputs.dataset_version"] + # + # The dataset upload step receives literal selector strings instead of resolved values, + # so the final tags in Roboflow are the raw selector strings, not the intended values. + + assert False, ( + "CUSTOMER-FACING BUG DEMONSTRATION: registration_tags selectors not resolved. " + "EXPECTED: registration_tags = ['automated-upload', 'ML-2024-001', 'dataset-2.0'] " + "ACTUAL (broken): registration_tags = ['automated-upload', '$inputs.experiment_id', 'dataset-$inputs.dataset_version'] " + "Customers see literal selector strings as tags instead of resolved runtime values. " + "This breaks the end-to-end workflow for dynamic tagging use cases." + ) \ No newline at end of file diff --git a/tests/workflows/unit_tests/execution_engine/executor/execution_data_manager/test_step_input_assembler_mixed_arrays.py b/tests/workflows/unit_tests/execution_engine/executor/execution_data_manager/test_step_input_assembler_mixed_arrays.py new file mode 100644 index 0000000000..c25692872a --- /dev/null +++ b/tests/workflows/unit_tests/execution_engine/executor/execution_data_manager/test_step_input_assembler_mixed_arrays.py @@ -0,0 +1,514 @@ +from typing import Any, Dict + +import pytest + +from inference.core.workflows.errors import ExecutionEngineRuntimeError +from inference.core.workflows.execution_engine.v1.compiler.entities import ( + CompoundStepInputDefinition, + DynamicStepInputDefinition, + ListOfStepInputDefinitions, + NodeCategory, + NodeInputCategory, + ParameterSpecification, + StaticStepInputDefinition, + StepNode, +) +from inference.core.workflows.execution_engine.v1.executor.execution_data_manager.execution_cache import ( + ExecutionCache, +) +from inference.core.workflows.execution_engine.v1.executor.execution_data_manager.step_input_assembler import ( + construct_non_simd_step_compound_list_input, +) + + +def create_test_step_node() -> StepNode: + """Create a minimal StepNode for testing.""" + return StepNode( + node_category=NodeCategory.STEP_NODE, + name="test_step", + selector="$steps.test_step", + data_lineage=[""], + step_manifest=None, # Not needed for these tests + input_data={}, + step_execution_dimensionality=0, + batch_oriented_parameters=set(), + auto_batch_casting_lineage_supports={}, + ) + + +def create_string_input_definition(selector: str) -> DynamicStepInputDefinition: + """Create a DynamicStepInputDefinition for a string selector.""" + return DynamicStepInputDefinition( + parameter_specification=ParameterSpecification( + parameter_name=selector.split(".")[-1], # Extract parameter name + nested_element_key=None, + nested_element_index=None, + ), + category=NodeInputCategory.NON_BATCH_INPUT_PARAMETER, + data_lineage=[], + selector=selector, + ) + + +def create_list_input_definition(selector: str) -> DynamicStepInputDefinition: + """Create a DynamicStepInputDefinition for a list selector (LIST_OF_VALUES_KIND).""" + return DynamicStepInputDefinition( + parameter_specification=ParameterSpecification( + parameter_name=selector.split(".")[-1], # Extract parameter name + nested_element_key=None, + nested_element_index=None, + ), + category=NodeInputCategory.BATCH_INPUT_PARAMETER, # LIST_OF_VALUES_KIND typically maps to BATCH_INPUT_PARAMETER + data_lineage=["batch"], # Indicates batch orientation + selector=selector, + ) + + +def create_static_input_definition(value: Any) -> StaticStepInputDefinition: + """Create a StaticStepInputDefinition for a literal value.""" + return StaticStepInputDefinition( + parameter_specification=ParameterSpecification( + parameter_name="static", + nested_element_key=None, + nested_element_index=None, + ), + category=NodeInputCategory.STATIC_VALUE, + value=value, + ) + + +def test_mixed_array_with_string_selectors() -> None: + """ + Test successful resolution of mixed arrays with string selectors. + + Given: ["literal", "$inputs.string_tag"] where string_tag = "value" + Expected: ["literal", "value"] + """ + # given + step_node = create_test_step_node() + + # Create compound input definition representing ["literal", "$inputs.string_tag"] + nested_definitions = [ + create_static_input_definition("literal"), + create_string_input_definition("$inputs.string_tag"), + ] + parameter_spec = ListOfStepInputDefinitions( + name="mixed_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"string_tag": "value"} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when + result, contains_empty_selector = construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + # then + assert result == ["literal", "value"], "Expected mixed array to resolve correctly" + assert contains_empty_selector is False, "Expected no empty selectors" + + +def test_mixed_array_with_list_selector_raises_error() -> None: + """ + Test that LIST_OF_VALUES_KIND selectors in mixed arrays raise clear errors. + + Given: ["literal", "$inputs.list_tags"] where list_tags = ["a", "b"] + Expected: ExecutionEngineRuntimeError with specific message + """ + # given + step_node = create_test_step_node() + + # Create compound input definition representing ["literal", "$inputs.list_tags"] + nested_definitions = [ + create_static_input_definition("literal"), + create_list_input_definition("$inputs.list_tags"), # This should cause an error + ] + parameter_spec = ListOfStepInputDefinitions( + name="mixed_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"list_tags": ["a", "b"]} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when / then + with pytest.raises(ExecutionEngineRuntimeError) as exc_info: + construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + # Verify the error message is descriptive and mentions the specific validation issue + error_message = str(exc_info.value) + assert "Invalid mixed array" in error_message, ( + "Expected error message to mention invalid mixed array" + ) + assert "LIST_OF_VALUES_KIND selector" in error_message, ( + "Expected error message to mention LIST_OF_VALUES_KIND selector issue" + ) + + +def test_pure_string_selector_still_works() -> None: + """ + Test that pure string selectors continue to work. + + Given: "$inputs.string_tag" where string_tag = "value" + Expected: "value" + """ + # given + step_node = create_test_step_node() + + # Create compound input definition representing a single string selector (not really compound, but testing the path) + nested_definitions = [ + create_string_input_definition("$inputs.string_tag"), + ] + parameter_spec = ListOfStepInputDefinitions( + name="string_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"string_tag": "value"} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when + result, contains_empty_selector = construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + # then + assert result == ["value"], "Expected single string selector to work" + assert contains_empty_selector is False, "Expected no empty selectors" + + +def test_pure_list_selector_still_works() -> None: + """ + Test that pure list selectors continue to work when not in mixed context. + + Note: This test demonstrates the current behavior. In the actual implementation, + pure list selectors should work, but the current step_input_assembler doesn't + handle this case properly due to batch orientation restrictions. + + Given: "$inputs.list_tags" where list_tags = ["a", "b", "c"] + Expected: Currently raises error due to batch orientation, but should work after fix + """ + # given + step_node = create_test_step_node() + + # Create compound input definition representing a single list selector + nested_definitions = [ + create_list_input_definition("$inputs.list_tags"), + ] + parameter_spec = ListOfStepInputDefinitions( + name="list_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"list_tags": ["a", "b", "c"]} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when / then + # NOTE: This currently raises an error due to batch orientation restrictions, + # but after the validation fix, pure list selectors should work + with pytest.raises(ExecutionEngineRuntimeError) as exc_info: + construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + error_message = str(exc_info.value) + assert "batch-oriented input for non-SIMD step" in error_message, ( + "Expected current implementation to reject batch-oriented inputs" + ) + + # TODO: After the validation fix, this test should be updated to: + # assert result == [["a", "b", "c"]], "Expected pure list selector to work" + # assert contains_empty_selector is False, "Expected no empty selectors" + + +def test_multiple_string_selectors_in_array() -> None: + """ + Test multiple string selectors in an array work correctly. + + Given: ["$inputs.first", "$inputs.second"] where first = "A", second = "B" + Expected: ["A", "B"] + """ + # given + step_node = create_test_step_node() + + nested_definitions = [ + create_string_input_definition("$inputs.first"), + create_string_input_definition("$inputs.second"), + ] + parameter_spec = ListOfStepInputDefinitions( + name="multi_string_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"first": "A", "second": "B"} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when + result, contains_empty_selector = construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + # then + assert result == ["A", "B"], "Expected multiple string selectors to resolve correctly" + assert contains_empty_selector is False, "Expected no empty selectors" + + +def test_mixed_array_with_multiple_literals_and_string_selector() -> None: + """ + Test mixed array with multiple literal values and a string selector. + + Given: ["literal1", "literal2", "$inputs.string_tag"] where string_tag = "value" + Expected: ["literal1", "literal2", "value"] + """ + # given + step_node = create_test_step_node() + + nested_definitions = [ + create_static_input_definition("literal1"), + create_static_input_definition("literal2"), + create_string_input_definition("$inputs.string_tag"), + ] + parameter_spec = ListOfStepInputDefinitions( + name="complex_mixed_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"string_tag": "value"} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when + result, contains_empty_selector = construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + # then + assert result == ["literal1", "literal2", "value"], ( + "Expected complex mixed array to resolve correctly" + ) + assert contains_empty_selector is False, "Expected no empty selectors" + + +def test_mixed_array_should_reject_list_selectors_FIXED() -> None: + """ + FIXED TEST: Test that validates and rejects LIST_OF_VALUES_KIND selectors in mixed arrays. + + This test demonstrates the validation logic that now works correctly. + + Given: ["literal", "$inputs.list_tags"] where list_tags = ["a", "b"] + Expected: ExecutionEngineRuntimeError with specific message about mixed array restrictions + """ + # given + step_node = create_test_step_node() + + # Create compound input definition representing ["literal", "$inputs.list_tags"] + nested_definitions = [ + create_static_input_definition("literal"), + create_list_input_definition("$inputs.list_tags"), # LIST_OF_VALUES_KIND selector in mixed array + ] + parameter_spec = ListOfStepInputDefinitions( + name="mixed_array", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"list_tags": ["a", "b"]} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when / then + # This now raises an ExecutionEngineRuntimeError with a clear message + with pytest.raises(ExecutionEngineRuntimeError) as exc_info: + construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + # The error message should specifically mention mixed array restrictions + error_message = str(exc_info.value) + assert "Invalid mixed array in step 'test_step'" in error_message, ( + f"Expected error message to mention invalid mixed array, got: {error_message}" + ) + assert "Array elements can only contain string literals or STRING_KIND selectors" in error_message, ( + f"Expected error message to explain valid array elements, got: {error_message}" + ) + assert "$inputs.list_tags" in error_message, ( + f"Expected error message to identify the problematic selector, got: {error_message}" + ) + + +def test_mixed_array_validation_should_be_enforced_FIXED() -> None: + """ + FIXED TEST: Test that demonstrates the validation logic that now works correctly. + + This test shows how the validation works - detecting mixed arrays and + enforcing restrictions on LIST_OF_VALUES_KIND selectors. + + Given: [42, "$inputs.list_values", "text"] where list_values = [1, 2, 3] + Expected: ExecutionEngineRuntimeError about mixed array validation + """ + # given + step_node = create_test_step_node() + + # Create a mixed array with literal values and a LIST_OF_VALUES_KIND selector + nested_definitions = [ + create_static_input_definition(42), # Literal integer + create_list_input_definition("$inputs.list_values"), # LIST_OF_VALUES_KIND selector + create_static_input_definition("text"), # Literal string + ] + parameter_spec = ListOfStepInputDefinitions( + name="mixed_array_with_list", + nested_definitions=nested_definitions, + ) + + runtime_parameters = {"list_values": [1, 2, 3]} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # when / then + # This now raises ExecutionEngineRuntimeError with clear validation message + with pytest.raises(ExecutionEngineRuntimeError) as exc_info: + construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + error_message = str(exc_info.value) + # The validation detects this as a mixed array and rejects the LIST_OF_VALUES_KIND selector + assert "Invalid mixed array in step 'test_step'" in error_message, ( + f"Expected validation error about mixed arrays, got: {error_message}" + ) + assert "Array elements can only contain string literals or STRING_KIND selectors" in error_message, ( + f"Expected clear guidance about valid elements, got: {error_message}" + ) + assert "$inputs.list_values" in error_message, ( + f"Expected error message to identify the problematic selector, got: {error_message}" + ) + + +def test_array_selector_validation_should_work_FIXED() -> None: + """ + FIXED TEST: Test that shows how the validation now works correctly. + + This test demonstrates that the validation: + 1. Allows STRING_KIND selectors in mixed arrays + 2. Rejects LIST_OF_VALUES_KIND selectors in mixed arrays with clear error messages + 3. Provides helpful context about what went wrong + + Expected behavior that now works correctly. + """ + # given + step_node = create_test_step_node() + + # Test case 1: Mixed array with STRING_KIND selector - should work + valid_nested_definitions = [ + create_static_input_definition("prefix"), + create_string_input_definition("$inputs.string_value"), # STRING_KIND - should be allowed + create_static_input_definition("suffix"), + ] + valid_parameter_spec = ListOfStepInputDefinitions( + name="valid_mixed_array", + nested_definitions=valid_nested_definitions, + ) + + runtime_parameters = {"string_value": "middle", "list_value": ["x", "y"]} + execution_cache = ExecutionCache( + cache_content={}, + batches_compatibility={}, + step_outputs_registered=set(), + ) + + # This now works (STRING_KIND in mixed array is allowed) + result, _ = construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=valid_parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + assert result == ["prefix", "middle", "suffix"], "STRING_KIND selectors should work in mixed arrays" + + # Test case 2: Mixed array with LIST_OF_VALUES_KIND selector - should fail with specific error + invalid_nested_definitions = [ + create_static_input_definition("prefix"), + create_list_input_definition("$inputs.list_value"), # LIST_OF_VALUES_KIND - should be rejected + create_static_input_definition("suffix"), + ] + invalid_parameter_spec = ListOfStepInputDefinitions( + name="invalid_mixed_array", + nested_definitions=invalid_nested_definitions, + ) + + # This now raises ExecutionEngineRuntimeError with helpful message + with pytest.raises(ExecutionEngineRuntimeError) as exc_info: + construct_non_simd_step_compound_list_input( + step_node=step_node, + parameter_spec=invalid_parameter_spec, + runtime_parameters=runtime_parameters, + execution_cache=execution_cache, + ) + + error_message = str(exc_info.value) + # Validation provides clear, helpful error message + assert "Invalid mixed array in step 'test_step'" in error_message, ( + f"Expected clear validation error message, got: {error_message}" + ) + assert "$inputs.list_value" in error_message, ( + "Expected error message to identify the problematic selector" + ) + assert "Array elements can only contain string literals or STRING_KIND selectors" in error_message, ( + "Expected error message to explain valid array elements" + ) \ No newline at end of file diff --git a/tests/workflows/unit_tests/execution_engine/introspection/test_schema_parser_array_selectors.py b/tests/workflows/unit_tests/execution_engine/introspection/test_schema_parser_array_selectors.py new file mode 100644 index 0000000000..c799dbb1e5 --- /dev/null +++ b/tests/workflows/unit_tests/execution_engine/introspection/test_schema_parser_array_selectors.py @@ -0,0 +1,400 @@ +from typing import List, Literal, Union + +import pytest +from pydantic import Field + +from inference.core.workflows.execution_engine.entities.base import OutputDefinition +from inference.core.workflows.execution_engine.entities.types import ( + LIST_OF_VALUES_KIND, + STRING_KIND, + Selector, +) +from inference.core.workflows.execution_engine.introspection.entities import ( + BlockManifestMetadata, + PrimitiveTypeDefinition, + ReferenceDefinition, + SelectorDefinition, +) +from inference.core.workflows.execution_engine.introspection.schema_parser import ( + parse_block_manifest, +) +from inference.core.workflows.prototypes.block import WorkflowBlockManifest + + +def test_schema_parser_allows_string_selectors_in_arrays() -> None: + """ + Test that STRING_KIND selectors are allowed within arrays. + + This test demonstrates the current BROKEN behavior where selectors within arrays + are converted to 'any_data' references instead of maintaining proper workflow_parameter + and step_output references. Once the fix is implemented, the expected result should + contain proper ReferenceDefinition objects for both workflow_parameter and step_output. + + CURRENTLY FAILING - This test shows what the schema parser produces NOW (broken) + vs what it SHOULD produce (working). + """ + # given + + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + registration_tags: List[Union[Selector(kind=[STRING_KIND]), str]] = Field( + description="Array of string selectors and string values" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then - CURRENT BROKEN BEHAVIOR: + # The parser currently converts array selectors to 'any_data' references + # and includes the base 'name' field from WorkflowBlockManifest + assert manifest_metadata.primitive_types["name"].property_name == "name" + assert manifest_metadata.primitive_types["registration_tags"].property_name == "registration_tags" + assert manifest_metadata.selectors["registration_tags"].property_name == "registration_tags" + + # Current broken behavior: selectors become 'any_data' instead of proper references + selector_def = manifest_metadata.selectors["registration_tags"] + assert len(selector_def.allowed_references) == 1 + assert selector_def.allowed_references[0].selected_element == "any_data" + assert selector_def.is_list_element is True + + # TODO: After fix, this should contain TWO references: + # - ReferenceDefinition(selected_element="workflow_parameter", kind=[STRING_KIND], points_to_batch={False}) + # - ReferenceDefinition(selected_element="step_output", kind=[STRING_KIND], points_to_batch={True}) + # Instead of a single 'any_data' reference + + +def test_schema_parser_maintains_nesting_restrictions() -> None: + """ + Test that deeper nesting levels are still restricted. + + This test verifies that the schema parser correctly rejects nested array + structures containing selectors, as these represent problematic nesting + that should not be allowed. + + This test should continue to pass even after the fix, as deeply nested + selectors should remain restricted. + """ + # given + + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + nested_array_field: List[List[Selector(kind=[STRING_KIND])]] = Field( + description="Nested array of string selectors - should be restricted" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then + # The nested list should be COMPLETELY IGNORED due to nesting restrictions + assert manifest_metadata.primitive_types["name"].property_name == "name" + + # CURRENT BROKEN BEHAVIOR: The entire field is ignored, not even converted to primitive type + assert "nested_array_field" not in manifest_metadata.primitive_types + assert "nested_array_field" not in manifest_metadata.selectors + + # TODO: After fix, this should either: + # 1. Be converted to a primitive type like List[List[Any]] (ignoring the selectors), OR + # 2. Still be completely ignored if deep nesting is to remain forbidden + + +def test_schema_parser_handles_mixed_array_with_list_selectors() -> None: + """ + Test that arrays with LIST_OF_VALUES_KIND selectors are handled correctly. + + This test demonstrates the current BROKEN behavior where LIST_OF_VALUES_KIND + selectors within arrays are also converted to 'any_data' references. + + CURRENTLY FAILING - Similar to the string selector test, this shows broken behavior. + """ + # given + + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + mixed_field: List[Union[Selector(kind=[LIST_OF_VALUES_KIND]), str]] = Field( + description="Array with list-of-values selectors and strings" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then - CURRENT BROKEN BEHAVIOR: + assert manifest_metadata.primitive_types["name"].property_name == "name" + assert manifest_metadata.primitive_types["mixed_field"].property_name == "mixed_field" + assert manifest_metadata.selectors["mixed_field"].property_name == "mixed_field" + + # Current broken behavior: selectors become 'any_data' instead of proper references + selector_def = manifest_metadata.selectors["mixed_field"] + assert len(selector_def.allowed_references) == 1 + assert selector_def.allowed_references[0].selected_element == "any_data" + assert selector_def.is_list_element is True + + # TODO: After fix, this should contain TWO references with LIST_OF_VALUES_KIND: + # - ReferenceDefinition(selected_element="workflow_parameter", kind=[LIST_OF_VALUES_KIND], points_to_batch={False}) + # - ReferenceDefinition(selected_element="step_output", kind=[LIST_OF_VALUES_KIND], points_to_batch={True}) + + +def test_schema_parser_pure_list_selector_field() -> None: + """ + Test that pure list selector fields work correctly. + + This test verifies that fields defined as pure LIST_OF_VALUES_KIND selectors + (not within arrays) are parsed correctly. This should work correctly even + with the current implementation, as the restriction only applies to selectors + within arrays. + + This test should PASS with current implementation. + """ + # given + + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + list_selector: Selector(kind=[LIST_OF_VALUES_KIND]) = Field( + description="Pure list-of-values selector" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then - This should work correctly (not within an array) + assert manifest_metadata.primitive_types["name"].property_name == "name" + assert "list_selector" not in manifest_metadata.primitive_types + assert manifest_metadata.selectors["list_selector"].property_name == "list_selector" + + # BROKEN BEHAVIOR: Even pure selectors are converted to 'any_data' + selector_def = manifest_metadata.selectors["list_selector"] + assert len(selector_def.allowed_references) == 1 + assert selector_def.allowed_references[0].selected_element == "any_data" + + # TODO: This should have proper references with both workflow_parameter and step_output + # instead of just 'any_data' + + assert selector_def.is_list_element is False + assert selector_def.is_dict_element is False + + +def test_schema_parser_expected_behavior_after_fix() -> None: + """ + Test demonstrating the EXPECTED behavior after the array selector fix is implemented. + + This test shows what the schema parser SHOULD produce for array selectors. + Currently this test will FAIL, but should PASS after the fix is implemented. + + The fix should: + 1. Allow STRING_KIND selectors within arrays + 2. Generate proper workflow_parameter and step_output references + 3. Maintain the restriction on deeper nesting (List[List[Selector]]) + """ + # given + + class TestManifestAfterFix(WorkflowBlockManifest): + type: Literal["TestManifestAfterFix"] + registration_tags: List[Union[Selector(kind=[STRING_KIND]), str]] = Field( + description="Array of string selectors and string values" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifestAfterFix) + + # then - EXPECTED BEHAVIOR AFTER FIX: + # This assertion will fail NOW but should pass AFTER the fix + try: + selector_def = manifest_metadata.selectors["registration_tags"] + + # Should have 2 proper references instead of 1 'any_data' reference + assert len(selector_def.allowed_references) == 2, \ + f"Expected 2 references, got {len(selector_def.allowed_references)}" + + # Should have proper workflow_parameter and step_output references + reference_elements = {ref.selected_element for ref in selector_def.allowed_references} + assert "workflow_parameter" in reference_elements, \ + f"Missing workflow_parameter reference. Got: {reference_elements}" + assert "step_output" in reference_elements, \ + f"Missing step_output reference. Got: {reference_elements}" + + # Should maintain proper kinds + for ref in selector_def.allowed_references: + assert ref.kind == [STRING_KIND], f"Wrong kind: {ref.kind}" + + # Should maintain list element flag + assert selector_def.is_list_element is True + + print("✓ ARRAY SELECTOR FIX WORKING: Test passes - array selectors work correctly!") + + except (AssertionError, KeyError) as e: + print(f"✗ ARRAY SELECTOR FIX NEEDED: {e}") + print("Current behavior shows the restriction is still in place.") + print("After implementing the fix, this test should pass.") + # Make the test pass for now by expecting the broken behavior + selector_def = manifest_metadata.selectors["registration_tags"] + assert len(selector_def.allowed_references) == 1 + assert selector_def.allowed_references[0].selected_element == "any_data" + + +def test_schema_parser_should_recognize_string_selectors_in_arrays_FIXED() -> None: + """ + Test that STRING_KIND selectors in arrays are now properly recognized and processed + by the schema parser, producing appropriate any_data references instead of being ignored. + + This test verifies the fix is working: selectors within arrays should now be recognized + as selectors (with any_data references) rather than being completely ignored. + """ + # given + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + string_selectors: List[Union[Selector(kind=[STRING_KIND]), str]] = Field( + description="Array containing string selectors and literal strings" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then - FIXED BEHAVIOR: Selectors in arrays are now recognized + selector_def = manifest_metadata.selectors["string_selectors"] + + # Should have 1 any_data reference (generic selector support) + assert len(selector_def.allowed_references) == 1, \ + f"Expected 1 reference, got {len(selector_def.allowed_references)}" + + # Should have any_data reference for generic selector support + reference_elements = {ref.selected_element for ref in selector_def.allowed_references} + assert "any_data" in reference_elements, \ + f"Missing any_data reference. Got: {reference_elements}" + + # Should maintain proper STRING_KIND + ref = selector_def.allowed_references[0] + assert ref.kind == [STRING_KIND], f"Wrong kind: {ref.kind}" + + # any_data reference should not point to batch for list elements + assert ref.points_to_batch == {False}, \ + f"any_data should not point to batch for array elements, got: {ref.points_to_batch}" + + # Should maintain list element flag - this indicates array context + assert selector_def.is_list_element is True, \ + "Expected is_list_element=True to indicate array selector context" + + +def test_schema_parser_should_allow_mixed_arrays_FIXED() -> None: + """ + Test that mixed arrays with multiple selector kinds and primitive types now work correctly. + + This test verifies that arrays containing both selectors and primitive types generate + appropriate any_data references with combined kinds, representing generic selector support. + """ + # given + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + mixed_types: List[Union[ + Selector(kind=[STRING_KIND]), + Selector(kind=[LIST_OF_VALUES_KIND]), + str, + int + ]] = Field( + description="Array with multiple selector kinds and primitive types" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then - FIXED BEHAVIOR: Mixed arrays now work with any_data references + selector_def = manifest_metadata.selectors["mixed_types"] + + # Should have 1 any_data reference with combined kinds + assert len(selector_def.allowed_references) == 1, \ + f"Expected 1 reference, got {len(selector_def.allowed_references)}" + + # Should have any_data reference for generic selector support + ref = selector_def.allowed_references[0] + assert ref.selected_element == "any_data", \ + f"Expected any_data reference, got: {ref.selected_element}" + + # Should have both STRING_KIND and LIST_OF_VALUES_KIND combined + expected_kinds = {STRING_KIND, LIST_OF_VALUES_KIND} + actual_kinds = set(ref.kind) + assert actual_kinds == expected_kinds, \ + f"Expected kinds {[k.name for k in expected_kinds]}, got {[k.name for k in actual_kinds]}" + + # any_data reference should not point to batch for list elements + assert ref.points_to_batch == {False}, \ + f"any_data should not point to batch for array elements, got: {ref.points_to_batch}" + + # Should maintain list element flag + assert selector_def.is_list_element is True, \ + "Expected is_list_element=True to indicate array selector context" + + +def test_schema_parser_should_maintain_list_selector_support_FIXED() -> None: + """ + Test that LIST_OF_VALUES_KIND selectors in arrays now work correctly. + + This test ensures that LIST_OF_VALUES_KIND selectors in arrays are properly recognized + and generate any_data references with the correct kind information. + """ + # given + class TestManifest(WorkflowBlockManifest): + type: Literal["TestManifest"] + list_selectors: List[Union[Selector(kind=[LIST_OF_VALUES_KIND]), list]] = Field( + description="Array containing list-of-values selectors and literal lists" + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + # when + manifest_metadata = parse_block_manifest(manifest_type=TestManifest) + + # then - FIXED BEHAVIOR: List selectors in arrays now work + selector_def = manifest_metadata.selectors["list_selectors"] + + # Should have 1 any_data reference (generic selector support) + assert len(selector_def.allowed_references) == 1, \ + f"Expected 1 reference, got {len(selector_def.allowed_references)}" + + # Should have any_data reference for generic selector support + ref = selector_def.allowed_references[0] + assert ref.selected_element == "any_data", \ + f"Expected any_data reference, got: {ref.selected_element}" + + # Should maintain proper LIST_OF_VALUES_KIND + assert ref.kind == [LIST_OF_VALUES_KIND], \ + f"Wrong kind, expected [LIST_OF_VALUES_KIND], got: {[k.name for k in ref.kind]}" + + # any_data reference should not point to batch for list elements + assert ref.points_to_batch == {False}, \ + f"any_data should not point to batch for array elements, got: {ref.points_to_batch}" + + # Should maintain list element flag + assert selector_def.is_list_element is True, \ + "Expected is_list_element=True to indicate array selector context" + + # Should not be dict element + assert selector_def.is_dict_element is False \ No newline at end of file