roboflow
diff --git a/‎docs/workflows/create_workflow_block.md‎
Lines changed: 18 additions & 8 deletions b/‎docs/workflows/create_workflow_block.md‎
Lines changed: 18 additions & 8 deletions
diff --git a/‎docs/workflows/execution_engine_changelog.md‎
Lines changed: 267 additions & 65 deletions b/‎docs/workflows/execution_engine_changelog.md‎
Lines changed: 267 additions & 65 deletions
diff --git a/‎docs/workflows/workflow_execution.md‎
Lines changed: 18 additions & 0 deletions b/‎docs/workflows/workflow_execution.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/workflows/workflows_execution_engine.md‎
Lines changed: 13 additions & 0 deletions b/‎docs/workflows/workflows_execution_engine.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎inference/core/cache/model_artifacts.py‎
Lines changed: 40 additions & 9 deletions b/‎inference/core/cache/model_artifacts.py‎
Lines changed: 40 additions & 9 deletions
diff --git a/‎inference/core/env.py‎
Lines changed: 2 additions & 0 deletions b/‎inference/core/env.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎inference/core/models/classification_base.py‎
Lines changed: 4 additions & 3 deletions b/‎inference/core/models/classification_base.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎inference/core/models/roboflow.py‎
Lines changed: 3 additions & 1 deletion b/‎inference/core/models/roboflow.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎inference/core/utils/file_system.py‎
Lines changed: 104 additions & 2 deletions b/‎inference/core/utils/file_system.py‎
Lines changed: 104 additions & 2 deletions
@@ -1528,7 +1528,7 @@ the method signatures.
         In this example, the block visualises crops predictions and creates tiles
         presenting all crops predictions in single output image.
 
-        ```{ .py linenums="1" hl_lines="29-31 48-49 59-60"}
+        ```{ .py linenums="1" hl_lines="30-32 34-36 53-55 65-66"}
         from typing import List, Literal, Type, Union
 
         import supervision as sv
@@ -1556,10 +1556,15 @@ the method signatures.
             crops_predictions: Selector(
                 kind=[OBJECT_DETECTION_PREDICTION_KIND]
             )
+            scalar_parameter: Union[float, Selector()]
         
             @classmethod
             def get_output_dimensionality_offset(cls) -> int:
                 return -1
+
+            @classmethod
+            def get_parameters_enforcing_auto_batch_casting(cls) -> List[str]:
+                return ["crops", "crops_predictions"]
         
             @classmethod
             def describe_outputs(cls) -> List[OutputDefinition]:
@@ -1578,6 +1583,7 @@ the method signatures.
                 self,
                 crops: Batch[WorkflowImageData],
                 crops_predictions: Batch[sv.Detections],
+                scalar_parameter: float,
             ) -> BlockResult:
                 annotator = sv.BoxAnnotator()
                 visualisations = []
@@ -1591,18 +1597,22 @@ the method signatures.
                 return {"visualisations": tile}
         ```
 
-        * in lines `29-31` manifest class declares output dimensionality 
+        * in lines `30-32` manifest class declares output dimensionality 
         offset - value `-1` should be understood as decreasing dimensionality level by `1`
 
-        * in lines `48-49` you can see the impact of output dimensionality decrease
-        on the method signature. Both inputs are artificially wrapped in `Batch[]` container.
-        This is done by Execution Engine automatically on output dimensionality decrease when 
-        all inputs have the same dimensionality to enable access to all elements occupying 
-        the last dimensionality level. Obviously, only elements related to the same element 
+        * in lines `34-36` manifest class declares `run(...)` method inputs that will be subject to auto-batch casting
+        ensuring that the signature is always stable. Auto-batch casting was introduced in Execution Engine `v0.1.6.0` 
+        - refer to [changelog](./execution_engine_changelog.md) for more details.
+
+        * in lines `53-55` you can see the impact of output dimensionality decrease
+        on the method signature. First two inputs (declared in line `36`) are artificially wrapped in `Batch[]`
+        container, whereas `scalar_parameter` remains primitive type. This is done by Execution Engine automatically 
+        on output dimensionality decrease when all inputs have the same dimensionality to enable access to 
+        all elements occupying the last dimensionality level. Obviously, only elements related to the same element 
         from top-level batch will be grouped. For instance, if you had two input images that you 
         cropped - crops from those two different images will be grouped separately.
 
-        * lines `59-60` illustrate how output is constructed - single value is returned and that value 
+        * lines `65-66` illustrate how output is constructed - single value is returned and that value 
         will be indexed by Execution Engine in output batch with reduced dimensionality
 
     === "different input dimensionalities"
 
@@ -124,6 +124,14 @@ influencing the processing for all elements in the batch and this type of data w
     the reference images remain unchanged as you process each input. Thus, the reference images are considered 
     *scalar* data, while the list of input images is *batch-oriented*.
 
+    **Great news!**
+    
+    Since Execution Engine `v1.6.0`, the practical aspects of dealing with *scalars* and *batches* are offloaded to 
+    the Execution Engine (refer to [changelog](./execution_engine_changelog.md) for more details). As a block 
+    developer, it is still important to understand the difference, but when building blocks you are not forced to 
+    think about the nuances that much.
+
+
 To illustrate the distinction, Workflow definitions hold inputs of the two categories:
 
 - **Scalar inputs** - like `WorkflowParameter`
@@ -356,6 +364,16 @@ execution excludes steps at higher `dimensionality levels` from producing output
 output field selecting that values will be presented as nested list of empty lists, with depth matching  
 `dimensionality level - 1` of referred output.
 
+Since Execution Engine `v1.6.0`, blocks within a workflow may collapse batches into scalars, as well as create new 
+batches from scalar inputs. The first scenario is pretty easy to understand - each dictionary in the output list will 
+simply be populated with the same scalar value. The case of *emergent* batch is slightly more complicated. 
+In such case we can find batch at dimensionality level 1, which has shape or elements order not compliant 
+with input batches. To prevent semantic ambiguity, we treat such batch as if it's dimensionality is one level higher
+(as if **there is additional batch-oriented input of size one attached to the input of the block creating batch 
+dynamically**). Such virtually nested outputs are broadcast, such that each dictionary in the output list will be given 
+new key with the same nested output. This nesting property is preserved even if there is no input-derived outputs 
+for given workflow - in such case, output is a list of size 1 which contains dictionary with nested output.
+
 Some outputs would require serialisation when Workflows Execution Engine runs behind HTTP API. We use the following
 serialisation strategies:
 
 
@@ -86,6 +86,19 @@ batch-oriented input, it will be treated as a SIMD step.
 Non-SIMD steps, by contrast, are expected to deliver a single result for the input data. In the case of non-SIMD 
 flow-control steps, they affect all downstream steps as a whole, rather than individually for each element in a batch.
 
+Historically, Execution Engine could not handle well all scenarios when non-SIMD steps' outputs were fed into SIMD steps
+inputs - causing compilation error due to lack of ability to automatically cast such outputs into batches when feeding
+into SIMD seps. Starting with Execution Engine `v1.6.0`, the handling of SIMD and non-SIMD blocks has been improved 
+through the introduction of **Auto Batch Casting**:
+
+* When a SIMD input is detected but receives scalar data, the Execution Engine automatically casts it into a batch.
+
+* The dimensionality of the batch is determined at compile time, using *lineage* information from other 
+batch-oriented inputs when available. Missing dimensions are generated in a manner similar to `torch.unsqueeze(...)`.
+
+* Outputs are evaluated against the casting context - leaving them as scalars when block keeps or decreases output 
+dimensionality or **creating new batches** when increase of dimensionality is expected.
+
 
 ### Preparing step inputs
 
 
@@ -1,4 +1,5 @@
 import errno
+import json
 import os.path
 import re
 import shutil
@@ -7,12 +8,16 @@
 
 from filelock import FileLock
 
-from inference.core.env import MODEL_CACHE_DIR
+from inference.core.env import ATOMIC_CACHE_WRITES_ENABLED, MODEL_CACHE_DIR
+from inference.core.exceptions import ModelArtefactError
 from inference.core.logger import logger
 from inference.core.utils.file_system import (
     dump_bytes,
+    dump_bytes_atomic,
     dump_json,
+    dump_json_atomic,
     dump_text_lines,
+    dump_text_lines_atomic,
     read_json,
     read_text_file,
 )
@@ -67,7 +72,10 @@ def load_json_from_cache(
     file: str, model_id: Optional[str] = None, **kwargs
 ) -> Optional[Union[dict, list]]:
     cached_file_path = get_cache_file_path(file=file, model_id=model_id)
-    return read_json(path=cached_file_path, **kwargs)
+    try:
+        return read_json(path=cached_file_path, **kwargs)
+    except json.JSONDecodeError as e:
+        raise ModelArtefactError(f"Error loading JSON from cache: {e}")
 
 
 def save_bytes_in_cache(
@@ -77,7 +85,14 @@ def save_bytes_in_cache(
     allow_override: bool = True,
 ) -> None:
     cached_file_path = get_cache_file_path(file=file, model_id=model_id)
-    dump_bytes(path=cached_file_path, content=content, allow_override=allow_override)
+    if ATOMIC_CACHE_WRITES_ENABLED:
+        dump_bytes_atomic(
+            path=cached_file_path, content=content, allow_override=allow_override
+        )
+    else:
+        dump_bytes(
+            path=cached_file_path, content=content, allow_override=allow_override
+        )
 
 
 def save_json_in_cache(
@@ -88,9 +103,20 @@ def save_json_in_cache(
     **kwargs,
 ) -> None:
     cached_file_path = get_cache_file_path(file=file, model_id=model_id)
-    dump_json(
-        path=cached_file_path, content=content, allow_override=allow_override, **kwargs
-    )
+    if ATOMIC_CACHE_WRITES_ENABLED:
+        dump_json_atomic(
+            path=cached_file_path,
+            content=content,
+            allow_override=allow_override,
+            **kwargs,
+        )
+    else:
+        dump_json(
+            path=cached_file_path,
+            content=content,
+            allow_override=allow_override,
+            **kwargs,
+        )
 
 
 def save_text_lines_in_cache(
@@ -100,9 +126,14 @@ def save_text_lines_in_cache(
     allow_override: bool = True,
 ) -> None:
     cached_file_path = get_cache_file_path(file=file, model_id=model_id)
-    dump_text_lines(
-        path=cached_file_path, content=content, allow_override=allow_override
-    )
+    if ATOMIC_CACHE_WRITES_ENABLED:
+        dump_text_lines_atomic(
+            path=cached_file_path, content=content, allow_override=allow_override
+        )
+    else:
+        dump_text_lines(
+            path=cached_file_path, content=content, allow_override=allow_override
+        )
 
 
 def get_cache_file_path(file: str, model_id: Optional[str] = None) -> str:
 
@@ -55,6 +55,8 @@
 
 MD5_VERIFICATION_ENABLED = str2bool(os.getenv("MD5_VERIFICATION_ENABLED", False))
 
+ATOMIC_CACHE_WRITES_ENABLED = str2bool(os.getenv("ATOMIC_CACHE_WRITES_ENABLED", False))
+
 # Base URL for metrics collector
 METRICS_COLLECTOR_BASE_URL = os.getenv(
     "METRICS_COLLECTOR_BASE_URL",
 
@@ -185,9 +185,10 @@ def postprocess(
         )
 
     def predict(self, img_in: np.ndarray, **kwargs) -> Tuple[np.ndarray]:
-        predictions = run_session_via_iobinding(
-            self.onnx_session, self.input_name, img_in
-        )
+        with self._session_lock:
+            predictions = run_session_via_iobinding(
+                self.onnx_session, self.input_name, img_in
+            )
         return (predictions,)
 
     def preprocess(
 
@@ -5,6 +5,7 @@
 from collections import OrderedDict
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
+from threading import Lock
 from time import perf_counter
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -748,9 +749,10 @@ def __init__(
                 expanded_execution_providers.append(ep)
             self.onnxruntime_execution_providers = expanded_execution_providers
 
-        self.initialize_model()
         self.image_loader_threadpool = ThreadPoolExecutor(max_workers=None)
+        self._session_lock = Lock()
         try:
+            self.initialize_model()
             self.validate_model()
         except ModelArtefactError as e:
             logger.error(f"Unable to validate model artifacts, clearing cache: {e}")
 
@@ -1,9 +1,70 @@
 import json
+import os
 import os.path
 import re
+import tempfile
 from typing import List, Optional, Union
 
 
+class AtomicPath:
+    """Context manager for atomic file writes.
+
+    Ensures that files are either written completely or not at all,
+    preventing partial/corrupted files from power failures or crashes.
+
+    Usage:
+        with AtomicPath(target_path, allow_override=False) as temp_path:
+            # Write to temp_path
+            with open(temp_path, 'w') as f:
+                f.write(data)
+        # File is atomically moved to target_path on successful exit
+    """
+
+    def __init__(self, target_path: str, allow_override: bool = False):
+        self.target_path = target_path
+        self.allow_override = allow_override
+        self.temp_path: Optional[str] = None
+        self.temp_file = None
+
+    def __enter__(self) -> str:
+        ensure_write_is_allowed(
+            path=self.target_path, allow_override=self.allow_override
+        )
+        ensure_parent_dir_exists(path=self.target_path)
+
+        dir_name = os.path.dirname(os.path.abspath(self.target_path))
+        base_name = os.path.basename(self.target_path)
+        self.temp_file = tempfile.NamedTemporaryFile(
+            dir=dir_name, prefix=".tmp_", suffix="_" + base_name, delete=False
+        )
+        self.temp_path = self.temp_file.name
+        self.temp_file.close()
+        return self.temp_path
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is None:
+            try:
+                if os.name == "nt":  # Windows
+                    if os.path.exists(self.target_path):
+                        os.remove(self.target_path)
+                    os.rename(self.temp_path, self.target_path)
+                else:  # POSIX
+                    os.replace(self.temp_path, self.target_path)
+            except Exception:
+                try:
+                    os.unlink(self.temp_path)
+                except OSError:
+                    pass
+                raise
+        else:
+            # Error occurred - clean up temp file
+            try:
+                os.unlink(self.temp_path)
+            except OSError:
+                pass
+        return False  # Don't suppress exceptions
+
+
 def read_text_file(
     path: str,
     split_lines: bool = False,
@@ -28,31 +89,72 @@ def read_json(path: str, **kwargs) -> Optional[Union[dict, list]]:
 
 
 def dump_json(
-    path: str, content: Union[dict, list], allow_override: bool = False, **kwargs
+    path: str,
+    content: Union[dict, list],
+    allow_override: bool = False,
+    fsync: bool = False,
+    **kwargs,
 ) -> None:
     ensure_write_is_allowed(path=path, allow_override=allow_override)
     ensure_parent_dir_exists(path=path)
     with open(path, "w") as f:
         json.dump(content, fp=f, **kwargs)
+        if fsync:
+            os.fsync(f.fileno())
+
+
+def dump_json_atomic(
+    path: str, content: Union[dict, list], allow_override: bool = False, **kwargs
+) -> None:
+    with AtomicPath(path, allow_override=allow_override) as temp_path:
+        dump_json(temp_path, content, allow_override=True, fsync=True, **kwargs)
 
 
 def dump_text_lines(
     path: str,
     content: List[str],
     allow_override: bool = False,
     lines_connector: str = "\n",
+    fsync: bool = False,
 ) -> None:
     ensure_write_is_allowed(path=path, allow_override=allow_override)
     ensure_parent_dir_exists(path=path)
     with open(path, "w") as f:
         f.write(lines_connector.join(content))
+        if fsync:
+            os.fsync(f.fileno())
 
 
-def dump_bytes(path: str, content: bytes, allow_override: bool = False) -> None:
+def dump_text_lines_atomic(
+    path: str,
+    content: List[str],
+    allow_override: bool = False,
+    lines_connector: str = "\n",
+) -> None:
+    with AtomicPath(path, allow_override=allow_override) as temp_path:
+        dump_text_lines(
+            temp_path,
+            content,
+            allow_override=True,
+            lines_connector=lines_connector,
+            fsync=True,
+        )
+
+
+def dump_bytes(
+    path: str, content: bytes, allow_override: bool = False, fsync: bool = False
+) -> None:
     ensure_write_is_allowed(path=path, allow_override=allow_override)
     ensure_parent_dir_exists(path=path)
     with open(path, "wb") as f:
         f.write(content)
+        if fsync:
+            os.fsync(f.fileno())
+
+
+def dump_bytes_atomic(path: str, content: bytes, allow_override: bool = False) -> None:
+    with AtomicPath(path, allow_override=allow_override) as temp_path:
+        dump_bytes(temp_path, content, allow_override=True, fsync=True)
 
 
 def ensure_parent_dir_exists(path: str) -> None: