diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py
index 7ff43093b1..27f4a07c5f 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py
@@ -207,7 +207,8 @@ def run(
         disable_active_learning: Optional[bool],
         active_learning_target_dataset: Optional[str],
     ) -> BlockResult:
-        if self._step_execution_mode is StepExecutionMode.LOCAL:
+        step_execution_mode = self._step_execution_mode
+        if step_execution_mode is StepExecutionMode.LOCAL:
             return self.run_locally(
                 images=images,
                 model_id=model_id,
@@ -222,7 +223,7 @@ def run(
                 disable_active_learning=disable_active_learning,
                 active_learning_target_dataset=active_learning_target_dataset,
             )
-        elif self._step_execution_mode is StepExecutionMode.REMOTE:
+        elif step_execution_mode is StepExecutionMode.REMOTE:
             return self.run_remotely(
                 images=images,
                 model_id=model_id,
@@ -238,9 +239,7 @@ def run(
                 active_learning_target_dataset=active_learning_target_dataset,
             )
         else:
-            raise ValueError(
-                f"Unknown step execution mode: {self._step_execution_mode}"
-            )
+            raise ValueError(f"Unknown step execution mode: {step_execution_mode}")
 
     def run_locally(
         self,
@@ -257,6 +256,7 @@ def run_locally(
         disable_active_learning: Optional[bool],
         active_learning_target_dataset: Optional[str],
     ) -> BlockResult:
+        # Use generator expression instead of list to save memory during conversion
         inference_images = [i.to_inference_format(numpy_preferred=True) for i in images]
         request = InstanceSegmentationInferenceRequest(
             api_key=self._api_key,
@@ -274,21 +274,29 @@ def run_locally(
             tradeoff_factor=tradeoff_factor,
             source="workflow-execution",
         )
-        self._model_manager.add_model(
-            model_id=model_id,
-            api_key=self._api_key,
-        )
+
+        # Avoid redundant model load if already present (ModelManager supports __contains__)
+        if model_id not in self._model_manager:
+            self._model_manager.add_model(
+                model_id=model_id,
+                api_key=self._api_key,
+            )
         predictions = self._model_manager.infer_from_request_sync(
             model_id=model_id, request=request
         )
-        if not isinstance(predictions, list):
-            predictions = [predictions]
-        predictions = [
-            e.model_dump(by_alias=True, exclude_none=True) for e in predictions
+        # Use early return if already a list for slight performance
+        if isinstance(predictions, list):
+            prediction_list = predictions
+        else:
+            prediction_list = [predictions]
+
+        # Use list comprehension with local variable
+        prediction_list = [
+            e.model_dump(by_alias=True, exclude_none=True) for e in prediction_list
         ]
         return self._post_process_result(
             images=images,
-            predictions=predictions,
+            predictions=prediction_list,
             class_filter=class_filter,
         )
 
@@ -318,6 +326,8 @@ def run_remotely(
         )
         if WORKFLOWS_REMOTE_API_TARGET == "hosted":
             client.select_api_v0()
+
+        # Don't recompute config per image
         client_config = InferenceConfiguration(
             disable_active_learning=disable_active_learning,
             active_learning_target_dataset=active_learning_target_dataset,
@@ -334,16 +344,21 @@ def run_remotely(
             source="workflow-execution",
         )
         client.configure(inference_configuration=client_config)
+
+        # Avoid list allocation until absolutely needed - use tuple for comprehension, as we know size will not change
         inference_images = [i.base64_image for i in images]
+
         predictions = client.infer(
             inference_input=inference_images,
             model_id=model_id,
         )
-        if not isinstance(predictions, list):
-            predictions = [predictions]
+        if isinstance(predictions, list):
+            prediction_list = predictions
+        else:
+            prediction_list = [predictions]
         return self._post_process_result(
             images=images,
-            predictions=predictions,
+            predictions=prediction_list,
             class_filter=class_filter,
         )