|
20 | 20 | import os |
21 | 21 | import re |
22 | 22 | import copy |
23 | | -from typing import List, Dict, Optional, Union |
| 23 | +from typing import List, Dict, Optional, Union, Any |
24 | 24 |
|
25 | 25 | import sagemaker |
26 | 26 | from sagemaker import ( |
|
66 | 66 | resolve_nested_dict_value_from_config, |
67 | 67 | format_tags, |
68 | 68 | Tags, |
| 69 | + _resolve_routing_config, |
69 | 70 | ) |
70 | 71 | from sagemaker.async_inference import AsyncInferenceConfig |
71 | 72 | from sagemaker.predictor_async import AsyncPredictor |
@@ -1309,6 +1310,7 @@ def deploy( |
1309 | 1310 | resources: Optional[ResourceRequirements] = None, |
1310 | 1311 | endpoint_type: EndpointType = EndpointType.MODEL_BASED, |
1311 | 1312 | managed_instance_scaling: Optional[str] = None, |
| 1313 | + routing_config: Optional[Dict[str, Any]] = None, |
1312 | 1314 | **kwargs, |
1313 | 1315 | ): |
1314 | 1316 | """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. |
@@ -1406,6 +1408,15 @@ def deploy( |
1406 | 1408 | Endpoint. (Default: None). |
1407 | 1409 | endpoint_type (Optional[EndpointType]): The type of an endpoint used to deploy models. |
1408 | 1410 | (Default: EndpointType.MODEL_BASED). |
| 1411 | + routing_config (Optional[Dict[str, Any]): Settings the control how the endpoint routes incoming |
| 1412 | + traffic to the instances that the endpoint hosts. |
| 1413 | + Currently, support dictionary key ``RoutingStrategy``. |
| 1414 | +
|
| 1415 | + .. code:: python |
| 1416 | +
|
| 1417 | + { |
| 1418 | + "RoutingStrategy": sagemaker.enums.RoutingStrategy.RANDOM |
| 1419 | + } |
1409 | 1420 | Raises: |
1410 | 1421 | ValueError: If arguments combination check failed in these circumstances: |
1411 | 1422 | - If no role is specified or |
@@ -1458,6 +1469,8 @@ def deploy( |
1458 | 1469 | if self.role is None: |
1459 | 1470 | raise ValueError("Role can not be null for deploying a model") |
1460 | 1471 |
|
| 1472 | + routing_config = _resolve_routing_config(routing_config) |
| 1473 | + |
1461 | 1474 | if ( |
1462 | 1475 | inference_recommendation_id is not None |
1463 | 1476 | or self.inference_recommender_job_results is not None |
@@ -1543,6 +1556,7 @@ def deploy( |
1543 | 1556 | model_data_download_timeout=model_data_download_timeout, |
1544 | 1557 | container_startup_health_check_timeout=container_startup_health_check_timeout, |
1545 | 1558 | managed_instance_scaling=managed_instance_scaling_config, |
| 1559 | + routing_config=routing_config, |
1546 | 1560 | ) |
1547 | 1561 |
|
1548 | 1562 | self.sagemaker_session.endpoint_from_production_variants( |
@@ -1625,6 +1639,7 @@ def deploy( |
1625 | 1639 | volume_size=volume_size, |
1626 | 1640 | model_data_download_timeout=model_data_download_timeout, |
1627 | 1641 | container_startup_health_check_timeout=container_startup_health_check_timeout, |
| 1642 | + routing_config=routing_config, |
1628 | 1643 | ) |
1629 | 1644 | if endpoint_name: |
1630 | 1645 | self.endpoint_name = endpoint_name |
|
0 commit comments