@@ -67,14 +67,16 @@ def __init__(self, model_data, image, role=None, predictor_cls=None, env=None, n
6767 self ._model_name = None
6868 self ._is_compiled_model = False
6969
70- def prepare_container_def (self , instance_type ): # pylint: disable=unused-argument
70+ def prepare_container_def (self , instance_type , accelerator_type = None ): # pylint: disable=unused-argument
7171 """Return a dict created by ``sagemaker.container_def()`` for deploying this model to a specified instance type.
7272
7373 Subclasses can override this to provide custom container definitions for
7474 deployment to a specific instance type. Called by ``deploy()``.
7575
7676 Args:
7777 instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
78+ accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
79+ making inferences to the model. For example, 'ml.eia1.medium'.
7880
7981 Returns:
8082 dict: A container definition object usable with the CreateModel API.
@@ -168,7 +170,7 @@ def compile(self, target_instance_family, input_shape, output_path, role,
168170 self ._is_compiled_model = True
169171 return self
170172
171- def deploy (self , initial_instance_count , instance_type , endpoint_name = None , tags = None ):
173+ def deploy (self , initial_instance_count , instance_type , accelerator_type = None , endpoint_name = None , tags = None ):
172174 """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
173175
174176 Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
@@ -184,6 +186,10 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
184186 instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
185187 initial_instance_count (int): The initial number of instances to run in the
186188 ``Endpoint`` created from this ``Model``.
189+ accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading
190+ and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
191+ will be attached to the endpoint.
192+ For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
187193 endpoint_name (str): The name of the endpoint to create (default: None).
188194 If not specified, a unique endpoint name will be created.
189195 tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint.
@@ -199,14 +205,15 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
199205 self .sagemaker_session = session .Session ()
200206
201207 compiled_model_suffix = '-' .join (instance_type .split ('.' )[:- 1 ])
202- container_def = self .prepare_container_def (instance_type )
208+ container_def = self .prepare_container_def (instance_type , accelerator_type = accelerator_type )
203209 self .name = self .name or utils .name_from_image (container_def ['Image' ])
204210 if self .role is None :
205211 raise ValueError ("Role can not be null for deploying a model" )
206212 if self ._is_compiled_model :
207213 self .name += compiled_model_suffix
208214 self .sagemaker_session .create_model (self .name , self .role , container_def , vpc_config = self .vpc_config )
209- production_variant = sagemaker .production_variant (self .name , instance_type , initial_instance_count )
215+ production_variant = sagemaker .production_variant (self .name , instance_type , initial_instance_count ,
216+ accelerator_type = accelerator_type )
210217 if endpoint_name :
211218 self .endpoint_name = endpoint_name
212219 else :
@@ -294,13 +301,15 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic
294301 self .bucket , self .key_prefix = None , None
295302 self .uploaded_code = None
296303
297- def prepare_container_def (self , instance_type ): # pylint disable=unused-argument
304+ def prepare_container_def (self , instance_type , accelerator_type = None ): # pylint disable=unused-argument
298305 """Return a container definition with framework configuration set in model environment variables.
299306
300307 This also uploads user-supplied code to S3.
301308
302309 Args:
303310 instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
311+ accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
312+ making inferences to the model. For example, 'ml.eia1.medium'.
304313
305314 Returns:
306315 dict[str, str]: A container definition object usable with the CreateModel API.
0 commit comments