aws
diff --git a/‎CHANGELOG.rst‎
Lines changed: 3 additions & 1 deletion b/‎CHANGELOG.rst‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/sagemaker/chainer/model.py‎
Lines changed: 4 additions & 2 deletions b/‎src/sagemaker/chainer/model.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/sagemaker/estimator.py‎
Lines changed: 7 additions & 1 deletion b/‎src/sagemaker/estimator.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/sagemaker/fw_utils.py‎
Lines changed: 29 additions & 4 deletions b/‎src/sagemaker/fw_utils.py‎
Lines changed: 29 additions & 4 deletions
diff --git a/‎src/sagemaker/local/entities.py‎
Lines changed: 4 additions & 0 deletions b/‎src/sagemaker/local/entities.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/sagemaker/model.py‎
Lines changed: 14 additions & 5 deletions b/‎src/sagemaker/model.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎src/sagemaker/mxnet/model.py‎
Lines changed: 4 additions & 2 deletions b/‎src/sagemaker/mxnet/model.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/sagemaker/pytorch/model.py‎
Lines changed: 4 additions & 2 deletions b/‎src/sagemaker/pytorch/model.py‎
Lines changed: 4 additions & 2 deletions
@@ -2,10 +2,12 @@
 CHANGELOG
 =========
 
+=======
 1.16.0.dev
-==========
+=======
 
 * feature: Estimators: Add RLEstimator to provide support for Reinforcement Learning.
+* feature: Add support for Amazon Elastic Inference
 
 1.15.2
 ======
 
@@ -70,11 +70,13 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py3',
         self.framework_version = framework_version
         self.model_server_workers = model_server_workers
 
-    def prepare_container_def(self, instance_type):
+    def prepare_container_def(self, instance_type, accelerator_type=None):
         """Return a container definition with framework configuration set in model environment variables.
 
         Args:
             instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
+            accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
+                making inferences to the model. For example, 'ml.eia1.medium'.
 
         Returns:
             dict[str, str]: A container definition object usable with the CreateModel API.
@@ -83,7 +85,7 @@ def prepare_container_def(self, instance_type):
         if not deploy_image:
             region_name = self.sagemaker_session.boto_session.region_name
             deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
-                                            self.framework_version, self.py_version)
+                                            self.framework_version, self.py_version, accelerator_type=accelerator_type)
 
         deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
         self._upload_code(deploy_key_prefix)
 
@@ -310,7 +310,8 @@ def attach(cls, training_job_name, sagemaker_session=None, model_channel_name='m
         estimator.latest_training_job.wait()
         return estimator
 
-    def deploy(self, initial_instance_count, instance_type, endpoint_name=None, use_compiled_model=False, **kwargs):
+    def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None,
+               use_compiled_model=False, **kwargs):
         """Deploy the trained model to an Amazon SageMaker endpoint and return a ``sagemaker.RealTimePredictor`` object.
 
         More information:
@@ -320,6 +321,10 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, use_
             initial_instance_count (int): Minimum number of EC2 instances to deploy to an endpoint for prediction.
             instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction,
                 for example, 'ml.c4.xlarge'.
+            accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading
+                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
+                will be attached to the endpoint.
+                For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
             endpoint_name (str): Name to use for creating an Amazon SageMaker endpoint. If not specified, the name of
                 the training job is used.
             use_compiled_model (bool): Flag to select whether to use compiled (optimized) model. Default: False.
@@ -345,6 +350,7 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, use_
         return model.deploy(
             instance_type=instance_type,
             initial_instance_count=initial_instance_count,
+            accelerator_type=accelerator_type,
             endpoint_name=endpoint_name)
 
     @property
 
@@ -39,11 +39,12 @@
                                 'Please add framework_version={} to your constructor to avoid this error.'
 
 VALID_PY_VERSIONS = ['py2', 'py3']
+VALID_EIA_FRAMEWORKS = ['tensorflow', 'mxnet']
+VALID_ACCOUNTS_BY_REGION = {'us-gov-west-1': '246785580436'}
 
 
 def create_image_uri(region, framework, instance_type, framework_version, py_version=None,
-                     account='520713654638', optimized_families=None):
-
+                     account='520713654638', accelerator_type=None, optimized_families=None):
     """Return the ECR URI of an image.
 
     Args:
@@ -54,6 +55,7 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
         py_version (str): Optional. Python version. If specified, should be one of 'py2' or 'py3'.
             If not specified, image uri will not include a python component.
         account (str): AWS account that contains the image. (default: '520713654638')
+        accelerator_type (str): SageMaker Elastic Inference accelerator type.
         optimized_families (str): Instance families for which there exist specific optimized images.
 
     Returns:
@@ -65,8 +67,7 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
         raise ValueError('invalid py_version argument: {}'.format(py_version))
 
     # Handle Account Number for Gov Cloud
-    if region == 'us-gov-west-1':
-        account = '246785580436'
+    account = VALID_ACCOUNTS_BY_REGION.get(region, account)
 
     # Handle Local Mode
     if instance_type.startswith('local'):
@@ -90,10 +91,34 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
         tag = "{}-{}-{}".format(framework_version, device_type, py_version)
     else:
         tag = "{}-{}".format(framework_version, device_type)
+
+    if _accelerator_type_valid_for_framework(framework=framework, accelerator_type=accelerator_type,
+                                             optimized_families=optimized_families):
+        framework += '-eia'
+
     return "{}.dkr.ecr.{}.amazonaws.com/sagemaker-{}:{}" \
         .format(account, region, framework, tag)
 
 
+def _accelerator_type_valid_for_framework(framework, accelerator_type=None, optimized_families=None):
+    if accelerator_type is None:
+        return False
+
+    if framework not in VALID_EIA_FRAMEWORKS:
+        raise ValueError('{} is not supported with Amazon Elastic Inference. Currently only '
+                         'TensorFlow and MXNet are supported for SageMaker.'.format(framework))
+
+    if optimized_families:
+        raise ValueError('Neo does not support Amazon Elastic Inference.')
+
+    if not accelerator_type.startswith('ml.eia') and not accelerator_type == 'local_sagemaker_notebook':
+        raise ValueError('{} is not a valid SageMaker Elastic Inference accelerator type. '
+                         'See: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html'
+                         .format(accelerator_type))
+
+    return True
+
+
 def validate_source_dir(script, directory):
     """Validate that the source directory exists and it contains the user script
 
 
@@ -370,6 +370,10 @@ def serve(self):
         instance_type = self.production_variant['InstanceType']
         instance_count = self.production_variant['InitialInstanceCount']
 
+        accelerator_type = self.production_variant.get('AcceleratorType')
+        if accelerator_type == 'local_sagemaker_notebook':
+            self.primary_container['Environment']['SAGEMAKER_INFERENCE_ACCELERATOR_PRESENT'] = 'true'
+
         self.create_time = datetime.datetime.now()
         self.container = _SageMakerContainer(instance_type, instance_count, image, self.local_session)
         self.container.serve(self.primary_container['ModelDataUrl'], self.primary_container['Environment'])
 
@@ -67,14 +67,16 @@ def __init__(self, model_data, image, role=None, predictor_cls=None, env=None, n
         self._model_name = None
         self._is_compiled_model = False
 
-    def prepare_container_def(self, instance_type):  # pylint: disable=unused-argument
+    def prepare_container_def(self, instance_type, accelerator_type=None):  # pylint: disable=unused-argument
         """Return a dict created by ``sagemaker.container_def()`` for deploying this model to a specified instance type.
 
         Subclasses can override this to provide custom container definitions for
         deployment to a specific instance type. Called by ``deploy()``.
 
         Args:
             instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
+            accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
+                making inferences to the model. For example, 'ml.eia1.medium'.
 
         Returns:
             dict: A container definition object usable with the CreateModel API.
@@ -168,7 +170,7 @@ def compile(self, target_instance_family, input_shape, output_path, role,
         self._is_compiled_model = True
         return self
 
-    def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None):
+    def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None, tags=None):
         """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
 
         Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
@@ -184,6 +186,10 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
             instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
             initial_instance_count (int): The initial number of instances to run in the
                 ``Endpoint`` created from this ``Model``.
+            accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading
+                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
+                will be attached to the endpoint.
+                For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
             endpoint_name (str): The name of the endpoint to create (default: None).
                 If not specified, a unique endpoint name will be created.
             tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint.
@@ -199,14 +205,15 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
                 self.sagemaker_session = session.Session()
 
         compiled_model_suffix = '-'.join(instance_type.split('.')[:-1])
-        container_def = self.prepare_container_def(instance_type)
+        container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type)
         self.name = self.name or utils.name_from_image(container_def['Image'])
         if self.role is None:
             raise ValueError("Role can not be null for deploying a model")
         if self._is_compiled_model:
             self.name += compiled_model_suffix
         self.sagemaker_session.create_model(self.name, self.role, container_def, vpc_config=self.vpc_config)
-        production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count)
+        production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count,
+                                                          accelerator_type=accelerator_type)
         if endpoint_name:
             self.endpoint_name = endpoint_name
         else:
@@ -294,13 +301,15 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic
             self.bucket, self.key_prefix = None, None
         self.uploaded_code = None
 
-    def prepare_container_def(self, instance_type):  # pylint disable=unused-argument
+    def prepare_container_def(self, instance_type, accelerator_type=None):  # pylint disable=unused-argument
         """Return a container definition with framework configuration set in model environment variables.
 
         This also uploads user-supplied code to S3.
 
         Args:
             instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
+            accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
+                making inferences to the model. For example, 'ml.eia1.medium'.
 
         Returns:
             dict[str, str]: A container definition object usable with the CreateModel API.
 
@@ -70,11 +70,13 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py2',
         self.framework_version = framework_version
         self.model_server_workers = model_server_workers
 
-    def prepare_container_def(self, instance_type):
+    def prepare_container_def(self, instance_type, accelerator_type=None):
         """Return a container definition with framework configuration set in model environment variables.
 
         Args:
             instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
+            accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
+                making inferences to the model. For example, 'ml.eia1.medium'.
 
         Returns:
             dict[str, str]: A container definition object usable with the CreateModel API.
@@ -83,7 +85,7 @@ def prepare_container_def(self, instance_type):
         if not deploy_image:
             region_name = self.sagemaker_session.boto_session.region_name
             deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
-                                            self.framework_version, self.py_version)
+                                            self.framework_version, self.py_version, accelerator_type=accelerator_type)
 
         deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
         self._upload_code(deploy_key_prefix)
 
@@ -69,11 +69,13 @@ def __init__(self, model_data, role, entry_point, image=None, py_version=PYTHON_
         self.framework_version = framework_version
         self.model_server_workers = model_server_workers
 
-    def prepare_container_def(self, instance_type):
+    def prepare_container_def(self, instance_type, accelerator_type=None):
         """Return a container definition with framework configuration set in model environment variables.
 
         Args:
             instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
+            accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
+                making inferences to the model. For example, 'ml.eia1.medium'.
 
         Returns:
             dict[str, str]: A container definition object usable with the CreateModel API.
@@ -82,7 +84,7 @@ def prepare_container_def(self, instance_type):
         if not deploy_image:
             region_name = self.sagemaker_session.boto_session.region_name
             deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
-                                            self.framework_version, self.py_version)
+                                            self.framework_version, self.py_version, accelerator_type=accelerator_type)
         deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
         self._upload_code(deploy_key_prefix)
         deploy_env = dict(self.env)