|
13 | 13 | from __future__ import absolute_import |
14 | 14 |
|
15 | 15 | import os |
| 16 | +import time |
16 | 17 |
|
17 | 18 | import pytest |
18 | 19 |
|
| 20 | +from botocore.exceptions import ClientError |
19 | 21 | from sagemaker import image_uris |
20 | 22 | from sagemaker.model import Model |
21 | 23 | from sagemaker.sklearn.model import SKLearnModel, SKLearnPredictor |
|
40 | 42 | IR_SKLEARN_FRAMEWORK_VERSION = "1.0-1" |
41 | 43 |
|
42 | 44 |
|
| 45 | +def retry_and_back_off(right_size_fn): |
| 46 | + tot_retries = 3 |
| 47 | + retries = 1 |
| 48 | + while retries <= tot_retries: |
| 49 | + try: |
| 50 | + return right_size_fn |
| 51 | + except ClientError as e: |
| 52 | + if e.response["Error"]["Code"] == "ThrottlingException": |
| 53 | + retries += 1 |
| 54 | + time.sleep(5 * retries) |
| 55 | + |
| 56 | + |
43 | 57 | @pytest.fixture(scope="module") |
44 | 58 | def default_right_sized_model(sagemaker_session, cpu_instance_type): |
45 | 59 | with timeout(minutes=45): |
@@ -68,13 +82,15 @@ def default_right_sized_model(sagemaker_session, cpu_instance_type): |
68 | 82 | ) |
69 | 83 |
|
70 | 84 | return ( |
71 | | - sklearn_model_package.right_size( |
72 | | - job_name=ir_job_name, |
73 | | - sample_payload_url=payload_data, |
74 | | - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
75 | | - supported_instance_types=[cpu_instance_type], |
76 | | - framework=IR_SKLEARN_FRAMEWORK, |
77 | | - log_level="Quiet", |
| 85 | + retry_and_back_off( |
| 86 | + sklearn_model_package.right_size( |
| 87 | + job_name=ir_job_name, |
| 88 | + sample_payload_url=payload_data, |
| 89 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 90 | + supported_instance_types=[cpu_instance_type], |
| 91 | + framework=IR_SKLEARN_FRAMEWORK, |
| 92 | + log_level="Quiet", |
| 93 | + ) |
78 | 94 | ), |
79 | 95 | model_package_group_name, |
80 | 96 | ir_job_name, |
@@ -133,17 +149,19 @@ def advanced_right_sized_model(sagemaker_session, cpu_instance_type): |
133 | 149 | ] |
134 | 150 |
|
135 | 151 | return ( |
136 | | - sklearn_model_package.right_size( |
137 | | - sample_payload_url=payload_data, |
138 | | - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
139 | | - framework=IR_SKLEARN_FRAMEWORK, |
140 | | - job_duration_in_seconds=3600, |
141 | | - hyperparameter_ranges=hyperparameter_ranges, |
142 | | - phases=phases, |
143 | | - model_latency_thresholds=model_latency_thresholds, |
144 | | - max_invocations=100, |
145 | | - max_tests=5, |
146 | | - max_parallel_tests=5, |
| 152 | + retry_and_back_off( |
| 153 | + sklearn_model_package.right_size( |
| 154 | + sample_payload_url=payload_data, |
| 155 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 156 | + framework=IR_SKLEARN_FRAMEWORK, |
| 157 | + job_duration_in_seconds=3600, |
| 158 | + hyperparameter_ranges=hyperparameter_ranges, |
| 159 | + phases=phases, |
| 160 | + model_latency_thresholds=model_latency_thresholds, |
| 161 | + max_invocations=100, |
| 162 | + max_tests=5, |
| 163 | + max_parallel_tests=5, |
| 164 | + ) |
147 | 165 | ), |
148 | 166 | model_package_group_name, |
149 | 167 | ) |
@@ -175,13 +193,15 @@ def default_right_sized_unregistered_model(sagemaker_session, cpu_instance_type) |
175 | 193 | ) |
176 | 194 |
|
177 | 195 | return ( |
178 | | - sklearn_model.right_size( |
179 | | - job_name=ir_job_name, |
180 | | - sample_payload_url=payload_data, |
181 | | - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
182 | | - supported_instance_types=[cpu_instance_type], |
183 | | - framework=IR_SKLEARN_FRAMEWORK, |
184 | | - log_level="Quiet", |
| 196 | + retry_and_back_off( |
| 197 | + sklearn_model.right_size( |
| 198 | + job_name=ir_job_name, |
| 199 | + sample_payload_url=payload_data, |
| 200 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 201 | + supported_instance_types=[cpu_instance_type], |
| 202 | + framework=IR_SKLEARN_FRAMEWORK, |
| 203 | + log_level="Quiet", |
| 204 | + ) |
185 | 205 | ), |
186 | 206 | ir_job_name, |
187 | 207 | ) |
@@ -224,18 +244,20 @@ def advanced_right_sized_unregistered_model(sagemaker_session, cpu_instance_type |
224 | 244 | ModelLatencyThreshold(percentile="P95", value_in_milliseconds=100) |
225 | 245 | ] |
226 | 246 |
|
227 | | - return sklearn_model.right_size( |
228 | | - sample_payload_url=payload_data, |
229 | | - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
230 | | - framework=IR_SKLEARN_FRAMEWORK, |
231 | | - job_duration_in_seconds=3600, |
232 | | - hyperparameter_ranges=hyperparameter_ranges, |
233 | | - phases=phases, |
234 | | - model_latency_thresholds=model_latency_thresholds, |
235 | | - max_invocations=100, |
236 | | - max_tests=5, |
237 | | - max_parallel_tests=5, |
238 | | - log_level="Quiet", |
| 247 | + return retry_and_back_off( |
| 248 | + sklearn_model.right_size( |
| 249 | + sample_payload_url=payload_data, |
| 250 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 251 | + framework=IR_SKLEARN_FRAMEWORK, |
| 252 | + job_duration_in_seconds=3600, |
| 253 | + hyperparameter_ranges=hyperparameter_ranges, |
| 254 | + phases=phases, |
| 255 | + model_latency_thresholds=model_latency_thresholds, |
| 256 | + max_invocations=100, |
| 257 | + max_tests=5, |
| 258 | + max_parallel_tests=5, |
| 259 | + log_level="Quiet", |
| 260 | + ) |
239 | 261 | ) |
240 | 262 |
|
241 | 263 | except Exception: |
@@ -265,13 +287,15 @@ def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_ |
265 | 287 | ) |
266 | 288 |
|
267 | 289 | return ( |
268 | | - model.right_size( |
269 | | - job_name=ir_job_name, |
270 | | - sample_payload_url=payload_data, |
271 | | - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
272 | | - supported_instance_types=[cpu_instance_type], |
273 | | - framework=IR_SKLEARN_FRAMEWORK, |
274 | | - log_level="Quiet", |
| 290 | + retry_and_back_off( |
| 291 | + model.right_size( |
| 292 | + job_name=ir_job_name, |
| 293 | + sample_payload_url=payload_data, |
| 294 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 295 | + supported_instance_types=[cpu_instance_type], |
| 296 | + framework=IR_SKLEARN_FRAMEWORK, |
| 297 | + log_level="Quiet", |
| 298 | + ) |
275 | 299 | ), |
276 | 300 | ir_job_name, |
277 | 301 | ) |
|
0 commit comments