|
16 | 16 | from unittest import mock |
17 | 17 |
|
18 | 18 | import pytest |
| 19 | + |
19 | 20 | import tests.integ |
20 | 21 |
|
21 | 22 | from sagemaker.jumpstart.model import JumpStartModel |
|
30 | 31 | get_tabular_data, |
31 | 32 | ) |
32 | 33 |
|
33 | | -from sagemaker.enums import EndpointType |
| 34 | +INF2_SUPPORTED_REGIONS = { |
| 35 | + "us-west-2", |
| 36 | + "us-east-1", |
| 37 | + "us-east-2", |
| 38 | +} |
34 | 39 |
|
35 | 40 | MAX_INIT_TIME_SECONDS = 5 |
36 | 41 |
|
37 | | -GATED_INFERENCE_MODEL_SUPPORTED_REGIONS = { |
| 42 | +GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS = { |
38 | 43 | "us-west-2", |
39 | 44 | "us-east-1", |
40 | 45 | "eu-west-1", |
@@ -67,89 +72,108 @@ def test_non_prepacked_jumpstart_model(setup): |
67 | 72 | assert response is not None |
68 | 73 |
|
69 | 74 |
|
| 75 | +def test_prepacked_jumpstart_model(setup): |
| 76 | + |
| 77 | + model_id = "huggingface-txt2img-conflictx-complex-lineart" |
| 78 | + |
| 79 | + model = JumpStartModel( |
| 80 | + model_id=model_id, |
| 81 | + role=get_sm_session().get_caller_identity_arn(), |
| 82 | + sagemaker_session=get_sm_session(), |
| 83 | + ) |
| 84 | + |
| 85 | + # uses ml.p3.2xlarge instance |
| 86 | + predictor = model.deploy( |
| 87 | + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], |
| 88 | + ) |
| 89 | + |
| 90 | + response = predictor.predict("hello world!") |
| 91 | + |
| 92 | + assert response is not None |
| 93 | + |
| 94 | + |
70 | 95 | @pytest.mark.skipif( |
71 | | - tests.integ.test_region() not in tests.integ.INFERENCE_COMPONENT_SUPPORTED_REGIONS, |
72 | | - reason="inference component based endpoint is not supported in certain regions", |
| 96 | + tests.integ.test_region() not in GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS, |
| 97 | + reason=f"JumpStart model package inference models unavailable in {tests.integ.test_region()}.", |
73 | 98 | ) |
74 | | -def test_non_prepacked_jumpstart_model_deployed_on_inference_component_based_endpoint(setup): |
| 99 | +def test_model_package_arn_jumpstart_model(setup): |
75 | 100 |
|
76 | | - model_id = "huggingface-llm-falcon-7b-instruct-bf16" # default g5.2xlarge |
| 101 | + model_id = "meta-textgeneration-llama-2-7b" |
77 | 102 |
|
78 | 103 | model = JumpStartModel( |
79 | 104 | model_id=model_id, |
| 105 | + model_version="2.*", # version <3.0.0 uses model packages |
80 | 106 | role=get_sm_session().get_caller_identity_arn(), |
81 | 107 | sagemaker_session=get_sm_session(), |
82 | 108 | ) |
83 | 109 |
|
84 | | - predictor = model.deploy(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED) |
85 | | - |
86 | | - inference_input = { |
87 | | - "inputs": "Girafatron is obsessed with giraffes, the most glorious animal on the " |
88 | | - + "face of this Earth. Giraftron believes all other animals are irrelevant when compared " |
89 | | - + "to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", |
90 | | - "parameters": { |
91 | | - "max_new_tokens": 50, |
92 | | - "top_k": 10, |
93 | | - "return_full_text": False, |
94 | | - "do_sample": True, |
95 | | - }, |
96 | | - } |
| 110 | + # uses ml.g5.2xlarge instance |
| 111 | + predictor = model.deploy( |
| 112 | + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], |
| 113 | + ) |
97 | 114 |
|
98 | | - response = predictor.predict(inference_input) |
99 | | - assert response is not None |
| 115 | + payload = { |
| 116 | + "inputs": "some-payload", |
| 117 | + "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6}, |
| 118 | + } |
100 | 119 |
|
101 | | - # Delete predictor |
102 | | - predictor.delete_predictor(wait=True) |
| 120 | + response = predictor.predict(payload, custom_attributes="accept_eula=true") |
103 | 121 |
|
104 | | - # Delete endpoint |
105 | | - predictor.delete_endpoint() |
| 122 | + assert response is not None |
106 | 123 |
|
107 | 124 |
|
108 | | -def test_prepacked_jumpstart_model(setup): |
| 125 | +@pytest.mark.skipif( |
| 126 | + tests.integ.test_region() not in INF2_SUPPORTED_REGIONS, |
| 127 | + reason=f"INF2 instances unavailable in {tests.integ.test_region()}.", |
| 128 | +) |
| 129 | +def test_jumpstart_gated_model_neuron(setup): |
109 | 130 |
|
110 | | - model_id = "huggingface-txt2img-conflictx-complex-lineart" |
| 131 | + model_id = "meta-textgenerationneuron-llama-2-7b" |
111 | 132 |
|
112 | 133 | model = JumpStartModel( |
113 | 134 | model_id=model_id, |
114 | 135 | role=get_sm_session().get_caller_identity_arn(), |
115 | 136 | sagemaker_session=get_sm_session(), |
116 | 137 | ) |
117 | 138 |
|
118 | | - # uses ml.p3.2xlarge instance |
| 139 | + # uses ml.inf2.xlarge instance |
119 | 140 | predictor = model.deploy( |
120 | 141 | tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], |
| 142 | + accept_eula=True, |
121 | 143 | ) |
122 | 144 |
|
123 | | - response = predictor.predict("hello world!") |
| 145 | + payload = { |
| 146 | + "inputs": "some-payload", |
| 147 | + } |
| 148 | + |
| 149 | + response = predictor.predict(payload) |
124 | 150 |
|
125 | 151 | assert response is not None |
126 | 152 |
|
127 | 153 |
|
128 | | -@pytest.mark.skipif( |
129 | | - tests.integ.test_region() not in GATED_INFERENCE_MODEL_SUPPORTED_REGIONS, |
130 | | - reason=f"JumpStart gated inference models unavailable in {tests.integ.test_region()}.", |
131 | | -) |
132 | | -def test_model_package_arn_jumpstart_model(setup): |
| 154 | +def test_jumpstart_gated_model(setup): |
133 | 155 |
|
134 | 156 | model_id = "meta-textgeneration-llama-2-7b" |
135 | 157 |
|
136 | 158 | model = JumpStartModel( |
137 | 159 | model_id=model_id, |
| 160 | + model_version="3.*", # version >=3.0.0 stores artifacts in jumpstart-private-cache-* buckets |
138 | 161 | role=get_sm_session().get_caller_identity_arn(), |
139 | 162 | sagemaker_session=get_sm_session(), |
140 | 163 | ) |
141 | 164 |
|
142 | 165 | # uses ml.g5.2xlarge instance |
143 | 166 | predictor = model.deploy( |
144 | 167 | tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], |
| 168 | + accept_eula=True, |
145 | 169 | ) |
146 | 170 |
|
147 | 171 | payload = { |
148 | 172 | "inputs": "some-payload", |
149 | 173 | "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6}, |
150 | 174 | } |
151 | 175 |
|
152 | | - response = predictor.predict(payload, custom_attributes="accept_eula=true") |
| 176 | + response = predictor.predict(payload) |
153 | 177 |
|
154 | 178 | assert response is not None |
155 | 179 |
|
|
0 commit comments