chore: add jumpstart gated model integration tests (#1415)

evakravi · benieric · commit c48bba7e2cfd · 2023-11-29T14:46:09.000-08:00
diff --git a/tests/integ/sagemaker/jumpstart/constants.py b/tests/integ/sagemaker/jumpstart/constants.py
@@ -46,6 +46,9 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:
     ("huggingface-spc-bert-base-cased", "*"): ("training-datasets/QNLI-tiny/"),
     ("js-trainable-model", "*"): ("training-datasets/QNLI-tiny/"),
     ("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
+    ("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),
+    ("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),
+    ("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
 }
 
 
diff --git a/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py b/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py
@@ -35,14 +35,19 @@
 
 MAX_INIT_TIME_SECONDS = 5
 
-GATED_TRAINING_MODEL_SUPPORTED_REGIONS = {
+GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS = {
     "us-west-2",
     "us-east-1",
     "eu-west-1",
     "ap-southeast-1",
     "us-east-2",
     "ap-southeast-2",
 }
+TRN2_SUPPORTED_REGIONS = {
+    "us-west-2",
+    "us-east-1",
+    "us-east-2",
+}
 
 
 def test_jumpstart_estimator(setup):
@@ -87,15 +92,58 @@ def test_jumpstart_estimator(setup):
 
 @x_fail_if_ice
 @pytest.mark.skipif(
-    tests.integ.test_region() not in GATED_TRAINING_MODEL_SUPPORTED_REGIONS,
+    tests.integ.test_region() not in GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS,
     reason=f"JumpStart gated training models unavailable in {tests.integ.test_region()}.",
 )
-def test_gated_model_training(setup):
+def test_gated_model_training_v1(setup):
+
+    model_id = "meta-textgeneration-llama-2-7b"
+    model_version = "2.*"  # model artifacts were retrieved using legacy workflow
+
+    estimator = JumpStartEstimator(
+        model_id=model_id,
+        model_version=model_version,
+        role=get_sm_session().get_caller_identity_arn(),
+        sagemaker_session=get_sm_session(),
+        tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+        environment={"accept_eula": "true"},
+        max_run=259200,  # avoid exceeding resource limits
+    )
+
+    # uses ml.g5.12xlarge instance
+    estimator.fit(
+        {
+            "training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/"
+            f"{get_training_dataset_for_model_and_version(model_id, model_version)}",
+        }
+    )
+
+    # uses ml.g5.2xlarge instance
+    predictor = estimator.deploy(
+        tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+        role=get_sm_session().get_caller_identity_arn(),
+        sagemaker_session=get_sm_session(),
+    )
+
+    payload = {
+        "inputs": "some-payload",
+        "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
+    }
+
+    response = predictor.predict(payload, custom_attributes="accept_eula=true")
+
+    assert response is not None
 
-    model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
+
+@x_fail_if_ice
+def test_gated_model_training_v2(setup):
+
+    model_id = "meta-textgeneration-llama-2-7b"
+    model_version = "3.*"  # model artifacts retrieved from jumpstart-private-cache-* buckets
 
     estimator = JumpStartEstimator(
         model_id=model_id,
+        model_version=model_version,
         role=get_sm_session().get_caller_identity_arn(),
         sagemaker_session=get_sm_session(),
         tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
@@ -128,6 +176,48 @@ def test_gated_model_training(setup):
     assert response is not None
 
 
+@x_fail_if_ice
+@pytest.mark.skipif(
+    tests.integ.test_region() not in TRN2_SUPPORTED_REGIONS,
+    reason=f"TRN2 instances unavailable in {tests.integ.test_region()}.",
+)
+def test_gated_model_training_v2_neuron(setup):
+
+    model_id = "meta-textgenerationneuron-llama-2-7b"
+
+    estimator = JumpStartEstimator(
+        model_id=model_id,
+        role=get_sm_session().get_caller_identity_arn(),
+        sagemaker_session=get_sm_session(),
+        tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+        environment={"accept_eula": "true"},
+        max_run=259200,  # avoid exceeding resource limits
+    )
+
+    # uses ml.trn1.32xlarge instance
+    estimator.fit(
+        {
+            "training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/"
+            f"{get_training_dataset_for_model_and_version(model_id, '*')}",
+        }
+    )
+
+    # uses ml.inf2.xlarge instance
+    predictor = estimator.deploy(
+        tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+        role=get_sm_session().get_caller_identity_arn(),
+        sagemaker_session=get_sm_session(),
+    )
+
+    payload = {
+        "inputs": "some-payload",
+    }
+
+    response = predictor.predict(payload, custom_attributes="accept_eula=true")
+
+    assert response is not None
+
+
 @mock.patch("sagemaker.jumpstart.cache.JUMPSTART_LOGGER.warning")
 def test_instatiating_estimator(mock_warning_logger, setup):
 
diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
@@ -16,6 +16,7 @@
 from unittest import mock
 
 import pytest
+
 import tests.integ
 
 from sagemaker.jumpstart.model import JumpStartModel
@@ -30,11 +31,15 @@
     get_tabular_data,
 )
 
-from sagemaker.enums import EndpointType
+INF2_SUPPORTED_REGIONS = {
+    "us-west-2",
+    "us-east-1",
+    "us-east-2",
+}
 
 MAX_INIT_TIME_SECONDS = 5
 
-GATED_INFERENCE_MODEL_SUPPORTED_REGIONS = {
+GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS = {
     "us-west-2",
     "us-east-1",
     "eu-west-1",
@@ -67,89 +72,108 @@ def test_non_prepacked_jumpstart_model(setup):
     assert response is not None
 
 
+def test_prepacked_jumpstart_model(setup):
+
+    model_id = "huggingface-txt2img-conflictx-complex-lineart"
+
+    model = JumpStartModel(
+        model_id=model_id,
+        role=get_sm_session().get_caller_identity_arn(),
+        sagemaker_session=get_sm_session(),
+    )
+
+    # uses ml.p3.2xlarge instance
+    predictor = model.deploy(
+        tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+    )
+
+    response = predictor.predict("hello world!")
+
+    assert response is not None
+
+
 @pytest.mark.skipif(
-    tests.integ.test_region() not in tests.integ.INFERENCE_COMPONENT_SUPPORTED_REGIONS,
-    reason="inference component based endpoint is not supported in certain regions",
+    tests.integ.test_region() not in GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS,
+    reason=f"JumpStart model package inference models unavailable in {tests.integ.test_region()}.",
 )
-def test_non_prepacked_jumpstart_model_deployed_on_inference_component_based_endpoint(setup):
+def test_model_package_arn_jumpstart_model(setup):
 
-    model_id = "huggingface-llm-falcon-7b-instruct-bf16"  # default g5.2xlarge
+    model_id = "meta-textgeneration-llama-2-7b"
 
     model = JumpStartModel(
         model_id=model_id,
+        model_version="2.*",  # version <3.0.0 uses model packages
         role=get_sm_session().get_caller_identity_arn(),
         sagemaker_session=get_sm_session(),
     )
 
-    predictor = model.deploy(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED)
-
-    inference_input = {
-        "inputs": "Girafatron is obsessed with giraffes, the most glorious animal on the "
-        + "face of this Earth. Giraftron believes all other animals are irrelevant when compared "
-        + "to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
-        "parameters": {
-            "max_new_tokens": 50,
-            "top_k": 10,
-            "return_full_text": False,
-            "do_sample": True,
-        },
-    }
+    # uses ml.g5.2xlarge instance
+    predictor = model.deploy(
+        tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+    )
 
-    response = predictor.predict(inference_input)
-    assert response is not None
+    payload = {
+        "inputs": "some-payload",
+        "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
+    }
 
-    # Delete predictor
-    predictor.delete_predictor(wait=True)
+    response = predictor.predict(payload, custom_attributes="accept_eula=true")
 
-    # Delete endpoint
-    predictor.delete_endpoint()
+    assert response is not None
 
 
-def test_prepacked_jumpstart_model(setup):
+@pytest.mark.skipif(
+    tests.integ.test_region() not in INF2_SUPPORTED_REGIONS,
+    reason=f"INF2 instances unavailable in {tests.integ.test_region()}.",
+)
+def test_jumpstart_gated_model_neuron(setup):
 
-    model_id = "huggingface-txt2img-conflictx-complex-lineart"
+    model_id = "meta-textgenerationneuron-llama-2-7b"
 
     model = JumpStartModel(
         model_id=model_id,
         role=get_sm_session().get_caller_identity_arn(),
         sagemaker_session=get_sm_session(),
     )
 
-    # uses ml.p3.2xlarge instance
+    # uses ml.inf2.xlarge instance
     predictor = model.deploy(
         tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+        accept_eula=True,
     )
 
-    response = predictor.predict("hello world!")
+    payload = {
+        "inputs": "some-payload",
+    }
+
+    response = predictor.predict(payload)
 
     assert response is not None
 
 
-@pytest.mark.skipif(
-    tests.integ.test_region() not in GATED_INFERENCE_MODEL_SUPPORTED_REGIONS,
-    reason=f"JumpStart gated inference models unavailable in {tests.integ.test_region()}.",
-)
-def test_model_package_arn_jumpstart_model(setup):
+def test_jumpstart_gated_model(setup):
 
     model_id = "meta-textgeneration-llama-2-7b"
 
     model = JumpStartModel(
         model_id=model_id,
+        model_version="3.*",  # version >=3.0.0 stores artifacts in jumpstart-private-cache-* buckets
         role=get_sm_session().get_caller_identity_arn(),
         sagemaker_session=get_sm_session(),
     )
 
     # uses ml.g5.2xlarge instance
     predictor = model.deploy(
         tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
+        accept_eula=True,
     )
 
     payload = {
         "inputs": "some-payload",
         "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
     }
 
-    response = predictor.predict(payload, custom_attributes="accept_eula=true")
+    response = predictor.predict(payload)
 
     assert response is not None
 

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,9 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:`
`46`	`46`	`("huggingface-spc-bert-base-cased", "*"): ("training-datasets/QNLI-tiny/"),`
`47`	`47`	`("js-trainable-model", "*"): ("training-datasets/QNLI-tiny/"),`
`48`	`48`	`("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),`
	`49`	`+ ("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),`
	`50`	`+ ("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),`
	`51`	`+ ("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),`
`49`	`52`	`}`
`50`	`53`
`51`	`54`