Update test module and guide for SageMaker-Endpoint

kwonyulchoi · kwonyulchoi · commit 873bbb073825 · 2021-08-23T15:29:15.000+09:00
diff --git a/README.md b/README.md
@@ -163,7 +163,7 @@ Download sample data by running the following command:
 sh codes/glue/churn-xgboost/script/download_data.sh
 ```
 
-A sample data will be downloaded in `codes/glue/churn-xgboost/data/input.csv`.
+A sample data will be downloaded in `codes/glue/churn-xgboost/data/input.csv`, and double quotes in this file will be removded to format `csv`.
 
 ### **Trigger the StateMachine in Step Functions**
 
@@ -231,6 +231,51 @@ AWS Glue ETL Job Result in AWS S3 Bucket
 Amazon SageMaker Training Job Result in AWS S3 Bucket
 ![sagemaker-training-output](docs/asset/sagemaker-training-output.png)
 
+### **Hot to invoke**
+
+Finally, let's inovoke `SageMaker Endpoint` to make sure it works well. 
+
+Before invocation, open `codes/glue/churn-xgboost/script/test_invoke.py` file, and update `profile name` and `endpoint name` according to your configuration.
+
+```python
+...
+...
+
+os.environ['AWS_PROFILE'] = 'cdk-demo'
+_endpoint_name = 'MLOpsPipelineDemo-churn-xgboost'
+
+...
+...
+```
+
+Invoke the endpoint by executing the following command:
+
+```bash
+python3 codes/glue/churn-xgboost/script/test_invoke.py
+...
+...
+0 Invocation ------------------
+>>input:  106,0,274.4,120,198.6,82,160.8,62,6.0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0
+>>label:  0
+>>prediction:  0.37959378957748413
+1 Invocation ------------------
+>>input:  28,0,187.8,94,248.6,86,208.8,124,10.6,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0
+>>label:  0
+>>prediction:  0.03738965839147568
+2 Invocation ------------------
+>>input:  148,0,279.3,104,201.6,87,280.8,99,7.9,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0
+>>label:  1
+>>prediction:  0.9195730090141296
+3 Invocation ------------------
+>>input:  132,0,191.9,107,206.9,127,272.0,88,12.6,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0
+>>label:  0
+>>prediction:  0.025062650442123413
+4 Invocation ------------------
+>>input:  92,29,155.4,110,188.5,104,254.9,118,8.0,4,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1
+>>label:  0
+>>prediction:  0.028299745172262192
+```
+
 ## How to re-use or upgrade
 
 ### **How to re-trigger the StateMachine in Step Functions**
diff --git a/codes/glue/churn-xgboost/script/download_data.sh b/codes/glue/churn-xgboost/script/download_data.sh
@@ -5,4 +5,7 @@ DATA_FILE=input.csv
 
 mkdir $DATA_PATH
 
-curl -o $DATA_PATH/$DATA_FILE https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/master/step-functions-data-science-sdk/automate_model_retraining_workflow/data/customer-churn.csv
+curl -o $DATA_PATH/$DATA_FILE https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/master/step-functions-data-science-sdk/automate_model_retraining_workflow/data/customer-churn.csv
+
+# sed -i 's/"//g' $DATA_PATH/$DATA_FILE
+sed -i'' -e 's/"//g' $DATA_PATH/$DATA_FILE
diff --git a/codes/glue/churn-xgboost/script/test_invoke.py b/codes/glue/churn-xgboost/script/test_invoke.py
@@ -0,0 +1,35 @@
+import os
+import csv
+import boto3
+
+
+os.environ['AWS_PROFILE'] = 'cdk-demo'
+_endpoint_name = 'MLOpsPipelineDemo-churn-xgboost'
+
+_input_file = 'codes/glue/churn-xgboost/data/input.csv'
+_sagemaker = boto3.client('sagemaker-runtime')
+
+def test_invoke(endpoint_name: str, input_file: str, loop_count: int):
+    with open(input_file) as reader:
+        for index, line in enumerate(reader):
+            if index == loop_count:
+                break
+            
+            print(f'{index} Invocation ------------------')
+            line_arr = line.rstrip('\n').split(',')
+            input = ','.join(line_arr[1:])
+            label = line_arr[0]
+            print('>>input: ', input)
+            print('>>label: ', label)
+
+            response = _sagemaker.invoke_endpoint(
+                        EndpointName=endpoint_name,
+                        Body=input,
+                        ContentType='text/csv',
+                        Accept='Accept'
+                    )
+            print('>>prediction: ', response['Body'].read().decode())
+
+
+if __name__ == '__main__':
+    test_invoke(_endpoint_name, _input_file, 5)
diff --git a/config/app-config-demo.json b/config/app-config-demo.json
@@ -16,16 +16,17 @@
             "GlueJobFilePath": "codes/glue/churn-xgboost/src/glue_etl.py",
             "GlueJobTimeoutInMin": 30,
 
-            "TrainContainerImage": "825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:1",
+            "TrainContainerImage": "825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest",
             "TrainParameters": {
                 "max_depth": "5",
+                "eval_metric": "error",
                 "eta": "0.2",
                 "gamma": "4",
                 "min_child_weight": "6",
-                "subsample": "0.7",
-                "objective": "multi:softprob",
-                "num_class": "2",   
-                "num_round": "50"
+                "subsample": "0.8",
+                "objective": "binary:logistic",
+                "silent": "0",
+                "num_round": "100"
             },
             "TrainInputContent": "text/csv",
             "TrainInstanceType": "c5.xlarge",
diff --git a/package-lock.json b/package-lock.json