From 5c2eded23f1ef5fff65339745791d590d35ee0c4 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Wed, 3 Jul 2024 04:18:08 +0300 Subject: [PATCH 01/16] Add functioning boiler plate --- mwaa-etl-processor/Makefile | 38 +++++++++++++++++++ .../airflow-bucket/dags/etl_dag.py | 26 +++++++++++++ .../airflow-bucket/requirements.txt | 1 + mwaa-etl-processor/run.sh | 23 +++++++++++ 4 files changed, 88 insertions(+) create mode 100644 mwaa-etl-processor/Makefile create mode 100644 mwaa-etl-processor/airflow-bucket/dags/etl_dag.py create mode 100644 mwaa-etl-processor/airflow-bucket/requirements.txt create mode 100755 mwaa-etl-processor/run.sh diff --git a/mwaa-etl-processor/Makefile b/mwaa-etl-processor/Makefile new file mode 100644 index 0000000..979a9e5 --- /dev/null +++ b/mwaa-etl-processor/Makefile @@ -0,0 +1,38 @@ +export DEBUG=1 + +SHELL := /bin/bash + +usage: ## Show this help + @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##//' + +install: ## Install dependencies + @which localstack || pip install localstack + @which awslocal || pip install awscli-local + +run: ## Run MWAA ETL Processor + ./run.sh + +start: ## Start LocalStack + @if [ -z `docker network ls --filter name=localstack -q` ]; then \ + echo "Creating localstack network"; \ + docker network create --attachable localstack; \ + fi + DOCKER_FLAGS='--network localstack --name=localhost.localstack.cloud' localstack start -d + +stop: ## Stop LocalStack + @echo + MAIN_CONTAINER_NAME=localhost.localstack.cloud localstack stop + docker rm -f $$(docker network inspect localstack | jq -r '.[0].Containers | keys[0]') + +ready: ## Wait for LocalStack to be ready + @echo Waiting on the LocalStack container... + @localstack wait -t 30 && echo Localstack is ready to use! || (echo Gave up waiting on LocalStack, exiting. && exit 1) + +logs: ## Retrieve logs from LocalStack + @localstack logs > logs.txt + +test-ci: ## Run CI test + make start install ready run; return_code=`echo $$?`;\ + make logs; make stop; exit $$return_code; + +.PHONY: usage install start run stop ready logs test-ci diff --git a/mwaa-etl-processor/airflow-bucket/dags/etl_dag.py b/mwaa-etl-processor/airflow-bucket/dags/etl_dag.py new file mode 100644 index 0000000..131bf6b --- /dev/null +++ b/mwaa-etl-processor/airflow-bucket/dags/etl_dag.py @@ -0,0 +1,26 @@ +from airflow.decorators import dag, task +from airflow.utils.dates import days_ago +from airflow.providers.amazon.aws.hooks.s3 import S3Hook + +@dag(schedule_interval=None, start_date=days_ago(1), tags=['example']) +def example_s3_list_dag(): + + @task + def list_environment_variables(): + import os + for key, value in os.environ.items(): + print(f"Found env var: {key}={value}") + return dict(os.environ.items()) + + @task + def list_s3_bucket(env_items): + hook = S3Hook(aws_conn_id='aws_default') + bucket_name = 'airflow' # Replace with your S3 bucket name + keys = hook.list_keys(bucket_name) + for key in keys: + print(f"Found key: {key}") + return keys + + list_s3_bucket(list_environment_variables()) + +dag = example_s3_list_dag() diff --git a/mwaa-etl-processor/airflow-bucket/requirements.txt b/mwaa-etl-processor/airflow-bucket/requirements.txt new file mode 100644 index 0000000..72b587d --- /dev/null +++ b/mwaa-etl-processor/airflow-bucket/requirements.txt @@ -0,0 +1 @@ +apache-airflow[amazon]==2.8.1 diff --git a/mwaa-etl-processor/run.sh b/mwaa-etl-processor/run.sh new file mode 100755 index 0000000..d657660 --- /dev/null +++ b/mwaa-etl-processor/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -euxo pipefail + +# Create default AWS connection for MWAA environment +awslocal secretsmanager create-secret \ + --name airflow/connections/aws_default \ + --secret-string '{"conn_type": "aws", "login": "test", "password": "test", "extra": {"region_name": "us-east-1", "endpoint_url": "https://localhost.localstack.cloud:4566"}}' + +# Create MWAA environment with default AWS connection +awslocal s3 mb s3://airflow +awslocal mwaa create-environment \ + --name my-mwaa-env \ + --airflow-version 2.8.1 \ + --dag-s3-path /dags \ + --airflow-configuration-options '{"secrets.backend": "airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend", "secrets.backend_kwargs": "{\"connections_prefix\": \"airflow/connections/\", \"variables_prefix\": \"airflow/variables/\", \"endpoint_url\": \"https://localhost.localstack.cloud:4566\", \"aws_access_key_id\": \"test\", \"aws_secret_access_key\": \"test\", \"region_name\": \"us-east-1\"}"}' \ + --execution-role-arn arn:aws:iam::000000000000:role/airflow-role \ + --source-bucket-arn arn:aws:s3:::airflow \ + --network-configuration {} \ + --endpoint-url http://localhost.localstack.cloud:4566 + +# Upload DAG to MWAA environment +awslocal s3 cp --recursive airflow-bucket s3://airflow + From 0839ce959b101430fbdfd18e20ac8e617324929d Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Fri, 5 Jul 2024 05:15:45 +0300 Subject: [PATCH 02/16] WIP model training sample in MWAA --- mwaa-etl-processor/Makefile | 2 +- .../airflow-bucket/dags/etl_dag.py | 26 ---- .../airflow-bucket/dags/train_and_deploy.py | 129 ++++++++++++++++++ .../airflow-bucket/requirements.txt | 4 + mwaa-etl-processor/proxy.conf | 10 ++ mwaa-etl-processor/run.sh | 6 +- 6 files changed, 149 insertions(+), 28 deletions(-) delete mode 100644 mwaa-etl-processor/airflow-bucket/dags/etl_dag.py create mode 100644 mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py create mode 100644 mwaa-etl-processor/proxy.conf diff --git a/mwaa-etl-processor/Makefile b/mwaa-etl-processor/Makefile index 979a9e5..99deb48 100644 --- a/mwaa-etl-processor/Makefile +++ b/mwaa-etl-processor/Makefile @@ -17,7 +17,7 @@ start: ## Start LocalStack echo "Creating localstack network"; \ docker network create --attachable localstack; \ fi - DOCKER_FLAGS='--network localstack --name=localhost.localstack.cloud' localstack start -d + DOCKER_FLAGS='--network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d stop: ## Stop LocalStack @echo diff --git a/mwaa-etl-processor/airflow-bucket/dags/etl_dag.py b/mwaa-etl-processor/airflow-bucket/dags/etl_dag.py deleted file mode 100644 index 131bf6b..0000000 --- a/mwaa-etl-processor/airflow-bucket/dags/etl_dag.py +++ /dev/null @@ -1,26 +0,0 @@ -from airflow.decorators import dag, task -from airflow.utils.dates import days_ago -from airflow.providers.amazon.aws.hooks.s3 import S3Hook - -@dag(schedule_interval=None, start_date=days_ago(1), tags=['example']) -def example_s3_list_dag(): - - @task - def list_environment_variables(): - import os - for key, value in os.environ.items(): - print(f"Found env var: {key}={value}") - return dict(os.environ.items()) - - @task - def list_s3_bucket(env_items): - hook = S3Hook(aws_conn_id='aws_default') - bucket_name = 'airflow' # Replace with your S3 bucket name - keys = hook.list_keys(bucket_name) - for key in keys: - print(f"Found key: {key}") - return keys - - list_s3_bucket(list_environment_variables()) - -dag = example_s3_list_dag() diff --git a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py new file mode 100644 index 0000000..5b6e88d --- /dev/null +++ b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py @@ -0,0 +1,129 @@ +from airflow.decorators import dag, task +from airflow.utils.dates import days_ago +from airflow.models import Variable + +from pydantic import BaseModel +from typing import List, Dict, Optional + +import hashlib +import pandas as pd + +from sklearn.preprocessing import LabelEncoder +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import DecisionTreeClassifier + +class DatasetSpec(BaseModel): + url: str + name: str + feature_columns: List[str] + target_column: str + +@dag(schedule_interval=None, start_date=days_ago(1), tags=['example']) +def train_and_deploy_classifier_model(): + @task + def retrieve_dataset(): + # Retrieve the dataset from the Airflow Variable. + dataset_spec = Variable.get( + "dataset_spec", + deserialize_json=True, + ) + if dataset_spec is None: + raise ValueError("Dataset URL is not defined") + + try: + DatasetSpec(**dataset_spec) + except Exception as e: + raise ValueError(f"Invalid dataset spec: {e}") + + return dataset_spec + + @task + def read_dataset(dataset_spec): + dataset = DatasetSpec(**dataset_spec) + + # Read the dataset from the specified URL. + df = pd.read_csv(dataset.url) + print(df.head()) + + # Compute dataset ID. + dataset_id = hashlib.sha256(pd.util.hash_pandas_object(df, index=True).values).hexdigest() + print(f"Dataset ID: {dataset_id}") + + # Return the dataset and its ID. + return { + "dataset": df.to_json(), + "dataset_id": dataset_id, + } + + @task + def train_model(dataset_spec: dict, dataset: dict, algorithm: str): + df_json: Dict = dataset["dataset"] + dataset_id: str = dataset["dataset_id"] + + dataset_spec: DatasetSpec = DatasetSpec(**dataset_spec) + data: pd.DataFrame = pd.DataFrame.from_dict(df_json) + + # Split the dataset into training and testing sets. + X_train, Y_train, X_test, Y_test = train_test_split(data, test_size=0.2) + X_train = X_train[dataset_spec.feature_columns] + X_test = X_test[dataset_spec.feature_columns] + Y_train = Y_train[dataset_spec.target_column] + Y_test = Y_test[dataset_spec.target_column] + + # Encode the target column. + label_encoder = LabelEncoder() + Y_train_encoded = label_encoder.fit_transform(Y_train) + Y_test_encoded = label_encoder.transform(Y_test) + Y_train = Y_train_encoded + Y_test = Y_test_encoded + + # Print the dataset information. + print(f"Feature columns: {dataset_spec.feature_columns}") + print(f"Target column: {dataset_spec.target_column}") + print(f"Train size: {len(X_train)}") + print(f"Test size: {len(X_test)}") + print(f"Training model using {algorithm} algorithm") + + # Train the model using the specified algorithm. + if algorithm == "SVM": + model = svm.SVC() + elif algorithm == "LogisticRegression": + model = LogisticRegression() + elif algorithm == "DecisionTreeClassifier": + model = DecisionTreeClassifier() + else: + raise ValueError(f"Unsupported algorithm: {algorithm}") + + # Train the model. + model.fit(X_train, Y_train) + + # Predict the target values. + Y_pred = model.predict(X_test) + + # Compute the accuracy of the model. + accuracy = metrics.accuracy_score(Y_test, Y_pred) + precision = metrics.precision_score(Y_test, Y_pred) + recall = metrics.recall_score(Y_test, Y_pred) + f1 = metrics.f1_score(Y_test, Y_pred) + conf_matrix = metrics.confusion_matrix(Y_test, Y_pred) + + # Print or log the evaluation metrics + print(f"Accuracy: {accuracy}") + print(f"Precision: {precision}") + print(f"Recall: {recall}") + print(f"F1 Score: {f1}") + print(f"Confusion Matrix:\n{conf_matrix}") + + dataset_spec: Dict = retrieve_dataset() + dataset = read_dataset(dataset_spec) + + ml_algorithms = ["SVM", "LogisticRegression", "DecisionTreeClassifier"] + for algorithm in ml_algorithms: + train_model(dataset_spec, dataset, algorithm) + + +dag = train_and_deploy_classifier_model() diff --git a/mwaa-etl-processor/airflow-bucket/requirements.txt b/mwaa-etl-processor/airflow-bucket/requirements.txt index 72b587d..f68086b 100644 --- a/mwaa-etl-processor/airflow-bucket/requirements.txt +++ b/mwaa-etl-processor/airflow-bucket/requirements.txt @@ -1 +1,5 @@ apache-airflow[amazon]==2.8.1 +pandas +pydantic +scikit-learn +scipy diff --git a/mwaa-etl-processor/proxy.conf b/mwaa-etl-processor/proxy.conf new file mode 100644 index 0000000..a42aecf --- /dev/null +++ b/mwaa-etl-processor/proxy.conf @@ -0,0 +1,10 @@ +services: + secretsmanager: + resources: + # list of ARNs of secrets to proxy to real AWS + - 'arn:aws:secretsmanager:.+:secret:airflow/variables/.*' + operations: + # list of operation name regex patterns to include all operations + - '.*' + # optionally, specify if only read requests should be allowed; false allows all operations + read_only: true diff --git a/mwaa-etl-processor/run.sh b/mwaa-etl-processor/run.sh index d657660..542510e 100755 --- a/mwaa-etl-processor/run.sh +++ b/mwaa-etl-processor/run.sh @@ -6,6 +6,11 @@ awslocal secretsmanager create-secret \ --name airflow/connections/aws_default \ --secret-string '{"conn_type": "aws", "login": "test", "password": "test", "extra": {"region_name": "us-east-1", "endpoint_url": "https://localhost.localstack.cloud:4566"}}' +# Create variable that holds the URL to the Iris dataset +awslocal secretsmanager create-secret \ + --name airflow/variables/dataset_spec \ + --secret-string '{"url": "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv", "name": "iris.data", "feature_columns": ["sepal_length", "sepal_width", "petal_length", "petal_width"], "target_column": "variety"}' + # Create MWAA environment with default AWS connection awslocal s3 mb s3://airflow awslocal mwaa create-environment \ @@ -20,4 +25,3 @@ awslocal mwaa create-environment \ # Upload DAG to MWAA environment awslocal s3 cp --recursive airflow-bucket s3://airflow - From b9876a3ebb7b2db39a954de069149800c59158f7 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Fri, 5 Jul 2024 17:40:16 +0300 Subject: [PATCH 03/16] WIP --- .../airflow-bucket/dags/train_and_deploy.py | 55 ++++++++++--------- mwaa-etl-processor/run.sh | 15 ++++- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py index 5b6e88d..0606b5f 100644 --- a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py +++ b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py @@ -8,14 +8,6 @@ import hashlib import pandas as pd -from sklearn.preprocessing import LabelEncoder -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import DecisionTreeClassifier - class DatasetSpec(BaseModel): url: str name: str @@ -55,31 +47,40 @@ def read_dataset(dataset_spec): # Return the dataset and its ID. return { - "dataset": df.to_json(), + "dataset": df.to_dict(), "dataset_id": dataset_id, } @task def train_model(dataset_spec: dict, dataset: dict, algorithm: str): + from sklearn import svm + from sklearn import metrics + from sklearn.preprocessing import LabelEncoder + from sklearn.linear_model import LogisticRegression + from sklearn.model_selection import train_test_split + from sklearn.neighbors import KNeighborsClassifier + from sklearn.tree import DecisionTreeClassifier + df_json: Dict = dataset["dataset"] dataset_id: str = dataset["dataset_id"] - dataset_spec: DatasetSpec = DatasetSpec(**dataset_spec) - data: pd.DataFrame = pd.DataFrame.from_dict(df_json) + data: pd.DataFrame = pd.DataFrame().from_dict(df_json) + + print(data.head()) + + # Split the dataset into feature columns and target column. + X_data = data[dataset_spec.feature_columns] + Y_data = data[dataset_spec.target_column] # Split the dataset into training and testing sets. - X_train, Y_train, X_test, Y_test = train_test_split(data, test_size=0.2) - X_train = X_train[dataset_spec.feature_columns] - X_test = X_test[dataset_spec.feature_columns] - Y_train = Y_train[dataset_spec.target_column] - Y_test = Y_test[dataset_spec.target_column] + X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.2) # Encode the target column. label_encoder = LabelEncoder() - Y_train_encoded = label_encoder.fit_transform(Y_train) - Y_test_encoded = label_encoder.transform(Y_test) - Y_train = Y_train_encoded - Y_test = Y_test_encoded + y_train_encoded = label_encoder.fit_transform(y_train) + y_test_encoded = label_encoder.transform(y_test) + y_train = y_train_encoded + y_test = y_test_encoded # Print the dataset information. print(f"Feature columns: {dataset_spec.feature_columns}") @@ -99,17 +100,17 @@ def train_model(dataset_spec: dict, dataset: dict, algorithm: str): raise ValueError(f"Unsupported algorithm: {algorithm}") # Train the model. - model.fit(X_train, Y_train) + model.fit(X_train, y_train) # Predict the target values. - Y_pred = model.predict(X_test) + y_pred = model.predict(X_test) # Compute the accuracy of the model. - accuracy = metrics.accuracy_score(Y_test, Y_pred) - precision = metrics.precision_score(Y_test, Y_pred) - recall = metrics.recall_score(Y_test, Y_pred) - f1 = metrics.f1_score(Y_test, Y_pred) - conf_matrix = metrics.confusion_matrix(Y_test, Y_pred) + accuracy = metrics.accuracy_score(y_test, y_pred) + precision = metrics.precision_score(y_test, y_pred, average=None) + recall = metrics.recall_score(y_test, y_pred, average=None) + f1 = metrics.f1_score(y_test, y_pred, average=None) + conf_matrix = metrics.confusion_matrix(y_test, y_pred) # Print or log the evaluation metrics print(f"Accuracy: {accuracy}") diff --git a/mwaa-etl-processor/run.sh b/mwaa-etl-processor/run.sh index 542510e..b24f571 100755 --- a/mwaa-etl-processor/run.sh +++ b/mwaa-etl-processor/run.sh @@ -9,7 +9,12 @@ awslocal secretsmanager create-secret \ # Create variable that holds the URL to the Iris dataset awslocal secretsmanager create-secret \ --name airflow/variables/dataset_spec \ - --secret-string '{"url": "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv", "name": "iris.data", "feature_columns": ["sepal_length", "sepal_width", "petal_length", "petal_width"], "target_column": "variety"}' + --secret-string '{ + "url": "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv", + "name": "iris.data", + "feature_columns": ["sepal.length", "sepal.width", "petal.length", "petal.width"], + "target_column": "variety" + }' # Create MWAA environment with default AWS connection awslocal s3 mb s3://airflow @@ -17,7 +22,13 @@ awslocal mwaa create-environment \ --name my-mwaa-env \ --airflow-version 2.8.1 \ --dag-s3-path /dags \ - --airflow-configuration-options '{"secrets.backend": "airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend", "secrets.backend_kwargs": "{\"connections_prefix\": \"airflow/connections/\", \"variables_prefix\": \"airflow/variables/\", \"endpoint_url\": \"https://localhost.localstack.cloud:4566\", \"aws_access_key_id\": \"test\", \"aws_secret_access_key\": \"test\", \"region_name\": \"us-east-1\"}"}' \ + --airflow-configuration-options '{ + "core.dags_are_paused_at_creation": "False", + "scheduler.min_file_process_interval": "0", + "scheduler.dag_dir_list_interval": "10", + "secrets.backend": "airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend", + "secrets.backend_kwargs": "{\"connections_prefix\": \"airflow/connections/\", \"variables_prefix\": \"airflow/variables/\", \"endpoint_url\": \"https://localhost.localstack.cloud:4566\", \"aws_access_key_id\": \"test\", \"aws_secret_access_key\": \"test\", \"region_name\": \"us-east-1\"}" + }' \ --execution-role-arn arn:aws:iam::000000000000:role/airflow-role \ --source-bucket-arn arn:aws:s3:::airflow \ --network-configuration {} \ From 97b42bc3d49afeadab62481fb3270476e148a215 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Mon, 8 Jul 2024 19:07:51 +0300 Subject: [PATCH 04/16] WIP --- .../airflow-bucket/dags/train_and_deploy.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py index 0606b5f..bc3bdc9 100644 --- a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py +++ b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py @@ -107,24 +107,36 @@ def train_model(dataset_spec: dict, dataset: dict, algorithm: str): # Compute the accuracy of the model. accuracy = metrics.accuracy_score(y_test, y_pred) - precision = metrics.precision_score(y_test, y_pred, average=None) - recall = metrics.recall_score(y_test, y_pred, average=None) - f1 = metrics.f1_score(y_test, y_pred, average=None) + precision = metrics.precision_score(y_test, y_pred, average="weighted") + recall = metrics.recall_score(y_test, y_pred, average="weighted") + f1 = metrics.f1_score(y_test, y_pred, average="weighted") conf_matrix = metrics.confusion_matrix(y_test, y_pred) + # Save the model and label encoder classes. + model.classes_names = label_encoder.classes_ + # Print or log the evaluation metrics print(f"Accuracy: {accuracy}") print(f"Precision: {precision}") print(f"Recall: {recall}") print(f"F1 Score: {f1}") print(f"Confusion Matrix:\n{conf_matrix}") + + return accuracy + + @task + def deploy_model(accuracies: List[float]): + print(f"Model accuracies: {accuracies}") dataset_spec: Dict = retrieve_dataset() dataset = read_dataset(dataset_spec) ml_algorithms = ["SVM", "LogisticRegression", "DecisionTreeClassifier"] + accuracies = [] for algorithm in ml_algorithms: - train_model(dataset_spec, dataset, algorithm) + accuracies += [train_model(dataset_spec, dataset, algorithm)] + + deploy_model(accuracies) dag = train_and_deploy_classifier_model() From f90454b38e95a22257df272e0105afe53ea7897b Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Mon, 8 Jul 2024 19:22:53 +0300 Subject: [PATCH 05/16] Multi-line configs --- mwaa-etl-processor/Makefile | 2 +- mwaa-etl-processor/run.sh | 36 +++++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/mwaa-etl-processor/Makefile b/mwaa-etl-processor/Makefile index 99deb48..d19285e 100644 --- a/mwaa-etl-processor/Makefile +++ b/mwaa-etl-processor/Makefile @@ -17,7 +17,7 @@ start: ## Start LocalStack echo "Creating localstack network"; \ docker network create --attachable localstack; \ fi - DOCKER_FLAGS='--network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d + DOCKER_FLAGS='-e LS_LOG=trace --network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d stop: ## Stop LocalStack @echo diff --git a/mwaa-etl-processor/run.sh b/mwaa-etl-processor/run.sh index b24f571..889ffae 100755 --- a/mwaa-etl-processor/run.sh +++ b/mwaa-etl-processor/run.sh @@ -1,6 +1,27 @@ #!/bin/bash set -euxo pipefail +AIRFLOW_CONFIG=$(cat < Date: Tue, 9 Jul 2024 02:20:11 +0300 Subject: [PATCH 06/16] More WIP --- .gitignore | 2 + .../airflow-bucket/dags/train_and_deploy.py | 60 ++++++++++++++++++- mwaa-etl-processor/lambda/main.py | 37 ++++++++++++ mwaa-etl-processor/run.sh | 15 +++++ 4 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 mwaa-etl-processor/lambda/main.py diff --git a/.gitignore b/.gitignore index ae41d4d..22542c2 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ glacier-s3-select/*result.csv azure-functions/.python_packages/ */logs.txt +mwaa-etl-processor/.lambda-env +mwaa-etl-processor/.pkgs diff --git a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py index bc3bdc9..aa2cb03 100644 --- a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py +++ b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py @@ -1,6 +1,12 @@ from airflow.decorators import dag, task from airflow.utils.dates import days_ago from airflow.models import Variable +from airflow.providers.amazon.aws.hooks.s3 import S3Hook +from airflow.providers.amazon.aws.hooks.lambda_function import LambdaHook +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook + +import pickle +import io from pydantic import BaseModel from typing import List, Dict, Optional @@ -114,6 +120,18 @@ def train_model(dataset_spec: dict, dataset: dict, algorithm: str): # Save the model and label encoder classes. model.classes_names = label_encoder.classes_ + + # Dump the model and label encoder to S3. + s3_hook = S3Hook(aws_conn_id="aws_default") + s3_hook.create_bucket(bucket_name="models") + model_bytes = pickle.dumps(model) + model_buffer = io.BytesIO(model_bytes) + s3_hook.load_bytes( + bytes_data=model_buffer.getvalue(), + key=f"models/{dataset_id}/{algorithm}.pkl", + bucket_name="models", + replace=True, + ) # Print or log the evaluation metrics print(f"Accuracy: {accuracy}") @@ -125,8 +143,46 @@ def train_model(dataset_spec: dict, dataset: dict, algorithm: str): return accuracy @task - def deploy_model(accuracies: List[float]): + def deploy_model(ml_algorithms: List[str], accuracies: List[float], dataset: dict): print(f"Model accuracies: {accuracies}") + print(f"ML algorithms: {ml_algorithms}") + + dataset_id = dataset["dataset_id"] + best_model_index = accuracies.index(max(accuracies)) + best_ml_algorithm = ml_algorithms[best_model_index] + + print(f"Location of best model: s3://models/models/{dataset_id}/{best_ml_algorithm}.pkl") + lambda_hook = LambdaHook(aws_conn_id="aws_default") + lambda_client = lambda_hook.get_client_type() + + try: + lambda_hook.create_lambda( + function_name=f"ml-model-{best_ml_algorithm}-{dataset_id}"[:64], + runtime="python3.9", + role="arn:aws:iam::000000000000:role/lambda-role", + handler="main.lambda_handler", + code={ + "S3Bucket": "lambda", + "S3Key": "deploy_lambda.zip", + }, + environment={ + "Variables": { + "MODEL_BUCKET_NAME": "models", + "MODEL_OBJECT_KEY": f"models/{dataset_id}/{best_ml_algorithm}.pkl", + }, + }, + ) + except Exception as e: + print(f"Error creating the function: {e}") + + try: + lambda_client.create_function_url_config( + FunctionName=f"ml-model-{best_ml_algorithm}-{dataset_id}"[:64], + AuthType="NONE", + InvokeMode="BUFFERED", + ) + except Exception as e: + print(f"Error creating the function URL config: {e}") dataset_spec: Dict = retrieve_dataset() dataset = read_dataset(dataset_spec) @@ -136,7 +192,7 @@ def deploy_model(accuracies: List[float]): for algorithm in ml_algorithms: accuracies += [train_model(dataset_spec, dataset, algorithm)] - deploy_model(accuracies) + deploy_model(ml_algorithms, accuracies, dataset) dag = train_and_deploy_classifier_model() diff --git a/mwaa-etl-processor/lambda/main.py b/mwaa-etl-processor/lambda/main.py new file mode 100644 index 0000000..2e78b87 --- /dev/null +++ b/mwaa-etl-processor/lambda/main.py @@ -0,0 +1,37 @@ +import os +import boto3 +import pickle +import json + +def lambda_handler(event, context): + # Access the JSON payload + sample = event.get("sample") + + # Specify the S3 bucket and object key + bucket_name = os.environ["MODEL_BUCKET_NAME"] + object_key = os.environ["MODEL_OBJECT_KEY"] + + # Create an S3 client + s3 = boto3.client("s3") + + # Download the file from S3 + response = s3.get_object(Bucket=bucket_name, Key=object_key) + model_data = response["Body"].read() + + # Load the model from the downloaded data + model = pickle.loads(model_data) + + # Run inference. + print(f"Running inference on input data: {sample}") + index_prediction = int(model.predict(sample)[0]) + print(f"Prediction index: {index_prediction}") + prediction = model.classes_names[index_prediction] + print(f"Prediction: {prediction}") + + return { + "statusCode": 200, + "headers": { + "Content-Type": "application/json" + }, + "body": json.dumps({"prediction": prediction}) + } diff --git a/mwaa-etl-processor/run.sh b/mwaa-etl-processor/run.sh index 889ffae..f8f5e6e 100755 --- a/mwaa-etl-processor/run.sh +++ b/mwaa-etl-processor/run.sh @@ -32,6 +32,21 @@ awslocal secretsmanager create-secret \ --name airflow/variables/dataset_spec \ --secret-string "$DATASET_CONFIG" +# Create the Lambda zip files +rm -rf .pkgs lambda/deploy_lambda.zip +docker rm -f mwaa-etl-processor-build-pip-pkgs || true +docker run -it --name mwaa-etl-processor-build-pip-pkgs \ + --entrypoint bash \ + localstack/lambda-python:3.9 \ + -c "mkdir -p /var/tmp/.pkgs && pip install scikit-learn==1.3.2 --target /var/tmp/.pkgs" +docker cp mwaa-etl-processor-build-pip-pkgs:/var/tmp/.pkgs/ . +cd .pkgs && zip -r9 ../lambda/deploy_lambda.zip . && cd .. +cd lambda && zip -g deploy_lambda.zip * && cd .. + +# And upload it to S3 +awslocal s3 mb s3://lambda +awslocal s3 cp lambda/deploy_lambda.zip s3://lambda + # Create MWAA environment with default AWS connection awslocal s3 mb s3://airflow awslocal mwaa create-environment \ From 515200b735885d1f51668c8bd0929f65fcd3fa56 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Tue, 9 Jul 2024 03:11:22 +0300 Subject: [PATCH 07/16] Fixes --- .../airflow-bucket/dags/train_and_deploy.py | 28 +++++++++---- mwaa-etl-processor/lambda/main.py | 9 +++-- mwaa-etl-processor/run.sh | 40 ++++++++++++++----- 3 files changed, 56 insertions(+), 21 deletions(-) diff --git a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py index aa2cb03..b6f745b 100644 --- a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py +++ b/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py @@ -1,15 +1,14 @@ +import pickle +import io +from datetime import datetime, timedelta + from airflow.decorators import dag, task -from airflow.utils.dates import days_ago from airflow.models import Variable from airflow.providers.amazon.aws.hooks.s3 import S3Hook from airflow.providers.amazon.aws.hooks.lambda_function import LambdaHook -from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook - -import pickle -import io from pydantic import BaseModel -from typing import List, Dict, Optional +from typing import List, Dict import hashlib import pandas as pd @@ -20,7 +19,22 @@ class DatasetSpec(BaseModel): feature_columns: List[str] target_column: str -@dag(schedule_interval=None, start_date=days_ago(1), tags=['example']) +default_args = { + 'owner': 'airflow', + 'depends_on_past': False, + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': timedelta(minutes=5), +} + +@dag( + default_args=default_args, + schedule_interval="@once", + start_date=datetime(2021, 1, 1), + catchup=False, + tags=["ml-classifier"] +) def train_and_deploy_classifier_model(): @task def retrieve_dataset(): diff --git a/mwaa-etl-processor/lambda/main.py b/mwaa-etl-processor/lambda/main.py index 2e78b87..f9218ec 100644 --- a/mwaa-etl-processor/lambda/main.py +++ b/mwaa-etl-processor/lambda/main.py @@ -4,8 +4,11 @@ import json def lambda_handler(event, context): - # Access the JSON payload - sample = event.get("sample") + print(f"Received event: {json.dumps(event)}") + + # Retrieve JSON from body + payload = json.loads(event["body"]) + sample = payload.get("sample") # Specify the S3 bucket and object key bucket_name = os.environ["MODEL_BUCKET_NAME"] @@ -22,7 +25,7 @@ def lambda_handler(event, context): model = pickle.loads(model_data) # Run inference. - print(f"Running inference on input data: {sample}") + print(f"Running inference on sample: {sample}") index_prediction = int(model.predict(sample)[0]) print(f"Prediction index: {index_prediction}") prediction = model.classes_names[index_prediction] diff --git a/mwaa-etl-processor/run.sh b/mwaa-etl-processor/run.sh index f8f5e6e..de75388 100755 --- a/mwaa-etl-processor/run.sh +++ b/mwaa-etl-processor/run.sh @@ -1,6 +1,16 @@ #!/bin/bash set -euxo pipefail +DATASET_CONFIG=$(cat < Date: Tue, 9 Jul 2024 03:24:26 +0300 Subject: [PATCH 08/16] Change name of sample app --- .gitignore | 3 +-- .../Makefile | 0 .../airflow-bucket/dags/train_and_deploy.py | 0 .../airflow-bucket/requirements.txt | 0 .../lambda/main.py | 0 .../proxy.conf | 0 .../run.sh | 8 ++++---- 7 files changed, 5 insertions(+), 6 deletions(-) rename {mwaa-etl-processor => mwaa-ml-classifier-deployment}/Makefile (100%) rename {mwaa-etl-processor => mwaa-ml-classifier-deployment}/airflow-bucket/dags/train_and_deploy.py (100%) rename {mwaa-etl-processor => mwaa-ml-classifier-deployment}/airflow-bucket/requirements.txt (100%) rename {mwaa-etl-processor => mwaa-ml-classifier-deployment}/lambda/main.py (100%) rename {mwaa-etl-processor => mwaa-ml-classifier-deployment}/proxy.conf (100%) rename {mwaa-etl-processor => mwaa-ml-classifier-deployment}/run.sh (92%) diff --git a/.gitignore b/.gitignore index 22542c2..e84db24 100644 --- a/.gitignore +++ b/.gitignore @@ -29,5 +29,4 @@ glacier-s3-select/*result.csv azure-functions/.python_packages/ */logs.txt -mwaa-etl-processor/.lambda-env -mwaa-etl-processor/.pkgs +mwaa-ml-classifier-deployment/.pkgs diff --git a/mwaa-etl-processor/Makefile b/mwaa-ml-classifier-deployment/Makefile similarity index 100% rename from mwaa-etl-processor/Makefile rename to mwaa-ml-classifier-deployment/Makefile diff --git a/mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py b/mwaa-ml-classifier-deployment/airflow-bucket/dags/train_and_deploy.py similarity index 100% rename from mwaa-etl-processor/airflow-bucket/dags/train_and_deploy.py rename to mwaa-ml-classifier-deployment/airflow-bucket/dags/train_and_deploy.py diff --git a/mwaa-etl-processor/airflow-bucket/requirements.txt b/mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt similarity index 100% rename from mwaa-etl-processor/airflow-bucket/requirements.txt rename to mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt diff --git a/mwaa-etl-processor/lambda/main.py b/mwaa-ml-classifier-deployment/lambda/main.py similarity index 100% rename from mwaa-etl-processor/lambda/main.py rename to mwaa-ml-classifier-deployment/lambda/main.py diff --git a/mwaa-etl-processor/proxy.conf b/mwaa-ml-classifier-deployment/proxy.conf similarity index 100% rename from mwaa-etl-processor/proxy.conf rename to mwaa-ml-classifier-deployment/proxy.conf diff --git a/mwaa-etl-processor/run.sh b/mwaa-ml-classifier-deployment/run.sh similarity index 92% rename from mwaa-etl-processor/run.sh rename to mwaa-ml-classifier-deployment/run.sh index de75388..ffd044d 100755 --- a/mwaa-etl-processor/run.sh +++ b/mwaa-ml-classifier-deployment/run.sh @@ -34,12 +34,12 @@ awslocal secretsmanager create-secret \ # Create the Lambda zip files rm -rf .pkgs lambda/deploy_lambda.zip -docker rm -f mwaa-etl-processor-build-pip-pkgs -docker run -it --name mwaa-etl-processor-build-pip-pkgs \ +docker rm -f mwaa-ml-classifier-deployment-build-pip-pkgs +docker run -it --name mwaa-ml-classifier-deployment-build-pip-pkgs \ --entrypoint bash \ localstack/lambda-python:3.9 \ -c "mkdir -p /var/tmp/.pkgs && pip install scikit-learn==1.3.2 --target /var/tmp/.pkgs" -docker cp mwaa-etl-processor-build-pip-pkgs:/var/tmp/.pkgs/ . +docker cp mwaa-ml-classifier-deployment-build-pip-pkgs:/var/tmp/.pkgs/ . cd .pkgs && zip -r9 ../lambda/deploy_lambda.zip . && cd .. cd lambda && zip -g deploy_lambda.zip * && cd .. @@ -64,7 +64,7 @@ awslocal s3 cp --recursive airflow-bucket s3://airflow # Clean up rm -rf .pkgs lambda/deploy_lambda.zip -docker rm -f mwaa-etl-processor-build-pip-pkgs +docker rm -f mwaa-ml-classifier-deployment-build-pip-pkgs # Wait for the DAG to be processed # Wait until a Lambda function with the "ml-model-" prefix is created From b4dbe3722073419febe0d54fccbe7604c58c7ebe Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Tue, 9 Jul 2024 03:46:45 +0300 Subject: [PATCH 09/16] Lock scikit-learn for Airflow --- mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt b/mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt index f68086b..193a330 100644 --- a/mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt +++ b/mwaa-ml-classifier-deployment/airflow-bucket/requirements.txt @@ -1,5 +1,5 @@ apache-airflow[amazon]==2.8.1 pandas pydantic -scikit-learn +scikit-learn==1.3.2 scipy From 1c3e2dd56aa71f8fdded886803aa6cb7f5fa87f2 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Thu, 11 Jul 2024 21:13:23 +0300 Subject: [PATCH 10/16] Add README.md file --- mwaa-ml-classifier-deployment/README.md | 55 +++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 mwaa-ml-classifier-deployment/README.md diff --git a/mwaa-ml-classifier-deployment/README.md b/mwaa-ml-classifier-deployment/README.md new file mode 100644 index 0000000..7cd92af --- /dev/null +++ b/mwaa-ml-classifier-deployment/README.md @@ -0,0 +1,55 @@ +# Localstack Demo: Training and deploying ML classifier with MWAA + +App that creates a DAG inside MWAA that takes a dataset, and builds a classifier model based on the feature columns and targetting column. A classifier is trained and the one with the best accuracy out of a bunch of three algorithms is picked up: SVM, Logistic Regression, and Decision Tree. Finally, the model is deployed as a Lambda function. + +To keep it simple, no external dependencies (custom Docker images) were added, and the training happens locally in Airflow. Following that, the model gets deployed as a Lambda function. While not ideal, as usually all workloads are supposed to be off-loaded (i.e. with SageMaker, or EC2 / AWS Batch jobs), but easily trained models can still technically be run with the local executor. + +The only input the dag has is a `airflow/variables/dataset_spec` secret in `SecretsManager` service, like the following one: + +```json +{ + "url": "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv", + "name": "iris.data", + "feature_columns": ["sepal.length", "sepal.width", "petal.length", "petal.width"], + "target_column": "variety" +} +``` + +## Prerequisites + +* LocalStack +* Docker +* Python 3.8+ / Python Pip +* `make` +* `jq` +* `curl` +* `awslocal` + +## Installing + +To install the dependencies: + +```shell +make install +``` + +## Starting LocalStack + +Make sure that LocalStack is started: + +```shell +LOCALSTACK_AUTH_TOKEN=... make start +``` + +## Running + +Run the sample demo script: + +```shell +make run +``` + +## License + +This code is available under the Apache 2.0 license. + From da8448ee13b7a824f6e86cc50871ac91055fd6e0 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Thu, 11 Jul 2024 21:14:33 +0300 Subject: [PATCH 11/16] Capitalize dag word --- mwaa-ml-classifier-deployment/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwaa-ml-classifier-deployment/README.md b/mwaa-ml-classifier-deployment/README.md index 7cd92af..4282e60 100644 --- a/mwaa-ml-classifier-deployment/README.md +++ b/mwaa-ml-classifier-deployment/README.md @@ -4,7 +4,7 @@ App that creates a DAG inside MWAA that takes a dataset, and builds a classifier To keep it simple, no external dependencies (custom Docker images) were added, and the training happens locally in Airflow. Following that, the model gets deployed as a Lambda function. While not ideal, as usually all workloads are supposed to be off-loaded (i.e. with SageMaker, or EC2 / AWS Batch jobs), but easily trained models can still technically be run with the local executor. -The only input the dag has is a `airflow/variables/dataset_spec` secret in `SecretsManager` service, like the following one: +The only input the DAG has is a `airflow/variables/dataset_spec` secret in `SecretsManager` service, like the following one: ```json { From 00c3de280d9c50684a14e92f0c3959650bd8ecb8 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Sat, 13 Jul 2024 15:54:10 +0300 Subject: [PATCH 12/16] Add missing env var to makefile target --- mwaa-ml-classifier-deployment/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwaa-ml-classifier-deployment/Makefile b/mwaa-ml-classifier-deployment/Makefile index d19285e..eb049a8 100644 --- a/mwaa-ml-classifier-deployment/Makefile +++ b/mwaa-ml-classifier-deployment/Makefile @@ -17,7 +17,7 @@ start: ## Start LocalStack echo "Creating localstack network"; \ docker network create --attachable localstack; \ fi - DOCKER_FLAGS='-e LS_LOG=trace --network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d + EXTRA_CORS_ALLOWED_ORIGINS='*' DOCKER_FLAGS='-e LS_LOG=trace --network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d stop: ## Stop LocalStack @echo From 87b038340e030d7d262a1fcd42a6c52fdc3a2b48 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Mon, 12 Aug 2024 15:12:48 +0300 Subject: [PATCH 13/16] Fix make start command when using sh --- mwaa-ml-classifier-deployment/Makefile | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mwaa-ml-classifier-deployment/Makefile b/mwaa-ml-classifier-deployment/Makefile index eb049a8..c6e02d1 100644 --- a/mwaa-ml-classifier-deployment/Makefile +++ b/mwaa-ml-classifier-deployment/Makefile @@ -13,10 +13,14 @@ run: ## Run MWAA ETL Processor ./run.sh start: ## Start LocalStack - @if [ -z `docker network ls --filter name=localstack -q` ]; then \ - echo "Creating localstack network"; \ - docker network create --attachable localstack; \ - fi + @echo "Checking existing networks:" + @docker network ls --filter name=localstack -q | xargs + @if [ -z "$$(docker network ls --filter name=localstack -q | xargs)" ]; then \ + echo "Creating localstack network"; \ + docker network create --attachable localstack; \ + else \ + echo "LocalStack network already exists"; \ + fi EXTRA_CORS_ALLOWED_ORIGINS='*' DOCKER_FLAGS='-e LS_LOG=trace --network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d stop: ## Stop LocalStack From b728cade011be015e22b624d7aa86023c07f4a22 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Mon, 12 Aug 2024 17:11:15 +0300 Subject: [PATCH 14/16] Improve readme doc --- mwaa-ml-classifier-deployment/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mwaa-ml-classifier-deployment/README.md b/mwaa-ml-classifier-deployment/README.md index 4282e60..07c5489 100644 --- a/mwaa-ml-classifier-deployment/README.md +++ b/mwaa-ml-classifier-deployment/README.md @@ -49,6 +49,14 @@ Run the sample demo script: make run ``` +## Proxying Secrets + +To proxy Airflow variables to upstream AWS, you can use the [proxy.conf](proxy.conf) config file to only use upstream AWS secrets as the Airflow variables. That's because we're sourcing the Airflow variables from the AWS Secrets backend. This assumes you have the `localstack-extension-aws-replicator` extension installed onto the LocalStack instance: https://pypi.org/project/localstack-extension-aws-replicator/. + +```shell +localstack aws proxy -c proxy.conf --container +``` + ## License This code is available under the Apache 2.0 license. From b8b350eea947abad1272fb493aa7c6f265df771c Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Tue, 13 Aug 2024 13:41:04 +0300 Subject: [PATCH 15/16] Use tab indentation --- mwaa-ml-classifier-deployment/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mwaa-ml-classifier-deployment/Makefile b/mwaa-ml-classifier-deployment/Makefile index c6e02d1..3e3a169 100644 --- a/mwaa-ml-classifier-deployment/Makefile +++ b/mwaa-ml-classifier-deployment/Makefile @@ -14,13 +14,13 @@ run: ## Run MWAA ETL Processor start: ## Start LocalStack @echo "Checking existing networks:" - @docker network ls --filter name=localstack -q | xargs - @if [ -z "$$(docker network ls --filter name=localstack -q | xargs)" ]; then \ - echo "Creating localstack network"; \ - docker network create --attachable localstack; \ - else \ - echo "LocalStack network already exists"; \ - fi + @docker network ls --filter name=localstack -q | xargs + @if [ -z "$$(docker network ls --filter name=localstack -q | xargs)" ]; then \ + echo "Creating localstack network"; \ + docker network create --attachable localstack; \ + else \ + echo "LocalStack network already exists"; \ + fi EXTRA_CORS_ALLOWED_ORIGINS='*' DOCKER_FLAGS='-e LS_LOG=trace --network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d stop: ## Stop LocalStack From 6a629fb6ed66fd984e8bb0068e4f9deee348da16 Mon Sep 17 00:00:00 2001 From: Robert - Localstack Date: Tue, 13 Aug 2024 14:08:52 +0300 Subject: [PATCH 16/16] Remove if branch --- mwaa-ml-classifier-deployment/Makefile | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mwaa-ml-classifier-deployment/Makefile b/mwaa-ml-classifier-deployment/Makefile index 3e3a169..21e38e7 100644 --- a/mwaa-ml-classifier-deployment/Makefile +++ b/mwaa-ml-classifier-deployment/Makefile @@ -13,14 +13,9 @@ run: ## Run MWAA ETL Processor ./run.sh start: ## Start LocalStack - @echo "Checking existing networks:" - @docker network ls --filter name=localstack -q | xargs - @if [ -z "$$(docker network ls --filter name=localstack -q | xargs)" ]; then \ - echo "Creating localstack network"; \ - docker network create --attachable localstack; \ - else \ - echo "LocalStack network already exists"; \ - fi + @echo "Attempting to create localstack network (if it doesn't already exist):" + @docker network create --attachable localstack || true + @echo "Starting LocalStack:" EXTRA_CORS_ALLOWED_ORIGINS='*' DOCKER_FLAGS='-e LS_LOG=trace --network localstack --name=localhost.localstack.cloud -e GATEWAY_SERVER=hypercorn' localstack start -d stop: ## Stop LocalStack