diff --git a/.github/workflows/markdown-link-check.yaml b/.github/workflows/markdown-link-check.yaml index 7d2a2f19..f82102d0 100644 --- a/.github/workflows/markdown-link-check.yaml +++ b/.github/workflows/markdown-link-check.yaml @@ -20,7 +20,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 with: - node-version: '18.x' + node-version: '20.x' - name: install markdown-link-check run: npm install -g markdown-link-check@3.10.2 - name: markdown-link-check version diff --git a/bin/single-new-eks-automode-opensource-observability.ts b/bin/single-new-eks-automode-opensource-observability.ts new file mode 100644 index 00000000..c2573feb --- /dev/null +++ b/bin/single-new-eks-automode-opensource-observability.ts @@ -0,0 +1,6 @@ +import { configureApp } from '../lib/common/construct-utils'; +import SingleNewEksAutoModeOpenSourcePattern from '../lib/single-new-eks-opensource-observability-pattern/automode'; + +const app = configureApp(); + +new SingleNewEksAutoModeOpenSourcePattern(app, 'single-new-eks-automode-opensource'); diff --git a/bin/single-new-eks-awsnative-automode-observability.ts b/bin/single-new-eks-awsnative-automode-observability.ts new file mode 100644 index 00000000..9bc10caa --- /dev/null +++ b/bin/single-new-eks-awsnative-automode-observability.ts @@ -0,0 +1,6 @@ +import SingleNewEksAutoModeAWSNativeObservabilityPattern from '../lib/single-new-eks-awsnative-automode-observability-pattern'; +import { configureApp } from '../lib/common/construct-utils'; + +const app = configureApp(); + +new SingleNewEksAutoModeAWSNativeObservabilityPattern(app, 'single-new-eks-awsnative-automode'); diff --git a/docs/patterns/images/automode-cluster.png b/docs/patterns/images/automode-cluster.png new file mode 100644 index 00000000..6cae493c Binary files /dev/null and b/docs/patterns/images/automode-cluster.png differ diff --git a/docs/patterns/images/automode-containermap.png b/docs/patterns/images/automode-containermap.png new file mode 100644 index 00000000..bb28d395 Binary files /dev/null and b/docs/patterns/images/automode-containermap.png differ diff --git a/docs/patterns/images/automode-kubelet.png b/docs/patterns/images/automode-kubelet.png new file mode 100644 index 00000000..5a9d9494 Binary files /dev/null and b/docs/patterns/images/automode-kubelet.png differ diff --git a/docs/patterns/images/automode-nodes.png b/docs/patterns/images/automode-nodes.png new file mode 100644 index 00000000..2a64c4f4 Binary files /dev/null and b/docs/patterns/images/automode-nodes.png differ diff --git a/docs/patterns/images/automode-ns.png b/docs/patterns/images/automode-ns.png new file mode 100644 index 00000000..ee00812e Binary files /dev/null and b/docs/patterns/images/automode-ns.png differ diff --git a/docs/patterns/images/automode-perfmonitoring.png b/docs/patterns/images/automode-perfmonitoring.png new file mode 100644 index 00000000..1bca11b1 Binary files /dev/null and b/docs/patterns/images/automode-perfmonitoring.png differ diff --git a/docs/patterns/images/automode-pods.png b/docs/patterns/images/automode-pods.png new file mode 100644 index 00000000..7788c2d2 Binary files /dev/null and b/docs/patterns/images/automode-pods.png differ diff --git a/docs/patterns/images/automode-workload.png b/docs/patterns/images/automode-workload.png new file mode 100644 index 00000000..31e538c1 Binary files /dev/null and b/docs/patterns/images/automode-workload.png differ diff --git a/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-automode-opensource-observability.md b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-automode-opensource-observability.md new file mode 100644 index 00000000..1b2ffc96 --- /dev/null +++ b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-automode-opensource-observability.md @@ -0,0 +1,381 @@ +# Single Cluster Open Source Observability - EKS Auto Mode + +## Architecture + +The following figure illustrates the architecture of the pattern we will be deploying for Single EKS Cluster Open Source Observability on an EKS Auto Mode Cluster using open source tooling such as AWS Distro for Open Telemetry (ADOT), Amazon Managed Service for Prometheus and Amazon Managed Grafana: + +![Architecture](../images/CDK_Architecture_diagram.png) + +Monitoring Amazon Elastic Kubernetes Service (Amazon EKS) for metrics has two categories: +the control plane and the Amazon EKS nodes (with Kubernetes objects). +The Amazon EKS control plane consists of control plane nodes that run the Kubernetes software, +such as etcd and the Kubernetes API server. To read more on the components of an Amazon EKS cluster, +please read the [service documentation](https://docs.aws.amazon.com/eks/latest/userguide/clusters.html). + + +### EKS Auto Mode + +EKS Auto Mode extends AWS management of Kubernetes clusters beyond the cluster itself, to allow AWS to also set up and manage the infrastructure that enables the smooth operation of your workloads. You can delegate key infrastructure decisions and leverage the expertise of AWS for day-to-day operations. Cluster infrastructure managed by AWS includes many Kubernetes capabilities as core components, as opposed to add-ons, such as compute autoscaling, pod and service networking, application load balancing, cluster DNS, block storage, and GPU support. + +## Objective + +- Deploys one production grade Amazon EKS Auto Mode cluster. +- Enables Control Plane logging. +- AWS Distro For OpenTelemetry Operator and Collector for Metrics and Traces +- Logs with [AWS for FluentBit](https://github.com/aws/aws-for-fluent-bit) +- Installs Grafana Operator to add AWS data sources and create Grafana Dashboards to Amazon Managed Grafana. +- Installs FluxCD to perform GitOps sync of a Git Repo to EKS Cluster. We will use this later for creating Grafana Dashboards and AWS datasources to Amazon Managed Grafana. You can also use your own GitRepo to sync your own Grafana resources such as Dashboards, Datasources etc. Please check our One observability module - [GitOps with Amazon Managed Grafana](https://catalog.workshops.aws/observability/en-US/aws-managed-oss/gitops-with-amg) to learn more about this. +- Installs External Secrets Operator to retrieve and Sync the Grafana API keys. +- Amazon Managed Grafana Dashboard and data source +- Alerts and recording rules with Amazon Managed Service for Prometheus + +## Prerequisites: + +Ensure that you have installed the following tools on your machine. + +1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) +2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) +3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) +4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) + +## Deploying + +1. Clone your forked repository + +```sh +git clone https://github.com/aws-observability/cdk-aws-observability-accelerator.git +``` + +2. Install the AWS CDK Toolkit globally on your machine using + +```bash +npm install -g aws-cdk +``` + +3. Amazon Managed Grafana workspace: To visualize metrics collected, you need an Amazon Managed Grafana workspace. If you have an existing workspace, create an environment variable as described below. To create a new workspace, visit [our supporting example for Grafana](https://aws-observability.github.io/terraform-aws-observability-accelerator/helpers/managed-grafana/) + +!!! note + For the URL `https://g-xyz.grafana-workspace.us-east-1.amazonaws.com`, the workspace ID would be `g-xyz` + +```bash +export AWS_REGION= +export COA_AMG_WORKSPACE_ID=g-xxx +export COA_AMG_ENDPOINT_URL=https://g-xyz.grafana-workspace.us-east-1.amazonaws.com +``` + +!!! warning + Setting up environment variables `COA_AMG_ENDPOINT_URL` and `AWS_REGION` is mandatory for successful execution of this pattern. + +4. GRAFANA API KEY: Amazon Managed Grafana provides a control plane API for generating Grafana API keys or Service Account Tokens. + +=== "v10.4 & v9.4 workspaces" + + ```bash + # IMPORTANT NOTE: skip this command if you already have a service token + GRAFANA_SA_ID=$(aws grafana create-workspace-service-account \ + --workspace-id $COA_AMG_WORKSPACE_ID \ + --grafana-role ADMIN \ + --name cdk-accelerator-eks \ + --query 'id' \ + --output text) + + # creates a new token + export AMG_API_KEY=$(aws grafana create-workspace-service-account-token \ + --workspace-id $COA_AMG_WORKSPACE_ID \ + --name "grafana-operator-key" \ + --seconds-to-live 432000 \ + --service-account-id $GRAFANA_SA_ID \ + --query 'serviceAccountToken.key' \ + --output text) + ``` + +=== "v8.4 workspaces" + + ```bash + export AMG_API_KEY=$(aws grafana create-workspace-api-key \ + --key-name "grafana-operator-key" \ + --key-role "ADMIN" \ + --seconds-to-live 432000 \ + --workspace-id $COA_AMG_WORKSPACE_ID \ + --query key \ + --output text) + ``` + + +5. AWS SSM Parameter Store for GRAFANA API KEY: Update the Grafana API key secret in AWS SSM Parameter Store using the above new Grafana API key. This will be referenced by Grafana Operator deployment of our solution to access Amazon Managed Grafana from Amazon EKS Cluster + +```bash +aws ssm put-parameter --name "/cdk-accelerator/grafana-api-key" \ + --type "SecureString" \ + --value $AMG_API_KEY \ + --region $AWS_REGION +``` + +6. Install project dependencies by running `npm install` in the main folder of this cloned repository. + +7. The actual settings for dashboard urls are expected to be specified in the CDK context. Generically it is inside the cdk.json file of the current directory or in `~/.cdk.json` in your home directory. + +Example settings: Update the context in `cdk.json` file located in `cdk-eks-blueprints-patterns` directory + +```typescript + "context": { + "fluxRepository": { + "name": "grafana-dashboards", + "namespace": "grafana-operator", + "repository": { + "repoUrl": "https://github.com/aws-observability/aws-observability-accelerator", + "name": "grafana-dashboards", + "targetRevision": "main", + "path": "./artifacts/grafana-operator-manifests/eks/infrastructure" + }, + "values": { + "GRAFANA_CLUSTER_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/cluster.json", + "GRAFANA_KUBELET_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/kubelet.json", + "GRAFANA_NSWRKLDS_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/namespace-workloads.json", + "GRAFANA_NODEEXP_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/nodeexporter-nodes.json", + "GRAFANA_NODES_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/nodes.json", + "GRAFANA_WORKLOADS_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/workloads.json", + "GRAFANA_KSH_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/ksh.json", + "GRAFANA_KCM_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/kcm.json" + }, + "kustomizations": [ + { + "kustomizationPath": "./artifacts/grafana-operator-manifests/eks/infrastructure" + } + ] + }, + } +``` + +8. Once all pre-requisites are set you are ready to deploy the pipeline. Run the following command from the root of this repository to deploy the pipeline stack: + +```bash +make build +make pattern single-new-eks-automode-opensource-observability deploy +``` + +## Verify the resources + +Run update-kubeconfig command. You should be able to get the command from CDK output message. + +```bash +aws eks update-kubeconfig --name single-new-eks-automode-opensource-observability-accelerator --region --role-arn arn:aws:iam::xxxxxxxxx:role/single-new-eks-automode-o-singleneweksautomodeopens-82N8N3BMJYYI +``` + +Let’s verify the resources created by steps above. + +```bash +kubectl get nodes -o wide +``` +Output: + +```console +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +i-05d9409ef2d7a31e5 Ready 129m v1.33.4-eks-e386d34 10.0.2.98 3.147.48.44 Bottlerocket (EKS Auto, Standard) 2025.11.1 (aws-k8s-1.33-standard) 6.12.53 containerd://1.7.28+bottlerocket +``` + +Next, lets verify the namespaces in the cluster: + +```bash +kubectl get ns # Output shows all namespace +``` + +Output: + +```console +NAME STATUS AGE +cert-manager Active 2d1h +default Active 2d1h +external-secrets Active 2d1h +flux-system Active 2d1h +grafana-operator Active 2d1h +kube-node-lease Active 2d1h +kube-public Active 2d1h +kube-system Active 2d1h +opentelemetry-operator-system Active 2d1h +prometheus-node-exporter Active 2d1h +``` + +Next, lets verify all resources of `grafana-operator` namespace: + +```bash +kubectl get all --namespace=grafana-operator +``` + +Output: + +```console +NAME READY STATUS RESTARTS AGE +pod/grafana-operator-5c9d65bbb9-h9wq2 1/1 Running 0 132m + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/grafana-operator-metrics-service ClusterIP 172.20.241.65 9090/TCP,8888/TCP 132m + +NAME READY UP-TO-DATE AVAILABLE AGE +deployment.apps/grafana-operator 1/1 1 1 132m + +NAME DESIRED CURRENT READY AGE +replicaset.apps/grafana-operator-5c9d65bbb9 1 1 1 132m +``` + +## Visualization + +#### 1. Grafana dashboards + +Login to your Grafana workspace and navigate to the Dashboards panel. You should see a list of dashboards under the `Observability Accelerator Dashboards` + +![Dashboard](../images/All-Dashboards.png) + +Open the `Cluster` dashboard and you should be able to view its visualization as shown below : + +![Cluster_Dashboard](../images/automode-cluster.png) + +Open the `Namespace (Workloads)` dashboard and you should be able to view its visualization as shown below : + +![Namespace_Dashboard](../images/automode-ns.png) + +Open the `Node (Pods)` dashboard and you should be able to view its visualization as shown below : + +![Node_Dashboard](../images/automode-pods.png) + +Open the `Workload` dashboard and you should be able to view its visualization as shown below : + +![Workload_Dashboard](../images/automode-workload.png) + +Open the `Kubelet` dashboard and you should be able to view its visualization as shown below : + +![Kubelet_Dashboard](../images/automode-kubelet.png) + +Open the `Nodes` dashboard and you should be able to view its visualization as shown below : + +![Nodes_Dashboard](../images/automode-nodes.png) + +Open the `EKS Scheduler` dashboard and you should be able to view its visualization as shown below : + +![EKS_Scheduler](../images/Ksh-Metrics.png) + +Open the `EKS Control Manager` dashboard and you should be able to view its visualization as shown below : + +![EKS_Control_Manager](../images/KCM-Metrics.png) + +From the cluster to view all dashboards as Kubernetes objects, run: + +```bash +kubectl get grafanadashboards -A +``` + +```console +NAMESPACE NAME AGE +grafana-operator cluster-grafanadashboard 138m +grafana-operator java-grafanadashboard 143m +grafana-operator kubelet-grafanadashboard 13h +grafana-operator namespace-workloads-grafanadashboard 13h +grafana-operator nginx-grafanadashboard 134m +grafana-operator node-exporter-grafanadashboard 13h +grafana-operator nodes-grafanadashboard 13h +grafana-operator workloads-grafanadashboard 13h +``` + +You can inspect more details per dashboard using this command + +```bash +kubectl describe grafanadashboards cluster-grafanadashboard -n grafana-operator +``` + +Grafana Operator and Flux always work together to synchronize your dashboards with Git. If you delete your dashboards by accident, they will be re-provisioned automatically. + +## Viewing Logs + +Refer to the "Using CloudWatch Logs as a data source in Grafana" section in [Logging](../../logs.md). + +## Teardown + +You can teardown the whole CDK stack with the following command: + +```bash +make pattern single-new-eks-automode-opensource-observability destroy +``` + +## Troubleshooting + +### 1. Grafana dashboards missing or Grafana API key expired + +In case you don't see the grafana dashboards in your Amazon Managed Grafana console, check on the logs on your grafana operator pod using the below command : + +```bash +kubectl get pods -n grafana-operator +``` + +Output: + +```console +NAME READY STATUS RESTARTS AGE +grafana-operator-866d4446bb-nqq5c 1/1 Running 0 3h17m +``` + +```bash +kubectl logs grafana-operator-866d4446bb-nqq5c -n grafana-operator +``` + +Output: + +```console +1.6857285045556655e+09 ERROR error reconciling datasource {"controller": "grafanadatasource", "controllerGroup": "grafana.integreatly.org", "controllerKind": "GrafanaDatasource", "GrafanaDatasource": {"name":"grafanadatasource-sample-amp","namespace":"grafana-operator"}, "namespace": "grafana-operator", "name": "grafanadatasource-sample-amp", "reconcileID": "72cfd60c-a255-44a1-bfbd-88b0cbc4f90c", "datasource": "grafanadatasource-sample-amp", "grafana": "external-grafana", "error": "status: 401, body: {\"message\":\"Expired API key\"}\n"} +github.com/grafana-operator/grafana-operator/controllers.(*GrafanaDatasourceReconciler).Reconcile +``` + +If you observe, the the above `grafana-api-key error` in the logs, your grafana API key is expired. Please use the operational procedure to update your `grafana-api-key` : + +- First, lets create a new Grafana API key. + +=== "v10.4 & v9.4 workspaces" + + ```bash + # IMPORTANT NOTE: skip this command if you already have a service token + GRAFANA_SA_ID=$(aws grafana create-workspace-service-account \ + --workspace-id $COA_AMG_WORKSPACE_ID \ + --grafana-role ADMIN \ + --name cdk-accelerator-eks \ + --query 'id' \ + --output text) + + # creates a new token + export GO_AMG_API_KEY=$(aws grafana create-workspace-service-account-token \ + --workspace-id $COA_AMG_WORKSPACE_ID \ + -name "grafana-operator-key" \ + --seconds-to-live 432000 \ + --service-account-id $GRAFANA_SA_ID \ + --query 'serviceAccountToken.key' \ + --output text) + ``` + +=== "v8.4 workspaces" + + ```bash + export GO_AMG_API_KEY=$(aws grafana create-workspace-api-key \ + --key-name "grafana-operator-key" \ + --key-role "ADMIN" \ + --seconds-to-live 432000 \ + --workspace-id $COA_AMG_WORKSPACE_ID \ + --query key \ + --output text) + ``` + +- Finally, update the Grafana API key secret in AWS SSM Parameter Store using the above new Grafana API key: + +```bash +export API_KEY_SECRET_NAME="grafana-api-key" +aws ssm put-parameter --name "/cdk-accelerator/grafana-api-key" \ + --type "SecureString" \ + --value $AMG_API_KEY \ + --region $AWS_REGION \ + --overwrite +``` + +- If the issue persists, you can force the synchronization by deleting the `externalsecret` Kubernetes object. + +```bash +kubectl delete externalsecret/external-secrets-sm -n grafana-operator +``` + + diff --git a/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-automode-observability.md b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-automode-observability.md new file mode 100644 index 00000000..6ec0d627 --- /dev/null +++ b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-automode-observability.md @@ -0,0 +1,190 @@ +# Single Cluster AWS Native Observability - Auto Mode + +## Architecture + +The following figure illustrates the architecture of the pattern we will be deploying for Single EKS Auto Mode Cluster Native Observability pattern using AWS native tools such as CloudWatch Logs and Container Insights. + +![Architecture](../images/cloud-native-arch.png) + +This example makes use of CloudWatch Container Insights as a vizualization and metric-aggregation layer. +Amazon CloudWatch Container Insights helps customers collect, aggregate, and summarize metrics and logs from containerized applications and microservices. Metrics data is collected as performance log events using the embedded metric format. These performance log events use a structured JSON schema that enables high-cardinality data to be ingested and stored at scale. From this data, CloudWatch creates aggregated metrics at the cluster, node, pod, task, and service level as CloudWatch metrics. The metrics that Container Insights collects are available in CloudWatch automatic dashboards. + +By combining Container Insights and CloudWatch logs, we are able to provide a foundation for EKS (Amazon Elastic Kubernetes Service) Observability. Monitoring EKS for metrics has two categories: +the control plane and the Amazon EKS nodes (with Kubernetes objects). +The Amazon EKS control plane consists of control plane nodes that run the Kubernetes software, +such as etcd and the Kubernetes API server. To read more on the components of an Amazon EKS cluster, +please read the [service documentation](https://docs.aws.amazon.com/eks/latest/userguide/clusters.html). + +## Objective + +- Deploys one production grade Amazon EKS Auto Mode cluster. +- Enables Control Plane Logging. +- AWS Distro For OpenTelemetry Operator and Collector +- Logs with [AWS for FluentBit](https://github.com/aws/aws-for-fluent-bit) and CloudWatch Logs +- Enables CloudWatch Container Insights. +- Installs Prometheus Node Exporter for infrastructure metrics. + +## Prerequisites + +Ensure that you have installed the following tools on your machine. + +1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) +2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) +3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) +4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) + +## Deploying + +1. Clone your forked repository + +```sh +git clone https://github.com/aws-observability/cdk-aws-observability-accelerator.git +``` + +2. Install the AWS CDK Toolkit globally on your machine using + +```bash +npm install -g aws-cdk +``` + +3. Install project dependencies by running `npm install` in the main folder of this cloned repository + +4. Once all pre-requisites are set you are ready to deploy the pipeline. Run the following command from the root of this repository to deploy the pipeline stack: + +```bash +make build +make pattern single-new-eks-awsnative-automode-observability deploy +``` + +## Verify the resources + +Run update-kubeconfig command. You should be able to get the command from CDK output message. + +```bash +aws eks update-kubeconfig --name single-new-eks-awsnative-automode-observability-accelerator --region --role-arn arn:aws:iam::xxxxxxxxx:role/single-new-eks-awsnative--singleneweksawsnativeauto-JN3QM2KMBNCO +``` + +Let’s verify the resources created by steps above. + +```bash +kubectl get nodes -o wide +``` + +Output: + +```console +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +i-08431fdb27179b448 Ready 26m v1.33.4-eks-e386d34 10.0.18.245 3.17.164.102 Bottlerocket (EKS Auto, Standard) 2025.11.8 (aws-k8s-1.33-standard) 6.12.53 containerd://1.7.28+bottlerocket +``` + +Next, lets verify the namespaces in the cluster: + +```bash +kubectl get ns # Output shows all namespace +``` + +Output: + +```console +NAME STATUS AGE +amazon-cloudwatch Active 5h36m +cert-manager Active 5h36m +default Active 5h46m +kube-node-lease Active 5h46m +kube-public Active 5h46m +kube-system Active 5h46m +prometheus-node-exporter Active 5h36m +``` + +## Visualization + +Navigate to CloudWatch and go to "Container Insights". + +View the Container Map: + +![Container_Map](../images/automode-containermap.png) + +View the Performance Monitoring Dashboard: + +![Perf_Dashboard](../images/automode-perfmonitoring.png) + +## Viewing Logs + +Refer to "Using CloudWatch Logs Insights to Query Logs in [Logging](../../logs.md). + +## Enabling Application Signals for your services + +Amazon CloudWatch Application Signals is a new integrated native APM experience +in AWS. CloudWatch Application Signals supports **Java**, **Python**, **.NET**, and **Node.js** applications +running on your Amazon EKS Auto Mode cluster. + +If you haven't enabled Application Signals in this account yet, follow steps 1 - 4 in our [AWS documentation](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Application-Monitoring-Sections.html). + +Next, you have to update your Application to +`Configure application metrics and trace sampling`. For this, you must add an +annotation to a manifest YAML in your cluster. Adding this annotation +auto-instruments the application to send metrics, traces, and logs to +Application Signals. You have two options for the annotation: + +1. **Annotate Workload** auto-instruments a single workload in the cluster. + - Paste the below line into the PodTemplate section of the workload manifest. + + ``` + apiVersion: apps/v1 + kind: Deployment + spec: + template: + metadata: + # add this annotation under the pod template metadata of the services deployment YAML you want to monitor + annotations: + instrumentation.opentelemetry.io/inject-java: "true" + instrumentation.opentelemetry.io/inject-python: "true" + instrumentation.opentelemetry.io/inject-dotnet: "true"; + instrumentation.opentelemetry.io/otel-dotnet-auto-runtime: "linux-x64" # for generic Linux glibc based images, this is default value and can be omitted + instrumentation.opentelemetry.io/otel-dotnet-auto-runtime: "linux-musl-x64" # for Alpine Linux (linux-musl-x64) based images + instrumentation.opentelemetry.io/inject-nodejs: "true" + ... + ``` + + - In your terminal, enter `kubectl apply -f your_deployment_yaml` to apply the change. + +2. **Annotate Namespace** auto-instruments all workloads deployed in the selected namespace. + - Paste the below line into the metadata section of the namespace manifest. + + ``` + annotations: instrumentation.opentelemetry.io/inject-java: "true" + apiVersion: apps/v1 + kind: Namespace + metadata: + name: + # add this annotation under metadata of the namespace manifest you want to monitor + annotations: + instrumentation.opentelemetry.io/inject-java: "true" + instrumentation.opentelemetry.io/inject-python: "true" + instrumentation.opentelemetry.io/inject-dotnet: "true" + instrumentation.opentelemetry.io/inject-nodejs: "true" + ... + ``` + + - In your terminal, enter `kubectl apply -f your_namespace_yaml` to apply the change. + - In your terminal, enter a command to restart all pods in the namespace. An example command to restart deployment workloads is `kubectl rollout restart deployment -n namespace_name` + +## Visualization of CloudWatch Application Signals data + +After enabling your Application to pass metrics and traces by following +[the steps provided above](#enabling-application-signals-for-your-services), +open your Amazon CloudWatch console in the same region as your EKS cluster, +then from the left hand side choose `Application Signals -> Services` and you +will see the metrics shown on the sample dashboard below: + +![APP_Signals_Services](../images/App-signals/app-signal-services.png) + +![APP_Signals_Dependencies](../images/App-signals/app-signal-ops-deps.png) + +## Teardown + +You can teardown the whole CDK stack with the following command: + +```bash +make pattern single-new-eks-awsnative-automode-observability destroy +``` diff --git a/lib/common/resources/otel-collector-config.yml b/lib/common/resources/otel-collector-config.yml index 33ce3953..acbae790 100644 --- a/lib/common/resources/otel-collector-config.yml +++ b/lib/common/resources/otel-collector-config.yml @@ -9,7 +9,7 @@ metadata: namespace: "{{namespace}}" spec: mode: "{{deploymentMode}}" - image: public.ecr.aws/aws-observability/aws-otel-collector:v0.37.0 + image: public.ecr.aws/aws-observability/aws-otel-collector:v0.45.1 resources: limits: cpu: "1" @@ -1890,7 +1890,7 @@ spec: endpoint: "{{remoteWriteEndpoint}}" auth: authenticator: sigv4auth - logging: + debug: loglevel: info {{ start enableAdotContainerLogsExporter }} awscloudwatchlogs: @@ -1914,7 +1914,7 @@ spec: pipelines: metrics: receivers: [prometheus] - exporters: [logging, prometheusremotewrite] + exporters: [debug, prometheusremotewrite] {{ start enableAdotContainerLogsPipeline }} logs: receivers: [filelog] diff --git a/lib/existing-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts b/lib/existing-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts index 0c222009..a5811ecf 100644 --- a/lib/existing-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts +++ b/lib/existing-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts @@ -13,7 +13,7 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ClusterSecretStore", metadata: { name: "ssm-parameter-store", @@ -43,7 +43,7 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ExternalSecret", metadata: { name: "external-grafana-admin-credentials", @@ -72,4 +72,4 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { externalSecret.node.addDependency(secretStore); return Promise.resolve(secretStore); } -} \ No newline at end of file +} diff --git a/lib/multi-acc-new-eks-mixed-observability-pattern/grafana-operator-secret-addon.ts b/lib/multi-acc-new-eks-mixed-observability-pattern/grafana-operator-secret-addon.ts index a06d358e..f43a8ab8 100644 --- a/lib/multi-acc-new-eks-mixed-observability-pattern/grafana-operator-secret-addon.ts +++ b/lib/multi-acc-new-eks-mixed-observability-pattern/grafana-operator-secret-addon.ts @@ -13,7 +13,7 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ClusterSecretStore", metadata: { name: "ssm-parameter-store", @@ -43,7 +43,7 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ExternalSecret", metadata: { name: "external-grafana-admin-credentials", @@ -72,4 +72,4 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { externalSecret.node.addDependency(secretStore); return Promise.resolve(secretStore); } -} \ No newline at end of file +} diff --git a/lib/single-new-eks-awsnative-automode-observability-pattern/index.ts b/lib/single-new-eks-awsnative-automode-observability-pattern/index.ts new file mode 100644 index 00000000..2e65be85 --- /dev/null +++ b/lib/single-new-eks-awsnative-automode-observability-pattern/index.ts @@ -0,0 +1,28 @@ + +import { Construct } from 'constructs'; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import { ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; +import { KubernetesVersion } from 'aws-cdk-lib/aws-eks'; + +export default class SingleNewEksAutoModeAWSNativeObservabilityPattern { + constructor(scope: Construct, id: string) { + + const stackId = `${id}-observability-accelerator`; + const account = process.env.COA_ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; + const region = process.env.COA_AWS_REGION! || process.env.CDK_DEFAULT_REGION!; + + const cluster = new blueprints.AutomodeClusterProvider({ + version: KubernetesVersion.V1_33, + nodePools: ['system', 'general-purpose'] + }); + + + ObservabilityBuilder.builder({ isAutoModeCluster: true }) + .account(account) + .region(region) + .clusterProvider(cluster) + .enableNativePatternAddOns() + .enableControlPlaneLogging() + .build(scope, stackId); + } +} diff --git a/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts b/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts index 301dc361..600c44c9 100644 --- a/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts +++ b/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts @@ -42,7 +42,7 @@ export class KubecostServiceAccountsAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ClusterSecretStore", metadata: { name: "ssm-parameter-store", @@ -71,4 +71,4 @@ export class KubecostServiceAccountsAddon implements blueprints.ClusterAddOn { return Promise.resolve(secretStore); } -} \ No newline at end of file +} diff --git a/lib/single-new-eks-opensource-observability-pattern/automode.ts b/lib/single-new-eks-opensource-observability-pattern/automode.ts new file mode 100644 index 00000000..321adcff --- /dev/null +++ b/lib/single-new-eks-opensource-observability-pattern/automode.ts @@ -0,0 +1,198 @@ +import { Construct } from 'constructs'; +import { utils } from '@aws-quickstart/eks-blueprints'; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import { GrafanaOperatorSecretAddon } from './grafanaoperatorsecretaddon'; +import * as amp from 'aws-cdk-lib/aws-aps'; +import * as eks from 'aws-cdk-lib/aws-eks'; +import { ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; +import * as fs from 'fs'; + +export default class SingleNewEksAutoModeOpenSourceObservabilityPattern { + constructor(scope: Construct, id: string) { + + const stackId = `${id}-observability-accelerator`; + + const account = process.env.COA_ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; + const region = process.env.COA_AWS_REGION! || process.env.CDK_DEFAULT_REGION!; + const ampWorkspaceName = process.env.COA_AMP_WORKSPACE_NAME! || 'observability-amp-Workspace'; + const ampWorkspace = blueprints.getNamedResource(ampWorkspaceName) as unknown as amp.CfnWorkspace; + const ampEndpoint = ampWorkspace.attrPrometheusEndpoint; + const ampWorkspaceArn = ampWorkspace.attrArn; + const amgEndpointUrl = process.env.COA_AMG_ENDPOINT_URL; + + // All Grafana Dashboard URLs from `cdk.json` if present + const fluxRepository: blueprints.FluxGitRepo = utils.valueFromContext(scope, "fluxRepository", undefined); + fluxRepository.values!.AMG_AWS_REGION = region; + fluxRepository.values!.AMP_ENDPOINT_URL = ampEndpoint; + fluxRepository.values!.AMG_ENDPOINT_URL = amgEndpointUrl; + + const ampAddOnProps: blueprints.AmpAddOnProps = { + ampPrometheusEndpoint: ampEndpoint, + + ampRules: { + ampWorkspaceArn: ampWorkspaceArn, + ruleFilePaths: [ + __dirname + '/../common/resources/amp-config/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/recording-rules.yml' + ] + } + }; + + const jsonString = fs.readFileSync(__dirname + '/../../cdk.json', 'utf-8'); + const jsonStringnew = JSON.parse(jsonString); + let doc = utils.readYamlDocument(__dirname + '/../common/resources/otel-collector-config.yml'); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableJavaMonJob }}", + "{{ stop enableJavaMonJob }}", + jsonStringnew.context["java.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableNginxMonJob }}", + "{{ stop enableNginxMonJob }}", + jsonStringnew.context["nginx.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableIstioMonJob }}", + "{{ stop enableIstioMonJob }}", + jsonStringnew.context["istio.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAPIserverJob }}", + "{{ stop enableAPIserverJob }}", + jsonStringnew.context["apiserver.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotMetricsCollectionJob }}", + "{{ stop enableAdotMetricsCollectionJob }}", + jsonStringnew.context["adotcollectormetrics.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotMetricsCollectionTelemetry }}", + "{{ stop enableAdotMetricsCollectionTelemetry }}", + jsonStringnew.context["adotcollectormetrics.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotContainerLogsReceiver }}", + "{{ stop enableAdotContainerLogsReceiver }}", + jsonStringnew.context["adotcontainerlogs.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotContainerLogsExporter }}", + "{{ stop enableAdotContainerLogsExporter }}", + jsonStringnew.context["adotcontainerlogs.pattern.enabled"] + ); + console.log(doc); + fs.writeFileSync(__dirname + '/../common/resources/otel-collector-config-new.yml', doc); + + if (utils.valueFromContext(scope, "adotcollectormetrics.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml' + }; + } + + if (utils.valueFromContext(scope, "java.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + javaScrapeSampleLimit: 1000, + javaPrometheusMetricsEndpoint: "/metrics" + } + }; + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/java/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/java/recording-rules.yml' + ); + } + + if (utils.valueFromContext(scope, "adotcontainerlogs.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + logGroupName: `/aws/eks/${stackId}`, + logStreamName: `/aws/eks/${stackId}`, + logRetentionDays: 30, + awsRegion: region + } + }; + } + + if (utils.valueFromContext(scope, "apiserver.pattern.enabled", false)) { + // eslint-disable-next-line @typescript-eslint/no-unused-expressions + ampAddOnProps.enableAPIServerJob = true, + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/apiserver/recording-rules.yml' + ); + } + + if (utils.valueFromContext(scope, "nginx.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + nginxScrapeSampleLimit: 1000, + nginxPrometheusMetricsEndpoint: "/metrics" + } + }; + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/nginx/alerting-rules.yml' + ); + } + + if (utils.valueFromContext(scope, "istio.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml' + }; + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/istio/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/istio/recording-rules.yml' + ); + } + + Reflect.defineMetadata("ordered", true, blueprints.addons.GrafanaOperatorAddon); + const addOns: Array = [ + new blueprints.addons.XrayAdotAddOn(), + new blueprints.addons.FluxCDAddOn({ "repositories": [fluxRepository] }), + new GrafanaOperatorSecretAddon(), + ]; + + if (utils.valueFromContext(scope, "istio.pattern.enabled", false)) { + addOns.push(new blueprints.addons.IstioBaseAddOn({ + version: "1.18.2" + })); + addOns.push(new blueprints.addons.IstioControlPlaneAddOn({ + version: "1.18.2" + })); + addOns.push(new blueprints.addons.IstioIngressGatewayAddon({ + version: "1.18.2" + })); + + addOns.push(new blueprints.addons.IstioCniAddon({ + version: "1.18.2" + })); + } + + + const automodeProps: blueprints.AutomodeClusterProviderProps = { + version: eks.KubernetesVersion.V1_33, + nodePools: ['system', 'general-purpose'] + }; + + ObservabilityBuilder.builder({ isAutoModeCluster: true }) + .account(account) + .region(region) + .withAmpProps(ampAddOnProps) + .enableOpenSourcePatternAddOns() + .enableControlPlaneLogging() + .resourceProvider(ampWorkspaceName, new blueprints.CreateAmpProvider(ampWorkspaceName, ampWorkspaceName)) + .clusterProvider(new blueprints.AutomodeClusterProvider(automodeProps)) + .addOns(...addOns) + .build(scope, stackId); + } +} diff --git a/lib/single-new-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts b/lib/single-new-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts index 0c222009..a5811ecf 100644 --- a/lib/single-new-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts +++ b/lib/single-new-eks-opensource-observability-pattern/grafanaoperatorsecretaddon.ts @@ -13,7 +13,7 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ClusterSecretStore", metadata: { name: "ssm-parameter-store", @@ -43,7 +43,7 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { cluster: cluster, manifest: [ { - apiVersion: "external-secrets.io/v1beta1", + apiVersion: "external-secrets.io/v1", kind: "ExternalSecret", metadata: { name: "external-grafana-admin-credentials", @@ -72,4 +72,4 @@ export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { externalSecret.node.addDependency(secretStore); return Promise.resolve(secretStore); } -} \ No newline at end of file +} diff --git a/package.json b/package.json index 0bdea734..4f1a46ef 100644 --- a/package.json +++ b/package.json @@ -10,12 +10,12 @@ "lint": "npx eslint . --ext .js,.jsx,.ts,.tsx" }, "devDependencies": { - "@aws-quickstart/eks-blueprints": "1.17.2", + "@aws-quickstart/eks-blueprints": "1.17.3", "@types/jest": "^30.0.0", "@types/node": "^24.0.13", "@typescript-eslint/eslint-plugin": "^8.36.0", "@typescript-eslint/parser": "^8.36.0", - "aws-cdk": "2.1020.2", + "aws-cdk": "2.1029.2", "copyfiles": "^2.4.1", "eslint": "^8.56.0", "jest": "^29.7.0", @@ -25,18 +25,17 @@ }, "dependencies": { "@kubecost/kubecost-eks-blueprints-addon": "^0.1.8", - "@aws-quickstart/eks-blueprints": "1.17.2", - "aws-cdk": "2.1020.2", - "aws-cdk-lib": "2.204.0", + "@aws-quickstart/eks-blueprints": "1.17.3", + "aws-cdk-lib": "2.215.0", "aws-sdk": "^2.1455.0", "constructs": "^10.3.0", "eks-blueprints-cdk-kubeflow-ext": "0.1.9", "source-map-support": "^0.5.21" }, "overrides": { - "@aws-quickstart/eks-blueprints": "1.17.2", - "aws-cdk": "2.1020.2", - "aws-cdk-lib": "2.204.0", + "@aws-quickstart/eks-blueprints": "1.17.3", + "aws-cdk": "2.1029.2", + "aws-cdk-lib": "2.215.0", "xml2js": "0.5.0" } -} \ No newline at end of file +}