From c99fcf91132b2d5dec169e8d7299d938b40d8c7c Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 7 Nov 2025 13:48:14 +0100 Subject: [PATCH] feat(k8s): Add topology constraints --- etl-api/src/k8s/http.rs | 33 +++++++++++++++---- ...ate_bq_replicator_stateful_set_json-2.snap | 28 ++++++++++++++-- ...ate_bq_replicator_stateful_set_json-3.snap | 28 ++++++++++++++-- ...reate_bq_replicator_stateful_set_json.snap | 28 ++++++++++++++-- ...ceberg_replicator_stateful_set_json-2.snap | 28 ++++++++++++++-- ...ceberg_replicator_stateful_set_json-3.snap | 28 ++++++++++++++-- ..._iceberg_replicator_stateful_set_json.snap | 28 ++++++++++++++-- 7 files changed, 177 insertions(+), 24 deletions(-) diff --git a/etl-api/src/k8s/http.rs b/etl-api/src/k8s/http.rs index 331580bc..2caf83a7 100644 --- a/etl-api/src/k8s/http.rs +++ b/etl-api/src/k8s/http.rs @@ -65,8 +65,6 @@ const LOGS_VOLUME_NAME: &str = "logs"; pub const TRUSTED_ROOT_CERT_CONFIG_MAP_NAME: &str = "trusted-root-certs-config"; /// Key inside the trusted root certificates ConfigMap. pub const TRUSTED_ROOT_CERT_KEY_NAME: &str = "trusted_root_certs"; -/// Pod template annotation used to trigger rolling restarts. -const RESTARTED_AT_ANNOTATION_KEY: &str = "etl.supabase.com/restarted-at"; /// Label used to identify replicator pods. const REPLICATOR_APP_LABEL: &str = "etl-replicator-app"; @@ -868,18 +866,18 @@ fn create_replicator_stateful_set_json( "replicas": 1, "selector": { "matchLabels": { - "app-name": replicator_app_name, + "etl.supabase.com/app-name": replicator_app_name, } }, "template": { "metadata": { "labels": { - "app-name": replicator_app_name, - "app": REPLICATOR_APP_LABEL + "etl.supabase.com/app-name": replicator_app_name, + "etl.supabase.com/app-type": REPLICATOR_APP_LABEL }, "annotations": { // Attach template annotations (e.g., restart checksum) to trigger a rolling restart - RESTARTED_AT_ANNOTATION_KEY: restarted_at_annotation, + "etl.supabase.com/restarted-at": restarted_at_annotation, } }, "spec": { @@ -899,6 +897,29 @@ fn create_replicator_stateful_set_json( "effect": "NoSchedule" } ], + // Distribute pods evenly across nodes and availability zones. + "topologySpreadConstraints": [ + { + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway", + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": REPLICATOR_APP_LABEL + } + } + }, + { + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway", + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": REPLICATOR_APP_LABEL + } + } + } + ], "nodeSelector": node_selector, // We want to wait at most 5 minutes before K8S sends a `SIGKILL` to the containers, // this way we let the system finish any in-flight transaction, if there are any. diff --git a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-2.snap b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-2.snap index 388401bf..6ce4f0f7 100644 --- a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-2.snap +++ b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-2.snap @@ -13,7 +13,7 @@ expression: stateful_set_json "replicas": 1, "selector": { "matchLabels": { - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app" } }, "template": { @@ -22,8 +22,8 @@ expression: stateful_set_json "etl.supabase.com/restarted-at": "[timestamp]" }, "labels": { - "app": "etl-replicator-app", - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app", + "etl.supabase.com/app-type": "etl-replicator-app" } }, "spec": { @@ -169,6 +169,28 @@ expression: stateful_set_json "value": "workloads" } ], + "topologySpreadConstraints": [ + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway" + }, + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway" + } + ], "volumes": [ { "configMap": { diff --git a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-3.snap b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-3.snap index c2f2fd60..ff716688 100644 --- a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-3.snap +++ b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json-3.snap @@ -13,7 +13,7 @@ expression: stateful_set_json "replicas": 1, "selector": { "matchLabels": { - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app" } }, "template": { @@ -22,8 +22,8 @@ expression: stateful_set_json "etl.supabase.com/restarted-at": "[timestamp]" }, "labels": { - "app": "etl-replicator-app", - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app", + "etl.supabase.com/app-type": "etl-replicator-app" } }, "spec": { @@ -169,6 +169,28 @@ expression: stateful_set_json "value": "workloads" } ], + "topologySpreadConstraints": [ + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway" + }, + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway" + } + ], "volumes": [ { "configMap": { diff --git a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json.snap b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json.snap index 19d5dbbd..edcecb1f 100644 --- a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json.snap +++ b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_bq_replicator_stateful_set_json.snap @@ -13,7 +13,7 @@ expression: stateful_set_json "replicas": 1, "selector": { "matchLabels": { - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app" } }, "template": { @@ -22,8 +22,8 @@ expression: stateful_set_json "etl.supabase.com/restarted-at": "[timestamp]" }, "labels": { - "app": "etl-replicator-app", - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app", + "etl.supabase.com/app-type": "etl-replicator-app" } }, "spec": { @@ -95,6 +95,28 @@ expression: stateful_set_json "value": "workloads" } ], + "topologySpreadConstraints": [ + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway" + }, + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway" + } + ], "volumes": [ { "configMap": { diff --git a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-2.snap b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-2.snap index 0d9d5422..f9559e7a 100644 --- a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-2.snap +++ b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-2.snap @@ -13,7 +13,7 @@ expression: stateful_set_json "replicas": 1, "selector": { "matchLabels": { - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app" } }, "template": { @@ -22,8 +22,8 @@ expression: stateful_set_json "etl.supabase.com/restarted-at": "[timestamp]" }, "labels": { - "app": "etl-replicator-app", - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app", + "etl.supabase.com/app-type": "etl-replicator-app" } }, "spec": { @@ -187,6 +187,28 @@ expression: stateful_set_json "value": "workloads" } ], + "topologySpreadConstraints": [ + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway" + }, + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway" + } + ], "volumes": [ { "configMap": { diff --git a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-3.snap b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-3.snap index 233ffbc3..7f936850 100644 --- a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-3.snap +++ b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json-3.snap @@ -13,7 +13,7 @@ expression: stateful_set_json "replicas": 1, "selector": { "matchLabels": { - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app" } }, "template": { @@ -22,8 +22,8 @@ expression: stateful_set_json "etl.supabase.com/restarted-at": "[timestamp]" }, "labels": { - "app": "etl-replicator-app", - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app", + "etl.supabase.com/app-type": "etl-replicator-app" } }, "spec": { @@ -187,6 +187,28 @@ expression: stateful_set_json "value": "workloads" } ], + "topologySpreadConstraints": [ + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway" + }, + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway" + } + ], "volumes": [ { "configMap": { diff --git a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json.snap b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json.snap index 9b7dc600..2bbf20f7 100644 --- a/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json.snap +++ b/etl-api/src/k8s/snapshots/etl_api__k8s__http__tests__create_iceberg_replicator_stateful_set_json.snap @@ -13,7 +13,7 @@ expression: stateful_set_json "replicas": 1, "selector": { "matchLabels": { - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app" } }, "template": { @@ -22,8 +22,8 @@ expression: stateful_set_json "etl.supabase.com/restarted-at": "[timestamp]" }, "labels": { - "app": "etl-replicator-app", - "app-name": "abcdefghijklmnopqrst-42-replicator-app" + "etl.supabase.com/app-name": "abcdefghijklmnopqrst-42-replicator-app", + "etl.supabase.com/app-type": "etl-replicator-app" } }, "spec": { @@ -113,6 +113,28 @@ expression: stateful_set_json "value": "workloads" } ], + "topologySpreadConstraints": [ + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "kubernetes.io/hostname", + "whenUnsatisfiable": "ScheduleAnyway" + }, + { + "labelSelector": { + "matchLabels": { + "etl.supabase.com/app-type": "etl-replicator-app" + } + }, + "maxSkew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "ScheduleAnyway" + } + ], "volumes": [ { "configMap": {