From 8f8c678698913c6338bb55bb5a9f0e66aa978422 Mon Sep 17 00:00:00 2001 From: Karina Ranadive Date: Fri, 17 Oct 2025 15:52:59 -0400 Subject: [PATCH 1/5] test: expand LRP test to include lifecycle events --- test/integration/lrp/lrp_fqdn_test.go | 2 +- test/integration/lrp/lrp_test.go | 336 +++++++++++++++++- .../v1.13/cilium-config/cilium-config.yaml | 2 +- 3 files changed, 330 insertions(+), 10 deletions(-) diff --git a/test/integration/lrp/lrp_fqdn_test.go b/test/integration/lrp/lrp_fqdn_test.go index 93bca2439b..7bfc8f3514 100644 --- a/test/integration/lrp/lrp_fqdn_test.go +++ b/test/integration/lrp/lrp_fqdn_test.go @@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) { for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { - testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases) + testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, promAddress) }) } } diff --git a/test/integration/lrp/lrp_test.go b/test/integration/lrp/lrp_test.go index 59fd974114..0500fda921 100644 --- a/test/integration/lrp/lrp_test.go +++ b/test/integration/lrp/lrp_test.go @@ -4,6 +4,7 @@ package lrp import ( "context" + "fmt" "os" "strings" "testing" @@ -13,11 +14,16 @@ import ( "github.com/Azure/azure-container-networking/test/integration/prometheus" "github.com/Azure/azure-container-networking/test/internal/kubernetes" "github.com/Azure/azure-container-networking/test/internal/retry" + ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned" "github.com/pkg/errors" "github.com/stretchr/testify/require" "golang.org/x/exp/rand" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sclient "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "sigs.k8s.io/yaml" ) const ( @@ -154,7 +160,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) { } func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, clientCmd []string, expectResponse, expectErrMsg string, - shouldError, countShouldIncrease bool) { + shouldError, countShouldIncrease bool, prometheusAddress string) { config := kubernetes.MustGetRestConfig() cs := kubernetes.MustGetClientset() @@ -167,9 +173,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client "zone": ".", } - // curl localhost:9253/metrics - beforeMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels) + // curl to the specified prometheus address + beforeMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels) require.NoError(t, err) + beforeValue := beforeMetric.GetCounter().GetValue() + t.Logf("Before DNS request - metric count: %.0f", beforeValue) t.Log("calling command from client") @@ -187,13 +195,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client time.Sleep(500 * time.Millisecond) // curl again and see count diff - afterMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels) + afterMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels) require.NoError(t, err) + afterValue := afterMetric.GetCounter().GetValue() + t.Logf("After DNS request - metric count: %.0f (diff: %.0f)", afterValue, afterValue-beforeValue) if countShouldIncrease { - require.Greater(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count did not increase after command") + require.Greater(t, afterValue, beforeValue, "dns metric count did not increase after command - before: %.0f, after: %.0f", beforeValue, afterValue) } else { - require.Equal(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count increased after command") + require.Equal(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue) } } @@ -210,9 +220,319 @@ func TestLRP(t *testing.T) { defer cleanupFn() require.NotNil(t, selectedPod) + // Get the kube-dns service IP for DNS requests + cs := kubernetes.MustGetClientset() + svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService) + require.NoError(t, err) + kubeDNS := svc.Spec.ClusterIP + + t.Logf("LRP Test Starting...") + + // Basic LRP test testLRPCase(t, ctx, *selectedPod, []string{ - "nslookup", "google.com", "10.0.0.10", - }, "", "", false, true) + "nslookup", "google.com", kubeDNS, + }, "", "", false, true, promAddress) + + t.Logf("LRP Test Completed") + + t.Logf("LRP Lifecycle Test Starting") + + // Run LRP Lifecycle test + testLRPLifecycle(t, ctx, *selectedPod, kubeDNS) + + t.Logf("LRP Lifecycle Test Completed") +} + +// testLRPLifecycle performs testing of Local Redirect Policy functionality +// including pod restarts, resource recreation, and cilium command validation +func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, kubeDNS string) { + config := kubernetes.MustGetRestConfig() + cs := kubernetes.MustGetClientset() + + // Step 1: Initial DNS test to verify LRP is working + t.Log("Step 1: Initial DNS test - verifying LRP functionality") + testLRPCase(t, ctx, clientPod, []string{ + "nslookup", "google.com", kubeDNS, + }, "", "", false, true, promAddress) + + // Step 2: Validate LRP using cilium commands + t.Log("Step 2: Validating LRP using cilium commands") + validateCiliumLRP(t, ctx, cs, config) + + // Step 3: Restart busybox pods and verify LRP still works + t.Log("Step 3: Restarting client pods to test persistence") + restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod) + + // Step 4: Verify metrics after restart + t.Log("Step 4: Verifying LRP functionality after pod restart") + testLRPCase(t, ctx, restartedPod, []string{ + "nslookup", "google.com", kubeDNS, + }, "", "", false, true, promAddress) + + // Step 5: Validate cilium commands still show LRP + t.Log("Step 5: Re-validating cilium LRP after restart") + validateCiliumLRP(t, ctx, cs, config) + + // Step 6: Delete and recreate resources & restart nodelocaldns daemonset + t.Log("Step 6: Testing resource deletion and recreation") + recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod) + + // Step 7: Final verification after recreation + t.Log("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8") + + // Step 8: Re-establish port forward to new node-local-dns pod and validate metrics + t.Log("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation") + + // Get the new node-local-dns pod on the same node as our recreated client pod + nodeName := recreatedPod.Spec.NodeName + newNodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, nodeName) + require.NoError(t, err) + require.NotEmpty(t, newNodeLocalDNSPods.Items, "No node-local-dns pod found on node %s after restart", nodeName) + + newNodeLocalDNSPod := TakeOne(newNodeLocalDNSPods.Items) + t.Logf("Setting up port forward to new node-local-dns pod: %s", newNodeLocalDNSPod.Name) + + // Setup new port forward to the new node-local-dns pod + newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{ + Namespace: newNodeLocalDNSPod.Namespace, + PodName: newNodeLocalDNSPod.Name, + LocalPort: 9254, // Use different port to avoid conflicts + DestPort: 9253, + }) + require.NoError(t, err) + + newPortForwardCtx, newCancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelay) + defer newCancel() + + err = defaultRetrier.Do(newPortForwardCtx, func() error { + t.Logf("attempting port forward to new node-local-dns pod %s...", newNodeLocalDNSPod.Name) + return errors.Wrap(newPf.Forward(newPortForwardCtx), "could not start port forward to new pod") + }) + require.NoError(t, err, "could not start port forward to new node-local-dns pod") + defer newPf.Stop() + + t.Log("Port forward to new node-local-dns pod established") + + // Now test metrics with the new port forward using port 9254 + newPromAddress := "http://localhost:9254/metrics" + + // Use testLRPCase function with the new prometheus address + t.Log("Validating metrics with new node-local-dns pod") + testLRPCase(t, ctx, recreatedPod, []string{ + "nslookup", "github.com", kubeDNS, + }, "", "", false, true, newPromAddress) + + t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name) + + // Step 9: Final cilium validation after node-local-dns restart + t.Log("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart") + validateCiliumLRP(t, ctx, cs, config) + +} + +// validateCiliumLRP checks that LRP is properly configured in cilium +func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, config *rest.Config) { + ciliumPods, err := cs.CoreV1().Pods(kubeSystemNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: "k8s-app=cilium", + }) + require.NoError(t, err) + require.NotEmpty(t, ciliumPods.Items) + ciliumPod := TakeOne(ciliumPods.Items) + + // Get Kubernetes version to determine validation approach + serverVersion, err := cs.Discovery().ServerVersion() + require.NoError(t, err) + t.Logf("Detected Kubernetes version: %s", serverVersion.String()) + + // Parse version to determine if we should use modern or legacy validation + // K8s 1.32.0+ should use modern Cilium format (v1.17+) + useModernFormat := false + if serverVersion.Major == "1" { + // Parse minor version + var minorVersion int + _, err := fmt.Sscanf(serverVersion.Minor, "%d", &minorVersion) + if err == nil && minorVersion >= 32 { + useModernFormat = true + } + } + + if useModernFormat { + t.Log("Using modern validation approach based on Kubernetes version >= 1.32.0") + } else { + t.Log("Using legacy validation approach based on Kubernetes version < 1.32.0") + } + + // Get kube-dns service IP for validation + svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService) + require.NoError(t, err) + kubeDNSIP := svc.Spec.ClusterIP + + // IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using + selectedNode := ciliumPod.Spec.NodeName + t.Logf("Using cilium pod %s on node %s for validation", ciliumPod.Name, selectedNode) + + // Get node-local-dns pod specifically on the same node as our cilium pod + nodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, selectedNode) + require.NoError(t, err) + require.NotEmpty(t, nodeLocalDNSPods.Items, "No node-local-dns pod found on node %s", selectedNode) + + // Use the first (and should be only) node-local-dns pod on this node + nodeLocalDNSPod := nodeLocalDNSPods.Items[0] + nodeLocalDNSIP := nodeLocalDNSPod.Status.PodIP + require.NotEmpty(t, nodeLocalDNSIP, "node-local-dns pod %s has no IP address", nodeLocalDNSPod.Name) + + t.Logf("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s", + kubeDNSIP, nodeLocalDNSIP, nodeLocalDNSPod.Name, selectedNode) + + // Check cilium lrp list + lrpListCmd := []string{"cilium", "lrp", "list"} + lrpOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", lrpListCmd, config, false) + require.NoError(t, err) + + // Validate the LRP output structure more thoroughly + lrpOutputStr := string(lrpOutput) + require.Contains(t, lrpOutputStr, "nodelocaldns", "LRP not found in cilium lrp list") + + // Parse LRP list output to validate structure + lrpLines := strings.Split(lrpOutputStr, "\n") + nodelocaldnsFound := false + + for _, line := range lrpLines { + line = strings.TrimSpace(line) + if strings.Contains(line, "nodelocaldns") && strings.Contains(line, "kube-system") { + // Validate that the line contains expected components + require.Contains(t, line, "kube-system", "LRP line should contain kube-system namespace") + require.Contains(t, line, "nodelocaldns", "LRP line should contain nodelocaldns name") + require.Contains(t, line, "kube-dns", "LRP line should reference kube-dns service") + nodelocaldnsFound = true + t.Logf("Found nodelocaldns LRP entry: %s", line) + break + } + } + + require.True(t, nodelocaldnsFound, "nodelocaldns LRP entry not found with expected structure in output: %s", lrpOutputStr) + + // Check cilium service list for localredirect + serviceListCmd := []string{"cilium", "service", "list"} + serviceOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", serviceListCmd, config, false) + require.NoError(t, err) + require.Contains(t, string(serviceOutput), "LocalRedirect", "LocalRedirect not found in cilium service list") + + // Validate LocalRedirect entries + serviceLines := strings.Split(string(serviceOutput), "\n") + tcpFound := false + udpFound := false + + for _, line := range serviceLines { + if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) { + // Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs + if strings.Contains(line, nodeLocalDNSIP) { + if useModernFormat { + // Modern format (K8s 1.32.0+/Cilium v1.17+): Check for explicit protocol + if strings.Contains(line, "/TCP") { + tcpFound = true + t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line)) + } else if strings.Contains(line, "/UDP") { + udpFound = true + t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line)) + } + } else { + // Legacy format (K8s < 1.32.0/Cilium < v1.17): No protocol specified + t.Logf("Found legacy LocalRedirect: %s", strings.TrimSpace(line)) + } + } + } + } + + // Validate based on determined format + if useModernFormat { + // Modern format (K8s 1.32.0+/Cilium v1.17+): Separate TCP and UDP entries + t.Log("Validating modern Cilium format - expecting separate TCP and UDP LocalRedirect entries") + require.True(t, tcpFound, "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) + require.True(t, udpFound, "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) + } else { + // Legacy format (K8s < 1.32.0/Cilium < v1.17): Just one LocalRedirect entry without protocol + t.Log("Validating legacy Cilium format - expecting single LocalRedirect entry without protocol") + require.False(t, useModernFormat, "Legacy LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) + } + + t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput)) + t.Logf("Cilium Service List Output:\n%s", string(serviceOutput)) +} + +// restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference +func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod { + // Find the daemonset name by looking up the pod's owner + podDetails, err := cs.CoreV1().Pods(originalPod.Namespace).Get(ctx, originalPod.Name, metav1.GetOptions{}) + require.NoError(t, err) + + // Get the node name for consistent testing + nodeName := podDetails.Spec.NodeName + + // Restart the daemonset (assumes it's named "lrp-test" based on the manifest) + err = kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test") + require.NoError(t, err) + + // Wait for the daemonset to be ready + kubernetes.WaitForPodDaemonset(ctx, cs, originalPod.Namespace, "lrp-test", clientLabelSelector) + + // Get the new pod on the same node + clientPods, err := kubernetes.GetPodsByNode(ctx, cs, originalPod.Namespace, clientLabelSelector, nodeName) + require.NoError(t, err) + require.NotEmpty(t, clientPods.Items) + + return TakeOne(clientPods.Items) +} + +// deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod +func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod { + config := kubernetes.MustGetRestConfig() + ciliumCS, err := ciliumClientset.NewForConfig(config) + require.NoError(t, err) + + nodeName := originalPod.Spec.NodeName + + // Delete client daemonset + dsClient := cs.AppsV1().DaemonSets(originalPod.Namespace) + clientDS := kubernetes.MustParseDaemonSet(clientPath) + kubernetes.MustDeleteDaemonset(ctx, dsClient, clientDS) + + // Delete LRP + lrpContent, err := os.ReadFile(lrpPath) + require.NoError(t, err) + var lrp ciliumv2.CiliumLocalRedirectPolicy + err = yaml.Unmarshal(lrpContent, &lrp) + require.NoError(t, err) + + lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace) + kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp) + + // Wait for deletion to complete + time.Sleep(10 * time.Second) + + // Recreate LRP + _, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath) + t.Cleanup(cleanupLRP) + + // Restart node-local-dns pods to pick up new LRP configuration + t.Log("Restarting node-local-dns pods after LRP recreation") + err = kubernetes.MustRestartDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns") + require.NoError(t, err) + kubernetes.WaitForPodDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns", nodeLocalDNSLabelSelector) + + // Recreate client daemonset + _, cleanupClient := kubernetes.MustSetupDaemonset(ctx, cs, clientPath) + t.Cleanup(cleanupClient) + + // Wait for pods to be ready + kubernetes.WaitForPodDaemonset(ctx, cs, clientDS.Namespace, clientDS.Name, clientLabelSelector) + + // Get new pod on the same node + clientPods, err := kubernetes.GetPodsByNode(ctx, cs, clientDS.Namespace, clientLabelSelector, nodeName) + require.NoError(t, err) + require.NotEmpty(t, clientPods.Items) + + return TakeOne(clientPods.Items) } // TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type diff --git a/test/integration/manifests/cilium/v1.13/cilium-config/cilium-config.yaml b/test/integration/manifests/cilium/v1.13/cilium-config/cilium-config.yaml index 198074750b..add4b386b7 100644 --- a/test/integration/manifests/cilium/v1.13/cilium-config/cilium-config.yaml +++ b/test/integration/manifests/cilium/v1.13/cilium-config/cilium-config.yaml @@ -34,7 +34,7 @@ data: enable-l2-neigh-discovery: "true" enable-l7-proxy: "false" enable-local-node-route: "false" - enable-local-redirect-policy: "true" # set to true for lrp test + enable-local-redirect-policy: "false" enable-metrics: "true" enable-policy: default enable-remote-node-identity: "true" From aa52f64fb8fee7bee4cced1bf67017fb7d94ae4c Mon Sep 17 00:00:00 2001 From: Karina Ranadive Date: Tue, 11 Nov 2025 13:49:22 -0500 Subject: [PATCH 2/5] fix --- test/integration/lrp/lrp_test.go | 52 ++++++++------------------------ 1 file changed, 12 insertions(+), 40 deletions(-) diff --git a/test/integration/lrp/lrp_test.go b/test/integration/lrp/lrp_test.go index 0500fda921..32d0a83ee8 100644 --- a/test/integration/lrp/lrp_test.go +++ b/test/integration/lrp/lrp_test.go @@ -4,7 +4,6 @@ package lrp import ( "context" - "fmt" "os" "strings" "testing" @@ -344,24 +343,6 @@ func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientse require.NoError(t, err) t.Logf("Detected Kubernetes version: %s", serverVersion.String()) - // Parse version to determine if we should use modern or legacy validation - // K8s 1.32.0+ should use modern Cilium format (v1.17+) - useModernFormat := false - if serverVersion.Major == "1" { - // Parse minor version - var minorVersion int - _, err := fmt.Sscanf(serverVersion.Minor, "%d", &minorVersion) - if err == nil && minorVersion >= 32 { - useModernFormat = true - } - } - - if useModernFormat { - t.Log("Using modern validation approach based on Kubernetes version >= 1.32.0") - } else { - t.Log("Using legacy validation approach based on Kubernetes version < 1.32.0") - } - // Get kube-dns service IP for validation svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService) require.NoError(t, err) @@ -422,39 +403,30 @@ func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientse serviceLines := strings.Split(string(serviceOutput), "\n") tcpFound := false udpFound := false + legacyFound := false for _, line := range serviceLines { if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) { // Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs if strings.Contains(line, nodeLocalDNSIP) { - if useModernFormat { - // Modern format (K8s 1.32.0+/Cilium v1.17+): Check for explicit protocol - if strings.Contains(line, "/TCP") { - tcpFound = true - t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line)) - } else if strings.Contains(line, "/UDP") { - udpFound = true - t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line)) - } + // Check for both modern format (with /TCP or /UDP) and legacy format (without protocol) + if strings.Contains(line, "/TCP") { + tcpFound = true + t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line)) + } else if strings.Contains(line, "/UDP") { + udpFound = true + t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line)) } else { - // Legacy format (K8s < 1.32.0/Cilium < v1.17): No protocol specified + legacyFound = true t.Logf("Found legacy LocalRedirect: %s", strings.TrimSpace(line)) } } } } - // Validate based on determined format - if useModernFormat { - // Modern format (K8s 1.32.0+/Cilium v1.17+): Separate TCP and UDP entries - t.Log("Validating modern Cilium format - expecting separate TCP and UDP LocalRedirect entries") - require.True(t, tcpFound, "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) - require.True(t, udpFound, "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) - } else { - // Legacy format (K8s < 1.32.0/Cilium < v1.17): Just one LocalRedirect entry without protocol - t.Log("Validating legacy Cilium format - expecting single LocalRedirect entry without protocol") - require.False(t, useModernFormat, "Legacy LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) - } + // Validate that we found either legacy format or modern format entries + t.Log("Validating LocalRedirect entries - accepting either legacy format or modern TCP/UDP format") + require.True(t, legacyFound || (tcpFound && udpFound), "Either legacy LocalRedirect entry OR both TCP and UDP entries must be found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode) t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput)) t.Logf("Cilium Service List Output:\n%s", string(serviceOutput)) From 8ee37d5c2a9c7ec2fef69ce0a4c72b8f5f843791 Mon Sep 17 00:00:00 2001 From: Karina Ranadive Date: Fri, 14 Nov 2025 12:47:41 -0500 Subject: [PATCH 3/5] adjustments --- test/integration/lrp/lrp_fqdn_test.go | 2 +- test/integration/lrp/lrp_test.go | 63 ++++++++++++++------------- test/internal/kubernetes/utils.go | 18 ++++++++ 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/test/integration/lrp/lrp_fqdn_test.go b/test/integration/lrp/lrp_fqdn_test.go index 7bfc8f3514..18ddc1caba 100644 --- a/test/integration/lrp/lrp_fqdn_test.go +++ b/test/integration/lrp/lrp_fqdn_test.go @@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) { for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { - testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, promAddress) + testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, getPrometheusAddress(initialPrometheusPort)) }) } } diff --git a/test/integration/lrp/lrp_test.go b/test/integration/lrp/lrp_test.go index 32d0a83ee8..5126881b12 100644 --- a/test/integration/lrp/lrp_test.go +++ b/test/integration/lrp/lrp_test.go @@ -4,6 +4,7 @@ package lrp import ( "context" + "fmt" "os" "strings" "testing" @@ -33,11 +34,13 @@ const ( dnsService = "kube-dns" retryAttempts = 10 retryDelay = 5 * time.Second - promAddress = "http://localhost:9253/metrics" nodeLocalDNSLabelSelector = "k8s-app=node-local-dns" clientLabelSelector = "lrp-test=true" coreDNSRequestCountTotal = "coredns_dns_request_count_total" clientContainer = "no-op" + // Port constants for prometheus endpoints + initialPrometheusPort = 9253 + recreatedPrometheusPort = 9254 ) var ( @@ -52,6 +55,11 @@ var ( clientPath = ciliumManifestsDir + "client-ds.yaml" ) +// getPrometheusAddress returns the prometheus metrics URL for the given port +func getPrometheusAddress(port int) string { + return fmt.Sprintf("http://localhost:%d/metrics", port) +} + func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) { var cleanUpFns []func() success := false @@ -137,8 +145,8 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) { pf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{ Namespace: nodeLocalDNSDS.Namespace, PodName: selectedLocalDNSPod, - LocalPort: 9253, - DestPort: 9253, + LocalPort: initialPrometheusPort, + DestPort: initialPrometheusPort, }) require.NoError(t, err) pctx := context.Background() @@ -227,10 +235,10 @@ func TestLRP(t *testing.T) { t.Logf("LRP Test Starting...") - // Basic LRP test + // Basic LRP test - using initial port from setupLRP testLRPCase(t, ctx, *selectedPod, []string{ "nslookup", "google.com", kubeDNS, - }, "", "", false, true, promAddress) + }, "", "", false, true, getPrometheusAddress(initialPrometheusPort)) t.Logf("LRP Test Completed") @@ -248,11 +256,11 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k config := kubernetes.MustGetRestConfig() cs := kubernetes.MustGetClientset() - // Step 1: Initial DNS test to verify LRP is working + // Step 1: Initial Basic LRP test to verify LRP is working t.Log("Step 1: Initial DNS test - verifying LRP functionality") testLRPCase(t, ctx, clientPod, []string{ "nslookup", "google.com", kubeDNS, - }, "", "", false, true, promAddress) + }, "", "", false, true, getPrometheusAddress(initialPrometheusPort)) // Step 2: Validate LRP using cilium commands t.Log("Step 2: Validating LRP using cilium commands") @@ -266,7 +274,7 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k t.Log("Step 4: Verifying LRP functionality after pod restart") testLRPCase(t, ctx, restartedPod, []string{ "nslookup", "google.com", kubeDNS, - }, "", "", false, true, promAddress) + }, "", "", false, true, getPrometheusAddress(initialPrometheusPort)) // Step 5: Validate cilium commands still show LRP t.Log("Step 5: Re-validating cilium LRP after restart") @@ -276,11 +284,8 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k t.Log("Step 6: Testing resource deletion and recreation") recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod) - // Step 7: Final verification after recreation - t.Log("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8") - - // Step 8: Re-establish port forward to new node-local-dns pod and validate metrics - t.Log("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation") + // Step 7: Re-establish port forward to new node-local-dns pod and validate metrics + t.Log("Step 7: Re-establishing port forward to new node-local-dns pod for metrics validation") // Get the new node-local-dns pod on the same node as our recreated client pod nodeName := recreatedPod.Spec.NodeName @@ -295,8 +300,8 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{ Namespace: newNodeLocalDNSPod.Namespace, PodName: newNodeLocalDNSPod.Name, - LocalPort: 9254, // Use different port to avoid conflicts - DestPort: 9253, + LocalPort: recreatedPrometheusPort, // Use different port to avoid conflicts + DestPort: initialPrometheusPort, }) require.NoError(t, err) @@ -312,19 +317,16 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k t.Log("Port forward to new node-local-dns pod established") - // Now test metrics with the new port forward using port 9254 - newPromAddress := "http://localhost:9254/metrics" - // Use testLRPCase function with the new prometheus address t.Log("Validating metrics with new node-local-dns pod") testLRPCase(t, ctx, recreatedPod, []string{ "nslookup", "github.com", kubeDNS, - }, "", "", false, true, newPromAddress) + }, "", "", false, true, getPrometheusAddress(recreatedPrometheusPort)) t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name) - // Step 9: Final cilium validation after node-local-dns restart - t.Log("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart") + // Step 8: Final cilium validation after node-local-dns restart + t.Log("Step 8: Final cilium validation - ensuring LRP is still active after node-local-dns restart") validateCiliumLRP(t, ctx, cs, config) } @@ -382,8 +384,6 @@ func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientse line = strings.TrimSpace(line) if strings.Contains(line, "nodelocaldns") && strings.Contains(line, "kube-system") { // Validate that the line contains expected components - require.Contains(t, line, "kube-system", "LRP line should contain kube-system namespace") - require.Contains(t, line, "nodelocaldns", "LRP line should contain nodelocaldns name") require.Contains(t, line, "kube-dns", "LRP line should reference kube-dns service") nodelocaldnsFound = true t.Logf("Found nodelocaldns LRP entry: %s", line) @@ -434,12 +434,8 @@ func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientse // restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod { - // Find the daemonset name by looking up the pod's owner - podDetails, err := cs.CoreV1().Pods(originalPod.Namespace).Get(ctx, originalPod.Name, metav1.GetOptions{}) - require.NoError(t, err) - // Get the node name for consistent testing - nodeName := podDetails.Spec.NodeName + nodeName := originalPod.Spec.NodeName // Restart the daemonset (assumes it's named "lrp-test" based on the manifest) err = kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test") @@ -479,8 +475,15 @@ func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace) kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp) - // Wait for deletion to complete - time.Sleep(10 * time.Second) + // Wait for client pods to be deleted + t.Log("Waiting for client pods to be deleted...") + err = kubernetes.WaitForPodsDelete(ctx, cs, originalPod.Namespace, clientLabelSelector) + require.NoError(t, err) + + // Wait for LRP to be deleted by polling + t.Log("Waiting for LRP to be deleted...") + err = kubernetes.WaitForLRPDelete(ctx, ciliumCS, lrp) + require.NoError(t, err) // Recreate LRP _, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath) diff --git a/test/internal/kubernetes/utils.go b/test/internal/kubernetes/utils.go index ac35efeada..27aa58e5bd 100644 --- a/test/internal/kubernetes/utils.go +++ b/test/internal/kubernetes/utils.go @@ -365,6 +365,24 @@ func WaitForDeploymentToDelete(ctx context.Context, deploymentsClient typedappsv return errors.Wrapf(retrier.Do(ctx, assertDeploymentNotFound), "could not assert deployment %s isNotFound", d.Name) } +func WaitForLRPDelete(ctx context.Context, ciliumClientset *cilium.Clientset, lrp ciliumv2.CiliumLocalRedirectPolicy) error { + lrpClient := ciliumClientset.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace) + + checkLRPDeleted := func() error { + _, err := lrpClient.Get(ctx, lrp.Name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return errors.Wrapf(err, "could not get LRP %s", lrp.Name) + } + return errors.Errorf("LRP %s still present", lrp.Name) + } + + retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay} + return errors.Wrap(retrier.Do(ctx, checkLRPDeleted), "failed to wait for LRP to delete") +} + func WaitForPodDaemonset(ctx context.Context, clientset *kubernetes.Clientset, namespace, daemonsetName, podLabelSelector string) error { podsClient := clientset.CoreV1().Pods(namespace) daemonsetClient := clientset.AppsV1().DaemonSets(namespace) From 4af974c67475b68763caf29c2f17865132436b4d Mon Sep 17 00:00:00 2001 From: Karina Ranadive Date: Fri, 14 Nov 2025 12:54:50 -0500 Subject: [PATCH 4/5] fix --- test/integration/lrp/lrp_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/lrp/lrp_test.go b/test/integration/lrp/lrp_test.go index 5126881b12..659d00eaa4 100644 --- a/test/integration/lrp/lrp_test.go +++ b/test/integration/lrp/lrp_test.go @@ -438,7 +438,7 @@ func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient nodeName := originalPod.Spec.NodeName // Restart the daemonset (assumes it's named "lrp-test" based on the manifest) - err = kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test") + err := kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test") require.NoError(t, err) // Wait for the daemonset to be ready From b7e8be8707c0b198e3a3f624501a6107b8a44c66 Mon Sep 17 00:00:00 2001 From: Karina Ranadive Date: Fri, 14 Nov 2025 13:02:54 -0500 Subject: [PATCH 5/5] removed redundant basic lrp test --- test/integration/lrp/lrp_test.go | 33 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/test/integration/lrp/lrp_test.go b/test/integration/lrp/lrp_test.go index 659d00eaa4..9d473f9188 100644 --- a/test/integration/lrp/lrp_test.go +++ b/test/integration/lrp/lrp_test.go @@ -256,36 +256,31 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k config := kubernetes.MustGetRestConfig() cs := kubernetes.MustGetClientset() - // Step 1: Initial Basic LRP test to verify LRP is working - t.Log("Step 1: Initial DNS test - verifying LRP functionality") - testLRPCase(t, ctx, clientPod, []string{ - "nslookup", "google.com", kubeDNS, - }, "", "", false, true, getPrometheusAddress(initialPrometheusPort)) - // Step 2: Validate LRP using cilium commands - t.Log("Step 2: Validating LRP using cilium commands") + // Step 1: Validate LRP using cilium commands + t.Log("Step 1: Validating LRP using cilium commands") validateCiliumLRP(t, ctx, cs, config) - // Step 3: Restart busybox pods and verify LRP still works - t.Log("Step 3: Restarting client pods to test persistence") + // Step 2: Restart busybox pods and verify LRP still works + t.Log("Step 2: Restarting client pods to test persistence") restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod) - // Step 4: Verify metrics after restart - t.Log("Step 4: Verifying LRP functionality after pod restart") + // Step 3: Verify metrics after restart + t.Log("Step 3: Verifying LRP functionality after pod restart") testLRPCase(t, ctx, restartedPod, []string{ "nslookup", "google.com", kubeDNS, }, "", "", false, true, getPrometheusAddress(initialPrometheusPort)) - // Step 5: Validate cilium commands still show LRP - t.Log("Step 5: Re-validating cilium LRP after restart") + // Step 4: Validate cilium commands still show LRP + t.Log("Step 4: Re-validating cilium LRP after restart") validateCiliumLRP(t, ctx, cs, config) - // Step 6: Delete and recreate resources & restart nodelocaldns daemonset - t.Log("Step 6: Testing resource deletion and recreation") + // Step 5: Delete and recreate resources & restart nodelocaldns daemonset + t.Log("Step 5: Testing resource deletion and recreation") recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod) - // Step 7: Re-establish port forward to new node-local-dns pod and validate metrics - t.Log("Step 7: Re-establishing port forward to new node-local-dns pod for metrics validation") + // Step 6: Re-establish port forward to new node-local-dns pod and validate metrics + t.Log("Step 6: Re-establishing port forward to new node-local-dns pod for metrics validation") // Get the new node-local-dns pod on the same node as our recreated client pod nodeName := recreatedPod.Spec.NodeName @@ -325,8 +320,8 @@ func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, k t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name) - // Step 8: Final cilium validation after node-local-dns restart - t.Log("Step 8: Final cilium validation - ensuring LRP is still active after node-local-dns restart") + // Step 7: Final cilium validation after node-local-dns restart + t.Log("Step 7: Final cilium validation - ensuring LRP is still active after node-local-dns restart") validateCiliumLRP(t, ctx, cs, config) }