Skip to content

Commit 4e6b260

Browse files
author
Karina Ranadive
committed
test: expand LRP test to include lifecycle events
1 parent b9f406a commit 4e6b260

File tree

3 files changed

+330
-10
lines changed

3 files changed

+330
-10
lines changed

test/integration/lrp/lrp_fqdn_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) {
102102
for _, tt := range tests {
103103
tt := tt
104104
t.Run(tt.name, func(t *testing.T) {
105-
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases)
105+
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, promAddress)
106106
})
107107
}
108108
}

test/integration/lrp/lrp_test.go

Lines changed: 328 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package lrp
44

55
import (
66
"context"
7+
"fmt"
78
"os"
89
"strings"
910
"testing"
@@ -13,11 +14,16 @@ import (
1314
"github.com/Azure/azure-container-networking/test/integration/prometheus"
1415
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
1516
"github.com/Azure/azure-container-networking/test/internal/retry"
17+
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
1618
ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
1719
"github.com/pkg/errors"
1820
"github.com/stretchr/testify/require"
1921
"golang.org/x/exp/rand"
2022
corev1 "k8s.io/api/core/v1"
23+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
k8sclient "k8s.io/client-go/kubernetes"
25+
"k8s.io/client-go/rest"
26+
"sigs.k8s.io/yaml"
2127
)
2228

2329
const (
@@ -154,7 +160,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
154160
}
155161

156162
func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, clientCmd []string, expectResponse, expectErrMsg string,
157-
shouldError, countShouldIncrease bool) {
163+
shouldError, countShouldIncrease bool, prometheusAddress string) {
158164

159165
config := kubernetes.MustGetRestConfig()
160166
cs := kubernetes.MustGetClientset()
@@ -167,9 +173,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
167173
"zone": ".",
168174
}
169175

170-
// curl localhost:9253/metrics
171-
beforeMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
176+
// curl to the specified prometheus address
177+
beforeMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
172178
require.NoError(t, err)
179+
beforeValue := beforeMetric.GetCounter().GetValue()
180+
t.Logf("Before DNS request - metric count: %.0f", beforeValue)
173181

174182
t.Log("calling command from client")
175183

@@ -187,13 +195,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
187195
time.Sleep(500 * time.Millisecond)
188196

189197
// curl again and see count diff
190-
afterMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
198+
afterMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
191199
require.NoError(t, err)
200+
afterValue := afterMetric.GetCounter().GetValue()
201+
t.Logf("After DNS request - metric count: %.0f (diff: %.0f)", afterValue, afterValue-beforeValue)
192202

193203
if countShouldIncrease {
194-
require.Greater(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count did not increase after command")
204+
require.Greater(t, afterValue, beforeValue, "dns metric count did not increase after command - before: %.0f, after: %.0f", beforeValue, afterValue)
195205
} else {
196-
require.Equal(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count increased after command")
206+
require.Equal(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue)
197207
}
198208
}
199209

@@ -210,9 +220,319 @@ func TestLRP(t *testing.T) {
210220
defer cleanupFn()
211221
require.NotNil(t, selectedPod)
212222

223+
// Get the kube-dns service IP for DNS requests
224+
cs := kubernetes.MustGetClientset()
225+
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
226+
require.NoError(t, err)
227+
kubeDNS := svc.Spec.ClusterIP
228+
229+
t.Logf("LRP Test Starting...")
230+
231+
// Basic LRP test
213232
testLRPCase(t, ctx, *selectedPod, []string{
214-
"nslookup", "google.com", "10.0.0.10",
215-
}, "", "", false, true)
233+
"nslookup", "google.com", kubeDNS,
234+
}, "", "", false, true, promAddress)
235+
236+
t.Logf("LRP Test Completed")
237+
238+
t.Logf("LRP Lifecycle Test Starting")
239+
240+
// Run LRP Lifecycle test
241+
testLRPLifecycle(t, ctx, *selectedPod, kubeDNS)
242+
243+
t.Logf("LRP Lifecycle Test Completed")
244+
}
245+
246+
// testLRPLifecycle performs testing of Local Redirect Policy functionality
247+
// including pod restarts, resource recreation, and cilium command validation
248+
func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, kubeDNS string) {
249+
config := kubernetes.MustGetRestConfig()
250+
cs := kubernetes.MustGetClientset()
251+
252+
// Step 1: Initial DNS test to verify LRP is working
253+
t.Log("Step 1: Initial DNS test - verifying LRP functionality")
254+
testLRPCase(t, ctx, clientPod, []string{
255+
"nslookup", "google.com", kubeDNS,
256+
}, "", "", false, true, promAddress)
257+
258+
// Step 2: Validate LRP using cilium commands
259+
t.Log("Step 2: Validating LRP using cilium commands")
260+
validateCiliumLRP(t, ctx, cs, config)
261+
262+
// Step 3: Restart busybox pods and verify LRP still works
263+
t.Log("Step 3: Restarting client pods to test persistence")
264+
restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod)
265+
266+
// Step 4: Verify metrics after restart
267+
t.Log("Step 4: Verifying LRP functionality after pod restart")
268+
testLRPCase(t, ctx, restartedPod, []string{
269+
"nslookup", "google.com", kubeDNS,
270+
}, "", "", false, true, promAddress)
271+
272+
// Step 5: Validate cilium commands still show LRP
273+
t.Log("Step 5: Re-validating cilium LRP after restart")
274+
validateCiliumLRP(t, ctx, cs, config)
275+
276+
// Step 6: Delete and recreate resources & restart nodelocaldns daemonset
277+
t.Log("Step 6: Testing resource deletion and recreation")
278+
recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod)
279+
280+
// Step 7: Final verification after recreation
281+
t.Log("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8")
282+
283+
// Step 8: Re-establish port forward to new node-local-dns pod and validate metrics
284+
t.Log("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation")
285+
286+
// Get the new node-local-dns pod on the same node as our recreated client pod
287+
nodeName := recreatedPod.Spec.NodeName
288+
newNodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, nodeName)
289+
require.NoError(t, err)
290+
require.NotEmpty(t, newNodeLocalDNSPods.Items, "No node-local-dns pod found on node %s after restart", nodeName)
291+
292+
newNodeLocalDNSPod := TakeOne(newNodeLocalDNSPods.Items)
293+
t.Logf("Setting up port forward to new node-local-dns pod: %s", newNodeLocalDNSPod.Name)
294+
295+
// Setup new port forward to the new node-local-dns pod
296+
newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
297+
Namespace: newNodeLocalDNSPod.Namespace,
298+
PodName: newNodeLocalDNSPod.Name,
299+
LocalPort: 9254, // Use different port to avoid conflicts
300+
DestPort: 9253,
301+
})
302+
require.NoError(t, err)
303+
304+
newPortForwardCtx, newCancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelay)
305+
defer newCancel()
306+
307+
err = defaultRetrier.Do(newPortForwardCtx, func() error {
308+
t.Logf("attempting port forward to new node-local-dns pod %s...", newNodeLocalDNSPod.Name)
309+
return errors.Wrap(newPf.Forward(newPortForwardCtx), "could not start port forward to new pod")
310+
})
311+
require.NoError(t, err, "could not start port forward to new node-local-dns pod")
312+
defer newPf.Stop()
313+
314+
t.Log("Port forward to new node-local-dns pod established")
315+
316+
// Now test metrics with the new port forward using port 9254
317+
newPromAddress := "http://localhost:9254/metrics"
318+
319+
// Use testLRPCase function with the new prometheus address
320+
t.Log("Validating metrics with new node-local-dns pod")
321+
testLRPCase(t, ctx, recreatedPod, []string{
322+
"nslookup", "github.com", kubeDNS,
323+
}, "", "", false, true, newPromAddress)
324+
325+
t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name)
326+
327+
// Step 9: Final cilium validation after node-local-dns restart
328+
t.Log("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart")
329+
validateCiliumLRP(t, ctx, cs, config)
330+
331+
}
332+
333+
// validateCiliumLRP checks that LRP is properly configured in cilium
334+
func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, config *rest.Config) {
335+
ciliumPods, err := cs.CoreV1().Pods(kubeSystemNamespace).List(ctx, metav1.ListOptions{
336+
LabelSelector: "k8s-app=cilium",
337+
})
338+
require.NoError(t, err)
339+
require.NotEmpty(t, ciliumPods.Items)
340+
ciliumPod := TakeOne(ciliumPods.Items)
341+
342+
// Get Kubernetes version to determine validation approach
343+
serverVersion, err := cs.Discovery().ServerVersion()
344+
require.NoError(t, err)
345+
t.Logf("Detected Kubernetes version: %s", serverVersion.String())
346+
347+
// Parse version to determine if we should use modern or legacy validation
348+
// K8s 1.32.0+ should use modern Cilium format (v1.17+)
349+
useModernFormat := false
350+
if serverVersion.Major == "1" {
351+
// Parse minor version
352+
var minorVersion int
353+
_, err := fmt.Sscanf(serverVersion.Minor, "%d", &minorVersion)
354+
if err == nil && minorVersion >= 32 {
355+
useModernFormat = true
356+
}
357+
}
358+
359+
if useModernFormat {
360+
t.Log("Using modern validation approach based on Kubernetes version >= 1.32.0")
361+
} else {
362+
t.Log("Using legacy validation approach based on Kubernetes version < 1.32.0")
363+
}
364+
365+
// Get kube-dns service IP for validation
366+
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
367+
require.NoError(t, err)
368+
kubeDNSIP := svc.Spec.ClusterIP
369+
370+
// IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
371+
selectedNode := ciliumPod.Spec.NodeName
372+
t.Logf("Using cilium pod %s on node %s for validation", ciliumPod.Name, selectedNode)
373+
374+
// Get node-local-dns pod specifically on the same node as our cilium pod
375+
nodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, selectedNode)
376+
require.NoError(t, err)
377+
require.NotEmpty(t, nodeLocalDNSPods.Items, "No node-local-dns pod found on node %s", selectedNode)
378+
379+
// Use the first (and should be only) node-local-dns pod on this node
380+
nodeLocalDNSPod := nodeLocalDNSPods.Items[0]
381+
nodeLocalDNSIP := nodeLocalDNSPod.Status.PodIP
382+
require.NotEmpty(t, nodeLocalDNSIP, "node-local-dns pod %s has no IP address", nodeLocalDNSPod.Name)
383+
384+
t.Logf("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s",
385+
kubeDNSIP, nodeLocalDNSIP, nodeLocalDNSPod.Name, selectedNode)
386+
387+
// Check cilium lrp list
388+
lrpListCmd := []string{"cilium", "lrp", "list"}
389+
lrpOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", lrpListCmd, config, false)
390+
require.NoError(t, err)
391+
392+
// Validate the LRP output structure more thoroughly
393+
lrpOutputStr := string(lrpOutput)
394+
require.Contains(t, lrpOutputStr, "nodelocaldns", "LRP not found in cilium lrp list")
395+
396+
// Parse LRP list output to validate structure
397+
lrpLines := strings.Split(lrpOutputStr, "\n")
398+
nodelocaldnsFound := false
399+
400+
for _, line := range lrpLines {
401+
line = strings.TrimSpace(line)
402+
if strings.Contains(line, "nodelocaldns") && strings.Contains(line, "kube-system") {
403+
// Validate that the line contains expected components
404+
require.Contains(t, line, "kube-system", "LRP line should contain kube-system namespace")
405+
require.Contains(t, line, "nodelocaldns", "LRP line should contain nodelocaldns name")
406+
require.Contains(t, line, "kube-dns", "LRP line should reference kube-dns service")
407+
nodelocaldnsFound = true
408+
t.Logf("Found nodelocaldns LRP entry: %s", line)
409+
break
410+
}
411+
}
412+
413+
require.True(t, nodelocaldnsFound, "nodelocaldns LRP entry not found with expected structure in output: %s", lrpOutputStr)
414+
415+
// Check cilium service list for localredirect
416+
serviceListCmd := []string{"cilium", "service", "list"}
417+
serviceOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", serviceListCmd, config, false)
418+
require.NoError(t, err)
419+
require.Contains(t, string(serviceOutput), "LocalRedirect", "LocalRedirect not found in cilium service list")
420+
421+
// Validate LocalRedirect entries
422+
serviceLines := strings.Split(string(serviceOutput), "\n")
423+
tcpFound := false
424+
udpFound := false
425+
426+
for _, line := range serviceLines {
427+
if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) {
428+
// Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
429+
if strings.Contains(line, nodeLocalDNSIP) {
430+
if useModernFormat {
431+
// Modern format (K8s 1.32.0+/Cilium v1.17+): Check for explicit protocol
432+
if strings.Contains(line, "/TCP") {
433+
tcpFound = true
434+
t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line))
435+
} else if strings.Contains(line, "/UDP") {
436+
udpFound = true
437+
t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line))
438+
}
439+
} else {
440+
// Legacy format (K8s < 1.32.0/Cilium < v1.17): No protocol specified
441+
t.Logf("Found legacy LocalRedirect: %s", strings.TrimSpace(line))
442+
}
443+
}
444+
}
445+
}
446+
447+
// Validate based on determined format
448+
if useModernFormat {
449+
// Modern format (K8s 1.32.0+/Cilium v1.17+): Separate TCP and UDP entries
450+
t.Log("Validating modern Cilium format - expecting separate TCP and UDP LocalRedirect entries")
451+
require.True(t, tcpFound, "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
452+
require.True(t, udpFound, "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
453+
} else {
454+
// Legacy format (K8s < 1.32.0/Cilium < v1.17): Just one LocalRedirect entry without protocol
455+
t.Log("Validating legacy Cilium format - expecting single LocalRedirect entry without protocol")
456+
require.False(t, useModernFormat, "Legacy LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
457+
}
458+
459+
t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput))
460+
t.Logf("Cilium Service List Output:\n%s", string(serviceOutput))
461+
}
462+
463+
// restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
464+
func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
465+
// Find the daemonset name by looking up the pod's owner
466+
podDetails, err := cs.CoreV1().Pods(originalPod.Namespace).Get(ctx, originalPod.Name, metav1.GetOptions{})
467+
require.NoError(t, err)
468+
469+
// Get the node name for consistent testing
470+
nodeName := podDetails.Spec.NodeName
471+
472+
// Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
473+
err = kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test")
474+
require.NoError(t, err)
475+
476+
// Wait for the daemonset to be ready
477+
kubernetes.WaitForPodDaemonset(ctx, cs, originalPod.Namespace, "lrp-test", clientLabelSelector)
478+
479+
// Get the new pod on the same node
480+
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, originalPod.Namespace, clientLabelSelector, nodeName)
481+
require.NoError(t, err)
482+
require.NotEmpty(t, clientPods.Items)
483+
484+
return TakeOne(clientPods.Items)
485+
}
486+
487+
// deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
488+
func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
489+
config := kubernetes.MustGetRestConfig()
490+
ciliumCS, err := ciliumClientset.NewForConfig(config)
491+
require.NoError(t, err)
492+
493+
nodeName := originalPod.Spec.NodeName
494+
495+
// Delete client daemonset
496+
dsClient := cs.AppsV1().DaemonSets(originalPod.Namespace)
497+
clientDS := kubernetes.MustParseDaemonSet(clientPath)
498+
kubernetes.MustDeleteDaemonset(ctx, dsClient, clientDS)
499+
500+
// Delete LRP
501+
lrpContent, err := os.ReadFile(lrpPath)
502+
require.NoError(t, err)
503+
var lrp ciliumv2.CiliumLocalRedirectPolicy
504+
err = yaml.Unmarshal(lrpContent, &lrp)
505+
require.NoError(t, err)
506+
507+
lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace)
508+
kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp)
509+
510+
// Wait for deletion to complete
511+
time.Sleep(10 * time.Second)
512+
513+
// Recreate LRP
514+
_, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath)
515+
t.Cleanup(cleanupLRP)
516+
517+
// Restart node-local-dns pods to pick up new LRP configuration
518+
t.Log("Restarting node-local-dns pods after LRP recreation")
519+
err = kubernetes.MustRestartDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns")
520+
require.NoError(t, err)
521+
kubernetes.WaitForPodDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns", nodeLocalDNSLabelSelector)
522+
523+
// Recreate client daemonset
524+
_, cleanupClient := kubernetes.MustSetupDaemonset(ctx, cs, clientPath)
525+
t.Cleanup(cleanupClient)
526+
527+
// Wait for pods to be ready
528+
kubernetes.WaitForPodDaemonset(ctx, cs, clientDS.Namespace, clientDS.Name, clientLabelSelector)
529+
530+
// Get new pod on the same node
531+
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, clientDS.Namespace, clientLabelSelector, nodeName)
532+
require.NoError(t, err)
533+
require.NotEmpty(t, clientPods.Items)
534+
535+
return TakeOne(clientPods.Items)
216536
}
217537

218538
// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type

0 commit comments

Comments
 (0)