Skip to content

Commit 1bfc513

Browse files
author
Karina Ranadive
committed
lrp comprehensive test
1 parent a3f179b commit 1bfc513

File tree

2 files changed

+259
-7
lines changed

2 files changed

+259
-7
lines changed

test/integration/lrp/lrp_fqdn_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) {
102102
for _, tt := range tests {
103103
tt := tt
104104
t.Run(tt.name, func(t *testing.T) {
105-
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases)
105+
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, promAddress)
106106
})
107107
}
108108
}

test/integration/lrp/lrp_test.go

Lines changed: 258 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@ import (
1313
"github.com/Azure/azure-container-networking/test/integration/prometheus"
1414
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
1515
"github.com/Azure/azure-container-networking/test/internal/retry"
16+
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
1617
ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
1718
"github.com/pkg/errors"
1819
"github.com/stretchr/testify/require"
1920
"golang.org/x/exp/rand"
2021
corev1 "k8s.io/api/core/v1"
22+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
k8sclient "k8s.io/client-go/kubernetes"
24+
"k8s.io/client-go/rest"
25+
"sigs.k8s.io/yaml"
2126
)
2227

2328
const (
@@ -154,7 +159,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
154159
}
155160

156161
func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, clientCmd []string, expectResponse, expectErrMsg string,
157-
shouldError, countShouldIncrease bool) {
162+
shouldError, countShouldIncrease bool, prometheusAddress string) {
158163

159164
config := kubernetes.MustGetRestConfig()
160165
cs := kubernetes.MustGetClientset()
@@ -167,8 +172,8 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
167172
"zone": ".",
168173
}
169174

170-
// curl localhost:9253/metrics
171-
beforeMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
175+
// curl to the specified prometheus address
176+
beforeMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
172177
require.NoError(t, err)
173178

174179
t.Log("calling command from client")
@@ -187,7 +192,7 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
187192
time.Sleep(500 * time.Millisecond)
188193

189194
// curl again and see count diff
190-
afterMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
195+
afterMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
191196
require.NoError(t, err)
192197

193198
if countShouldIncrease {
@@ -210,9 +215,256 @@ func TestLRP(t *testing.T) {
210215
defer cleanupFn()
211216
require.NotNil(t, selectedPod)
212217

218+
// Get the kube-dns service IP for DNS requests
219+
cs := kubernetes.MustGetClientset()
220+
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
221+
require.NoError(t, err)
222+
kubeDNS := svc.Spec.ClusterIP
223+
224+
t.Logf("Using kube-dns service IP: %s", kubeDNS)
225+
226+
// Basic LRP test
213227
testLRPCase(t, ctx, *selectedPod, []string{
214-
"nslookup", "google.com", "10.0.0.10",
215-
}, "", "", false, true)
228+
"nslookup", "google.com", kubeDNS,
229+
}, "", "", false, true, promAddress)
230+
231+
// Run comprehensive test
232+
testLRPComprehensive(t, ctx, *selectedPod, kubeDNS)
233+
}
234+
235+
// testLRPComprehensive performs a comprehensive test of Local Redirect Policy functionality
236+
// including pod restarts, resource recreation, and cilium command validation
237+
func testLRPComprehensive(t *testing.T, ctx context.Context, clientPod corev1.Pod, kubeDNS string) {
238+
config := kubernetes.MustGetRestConfig()
239+
cs := kubernetes.MustGetClientset()
240+
241+
// Step 1: Initial DNS test to verify LRP is working
242+
t.Log("Step 1: Initial DNS test - verifying LRP functionality")
243+
testLRPCase(t, ctx, clientPod, []string{
244+
"nslookup", "google.com", kubeDNS,
245+
}, "", "", false, true, promAddress)
246+
247+
// Step 2: Validate LRP using cilium commands
248+
t.Log("Step 2: Validating LRP using cilium commands")
249+
validateCiliumLRP(t, ctx, cs, config)
250+
251+
// Step 3: Restart busybox pods and verify LRP still works
252+
t.Log("Step 3: Restarting client pods to test persistence")
253+
restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod)
254+
255+
// Step 4: Verify metrics after restart
256+
t.Log("Step 4: Verifying LRP functionality after pod restart")
257+
testLRPCase(t, ctx, restartedPod, []string{
258+
"nslookup", "google.com", kubeDNS,
259+
}, "", "", false, true, promAddress)
260+
261+
// Step 5: Validate cilium commands still show LRP
262+
t.Log("Step 5: Re-validating cilium LRP after restart")
263+
validateCiliumLRP(t, ctx, cs, config)
264+
265+
// Step 6: Delete and recreate resources & restart nodelocaldns daemonset
266+
t.Log("Step 6: Testing resource deletion and recreation")
267+
recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod)
268+
269+
// Step 7: Final verification after recreation
270+
t.Log("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8")
271+
272+
// Step 8: Re-establish port forward to new node-local-dns pod and validate metrics
273+
t.Log("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation")
274+
275+
// Get the new node-local-dns pod on the same node as our recreated client pod
276+
nodeName := recreatedPod.Spec.NodeName
277+
newNodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, nodeName)
278+
require.NoError(t, err)
279+
require.NotEmpty(t, newNodeLocalDNSPods.Items, "No node-local-dns pod found on node %s after restart", nodeName)
280+
281+
newNodeLocalDNSPod := TakeOne(newNodeLocalDNSPods.Items)
282+
t.Logf("Setting up port forward to new node-local-dns pod: %s", newNodeLocalDNSPod.Name)
283+
284+
// Setup new port forward to the new node-local-dns pod
285+
newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
286+
Namespace: newNodeLocalDNSPod.Namespace,
287+
PodName: newNodeLocalDNSPod.Name,
288+
LocalPort: 9254, // Use different port to avoid conflicts
289+
DestPort: 9253,
290+
})
291+
require.NoError(t, err)
292+
293+
newPortForwardCtx, newCancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelay)
294+
defer newCancel()
295+
296+
err = defaultRetrier.Do(newPortForwardCtx, func() error {
297+
t.Logf("attempting port forward to new node-local-dns pod %s...", newNodeLocalDNSPod.Name)
298+
return errors.Wrap(newPf.Forward(newPortForwardCtx), "could not start port forward to new pod")
299+
})
300+
require.NoError(t, err, "could not start port forward to new node-local-dns pod")
301+
defer newPf.Stop()
302+
303+
t.Log("Port forward to new node-local-dns pod established")
304+
305+
// Now test metrics with the new port forward using port 9254
306+
newPromAddress := "http://localhost:9254/metrics"
307+
308+
// Use testLRPCase function with the new prometheus address
309+
t.Log("Validating metrics with new node-local-dns pod")
310+
testLRPCase(t, ctx, recreatedPod, []string{
311+
"nslookup", "github.com", kubeDNS,
312+
}, "", "", false, true, newPromAddress)
313+
314+
t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name)
315+
316+
// Step 9: Final cilium validation after node-local-dns restart
317+
t.Log("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart")
318+
validateCiliumLRP(t, ctx, cs, config)
319+
320+
t.Log("Comprehensive LRP test completed successfully")
321+
}
322+
323+
// validateCiliumLRP checks that LRP is properly configured in cilium
324+
func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, config *rest.Config) {
325+
ciliumPods, err := cs.CoreV1().Pods(kubeSystemNamespace).List(ctx, metav1.ListOptions{
326+
LabelSelector: "k8s-app=cilium",
327+
})
328+
require.NoError(t, err)
329+
require.NotEmpty(t, ciliumPods.Items)
330+
ciliumPod := TakeOne(ciliumPods.Items)
331+
332+
// Get kube-dns service IP for validation
333+
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
334+
require.NoError(t, err)
335+
kubeDNSIP := svc.Spec.ClusterIP
336+
337+
// IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
338+
selectedNode := ciliumPod.Spec.NodeName
339+
t.Logf("Using cilium pod %s on node %s for validation", ciliumPod.Name, selectedNode)
340+
341+
// Get node-local-dns pod specifically on the same node as our cilium pod
342+
nodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, selectedNode)
343+
require.NoError(t, err)
344+
require.NotEmpty(t, nodeLocalDNSPods.Items, "No node-local-dns pod found on node %s", selectedNode)
345+
346+
// Use the first (and should be only) node-local-dns pod on this node
347+
nodeLocalDNSPod := nodeLocalDNSPods.Items[0]
348+
nodeLocalDNSIP := nodeLocalDNSPod.Status.PodIP
349+
require.NotEmpty(t, nodeLocalDNSIP, "node-local-dns pod %s has no IP address", nodeLocalDNSPod.Name)
350+
351+
t.Logf("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s",
352+
kubeDNSIP, nodeLocalDNSIP, nodeLocalDNSPod.Name, selectedNode)
353+
354+
// Check cilium lrp list
355+
lrpListCmd := []string{"cilium", "lrp", "list"}
356+
lrpOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", lrpListCmd, config, false)
357+
require.NoError(t, err)
358+
require.Contains(t, string(lrpOutput), "nodelocaldns", "LRP not found in cilium lrp list")
359+
360+
// Check cilium service list for localredirect
361+
serviceListCmd := []string{"cilium", "service", "list"}
362+
serviceOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", serviceListCmd, config, false)
363+
require.NoError(t, err)
364+
require.Contains(t, string(serviceOutput), "LocalRedirect", "LocalRedirect not found in cilium service list")
365+
366+
// Validate LocalRedirect entries
367+
serviceLines := strings.Split(string(serviceOutput), "\n")
368+
tcpFound := false
369+
udpFound := false
370+
371+
for _, line := range serviceLines {
372+
if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) {
373+
// Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
374+
if strings.Contains(line, nodeLocalDNSIP) {
375+
if strings.Contains(line, "/TCP") {
376+
tcpFound = true
377+
t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line))
378+
}
379+
if strings.Contains(line, "/UDP") {
380+
udpFound = true
381+
t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line))
382+
}
383+
}
384+
}
385+
}
386+
387+
// Verify both TCP and UDP LocalRedirect entries exist
388+
require.True(t, tcpFound, "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
389+
require.True(t, udpFound, "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
390+
391+
t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput))
392+
t.Logf("Cilium Service List Output:\n%s", string(serviceOutput))
393+
}
394+
395+
// restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
396+
func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
397+
// Find the daemonset name by looking up the pod's owner
398+
podDetails, err := cs.CoreV1().Pods(originalPod.Namespace).Get(ctx, originalPod.Name, metav1.GetOptions{})
399+
require.NoError(t, err)
400+
401+
// Get the node name for consistent testing
402+
nodeName := podDetails.Spec.NodeName
403+
404+
// Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
405+
err = kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test")
406+
require.NoError(t, err)
407+
408+
// Wait for the daemonset to be ready
409+
kubernetes.WaitForPodDaemonset(ctx, cs, originalPod.Namespace, "lrp-test", clientLabelSelector)
410+
411+
// Get the new pod on the same node
412+
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, originalPod.Namespace, clientLabelSelector, nodeName)
413+
require.NoError(t, err)
414+
require.NotEmpty(t, clientPods.Items)
415+
416+
return TakeOne(clientPods.Items)
417+
}
418+
419+
// deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
420+
func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
421+
config := kubernetes.MustGetRestConfig()
422+
ciliumCS, err := ciliumClientset.NewForConfig(config)
423+
require.NoError(t, err)
424+
425+
nodeName := originalPod.Spec.NodeName
426+
427+
// Delete client daemonset
428+
dsClient := cs.AppsV1().DaemonSets(originalPod.Namespace)
429+
clientDS := kubernetes.MustParseDaemonSet(clientPath)
430+
kubernetes.MustDeleteDaemonset(ctx, dsClient, clientDS)
431+
432+
// Delete LRP
433+
lrpContent, err := os.ReadFile(lrpPath)
434+
require.NoError(t, err)
435+
var lrp ciliumv2.CiliumLocalRedirectPolicy
436+
err = yaml.Unmarshal(lrpContent, &lrp)
437+
require.NoError(t, err)
438+
439+
lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace)
440+
kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp)
441+
442+
// Wait for deletion to complete
443+
time.Sleep(10 * time.Second)
444+
445+
// Recreate LRP
446+
_, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath)
447+
t.Cleanup(cleanupLRP)
448+
449+
// Restart node-local-dns pods to pick up new LRP configuration
450+
t.Log("Restarting node-local-dns pods after LRP recreation")
451+
err = kubernetes.MustRestartDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns")
452+
require.NoError(t, err)
453+
kubernetes.WaitForPodDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns", nodeLocalDNSLabelSelector)
454+
455+
// Recreate client daemonset
456+
_, cleanupClient := kubernetes.MustSetupDaemonset(ctx, cs, clientPath)
457+
t.Cleanup(cleanupClient)
458+
459+
// Wait for pods to be ready
460+
kubernetes.WaitForPodDaemonset(ctx, cs, clientDS.Namespace, clientDS.Name, clientLabelSelector)
461+
462+
// Get new pod on the same node
463+
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, clientDS.Namespace, clientLabelSelector, nodeName)
464+
require.NoError(t, err)
465+
require.NotEmpty(t, clientPods.Items)
466+
467+
return TakeOne(clientPods.Items)
216468
}
217469

218470
// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type

0 commit comments

Comments
 (0)