@@ -4,6 +4,7 @@ package lrp
44
55import (
66 "context"
7+ "fmt"
78 "os"
89 "strings"
910 "testing"
@@ -13,11 +14,16 @@ import (
1314 "github.com/Azure/azure-container-networking/test/integration/prometheus"
1415 "github.com/Azure/azure-container-networking/test/internal/kubernetes"
1516 "github.com/Azure/azure-container-networking/test/internal/retry"
17+ ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
1618 ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
1719 "github.com/pkg/errors"
1820 "github.com/stretchr/testify/require"
1921 "golang.org/x/exp/rand"
2022 corev1 "k8s.io/api/core/v1"
23+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+ k8sclient "k8s.io/client-go/kubernetes"
25+ "k8s.io/client-go/rest"
26+ "sigs.k8s.io/yaml"
2127)
2228
2329const (
@@ -154,7 +160,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
154160}
155161
156162func testLRPCase (t * testing.T , ctx context.Context , clientPod corev1.Pod , clientCmd []string , expectResponse , expectErrMsg string ,
157- shouldError , countShouldIncrease bool ) {
163+ shouldError , countShouldIncrease bool , prometheusAddress string ) {
158164
159165 config := kubernetes .MustGetRestConfig ()
160166 cs := kubernetes .MustGetClientset ()
@@ -167,9 +173,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
167173 "zone" : "." ,
168174 }
169175
170- // curl localhost:9253/metrics
171- beforeMetric , err := prometheus .GetMetric (promAddress , coreDNSRequestCountTotal , metricLabels )
176+ // curl to the specified prometheus address
177+ beforeMetric , err := prometheus .GetMetric (prometheusAddress , coreDNSRequestCountTotal , metricLabels )
172178 require .NoError (t , err )
179+ beforeValue := beforeMetric .GetCounter ().GetValue ()
180+ t .Logf ("Before DNS request - metric count: %.0f" , beforeValue )
173181
174182 t .Log ("calling command from client" )
175183
@@ -187,13 +195,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
187195 time .Sleep (500 * time .Millisecond )
188196
189197 // curl again and see count diff
190- afterMetric , err := prometheus .GetMetric (promAddress , coreDNSRequestCountTotal , metricLabels )
198+ afterMetric , err := prometheus .GetMetric (prometheusAddress , coreDNSRequestCountTotal , metricLabels )
191199 require .NoError (t , err )
200+ afterValue := afterMetric .GetCounter ().GetValue ()
201+ t .Logf ("After DNS request - metric count: %.0f (diff: %.0f)" , afterValue , afterValue - beforeValue )
192202
193203 if countShouldIncrease {
194- require .Greater (t , afterMetric . GetCounter (). GetValue (), beforeMetric . GetCounter (). GetValue () , "dns metric count did not increase after command" )
204+ require .Greater (t , afterValue , beforeValue , "dns metric count did not increase after command - before: %.0f, after: %.0f" , beforeValue , afterValue )
195205 } else {
196- require .Equal (t , afterMetric . GetCounter (). GetValue (), beforeMetric . GetCounter (). GetValue () , "dns metric count increased after command" )
206+ require .Equal (t , afterValue , beforeValue , "dns metric count increased after command - before: %.0f, after: %.0f" , beforeValue , afterValue )
197207 }
198208}
199209
@@ -210,9 +220,319 @@ func TestLRP(t *testing.T) {
210220 defer cleanupFn ()
211221 require .NotNil (t , selectedPod )
212222
223+ // Get the kube-dns service IP for DNS requests
224+ cs := kubernetes .MustGetClientset ()
225+ svc , err := kubernetes .GetService (ctx , cs , kubeSystemNamespace , dnsService )
226+ require .NoError (t , err )
227+ kubeDNS := svc .Spec .ClusterIP
228+
229+ t .Logf ("LRP Test Starting..." )
230+
231+ // Basic LRP test
213232 testLRPCase (t , ctx , * selectedPod , []string {
214- "nslookup" , "google.com" , "10.0.0.10" ,
215- }, "" , "" , false , true )
233+ "nslookup" , "google.com" , kubeDNS ,
234+ }, "" , "" , false , true , promAddress )
235+
236+ t .Logf ("LRP Test Completed" )
237+
238+ t .Logf ("LRP Lifecycle Test Starting" )
239+
240+ // Run LRP Lifecycle test
241+ testLRPLifecycle (t , ctx , * selectedPod , kubeDNS )
242+
243+ t .Logf ("LRP Lifecycle Test Completed" )
244+ }
245+
246+ // testLRPLifecycle performs testing of Local Redirect Policy functionality
247+ // including pod restarts, resource recreation, and cilium command validation
248+ func testLRPLifecycle (t * testing.T , ctx context.Context , clientPod corev1.Pod , kubeDNS string ) {
249+ config := kubernetes .MustGetRestConfig ()
250+ cs := kubernetes .MustGetClientset ()
251+
252+ // Step 1: Initial DNS test to verify LRP is working
253+ t .Log ("Step 1: Initial DNS test - verifying LRP functionality" )
254+ testLRPCase (t , ctx , clientPod , []string {
255+ "nslookup" , "google.com" , kubeDNS ,
256+ }, "" , "" , false , true , promAddress )
257+
258+ // Step 2: Validate LRP using cilium commands
259+ t .Log ("Step 2: Validating LRP using cilium commands" )
260+ validateCiliumLRP (t , ctx , cs , config )
261+
262+ // Step 3: Restart busybox pods and verify LRP still works
263+ t .Log ("Step 3: Restarting client pods to test persistence" )
264+ restartedPod := restartClientPodsAndGetPod (t , ctx , cs , clientPod )
265+
266+ // Step 4: Verify metrics after restart
267+ t .Log ("Step 4: Verifying LRP functionality after pod restart" )
268+ testLRPCase (t , ctx , restartedPod , []string {
269+ "nslookup" , "google.com" , kubeDNS ,
270+ }, "" , "" , false , true , promAddress )
271+
272+ // Step 5: Validate cilium commands still show LRP
273+ t .Log ("Step 5: Re-validating cilium LRP after restart" )
274+ validateCiliumLRP (t , ctx , cs , config )
275+
276+ // Step 6: Delete and recreate resources & restart nodelocaldns daemonset
277+ t .Log ("Step 6: Testing resource deletion and recreation" )
278+ recreatedPod := deleteAndRecreateResources (t , ctx , cs , clientPod )
279+
280+ // Step 7: Final verification after recreation
281+ t .Log ("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8" )
282+
283+ // Step 8: Re-establish port forward to new node-local-dns pod and validate metrics
284+ t .Log ("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation" )
285+
286+ // Get the new node-local-dns pod on the same node as our recreated client pod
287+ nodeName := recreatedPod .Spec .NodeName
288+ newNodeLocalDNSPods , err := kubernetes .GetPodsByNode (ctx , cs , kubeSystemNamespace , nodeLocalDNSLabelSelector , nodeName )
289+ require .NoError (t , err )
290+ require .NotEmpty (t , newNodeLocalDNSPods .Items , "No node-local-dns pod found on node %s after restart" , nodeName )
291+
292+ newNodeLocalDNSPod := TakeOne (newNodeLocalDNSPods .Items )
293+ t .Logf ("Setting up port forward to new node-local-dns pod: %s" , newNodeLocalDNSPod .Name )
294+
295+ // Setup new port forward to the new node-local-dns pod
296+ newPf , err := k8s .NewPortForwarder (config , k8s.PortForwardingOpts {
297+ Namespace : newNodeLocalDNSPod .Namespace ,
298+ PodName : newNodeLocalDNSPod .Name ,
299+ LocalPort : 9254 , // Use different port to avoid conflicts
300+ DestPort : 9253 ,
301+ })
302+ require .NoError (t , err )
303+
304+ newPortForwardCtx , newCancel := context .WithTimeout (ctx , (retryAttempts + 1 )* retryDelay )
305+ defer newCancel ()
306+
307+ err = defaultRetrier .Do (newPortForwardCtx , func () error {
308+ t .Logf ("attempting port forward to new node-local-dns pod %s..." , newNodeLocalDNSPod .Name )
309+ return errors .Wrap (newPf .Forward (newPortForwardCtx ), "could not start port forward to new pod" )
310+ })
311+ require .NoError (t , err , "could not start port forward to new node-local-dns pod" )
312+ defer newPf .Stop ()
313+
314+ t .Log ("Port forward to new node-local-dns pod established" )
315+
316+ // Now test metrics with the new port forward using port 9254
317+ newPromAddress := "http://localhost:9254/metrics"
318+
319+ // Use testLRPCase function with the new prometheus address
320+ t .Log ("Validating metrics with new node-local-dns pod" )
321+ testLRPCase (t , ctx , recreatedPod , []string {
322+ "nslookup" , "github.com" , kubeDNS ,
323+ }, "" , "" , false , true , newPromAddress )
324+
325+ t .Logf ("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s" , newNodeLocalDNSPod .Name )
326+
327+ // Step 9: Final cilium validation after node-local-dns restart
328+ t .Log ("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart" )
329+ validateCiliumLRP (t , ctx , cs , config )
330+
331+ }
332+
333+ // validateCiliumLRP checks that LRP is properly configured in cilium
334+ func validateCiliumLRP (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , config * rest.Config ) {
335+ ciliumPods , err := cs .CoreV1 ().Pods (kubeSystemNamespace ).List (ctx , metav1.ListOptions {
336+ LabelSelector : "k8s-app=cilium" ,
337+ })
338+ require .NoError (t , err )
339+ require .NotEmpty (t , ciliumPods .Items )
340+ ciliumPod := TakeOne (ciliumPods .Items )
341+
342+ // Get Kubernetes version to determine validation approach
343+ serverVersion , err := cs .Discovery ().ServerVersion ()
344+ require .NoError (t , err )
345+ t .Logf ("Detected Kubernetes version: %s" , serverVersion .String ())
346+
347+ // Parse version to determine if we should use modern or legacy validation
348+ // K8s 1.32.0+ should use modern Cilium format (v1.17+)
349+ useModernFormat := false
350+ if serverVersion .Major == "1" {
351+ // Parse minor version
352+ var minorVersion int
353+ _ , err := fmt .Sscanf (serverVersion .Minor , "%d" , & minorVersion )
354+ if err == nil && minorVersion >= 32 {
355+ useModernFormat = true
356+ }
357+ }
358+
359+ if useModernFormat {
360+ t .Log ("Using modern validation approach based on Kubernetes version >= 1.32.0" )
361+ } else {
362+ t .Log ("Using legacy validation approach based on Kubernetes version < 1.32.0" )
363+ }
364+
365+ // Get kube-dns service IP for validation
366+ svc , err := kubernetes .GetService (ctx , cs , kubeSystemNamespace , dnsService )
367+ require .NoError (t , err )
368+ kubeDNSIP := svc .Spec .ClusterIP
369+
370+ // IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
371+ selectedNode := ciliumPod .Spec .NodeName
372+ t .Logf ("Using cilium pod %s on node %s for validation" , ciliumPod .Name , selectedNode )
373+
374+ // Get node-local-dns pod specifically on the same node as our cilium pod
375+ nodeLocalDNSPods , err := kubernetes .GetPodsByNode (ctx , cs , kubeSystemNamespace , nodeLocalDNSLabelSelector , selectedNode )
376+ require .NoError (t , err )
377+ require .NotEmpty (t , nodeLocalDNSPods .Items , "No node-local-dns pod found on node %s" , selectedNode )
378+
379+ // Use the first (and should be only) node-local-dns pod on this node
380+ nodeLocalDNSPod := nodeLocalDNSPods .Items [0 ]
381+ nodeLocalDNSIP := nodeLocalDNSPod .Status .PodIP
382+ require .NotEmpty (t , nodeLocalDNSIP , "node-local-dns pod %s has no IP address" , nodeLocalDNSPod .Name )
383+
384+ t .Logf ("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s" ,
385+ kubeDNSIP , nodeLocalDNSIP , nodeLocalDNSPod .Name , selectedNode )
386+
387+ // Check cilium lrp list
388+ lrpListCmd := []string {"cilium" , "lrp" , "list" }
389+ lrpOutput , _ , err := kubernetes .ExecCmdOnPod (ctx , cs , ciliumPod .Namespace , ciliumPod .Name , "cilium-agent" , lrpListCmd , config , false )
390+ require .NoError (t , err )
391+
392+ // Validate the LRP output structure more thoroughly
393+ lrpOutputStr := string (lrpOutput )
394+ require .Contains (t , lrpOutputStr , "nodelocaldns" , "LRP not found in cilium lrp list" )
395+
396+ // Parse LRP list output to validate structure
397+ lrpLines := strings .Split (lrpOutputStr , "\n " )
398+ nodelocaldnsFound := false
399+
400+ for _ , line := range lrpLines {
401+ line = strings .TrimSpace (line )
402+ if strings .Contains (line , "nodelocaldns" ) && strings .Contains (line , "kube-system" ) {
403+ // Validate that the line contains expected components
404+ require .Contains (t , line , "kube-system" , "LRP line should contain kube-system namespace" )
405+ require .Contains (t , line , "nodelocaldns" , "LRP line should contain nodelocaldns name" )
406+ require .Contains (t , line , "kube-dns" , "LRP line should reference kube-dns service" )
407+ nodelocaldnsFound = true
408+ t .Logf ("Found nodelocaldns LRP entry: %s" , line )
409+ break
410+ }
411+ }
412+
413+ require .True (t , nodelocaldnsFound , "nodelocaldns LRP entry not found with expected structure in output: %s" , lrpOutputStr )
414+
415+ // Check cilium service list for localredirect
416+ serviceListCmd := []string {"cilium" , "service" , "list" }
417+ serviceOutput , _ , err := kubernetes .ExecCmdOnPod (ctx , cs , ciliumPod .Namespace , ciliumPod .Name , "cilium-agent" , serviceListCmd , config , false )
418+ require .NoError (t , err )
419+ require .Contains (t , string (serviceOutput ), "LocalRedirect" , "LocalRedirect not found in cilium service list" )
420+
421+ // Validate LocalRedirect entries
422+ serviceLines := strings .Split (string (serviceOutput ), "\n " )
423+ tcpFound := false
424+ udpFound := false
425+
426+ for _ , line := range serviceLines {
427+ if strings .Contains (line , "LocalRedirect" ) && strings .Contains (line , kubeDNSIP ) {
428+ // Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
429+ if strings .Contains (line , nodeLocalDNSIP ) {
430+ if useModernFormat {
431+ // Modern format (K8s 1.32.0+/Cilium v1.17+): Check for explicit protocol
432+ if strings .Contains (line , "/TCP" ) {
433+ tcpFound = true
434+ t .Logf ("Found TCP LocalRedirect: %s" , strings .TrimSpace (line ))
435+ } else if strings .Contains (line , "/UDP" ) {
436+ udpFound = true
437+ t .Logf ("Found UDP LocalRedirect: %s" , strings .TrimSpace (line ))
438+ }
439+ } else {
440+ // Legacy format (K8s < 1.32.0/Cilium < v1.17): No protocol specified
441+ t .Logf ("Found legacy LocalRedirect: %s" , strings .TrimSpace (line ))
442+ }
443+ }
444+ }
445+ }
446+
447+ // Validate based on determined format
448+ if useModernFormat {
449+ // Modern format (K8s 1.32.0+/Cilium v1.17+): Separate TCP and UDP entries
450+ t .Log ("Validating modern Cilium format - expecting separate TCP and UDP LocalRedirect entries" )
451+ require .True (t , tcpFound , "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s" , kubeDNSIP , nodeLocalDNSIP , selectedNode )
452+ require .True (t , udpFound , "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s" , kubeDNSIP , nodeLocalDNSIP , selectedNode )
453+ } else {
454+ // Legacy format (K8s < 1.32.0/Cilium < v1.17): Just one LocalRedirect entry without protocol
455+ t .Log ("Validating legacy Cilium format - expecting single LocalRedirect entry without protocol" )
456+ require .False (t , useModernFormat , "Legacy LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s" , kubeDNSIP , nodeLocalDNSIP , selectedNode )
457+ }
458+
459+ t .Logf ("Cilium LRP List Output:\n %s" , string (lrpOutput ))
460+ t .Logf ("Cilium Service List Output:\n %s" , string (serviceOutput ))
461+ }
462+
463+ // restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
464+ func restartClientPodsAndGetPod (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , originalPod corev1.Pod ) corev1.Pod {
465+ // Find the daemonset name by looking up the pod's owner
466+ podDetails , err := cs .CoreV1 ().Pods (originalPod .Namespace ).Get (ctx , originalPod .Name , metav1.GetOptions {})
467+ require .NoError (t , err )
468+
469+ // Get the node name for consistent testing
470+ nodeName := podDetails .Spec .NodeName
471+
472+ // Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
473+ err = kubernetes .MustRestartDaemonset (ctx , cs , originalPod .Namespace , "lrp-test" )
474+ require .NoError (t , err )
475+
476+ // Wait for the daemonset to be ready
477+ kubernetes .WaitForPodDaemonset (ctx , cs , originalPod .Namespace , "lrp-test" , clientLabelSelector )
478+
479+ // Get the new pod on the same node
480+ clientPods , err := kubernetes .GetPodsByNode (ctx , cs , originalPod .Namespace , clientLabelSelector , nodeName )
481+ require .NoError (t , err )
482+ require .NotEmpty (t , clientPods .Items )
483+
484+ return TakeOne (clientPods .Items )
485+ }
486+
487+ // deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
488+ func deleteAndRecreateResources (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , originalPod corev1.Pod ) corev1.Pod {
489+ config := kubernetes .MustGetRestConfig ()
490+ ciliumCS , err := ciliumClientset .NewForConfig (config )
491+ require .NoError (t , err )
492+
493+ nodeName := originalPod .Spec .NodeName
494+
495+ // Delete client daemonset
496+ dsClient := cs .AppsV1 ().DaemonSets (originalPod .Namespace )
497+ clientDS := kubernetes .MustParseDaemonSet (clientPath )
498+ kubernetes .MustDeleteDaemonset (ctx , dsClient , clientDS )
499+
500+ // Delete LRP
501+ lrpContent , err := os .ReadFile (lrpPath )
502+ require .NoError (t , err )
503+ var lrp ciliumv2.CiliumLocalRedirectPolicy
504+ err = yaml .Unmarshal (lrpContent , & lrp )
505+ require .NoError (t , err )
506+
507+ lrpClient := ciliumCS .CiliumV2 ().CiliumLocalRedirectPolicies (lrp .Namespace )
508+ kubernetes .MustDeleteCiliumLocalRedirectPolicy (ctx , lrpClient , lrp )
509+
510+ // Wait for deletion to complete
511+ time .Sleep (10 * time .Second )
512+
513+ // Recreate LRP
514+ _ , cleanupLRP := kubernetes .MustSetupLRP (ctx , ciliumCS , lrpPath )
515+ t .Cleanup (cleanupLRP )
516+
517+ // Restart node-local-dns pods to pick up new LRP configuration
518+ t .Log ("Restarting node-local-dns pods after LRP recreation" )
519+ err = kubernetes .MustRestartDaemonset (ctx , cs , kubeSystemNamespace , "node-local-dns" )
520+ require .NoError (t , err )
521+ kubernetes .WaitForPodDaemonset (ctx , cs , kubeSystemNamespace , "node-local-dns" , nodeLocalDNSLabelSelector )
522+
523+ // Recreate client daemonset
524+ _ , cleanupClient := kubernetes .MustSetupDaemonset (ctx , cs , clientPath )
525+ t .Cleanup (cleanupClient )
526+
527+ // Wait for pods to be ready
528+ kubernetes .WaitForPodDaemonset (ctx , cs , clientDS .Namespace , clientDS .Name , clientLabelSelector )
529+
530+ // Get new pod on the same node
531+ clientPods , err := kubernetes .GetPodsByNode (ctx , cs , clientDS .Namespace , clientLabelSelector , nodeName )
532+ require .NoError (t , err )
533+ require .NotEmpty (t , clientPods .Items )
534+
535+ return TakeOne (clientPods .Items )
216536}
217537
218538// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type
0 commit comments