@@ -13,11 +13,16 @@ import (
1313 "github.com/Azure/azure-container-networking/test/integration/prometheus"
1414 "github.com/Azure/azure-container-networking/test/internal/kubernetes"
1515 "github.com/Azure/azure-container-networking/test/internal/retry"
16+ ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
1617 ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
1718 "github.com/pkg/errors"
1819 "github.com/stretchr/testify/require"
1920 "golang.org/x/exp/rand"
2021 corev1 "k8s.io/api/core/v1"
22+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+ k8sclient "k8s.io/client-go/kubernetes"
24+ "k8s.io/client-go/rest"
25+ "sigs.k8s.io/yaml"
2126)
2227
2328const (
@@ -154,7 +159,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
154159}
155160
156161func testLRPCase (t * testing.T , ctx context.Context , clientPod corev1.Pod , clientCmd []string , expectResponse , expectErrMsg string ,
157- shouldError , countShouldIncrease bool ) {
162+ shouldError , countShouldIncrease bool , prometheusAddress string ) {
158163
159164 config := kubernetes .MustGetRestConfig ()
160165 cs := kubernetes .MustGetClientset ()
@@ -167,8 +172,8 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
167172 "zone" : "." ,
168173 }
169174
170- // curl localhost:9253/metrics
171- beforeMetric , err := prometheus .GetMetric (promAddress , coreDNSRequestCountTotal , metricLabels )
175+ // curl to the specified prometheus address
176+ beforeMetric , err := prometheus .GetMetric (prometheusAddress , coreDNSRequestCountTotal , metricLabels )
172177 require .NoError (t , err )
173178
174179 t .Log ("calling command from client" )
@@ -187,7 +192,7 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
187192 time .Sleep (500 * time .Millisecond )
188193
189194 // curl again and see count diff
190- afterMetric , err := prometheus .GetMetric (promAddress , coreDNSRequestCountTotal , metricLabels )
195+ afterMetric , err := prometheus .GetMetric (prometheusAddress , coreDNSRequestCountTotal , metricLabels )
191196 require .NoError (t , err )
192197
193198 if countShouldIncrease {
@@ -210,9 +215,256 @@ func TestLRP(t *testing.T) {
210215 defer cleanupFn ()
211216 require .NotNil (t , selectedPod )
212217
218+ // Get the kube-dns service IP for DNS requests
219+ cs := kubernetes .MustGetClientset ()
220+ svc , err := kubernetes .GetService (ctx , cs , kubeSystemNamespace , dnsService )
221+ require .NoError (t , err )
222+ kubeDNS := svc .Spec .ClusterIP
223+
224+ t .Logf ("Using kube-dns service IP: %s" , kubeDNS )
225+
226+ // Basic LRP test
213227 testLRPCase (t , ctx , * selectedPod , []string {
214- "nslookup" , "google.com" , "10.0.0.10" ,
215- }, "" , "" , false , true )
228+ "nslookup" , "google.com" , kubeDNS ,
229+ }, "" , "" , false , true , promAddress )
230+
231+ // Run comprehensive test
232+ testLRPComprehensive (t , ctx , * selectedPod , kubeDNS )
233+ }
234+
235+ // testLRPComprehensive performs a comprehensive test of Local Redirect Policy functionality
236+ // including pod restarts, resource recreation, and cilium command validation
237+ func testLRPComprehensive (t * testing.T , ctx context.Context , clientPod corev1.Pod , kubeDNS string ) {
238+ config := kubernetes .MustGetRestConfig ()
239+ cs := kubernetes .MustGetClientset ()
240+
241+ // Step 1: Initial DNS test to verify LRP is working
242+ t .Log ("Step 1: Initial DNS test - verifying LRP functionality" )
243+ testLRPCase (t , ctx , clientPod , []string {
244+ "nslookup" , "google.com" , kubeDNS ,
245+ }, "" , "" , false , true , promAddress )
246+
247+ // Step 2: Validate LRP using cilium commands
248+ t .Log ("Step 2: Validating LRP using cilium commands" )
249+ validateCiliumLRP (t , ctx , cs , config )
250+
251+ // Step 3: Restart busybox pods and verify LRP still works
252+ t .Log ("Step 3: Restarting client pods to test persistence" )
253+ restartedPod := restartClientPodsAndGetPod (t , ctx , cs , clientPod )
254+
255+ // Step 4: Verify metrics after restart
256+ t .Log ("Step 4: Verifying LRP functionality after pod restart" )
257+ testLRPCase (t , ctx , restartedPod , []string {
258+ "nslookup" , "google.com" , kubeDNS ,
259+ }, "" , "" , false , true , promAddress )
260+
261+ // Step 5: Validate cilium commands still show LRP
262+ t .Log ("Step 5: Re-validating cilium LRP after restart" )
263+ validateCiliumLRP (t , ctx , cs , config )
264+
265+ // Step 6: Delete and recreate resources & restart nodelocaldns daemonset
266+ t .Log ("Step 6: Testing resource deletion and recreation" )
267+ recreatedPod := deleteAndRecreateResources (t , ctx , cs , clientPod )
268+
269+ // Step 7: Final verification after recreation
270+ t .Log ("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8" )
271+
272+ // Step 8: Re-establish port forward to new node-local-dns pod and validate metrics
273+ t .Log ("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation" )
274+
275+ // Get the new node-local-dns pod on the same node as our recreated client pod
276+ nodeName := recreatedPod .Spec .NodeName
277+ newNodeLocalDNSPods , err := kubernetes .GetPodsByNode (ctx , cs , kubeSystemNamespace , nodeLocalDNSLabelSelector , nodeName )
278+ require .NoError (t , err )
279+ require .NotEmpty (t , newNodeLocalDNSPods .Items , "No node-local-dns pod found on node %s after restart" , nodeName )
280+
281+ newNodeLocalDNSPod := TakeOne (newNodeLocalDNSPods .Items )
282+ t .Logf ("Setting up port forward to new node-local-dns pod: %s" , newNodeLocalDNSPod .Name )
283+
284+ // Setup new port forward to the new node-local-dns pod
285+ newPf , err := k8s .NewPortForwarder (config , k8s.PortForwardingOpts {
286+ Namespace : newNodeLocalDNSPod .Namespace ,
287+ PodName : newNodeLocalDNSPod .Name ,
288+ LocalPort : 9254 , // Use different port to avoid conflicts
289+ DestPort : 9253 ,
290+ })
291+ require .NoError (t , err )
292+
293+ newPortForwardCtx , newCancel := context .WithTimeout (ctx , (retryAttempts + 1 )* retryDelay )
294+ defer newCancel ()
295+
296+ err = defaultRetrier .Do (newPortForwardCtx , func () error {
297+ t .Logf ("attempting port forward to new node-local-dns pod %s..." , newNodeLocalDNSPod .Name )
298+ return errors .Wrap (newPf .Forward (newPortForwardCtx ), "could not start port forward to new pod" )
299+ })
300+ require .NoError (t , err , "could not start port forward to new node-local-dns pod" )
301+ defer newPf .Stop ()
302+
303+ t .Log ("Port forward to new node-local-dns pod established" )
304+
305+ // Now test metrics with the new port forward using port 9254
306+ newPromAddress := "http://localhost:9254/metrics"
307+
308+ // Use testLRPCase function with the new prometheus address
309+ t .Log ("Validating metrics with new node-local-dns pod" )
310+ testLRPCase (t , ctx , recreatedPod , []string {
311+ "nslookup" , "github.com" , kubeDNS ,
312+ }, "" , "" , false , true , newPromAddress )
313+
314+ t .Logf ("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s" , newNodeLocalDNSPod .Name )
315+
316+ // Step 9: Final cilium validation after node-local-dns restart
317+ t .Log ("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart" )
318+ validateCiliumLRP (t , ctx , cs , config )
319+
320+ t .Log ("Comprehensive LRP test completed successfully" )
321+ }
322+
323+ // validateCiliumLRP checks that LRP is properly configured in cilium
324+ func validateCiliumLRP (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , config * rest.Config ) {
325+ ciliumPods , err := cs .CoreV1 ().Pods (kubeSystemNamespace ).List (ctx , metav1.ListOptions {
326+ LabelSelector : "k8s-app=cilium" ,
327+ })
328+ require .NoError (t , err )
329+ require .NotEmpty (t , ciliumPods .Items )
330+ ciliumPod := TakeOne (ciliumPods .Items )
331+
332+ // Get kube-dns service IP for validation
333+ svc , err := kubernetes .GetService (ctx , cs , kubeSystemNamespace , dnsService )
334+ require .NoError (t , err )
335+ kubeDNSIP := svc .Spec .ClusterIP
336+
337+ // IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
338+ selectedNode := ciliumPod .Spec .NodeName
339+ t .Logf ("Using cilium pod %s on node %s for validation" , ciliumPod .Name , selectedNode )
340+
341+ // Get node-local-dns pod specifically on the same node as our cilium pod
342+ nodeLocalDNSPods , err := kubernetes .GetPodsByNode (ctx , cs , kubeSystemNamespace , nodeLocalDNSLabelSelector , selectedNode )
343+ require .NoError (t , err )
344+ require .NotEmpty (t , nodeLocalDNSPods .Items , "No node-local-dns pod found on node %s" , selectedNode )
345+
346+ // Use the first (and should be only) node-local-dns pod on this node
347+ nodeLocalDNSPod := nodeLocalDNSPods .Items [0 ]
348+ nodeLocalDNSIP := nodeLocalDNSPod .Status .PodIP
349+ require .NotEmpty (t , nodeLocalDNSIP , "node-local-dns pod %s has no IP address" , nodeLocalDNSPod .Name )
350+
351+ t .Logf ("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s" ,
352+ kubeDNSIP , nodeLocalDNSIP , nodeLocalDNSPod .Name , selectedNode )
353+
354+ // Check cilium lrp list
355+ lrpListCmd := []string {"cilium" , "lrp" , "list" }
356+ lrpOutput , _ , err := kubernetes .ExecCmdOnPod (ctx , cs , ciliumPod .Namespace , ciliumPod .Name , "cilium-agent" , lrpListCmd , config , false )
357+ require .NoError (t , err )
358+ require .Contains (t , string (lrpOutput ), "nodelocaldns" , "LRP not found in cilium lrp list" )
359+
360+ // Check cilium service list for localredirect
361+ serviceListCmd := []string {"cilium" , "service" , "list" }
362+ serviceOutput , _ , err := kubernetes .ExecCmdOnPod (ctx , cs , ciliumPod .Namespace , ciliumPod .Name , "cilium-agent" , serviceListCmd , config , false )
363+ require .NoError (t , err )
364+ require .Contains (t , string (serviceOutput ), "LocalRedirect" , "LocalRedirect not found in cilium service list" )
365+
366+ // Validate LocalRedirect entries
367+ serviceLines := strings .Split (string (serviceOutput ), "\n " )
368+ tcpFound := false
369+ udpFound := false
370+
371+ for _ , line := range serviceLines {
372+ if strings .Contains (line , "LocalRedirect" ) && strings .Contains (line , kubeDNSIP ) {
373+ // Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
374+ if strings .Contains (line , nodeLocalDNSIP ) {
375+ if strings .Contains (line , "/TCP" ) {
376+ tcpFound = true
377+ t .Logf ("Found TCP LocalRedirect: %s" , strings .TrimSpace (line ))
378+ }
379+ if strings .Contains (line , "/UDP" ) {
380+ udpFound = true
381+ t .Logf ("Found UDP LocalRedirect: %s" , strings .TrimSpace (line ))
382+ }
383+ }
384+ }
385+ }
386+
387+ // Verify both TCP and UDP LocalRedirect entries exist
388+ require .True (t , tcpFound , "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s" , kubeDNSIP , nodeLocalDNSIP , selectedNode )
389+ require .True (t , udpFound , "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s" , kubeDNSIP , nodeLocalDNSIP , selectedNode )
390+
391+ t .Logf ("Cilium LRP List Output:\n %s" , string (lrpOutput ))
392+ t .Logf ("Cilium Service List Output:\n %s" , string (serviceOutput ))
393+ }
394+
395+ // restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
396+ func restartClientPodsAndGetPod (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , originalPod corev1.Pod ) corev1.Pod {
397+ // Find the daemonset name by looking up the pod's owner
398+ podDetails , err := cs .CoreV1 ().Pods (originalPod .Namespace ).Get (ctx , originalPod .Name , metav1.GetOptions {})
399+ require .NoError (t , err )
400+
401+ // Get the node name for consistent testing
402+ nodeName := podDetails .Spec .NodeName
403+
404+ // Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
405+ err = kubernetes .MustRestartDaemonset (ctx , cs , originalPod .Namespace , "lrp-test" )
406+ require .NoError (t , err )
407+
408+ // Wait for the daemonset to be ready
409+ kubernetes .WaitForPodDaemonset (ctx , cs , originalPod .Namespace , "lrp-test" , clientLabelSelector )
410+
411+ // Get the new pod on the same node
412+ clientPods , err := kubernetes .GetPodsByNode (ctx , cs , originalPod .Namespace , clientLabelSelector , nodeName )
413+ require .NoError (t , err )
414+ require .NotEmpty (t , clientPods .Items )
415+
416+ return TakeOne (clientPods .Items )
417+ }
418+
419+ // deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
420+ func deleteAndRecreateResources (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , originalPod corev1.Pod ) corev1.Pod {
421+ config := kubernetes .MustGetRestConfig ()
422+ ciliumCS , err := ciliumClientset .NewForConfig (config )
423+ require .NoError (t , err )
424+
425+ nodeName := originalPod .Spec .NodeName
426+
427+ // Delete client daemonset
428+ dsClient := cs .AppsV1 ().DaemonSets (originalPod .Namespace )
429+ clientDS := kubernetes .MustParseDaemonSet (clientPath )
430+ kubernetes .MustDeleteDaemonset (ctx , dsClient , clientDS )
431+
432+ // Delete LRP
433+ lrpContent , err := os .ReadFile (lrpPath )
434+ require .NoError (t , err )
435+ var lrp ciliumv2.CiliumLocalRedirectPolicy
436+ err = yaml .Unmarshal (lrpContent , & lrp )
437+ require .NoError (t , err )
438+
439+ lrpClient := ciliumCS .CiliumV2 ().CiliumLocalRedirectPolicies (lrp .Namespace )
440+ kubernetes .MustDeleteCiliumLocalRedirectPolicy (ctx , lrpClient , lrp )
441+
442+ // Wait for deletion to complete
443+ time .Sleep (10 * time .Second )
444+
445+ // Recreate LRP
446+ _ , cleanupLRP := kubernetes .MustSetupLRP (ctx , ciliumCS , lrpPath )
447+ t .Cleanup (cleanupLRP )
448+
449+ // Restart node-local-dns pods to pick up new LRP configuration
450+ t .Log ("Restarting node-local-dns pods after LRP recreation" )
451+ err = kubernetes .MustRestartDaemonset (ctx , cs , kubeSystemNamespace , "node-local-dns" )
452+ require .NoError (t , err )
453+ kubernetes .WaitForPodDaemonset (ctx , cs , kubeSystemNamespace , "node-local-dns" , nodeLocalDNSLabelSelector )
454+
455+ // Recreate client daemonset
456+ _ , cleanupClient := kubernetes .MustSetupDaemonset (ctx , cs , clientPath )
457+ t .Cleanup (cleanupClient )
458+
459+ // Wait for pods to be ready
460+ kubernetes .WaitForPodDaemonset (ctx , cs , clientDS .Namespace , clientDS .Name , clientLabelSelector )
461+
462+ // Get new pod on the same node
463+ clientPods , err := kubernetes .GetPodsByNode (ctx , cs , clientDS .Namespace , clientLabelSelector , nodeName )
464+ require .NoError (t , err )
465+ require .NotEmpty (t , clientPods .Items )
466+
467+ return TakeOne (clientPods .Items )
216468}
217469
218470// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type
0 commit comments