@@ -279,6 +279,9 @@ static const struct option_wrapper long_options[] = {
279279 {{"metainfo" , no_argument , NULL , 'm' },
280280 "Print XDP metadata info output mode (debug)" },
281281
282+ {{"timedebug" , no_argument , NULL , 't' },
283+ "Print timestamps info for wakeup accuracy (debug)" },
284+
282285 {{"debug" , no_argument , NULL , 'D' },
283286 "Debug info output mode (debug)" },
284287
@@ -288,6 +291,12 @@ static const struct option_wrapper long_options[] = {
288291 {{"progsec" , required_argument , NULL , 2 },
289292 "Load program in <section> of the ELF file" , "<section>" },
290293
294+ {{"src-ip" , required_argument , NULL , 4 },
295+ "Change IPv4 source address in generated packets" , "<ip>" },
296+
297+ {{"dst-ip" , required_argument , NULL , 5 },
298+ "Change IPv4 destination address in generated packets" , "<ip>" },
299+
291300 {{"busy-poll" , no_argument , NULL , 'B' },
292301 "Enable socket prefer NAPI busy-poll mode (remember adjust sysctl too)" },
293302
@@ -297,6 +306,12 @@ static const struct option_wrapper long_options[] = {
297306 {{"tx-smac" , required_argument , NULL , 'H' },
298307 "Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format" , "aa:bb:cc:dd:ee:ff" },
299308
309+ {{"interval" , required_argument , NULL , 'i' },
310+ "Periodic TX-cyclic interval wakeup period in usec" , "<usec>" },
311+
312+ {{"batch-pkts" , required_argument , NULL , 'b' },
313+ "Periodic TX-cyclic batch send pkts" , "<pkts>" },
314+
300315 {{0 , 0 , NULL , 0 }, NULL , false}
301316};
302317
@@ -521,16 +536,35 @@ static struct xsk_socket_info *xsk_configure_socket(struct config *cfg,
521536 return NULL ;
522537}
523538
524- static void complete_tx (struct xsk_socket_info * xsk )
539+ static int kick_tx (struct xsk_socket_info * xsk )
540+ {
541+ int err = 0 ;
542+ int ret ;
543+
544+ ret = sendto (xsk_socket__fd (xsk -> xsk ), NULL , 0 , MSG_DONTWAIT , NULL , 0 );
545+ if (ret < 0 ) { /* On error, -1 is returned, and errno is set */
546+ fprintf (stderr , "WARN: %s() sendto() failed with errno:%d\n" ,
547+ __func__ , errno );
548+ err = errno ;
549+ }
550+ /* Kernel samples/bpf/ xdp_sock_user.c kick_tx variant doesn't
551+ * treat the following errno values as errors:
552+ * ENOBUFS , EAGAIN , EBUSY , ENETDOWN
553+ */
554+ return err ;
555+ }
556+
557+ static int complete_tx (struct xsk_socket_info * xsk )
525558{
526559 unsigned int completed ;
527560 uint32_t idx_cq ;
561+ int err ;
528562
529563 if (!xsk -> outstanding_tx )
530564 return ;
531565
532- sendto ( xsk_socket__fd ( xsk -> xsk ), NULL , 0 , MSG_DONTWAIT , NULL , 0 );
533-
566+ /* Notify kernel via sendto syscall that TX packet are avail */
567+ err = kick_tx ( xsk );
534568
535569 /* Collect/free completed TX buffers */
536570 completed = xsk_ring_cons__peek (& xsk -> cq ,
@@ -547,9 +581,17 @@ static void complete_tx(struct xsk_socket_info *xsk)
547581 }
548582
549583 xsk_ring_cons__release (& xsk -> cq , completed );
584+ if (completed > xsk -> outstanding_tx ) {
585+ fprintf (stderr , "WARN: %s() "
586+ "reset outstanding_tx(%d) as completed(%d)"
587+ "more than outstanding TX pakcets\n" ,
588+ __func__ , xsk -> outstanding_tx , completed );
589+ }
550590 xsk -> outstanding_tx -= completed < xsk -> outstanding_tx ?
551591 completed : xsk -> outstanding_tx ;
552592 }
593+
594+ return err ;
553595}
554596
555597static inline __sum16 csum16_add (__sum16 csum , __be16 addend )
@@ -601,32 +643,17 @@ static void gen_eth_hdr(struct config *cfg, struct ethhdr *eth_hdr)
601643 eth_hdr -> h_proto = htons (ETH_P_IP );
602644}
603645
604- static bool get_ipv4_u32 (char * ip_str , uint32_t * ip_addr )
605- {
606- int res ;
607646
608- res = inet_pton (AF_INET , ip_str , ip_addr );
609- if (res <= 0 ) {
610- if (res == 0 )
611- fprintf (stderr , "ERROR: IP%s \"%s\" not in presentation format\n" ,
612- "v4" , ip_str );
613- else
614- perror ("inet_pton" );
615- return false;
616- }
617- return true;
618- }
619-
620- static char * opt_ip_str_src = "192.168.44.2" ;
647+ static char * opt_ip_str_src = "192.168.44.1" ;
621648static char * opt_ip_str_dst = "192.168.44.3" ;
622649
623- static void gen_ip_hdr (struct iphdr * ip_hdr )
650+ static void gen_ip_hdr (struct config * cfg , struct iphdr * ip_hdr )
624651{
625- uint32_t saddr ;
626- uint32_t daddr ;
652+ if ( cfg -> opt_ip_src == 0 )
653+ get_ipv4_u32 ( opt_ip_str_src , & cfg -> opt_ip_src ) ;
627654
628- get_ipv4_u32 ( opt_ip_str_src , & saddr );
629- get_ipv4_u32 (opt_ip_str_dst , & daddr );
655+ if ( cfg -> opt_ip_dst == 0 )
656+ get_ipv4_u32 (opt_ip_str_dst , & cfg -> opt_ip_dst );
630657
631658 /* IP header */
632659 ip_hdr -> version = IPVERSION ;
@@ -637,8 +664,8 @@ static void gen_ip_hdr(struct iphdr *ip_hdr)
637664 ip_hdr -> frag_off = 0 ;
638665 ip_hdr -> ttl = IPDEFTTL ;
639666 ip_hdr -> protocol = IPPROTO_UDP ;
640- ip_hdr -> saddr = saddr ;
641- ip_hdr -> daddr = daddr ;
667+ ip_hdr -> saddr = cfg -> opt_ip_src ;
668+ ip_hdr -> daddr = cfg -> opt_ip_dst ;
642669
643670 /* IP header checksum */
644671 ip_hdr -> check = 0 ;
@@ -675,7 +702,7 @@ static void gen_base_pkt(struct config *cfg, uint8_t *pkt_ptr)
675702 sizeof (struct iphdr ));
676703
677704 gen_eth_hdr (cfg , eth_hdr );
678- gen_ip_hdr (ip_hdr );
705+ gen_ip_hdr (cfg , ip_hdr );
679706 gen_udp_hdr (udp_hdr , ip_hdr );
680707}
681708
@@ -836,7 +863,7 @@ static void print_pkt_info(uint8_t *pkt, uint32_t len)
836863 }
837864}
838865
839- static void tx_pkt (struct config * cfg , struct xsk_socket_info * xsk )
866+ static int tx_pkt (struct config * cfg , struct xsk_socket_info * xsk )
840867{
841868 struct xsk_umem_info * umem = xsk -> umem ;
842869 uint64_t pkt_addr = mem_alloc_umem_frame (& umem -> mem );
@@ -857,14 +884,17 @@ static void tx_pkt(struct config *cfg, struct xsk_socket_info *xsk)
857884 if (ret != 1 ) {
858885 /* No more transmit slots, drop the packet */
859886 mem_free_umem_frame (& umem -> mem , pkt_addr );
887+ fprintf (stderr , "ERR - %s() failed transmit\n" ,
888+ __func__ );
860889 }
861890
862891 xsk_ring_prod__tx_desc (& xsk -> tx , tx_idx )-> addr = pkt_addr ;
863892 xsk_ring_prod__tx_desc (& xsk -> tx , tx_idx )-> len = 64 ;
864893 xsk_ring_prod__submit (& xsk -> tx , 1 );
865894 xsk -> outstanding_tx ++ ;
866895 }
867- //complete_tx(xsk);
896+
897+ return complete_tx (xsk );
868898}
869899
870900/* Generate some fake packets (in umem area). Real system will deliver TX
@@ -1133,7 +1163,7 @@ static void rx_avail_packets(struct xsk_container *xsks)
11331163}
11341164
11351165/* Default interval in usec */
1136- #define DEFAULT_INTERVAL 1000000
1166+ #define DEFAULT_INTERVAL 1000000
11371167
11381168#define USEC_PER_SEC 1000000
11391169#define NSEC_PER_SEC 1000000000
@@ -1146,6 +1176,17 @@ static inline void tsnorm(struct timespec *ts)
11461176 }
11471177}
11481178
1179+ static inline uint64_t timespec2ns (struct timespec * ts )
1180+ {
1181+ return (uint64_t ) ts -> tv_sec * NANOSEC_PER_SEC + ts -> tv_nsec ;
1182+ }
1183+
1184+ static inline void ns2timespec (uint64_t ns , struct timespec * ts )
1185+ {
1186+ ts -> tv_sec = ns / NANOSEC_PER_SEC ;
1187+ ts -> tv_nsec = ns % NANOSEC_PER_SEC ;
1188+ }
1189+
11491190static inline int64_t calcdiff (struct timespec t1 , struct timespec t2 )
11501191{
11511192 int64_t diff ;
@@ -1154,10 +1195,24 @@ static inline int64_t calcdiff(struct timespec t1, struct timespec t2)
11541195 return diff ;
11551196}
11561197
1198+ static inline int64_t calcdiff_ns (struct timespec t1 , struct timespec t2 )
1199+ {
1200+ int64_t diff ;
1201+ diff = NSEC_PER_SEC * (long long )((int ) t1 .tv_sec - (int ) t2 .tv_sec );
1202+ diff += ((int ) t1 .tv_nsec - (int ) t2 .tv_nsec );
1203+ return diff ;
1204+ }
1205+
1206+ static void print_timespec (struct timespec * ts , char * msg )
1207+ {
1208+ printf ("Time: %lu.%lu - %s\n" , ts -> tv_sec , ts -> tv_nsec , msg );
1209+ }
1210+
11571211struct wakeup_stat {
11581212 long min ;
11591213 long max ;
1160- long act ;
1214+ long curr ;
1215+ long prev ;
11611216 double avg ;
11621217 unsigned long events ;
11631218};
@@ -1173,13 +1228,15 @@ struct wakeup_stat {
11731228static void tx_cyclic_and_rx_process (struct config * cfg ,
11741229 struct xsk_container * xsks )
11751230{
1176- struct timespec now , next , interval ;
1177- struct wakeup_stat stat = { .min = DEFAULT_INTERVAL };
1178- int batch_nr = 4 ;
1179- struct xdp_desc tx_pkts [batch_nr ];
1231+ struct timespec now , next , next_adj , interval , now_prev ;
1232+ struct wakeup_stat stat = { .min = DEFAULT_INTERVAL , .max = -0xFFFF };
1233+ struct wakeup_stat stat_adj = { .min = DEFAULT_INTERVAL , .max = -0xFFFF };
1234+ struct xdp_desc tx_pkts [BATCH_PKTS_MAX ];
1235+ int batch_nr = cfg -> batch_pkts ;
11801236 int tx_nr ;
1237+ bool first = true;
11811238
1182- int period = DEFAULT_INTERVAL ; // TODO: Add to cfg
1239+ int period = cfg -> interval ;
11831240 int timermode = TIMER_ABSTIME ;
11841241 int clock = CLOCK_MONOTONIC ;
11851242
@@ -1198,13 +1255,15 @@ static void tx_cyclic_and_rx_process(struct config *cfg,
11981255 next .tv_sec += interval .tv_sec ;
11991256 next .tv_nsec += interval .tv_nsec ;
12001257 tsnorm (& next );
1258+ next_adj = next ; /* Not adjusted yet */
12011259
12021260 while (!global_exit ) {
1203- int64_t diff ;
1261+ int64_t diff , diff2adj , diff_interval ;
1262+ int64_t avg , avg2adj ;
12041263 int err , n ;
12051264
1206- /* Wait for next period */
1207- err = clock_nanosleep (clock , timermode , & next , NULL );
1265+ /* Wait for next period, but adjusted for measured inaccuracy */
1266+ err = clock_nanosleep (clock , timermode , & next_adj , NULL );
12081267 /* Took case MODE_CLOCK_NANOSLEEP from cyclictest */
12091268 if (err ) {
12101269 if (err != EINTR )
@@ -1214,6 +1273,7 @@ static void tx_cyclic_and_rx_process(struct config *cfg,
12141273 }
12151274
12161275 /* Expecting to wakeup at "next" get systime "now" to check */
1276+ now_prev = now ;
12171277 err = clock_gettime (clock , & now );
12181278 if (err ) {
12191279 if (err != EINTR )
@@ -1222,30 +1282,62 @@ static void tx_cyclic_and_rx_process(struct config *cfg,
12221282 goto out ;
12231283 }
12241284
1225- /* Detect inaccuracy diff */
1226- diff = calcdiff (now , next );
1227- if (diff < stat .min )
1228- stat .min = diff ;
1229- if (diff > stat .max )
1230- stat .max = diff ;
1285+ /* How close is wakeup time to our actual target */
1286+ diff = calcdiff_ns (now , next ); /* Positive num = wokeup after */
1287+ /* Exclude first measurement as no next_adj happened */
1288+ if (!first ) {
1289+ if (diff < stat .min )
1290+ stat .min = diff ;
1291+ if (diff > stat .max )
1292+ stat .max = diff ;
1293+ }
1294+ first = false;
12311295 stat .avg += (double ) diff ;
1232- stat .act = diff ;
1233-
1296+ stat .prev = stat . curr ;
1297+ stat . curr = diff ;
12341298 stat .events ++ ;
1299+ avg = (stat .avg / stat .events );
1300+
1301+ /* Measure inaccuracy of clock_nanosleep */
1302+ diff2adj = calcdiff_ns (now , next_adj ); /* Positive num = wokeup after */
1303+ stat_adj .avg += (double ) diff2adj ;
1304+ stat_adj .events ++ ;
1305+ avg2adj = (stat_adj .avg / stat_adj .events );
1306+
1307+ // IDEA: Spin until exact time occurs (if diff negative)
12351308
12361309 /* Send batch of packets */
12371310 n = tx_batch_pkts (xsk , tx_nr , tx_pkts );
12381311
1312+ diff_interval = calcdiff_ns (now , now_prev );
1313+
12391314 if (verbose >=1 )
12401315 printf ("TX pkts:%d event:%lu"
1241- " inaccurate(usec) wakeup min:%ld cur:%ld max:%ld\n" ,
1242- n , stat .events , stat .min , stat .act , stat .max );
1316+ " inaccurate wakeup(nanosec) curr:%ld"
1317+ "(min:%ld max:%ld avg:%ld avg2adj:%ld)"
1318+ " variance(n-1):%ld interval-ns:%ld\n" ,
1319+ n , stat .events , stat .curr ,
1320+ stat .min , stat .max , avg , avg2adj ,
1321+ stat .curr - stat .prev ,
1322+ diff_interval );
1323+
1324+ if (debug_time ) {
1325+ print_timespec (& now , "now" );
1326+ print_timespec (& next_adj , "next_adj" );
1327+ print_timespec (& next , "next" );
1328+ }
12431329
12441330 /* Calculate next time to wakeup */
12451331 next .tv_sec += interval .tv_sec ;
12461332 next .tv_nsec += interval .tv_nsec ;
12471333 tsnorm (& next );
12481334
1335+ /* Adjust for inaccuracy of clock_nanosleep wakeup */
1336+ uint64_t next_adj_ns = timespec2ns (& next );
1337+ next_adj_ns = next_adj_ns - avg2adj ;
1338+ ns2timespec (next_adj_ns , & next_adj );
1339+ tsnorm (& next_adj );
1340+
12491341 /* Get packets for *next* iteration */
12501342 tx_nr = invent_tx_pkts (cfg , xsk -> umem , batch_nr , tx_pkts );
12511343
@@ -1381,6 +1473,8 @@ int main(int argc, char **argv)
13811473 .xsk_if_queue = -1 ,
13821474 .opt_tx_dmac = default_tx_dmac ,
13831475 .opt_tx_smac = default_tx_smac ,
1476+ .interval = DEFAULT_INTERVAL ,
1477+ .batch_pkts = BATCH_PKTS_DEFAULT ,
13841478 };
13851479 pthread_t stats_poll_thread ;
13861480 struct xsk_umem_info * umem ;
@@ -1553,9 +1647,15 @@ int main(int argc, char **argv)
15531647 * It seems related with XDP attachment causing link down/up event for
15541648 * some drivers. Q: What is the right method/API that waits for link to
15551649 * be initilized correctly?
1650+ *
1651+ * This workaround keeps trying to send a single packet, and
1652+ * check return value seen from sendto() syscall, until it
1653+ * doesn't return an error.
15561654 */
1557- //sleep(3);
1558- // tx_pkt(&cfg, xsks.sockets[0]);
1655+ while (err = tx_pkt (& cfg , xsks .sockets [0 ])) {
1656+ fprintf (stderr , "WARN(%d): Failed to Tx pkt, will retry\n" , err );
1657+ sleep (1 );
1658+ }
15591659
15601660 /* Receive and count packets than drop them */
15611661 // rx_and_process(&cfg, &xsks);
0 commit comments