Skip to content

Commit 2135ae1

Browse files
authored
Merge pull request #36 from xdp-project/vestas04_AF_XDP_example
AF_XDP example: Improvements to wake-up accuracy
2 parents e80da14 + 0c2ec8a commit 2135ae1

File tree

4 files changed

+209
-52
lines changed

4 files changed

+209
-52
lines changed

AF_XDP-interaction/af_xdp_user.c

Lines changed: 151 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,9 @@ static const struct option_wrapper long_options[] = {
279279
{{"metainfo", no_argument, NULL, 'm' },
280280
"Print XDP metadata info output mode (debug)"},
281281

282+
{{"timedebug", no_argument, NULL, 't' },
283+
"Print timestamps info for wakeup accuracy (debug)"},
284+
282285
{{"debug", no_argument, NULL, 'D' },
283286
"Debug info output mode (debug)"},
284287

@@ -288,6 +291,12 @@ static const struct option_wrapper long_options[] = {
288291
{{"progsec", required_argument, NULL, 2 },
289292
"Load program in <section> of the ELF file", "<section>"},
290293

294+
{{"src-ip", required_argument, NULL, 4 },
295+
"Change IPv4 source address in generated packets", "<ip>"},
296+
297+
{{"dst-ip", required_argument, NULL, 5 },
298+
"Change IPv4 destination address in generated packets", "<ip>"},
299+
291300
{{"busy-poll", no_argument, NULL, 'B' },
292301
"Enable socket prefer NAPI busy-poll mode (remember adjust sysctl too)"},
293302

@@ -297,6 +306,12 @@ static const struct option_wrapper long_options[] = {
297306
{{"tx-smac", required_argument, NULL, 'H' },
298307
"Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format", "aa:bb:cc:dd:ee:ff"},
299308

309+
{{"interval", required_argument, NULL, 'i' },
310+
"Periodic TX-cyclic interval wakeup period in usec", "<usec>"},
311+
312+
{{"batch-pkts", required_argument, NULL, 'b' },
313+
"Periodic TX-cyclic batch send pkts", "<pkts>"},
314+
300315
{{0, 0, NULL, 0 }, NULL, false}
301316
};
302317

@@ -521,16 +536,35 @@ static struct xsk_socket_info *xsk_configure_socket(struct config *cfg,
521536
return NULL;
522537
}
523538

524-
static void complete_tx(struct xsk_socket_info *xsk)
539+
static int kick_tx(struct xsk_socket_info *xsk)
540+
{
541+
int err = 0;
542+
int ret;
543+
544+
ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
545+
if (ret < 0) { /* On error, -1 is returned, and errno is set */
546+
fprintf(stderr, "WARN: %s() sendto() failed with errno:%d\n",
547+
__func__, errno);
548+
err = errno;
549+
}
550+
/* Kernel samples/bpf/ xdp_sock_user.c kick_tx variant doesn't
551+
* treat the following errno values as errors:
552+
* ENOBUFS , EAGAIN , EBUSY , ENETDOWN
553+
*/
554+
return err;
555+
}
556+
557+
static int complete_tx(struct xsk_socket_info *xsk)
525558
{
526559
unsigned int completed;
527560
uint32_t idx_cq;
561+
int err;
528562

529563
if (!xsk->outstanding_tx)
530564
return;
531565

532-
sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
533-
566+
/* Notify kernel via sendto syscall that TX packet are avail */
567+
err = kick_tx(xsk);
534568

535569
/* Collect/free completed TX buffers */
536570
completed = xsk_ring_cons__peek(&xsk->cq,
@@ -547,9 +581,17 @@ static void complete_tx(struct xsk_socket_info *xsk)
547581
}
548582

549583
xsk_ring_cons__release(&xsk->cq, completed);
584+
if (completed > xsk->outstanding_tx) {
585+
fprintf(stderr, "WARN: %s() "
586+
"reset outstanding_tx(%d) as completed(%d)"
587+
"more than outstanding TX pakcets\n",
588+
__func__, xsk->outstanding_tx, completed);
589+
}
550590
xsk->outstanding_tx -= completed < xsk->outstanding_tx ?
551591
completed : xsk->outstanding_tx;
552592
}
593+
594+
return err;
553595
}
554596

555597
static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
@@ -601,32 +643,17 @@ static void gen_eth_hdr(struct config *cfg, struct ethhdr *eth_hdr)
601643
eth_hdr->h_proto = htons(ETH_P_IP);
602644
}
603645

604-
static bool get_ipv4_u32(char *ip_str, uint32_t *ip_addr)
605-
{
606-
int res;
607646

608-
res = inet_pton(AF_INET, ip_str, ip_addr);
609-
if (res <= 0) {
610-
if (res == 0)
611-
fprintf(stderr, "ERROR: IP%s \"%s\" not in presentation format\n",
612-
"v4", ip_str);
613-
else
614-
perror("inet_pton");
615-
return false;
616-
}
617-
return true;
618-
}
619-
620-
static char *opt_ip_str_src = "192.168.44.2";
647+
static char *opt_ip_str_src = "192.168.44.1";
621648
static char *opt_ip_str_dst = "192.168.44.3";
622649

623-
static void gen_ip_hdr(struct iphdr *ip_hdr)
650+
static void gen_ip_hdr(struct config *cfg, struct iphdr *ip_hdr)
624651
{
625-
uint32_t saddr;
626-
uint32_t daddr;
652+
if (cfg->opt_ip_src == 0)
653+
get_ipv4_u32(opt_ip_str_src, &cfg->opt_ip_src);
627654

628-
get_ipv4_u32(opt_ip_str_src, &saddr);
629-
get_ipv4_u32(opt_ip_str_dst, &daddr);
655+
if (cfg->opt_ip_dst == 0)
656+
get_ipv4_u32(opt_ip_str_dst, &cfg->opt_ip_dst);
630657

631658
/* IP header */
632659
ip_hdr->version = IPVERSION;
@@ -637,8 +664,8 @@ static void gen_ip_hdr(struct iphdr *ip_hdr)
637664
ip_hdr->frag_off = 0;
638665
ip_hdr->ttl = IPDEFTTL;
639666
ip_hdr->protocol = IPPROTO_UDP;
640-
ip_hdr->saddr = saddr;
641-
ip_hdr->daddr = daddr;
667+
ip_hdr->saddr = cfg->opt_ip_src;
668+
ip_hdr->daddr = cfg->opt_ip_dst;
642669

643670
/* IP header checksum */
644671
ip_hdr->check = 0;
@@ -675,7 +702,7 @@ static void gen_base_pkt(struct config *cfg, uint8_t *pkt_ptr)
675702
sizeof(struct iphdr));
676703

677704
gen_eth_hdr(cfg, eth_hdr);
678-
gen_ip_hdr(ip_hdr);
705+
gen_ip_hdr(cfg, ip_hdr);
679706
gen_udp_hdr(udp_hdr, ip_hdr);
680707
}
681708

@@ -836,7 +863,7 @@ static void print_pkt_info(uint8_t *pkt, uint32_t len)
836863
}
837864
}
838865

839-
static void tx_pkt(struct config *cfg, struct xsk_socket_info *xsk)
866+
static int tx_pkt(struct config *cfg, struct xsk_socket_info *xsk)
840867
{
841868
struct xsk_umem_info *umem = xsk->umem;
842869
uint64_t pkt_addr = mem_alloc_umem_frame(&umem->mem);
@@ -857,14 +884,17 @@ static void tx_pkt(struct config *cfg, struct xsk_socket_info *xsk)
857884
if (ret != 1) {
858885
/* No more transmit slots, drop the packet */
859886
mem_free_umem_frame(&umem->mem, pkt_addr);
887+
fprintf(stderr, "ERR - %s() failed transmit\n",
888+
__func__);
860889
}
861890

862891
xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->addr = pkt_addr;
863892
xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->len = 64;
864893
xsk_ring_prod__submit(&xsk->tx, 1);
865894
xsk->outstanding_tx++;
866895
}
867-
//complete_tx(xsk);
896+
897+
return complete_tx(xsk);
868898
}
869899

870900
/* Generate some fake packets (in umem area). Real system will deliver TX
@@ -1133,7 +1163,7 @@ static void rx_avail_packets(struct xsk_container *xsks)
11331163
}
11341164

11351165
/* Default interval in usec */
1136-
#define DEFAULT_INTERVAL 1000000
1166+
#define DEFAULT_INTERVAL 1000000
11371167

11381168
#define USEC_PER_SEC 1000000
11391169
#define NSEC_PER_SEC 1000000000
@@ -1146,6 +1176,17 @@ static inline void tsnorm(struct timespec *ts)
11461176
}
11471177
}
11481178

1179+
static inline uint64_t timespec2ns(struct timespec *ts)
1180+
{
1181+
return (uint64_t) ts->tv_sec * NANOSEC_PER_SEC + ts->tv_nsec;
1182+
}
1183+
1184+
static inline void ns2timespec(uint64_t ns, struct timespec *ts)
1185+
{
1186+
ts->tv_sec = ns / NANOSEC_PER_SEC;
1187+
ts->tv_nsec = ns % NANOSEC_PER_SEC;
1188+
}
1189+
11491190
static inline int64_t calcdiff(struct timespec t1, struct timespec t2)
11501191
{
11511192
int64_t diff;
@@ -1154,10 +1195,24 @@ static inline int64_t calcdiff(struct timespec t1, struct timespec t2)
11541195
return diff;
11551196
}
11561197

1198+
static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2)
1199+
{
1200+
int64_t diff;
1201+
diff = NSEC_PER_SEC * (long long)((int) t1.tv_sec - (int) t2.tv_sec);
1202+
diff += ((int) t1.tv_nsec - (int) t2.tv_nsec);
1203+
return diff;
1204+
}
1205+
1206+
static void print_timespec(struct timespec *ts, char *msg)
1207+
{
1208+
printf("Time: %lu.%lu - %s\n", ts->tv_sec, ts->tv_nsec, msg);
1209+
}
1210+
11571211
struct wakeup_stat {
11581212
long min;
11591213
long max;
1160-
long act;
1214+
long curr;
1215+
long prev;
11611216
double avg;
11621217
unsigned long events;
11631218
};
@@ -1173,13 +1228,15 @@ struct wakeup_stat {
11731228
static void tx_cyclic_and_rx_process(struct config *cfg,
11741229
struct xsk_container *xsks)
11751230
{
1176-
struct timespec now, next, interval;
1177-
struct wakeup_stat stat = { .min = DEFAULT_INTERVAL};
1178-
int batch_nr = 4;
1179-
struct xdp_desc tx_pkts[batch_nr];
1231+
struct timespec now, next, next_adj, interval, now_prev;
1232+
struct wakeup_stat stat = { .min = DEFAULT_INTERVAL, .max = -0xFFFF };
1233+
struct wakeup_stat stat_adj = { .min = DEFAULT_INTERVAL, .max = -0xFFFF };
1234+
struct xdp_desc tx_pkts[BATCH_PKTS_MAX];
1235+
int batch_nr = cfg->batch_pkts;
11801236
int tx_nr;
1237+
bool first = true;
11811238

1182-
int period = DEFAULT_INTERVAL; // TODO: Add to cfg
1239+
int period = cfg->interval;
11831240
int timermode = TIMER_ABSTIME;
11841241
int clock = CLOCK_MONOTONIC;
11851242

@@ -1198,13 +1255,15 @@ static void tx_cyclic_and_rx_process(struct config *cfg,
11981255
next.tv_sec += interval.tv_sec;
11991256
next.tv_nsec += interval.tv_nsec;
12001257
tsnorm(&next);
1258+
next_adj = next; /* Not adjusted yet */
12011259

12021260
while (!global_exit) {
1203-
int64_t diff;
1261+
int64_t diff, diff2adj, diff_interval;
1262+
int64_t avg, avg2adj;
12041263
int err, n;
12051264

1206-
/* Wait for next period */
1207-
err = clock_nanosleep(clock, timermode, &next, NULL);
1265+
/* Wait for next period, but adjusted for measured inaccuracy */
1266+
err = clock_nanosleep(clock, timermode, &next_adj, NULL);
12081267
/* Took case MODE_CLOCK_NANOSLEEP from cyclictest */
12091268
if (err) {
12101269
if (err != EINTR)
@@ -1214,6 +1273,7 @@ static void tx_cyclic_and_rx_process(struct config *cfg,
12141273
}
12151274

12161275
/* Expecting to wakeup at "next" get systime "now" to check */
1276+
now_prev = now;
12171277
err = clock_gettime(clock, &now);
12181278
if (err) {
12191279
if (err != EINTR)
@@ -1222,30 +1282,62 @@ static void tx_cyclic_and_rx_process(struct config *cfg,
12221282
goto out;
12231283
}
12241284

1225-
/* Detect inaccuracy diff */
1226-
diff = calcdiff(now, next);
1227-
if (diff < stat.min)
1228-
stat.min = diff;
1229-
if (diff > stat.max)
1230-
stat.max = diff;
1285+
/* How close is wakeup time to our actual target */
1286+
diff = calcdiff_ns(now, next); /* Positive num = wokeup after */
1287+
/* Exclude first measurement as no next_adj happened */
1288+
if (!first) {
1289+
if (diff < stat.min)
1290+
stat.min = diff;
1291+
if (diff > stat.max)
1292+
stat.max = diff;
1293+
}
1294+
first = false;
12311295
stat.avg += (double) diff;
1232-
stat.act = diff;
1233-
1296+
stat.prev = stat.curr;
1297+
stat.curr = diff;
12341298
stat.events++;
1299+
avg = (stat.avg / stat.events);
1300+
1301+
/* Measure inaccuracy of clock_nanosleep */
1302+
diff2adj = calcdiff_ns(now, next_adj); /* Positive num = wokeup after */
1303+
stat_adj.avg += (double) diff2adj;
1304+
stat_adj.events++;
1305+
avg2adj = (stat_adj.avg / stat_adj.events);
1306+
1307+
// IDEA: Spin until exact time occurs (if diff negative)
12351308

12361309
/* Send batch of packets */
12371310
n = tx_batch_pkts(xsk, tx_nr, tx_pkts);
12381311

1312+
diff_interval = calcdiff_ns(now, now_prev);
1313+
12391314
if (verbose >=1 )
12401315
printf("TX pkts:%d event:%lu"
1241-
" inaccurate(usec) wakeup min:%ld cur:%ld max:%ld\n",
1242-
n, stat.events, stat.min, stat.act, stat.max);
1316+
" inaccurate wakeup(nanosec) curr:%ld"
1317+
"(min:%ld max:%ld avg:%ld avg2adj:%ld)"
1318+
" variance(n-1):%ld interval-ns:%ld\n",
1319+
n, stat.events, stat.curr,
1320+
stat.min, stat.max, avg, avg2adj,
1321+
stat.curr - stat.prev,
1322+
diff_interval);
1323+
1324+
if (debug_time) {
1325+
print_timespec(&now, "now");
1326+
print_timespec(&next_adj, "next_adj");
1327+
print_timespec(&next, "next");
1328+
}
12431329

12441330
/* Calculate next time to wakeup */
12451331
next.tv_sec += interval.tv_sec;
12461332
next.tv_nsec += interval.tv_nsec;
12471333
tsnorm(&next);
12481334

1335+
/* Adjust for inaccuracy of clock_nanosleep wakeup */
1336+
uint64_t next_adj_ns = timespec2ns(&next);
1337+
next_adj_ns = next_adj_ns - avg2adj;
1338+
ns2timespec(next_adj_ns, &next_adj);
1339+
tsnorm(&next_adj);
1340+
12491341
/* Get packets for *next* iteration */
12501342
tx_nr = invent_tx_pkts(cfg, xsk->umem, batch_nr, tx_pkts);
12511343

@@ -1381,6 +1473,8 @@ int main(int argc, char **argv)
13811473
.xsk_if_queue = -1,
13821474
.opt_tx_dmac = default_tx_dmac,
13831475
.opt_tx_smac = default_tx_smac,
1476+
.interval = DEFAULT_INTERVAL,
1477+
.batch_pkts = BATCH_PKTS_DEFAULT,
13841478
};
13851479
pthread_t stats_poll_thread;
13861480
struct xsk_umem_info *umem;
@@ -1553,9 +1647,15 @@ int main(int argc, char **argv)
15531647
* It seems related with XDP attachment causing link down/up event for
15541648
* some drivers. Q: What is the right method/API that waits for link to
15551649
* be initilized correctly?
1650+
*
1651+
* This workaround keeps trying to send a single packet, and
1652+
* check return value seen from sendto() syscall, until it
1653+
* doesn't return an error.
15561654
*/
1557-
//sleep(3);
1558-
// tx_pkt(&cfg, xsks.sockets[0]);
1655+
while (err = tx_pkt(&cfg, xsks.sockets[0])) {
1656+
fprintf(stderr, "WARN(%d): Failed to Tx pkt, will retry\n", err);
1657+
sleep(1);
1658+
}
15591659

15601660
/* Receive and count packets than drop them */
15611661
// rx_and_process(&cfg, &xsks);

0 commit comments

Comments
 (0)