1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Microsoft Corporation 3 */ 4 5 #include <errno.h> 6 #include <net/if.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/uio.h> 11 #include <sys/utsname.h> 12 #include <time.h> 13 #include <unistd.h> 14 15 #include <rte_common.h> 16 #include <rte_cycles.h> 17 #include <rte_dev.h> 18 #include <rte_errno.h> 19 #include <rte_ethdev.h> 20 #include <rte_ether.h> 21 #include <rte_mbuf.h> 22 #include <rte_pcapng.h> 23 #include <rte_time.h> 24 25 #include "pcapng_proto.h" 26 27 /* conversion from DPDK speed to PCAPNG */ 28 #define PCAPNG_MBPS_SPEED 1000000ull 29 30 /* Format of the capture file handle */ 31 struct rte_pcapng { 32 int outfd; /* output file */ 33 /* DPDK port id to interface index in file */ 34 uint32_t port_index[RTE_MAX_ETHPORTS]; 35 }; 36 37 /* For converting TSC cycles to PCAPNG ns format */ 38 struct pcapng_time { 39 uint64_t ns; 40 uint64_t cycles; 41 } pcapng_time; 42 43 RTE_INIT(pcapng_init) 44 { 45 struct timespec ts; 46 47 pcapng_time.cycles = rte_get_tsc_cycles(); 48 clock_gettime(CLOCK_REALTIME, &ts); 49 pcapng_time.ns = rte_timespec_to_ns(&ts); 50 } 51 52 /* PCAPNG timestamps are in nanoseconds */ 53 static uint64_t pcapng_tsc_to_ns(uint64_t cycles) 54 { 55 uint64_t delta; 56 57 delta = cycles - pcapng_time.cycles; 58 return pcapng_time.ns + (delta * NSEC_PER_SEC) / rte_get_tsc_hz(); 59 } 60 61 /* length of option including padding */ 62 static uint16_t pcapng_optlen(uint16_t len) 63 { 64 return RTE_ALIGN(sizeof(struct pcapng_option) + len, 65 sizeof(uint32_t)); 66 } 67 68 /* build TLV option and return location of next */ 69 static struct pcapng_option * 70 pcapng_add_option(struct pcapng_option *popt, uint16_t code, 71 const void *data, uint16_t len) 72 { 73 popt->code = code; 74 popt->length = len; 75 memcpy(popt->data, data, len); 76 77 return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len)); 78 } 79 80 /* 81 * Write required initial section header describing the capture 82 */ 83 static int 84 pcapng_section_block(rte_pcapng_t *self, 85 const char *os, const char *hw, 86 const char *app, const char *comment) 87 { 88 struct pcapng_section_header *hdr; 89 struct pcapng_option *opt; 90 void *buf; 91 uint32_t len; 92 ssize_t cc; 93 94 len = sizeof(*hdr); 95 if (hw) 96 len += pcapng_optlen(strlen(hw)); 97 if (os) 98 len += pcapng_optlen(strlen(os)); 99 if (app) 100 len += pcapng_optlen(strlen(app)); 101 if (comment) 102 len += pcapng_optlen(strlen(comment)); 103 104 /* reserve space for OPT_END */ 105 len += pcapng_optlen(0); 106 len += sizeof(uint32_t); 107 108 buf = calloc(1, len); 109 if (!buf) 110 return -1; 111 112 hdr = (struct pcapng_section_header *)buf; 113 *hdr = (struct pcapng_section_header) { 114 .block_type = PCAPNG_SECTION_BLOCK, 115 .block_length = len, 116 .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC, 117 .major_version = PCAPNG_MAJOR_VERS, 118 .minor_version = PCAPNG_MINOR_VERS, 119 .section_length = UINT64_MAX, 120 }; 121 122 /* After the section header insert variable length options. */ 123 opt = (struct pcapng_option *)(hdr + 1); 124 if (comment) 125 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, 126 comment, strlen(comment)); 127 if (hw) 128 opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE, 129 hw, strlen(hw)); 130 if (os) 131 opt = pcapng_add_option(opt, PCAPNG_SHB_OS, 132 os, strlen(os)); 133 if (app) 134 opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL, 135 app, strlen(app)); 136 137 /* The standard requires last option to be OPT_END */ 138 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 139 140 /* clone block_length after option */ 141 memcpy(opt, &hdr->block_length, sizeof(uint32_t)); 142 143 cc = write(self->outfd, buf, len); 144 free(buf); 145 146 return cc; 147 } 148 149 /* Write an interface block for a DPDK port */ 150 static int 151 pcapng_add_interface(rte_pcapng_t *self, uint16_t port) 152 { 153 struct pcapng_interface_block *hdr; 154 struct rte_eth_dev_info dev_info; 155 struct rte_ether_addr *ea, macaddr; 156 const struct rte_device *dev; 157 struct rte_eth_link link; 158 struct pcapng_option *opt; 159 const uint8_t tsresol = 9; /* nanosecond resolution */ 160 uint32_t len; 161 void *buf; 162 char ifname[IF_NAMESIZE]; 163 char ifhw[256]; 164 uint64_t speed = 0; 165 166 if (rte_eth_dev_info_get(port, &dev_info) < 0) 167 return -1; 168 169 /* make something like an interface name */ 170 if (if_indextoname(dev_info.if_index, ifname) == NULL) 171 snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port); 172 173 /* make a useful device hardware string */ 174 dev = dev_info.device; 175 if (dev) 176 snprintf(ifhw, sizeof(ifhw), 177 "%s-%s", dev->bus->name, dev->name); 178 179 /* DPDK reports in units of Mbps */ 180 rte_eth_link_get(port, &link); 181 if (link.link_status == RTE_ETH_LINK_UP) 182 speed = link.link_speed * PCAPNG_MBPS_SPEED; 183 184 if (rte_eth_macaddr_get(port, &macaddr) < 0) 185 ea = NULL; 186 else 187 ea = &macaddr; 188 189 /* Compute length of interface block options */ 190 len = sizeof(*hdr); 191 192 len += pcapng_optlen(sizeof(tsresol)); /* timestamp */ 193 len += pcapng_optlen(strlen(ifname)); /* ifname */ 194 195 if (ea) 196 len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */ 197 if (speed != 0) 198 len += pcapng_optlen(sizeof(uint64_t)); 199 if (dev) 200 len += pcapng_optlen(strlen(ifhw)); 201 202 len += pcapng_optlen(0); 203 len += sizeof(uint32_t); 204 205 buf = alloca(len); 206 if (!buf) 207 return -1; 208 209 hdr = (struct pcapng_interface_block *)buf; 210 *hdr = (struct pcapng_interface_block) { 211 .block_type = PCAPNG_INTERFACE_BLOCK, 212 .link_type = 1, /* DLT_EN10MB - Ethernet */ 213 .block_length = len, 214 }; 215 216 opt = (struct pcapng_option *)(hdr + 1); 217 opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL, 218 &tsresol, sizeof(tsresol)); 219 opt = pcapng_add_option(opt, PCAPNG_IFB_NAME, 220 ifname, strlen(ifname)); 221 if (ea) 222 opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR, 223 ea, RTE_ETHER_ADDR_LEN); 224 if (speed != 0) 225 opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED, 226 &speed, sizeof(uint64_t)); 227 if (dev) 228 opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE, 229 ifhw, strlen(ifhw)); 230 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 231 232 /* clone block_length after optionsa */ 233 memcpy(opt, &hdr->block_length, sizeof(uint32_t)); 234 235 return write(self->outfd, buf, len); 236 } 237 238 /* 239 * Write the list of possible interfaces at the start 240 * of the file. 241 */ 242 static int 243 pcapng_interfaces(rte_pcapng_t *self) 244 { 245 uint16_t port_id; 246 uint16_t index = 0; 247 248 RTE_ETH_FOREACH_DEV(port_id) { 249 /* The list if ports in pcapng needs to be contiguous */ 250 self->port_index[port_id] = index++; 251 if (pcapng_add_interface(self, port_id) < 0) 252 return -1; 253 } 254 return 0; 255 } 256 257 /* 258 * Write an Interface statistics block at the end of capture. 259 */ 260 ssize_t 261 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, 262 const char *comment, 263 uint64_t start_time, uint64_t end_time, 264 uint64_t ifrecv, uint64_t ifdrop) 265 { 266 struct pcapng_statistics *hdr; 267 struct pcapng_option *opt; 268 uint32_t optlen, len; 269 uint8_t *buf; 270 uint64_t ns; 271 272 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); 273 274 optlen = 0; 275 276 if (ifrecv != UINT64_MAX) 277 optlen += pcapng_optlen(sizeof(ifrecv)); 278 if (ifdrop != UINT64_MAX) 279 optlen += pcapng_optlen(sizeof(ifdrop)); 280 if (start_time != 0) 281 optlen += pcapng_optlen(sizeof(start_time)); 282 if (end_time != 0) 283 optlen += pcapng_optlen(sizeof(end_time)); 284 if (comment) 285 optlen += pcapng_optlen(strlen(comment)); 286 if (optlen != 0) 287 optlen += pcapng_optlen(0); 288 289 len = sizeof(*hdr) + optlen + sizeof(uint32_t); 290 buf = alloca(len); 291 if (buf == NULL) 292 return -1; 293 294 hdr = (struct pcapng_statistics *)buf; 295 opt = (struct pcapng_option *)(hdr + 1); 296 297 if (comment) 298 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, 299 comment, strlen(comment)); 300 if (start_time != 0) 301 opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME, 302 &start_time, sizeof(start_time)); 303 if (end_time != 0) 304 opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME, 305 &end_time, sizeof(end_time)); 306 if (ifrecv != UINT64_MAX) 307 opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV, 308 &ifrecv, sizeof(ifrecv)); 309 if (ifdrop != UINT64_MAX) 310 opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP, 311 &ifdrop, sizeof(ifdrop)); 312 if (optlen != 0) 313 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 314 315 hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK; 316 hdr->block_length = len; 317 hdr->interface_id = self->port_index[port_id]; 318 319 ns = pcapng_tsc_to_ns(rte_get_tsc_cycles()); 320 hdr->timestamp_hi = ns >> 32; 321 hdr->timestamp_lo = (uint32_t)ns; 322 323 /* clone block_length after option */ 324 memcpy(opt, &len, sizeof(uint32_t)); 325 326 return write(self->outfd, buf, len); 327 } 328 329 uint32_t 330 rte_pcapng_mbuf_size(uint32_t length) 331 { 332 /* The VLAN and EPB header must fit in the mbuf headroom. */ 333 RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) + 334 sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM); 335 336 /* The flags and queue information are added at the end. */ 337 return sizeof(struct rte_mbuf) 338 + RTE_ALIGN(length, sizeof(uint32_t)) 339 + pcapng_optlen(sizeof(uint32_t)) /* flag option */ 340 + pcapng_optlen(sizeof(uint32_t)) /* queue option */ 341 + sizeof(uint32_t); /* length */ 342 } 343 344 /* More generalized version rte_vlan_insert() */ 345 static int 346 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci) 347 { 348 struct rte_ether_hdr *nh, *oh; 349 struct rte_vlan_hdr *vh; 350 351 if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1) 352 return -EINVAL; 353 354 if (rte_pktmbuf_data_len(m) < sizeof(*oh)) 355 return -EINVAL; 356 357 oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 358 nh = (struct rte_ether_hdr *) 359 rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr)); 360 if (nh == NULL) 361 return -ENOSPC; 362 363 memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN); 364 nh->ether_type = rte_cpu_to_be_16(ether_type); 365 366 vh = (struct rte_vlan_hdr *) (nh + 1); 367 vh->vlan_tci = rte_cpu_to_be_16(tci); 368 369 return 0; 370 } 371 372 /* 373 * The mbufs created use the Pcapng standard enhanced packet block. 374 * 375 * 1 2 3 376 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 377 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 378 * 0 | Block Type = 0x00000006 | 379 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 380 * 4 | Block Total Length | 381 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 382 * 8 | Interface ID | 383 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 384 * 12 | Timestamp (High) | 385 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 386 * 16 | Timestamp (Low) | 387 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 388 * 20 | Captured Packet Length | 389 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 390 * 24 | Original Packet Length | 391 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 392 * 28 / / 393 * / Packet Data / 394 * / variable length, padded to 32 bits / 395 * / / 396 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 397 * | Option Code = 0x0002 | Option Length = 0x004 | 398 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 399 * | Flags (direction) | 400 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 401 * | Option Code = 0x0006 | Option Length = 0x002 | 402 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 403 * | Queue id | 404 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 405 * | Block Total Length | 406 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 407 */ 408 409 /* Make a copy of original mbuf with pcapng header and options */ 410 struct rte_mbuf * 411 rte_pcapng_copy(uint16_t port_id, uint32_t queue, 412 const struct rte_mbuf *md, 413 struct rte_mempool *mp, 414 uint32_t length, uint64_t cycles, 415 enum rte_pcapng_direction direction) 416 { 417 struct pcapng_enhance_packet_block *epb; 418 uint32_t orig_len, data_len, padding, flags; 419 struct pcapng_option *opt; 420 const uint16_t optlen = pcapng_optlen(sizeof(flags)) + pcapng_optlen(sizeof(queue)); 421 struct rte_mbuf *mc; 422 uint64_t ns; 423 424 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 425 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); 426 #endif 427 ns = pcapng_tsc_to_ns(cycles); 428 429 orig_len = rte_pktmbuf_pkt_len(md); 430 431 /* Take snapshot of the data */ 432 mc = rte_pktmbuf_copy(md, mp, 0, length); 433 if (unlikely(mc == NULL)) 434 return NULL; 435 436 /* Expand any offloaded VLAN information */ 437 if ((direction == RTE_PCAPNG_DIRECTION_IN && 438 (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) || 439 (direction == RTE_PCAPNG_DIRECTION_OUT && 440 (md->ol_flags & RTE_MBUF_F_TX_VLAN))) { 441 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN, 442 md->vlan_tci) != 0) 443 goto fail; 444 } 445 446 if ((direction == RTE_PCAPNG_DIRECTION_IN && 447 (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) || 448 (direction == RTE_PCAPNG_DIRECTION_OUT && 449 (md->ol_flags & RTE_MBUF_F_TX_QINQ))) { 450 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ, 451 md->vlan_tci_outer) != 0) 452 goto fail; 453 } 454 455 /* pad the packet to 32 bit boundary */ 456 data_len = rte_pktmbuf_data_len(mc); 457 padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len; 458 if (padding > 0) { 459 void *tail = rte_pktmbuf_append(mc, padding); 460 461 if (tail == NULL) 462 goto fail; 463 memset(tail, 0, padding); 464 } 465 466 /* reserve trailing options and block length */ 467 opt = (struct pcapng_option *) 468 rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); 469 if (unlikely(opt == NULL)) 470 goto fail; 471 472 switch (direction) { 473 case RTE_PCAPNG_DIRECTION_IN: 474 flags = PCAPNG_IFB_INBOUND; 475 break; 476 case RTE_PCAPNG_DIRECTION_OUT: 477 flags = PCAPNG_IFB_OUTBOUND; 478 break; 479 default: 480 flags = 0; 481 } 482 483 opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS, 484 &flags, sizeof(flags)); 485 486 opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE, 487 &queue, sizeof(queue)); 488 489 /* Note: END_OPT necessary here. Wireshark doesn't do it. */ 490 491 /* Add PCAPNG packet header */ 492 epb = (struct pcapng_enhance_packet_block *) 493 rte_pktmbuf_prepend(mc, sizeof(*epb)); 494 if (unlikely(epb == NULL)) 495 goto fail; 496 497 epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK; 498 epb->block_length = rte_pktmbuf_data_len(mc); 499 500 /* Interface index is filled in later during write */ 501 mc->port = port_id; 502 503 epb->timestamp_hi = ns >> 32; 504 epb->timestamp_lo = (uint32_t)ns; 505 epb->capture_length = data_len; 506 epb->original_length = orig_len; 507 508 /* set trailer of block length */ 509 *(uint32_t *)opt = epb->block_length; 510 511 return mc; 512 513 fail: 514 rte_pktmbuf_free(mc); 515 return NULL; 516 } 517 518 /* Count how many segments are in this array of mbufs */ 519 static unsigned int 520 mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n) 521 { 522 unsigned int i, iovcnt; 523 524 for (iovcnt = 0, i = 0; i < n; i++) { 525 const struct rte_mbuf *m = pkts[i]; 526 527 __rte_mbuf_sanity_check(m, 1); 528 529 iovcnt += m->nb_segs; 530 } 531 return iovcnt; 532 } 533 534 /* Write pre-formatted packets to file. */ 535 ssize_t 536 rte_pcapng_write_packets(rte_pcapng_t *self, 537 struct rte_mbuf *pkts[], uint16_t nb_pkts) 538 { 539 int iovcnt = mbuf_burst_segs(pkts, nb_pkts); 540 struct iovec iov[iovcnt]; 541 unsigned int i, cnt; 542 ssize_t ret; 543 544 for (i = cnt = 0; i < nb_pkts; i++) { 545 struct rte_mbuf *m = pkts[i]; 546 struct pcapng_enhance_packet_block *epb; 547 548 /* sanity check that is really a pcapng mbuf */ 549 epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *); 550 if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK || 551 epb->block_length != rte_pktmbuf_data_len(m))) { 552 rte_errno = EINVAL; 553 return -1; 554 } 555 556 /* 557 * The DPDK port is recorded during pcapng_copy. 558 * Map that to PCAPNG interface in file. 559 */ 560 epb->interface_id = self->port_index[m->port]; 561 do { 562 iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *); 563 iov[cnt].iov_len = rte_pktmbuf_data_len(m); 564 ++cnt; 565 } while ((m = m->next)); 566 } 567 568 ret = writev(self->outfd, iov, iovcnt); 569 if (unlikely(ret < 0)) 570 rte_errno = errno; 571 return ret; 572 } 573 574 /* Create new pcapng writer handle */ 575 rte_pcapng_t * 576 rte_pcapng_fdopen(int fd, 577 const char *osname, const char *hardware, 578 const char *appname, const char *comment) 579 { 580 rte_pcapng_t *self; 581 582 self = malloc(sizeof(*self)); 583 if (!self) { 584 rte_errno = ENOMEM; 585 return NULL; 586 } 587 588 self->outfd = fd; 589 590 if (pcapng_section_block(self, osname, hardware, appname, comment) < 0) 591 goto fail; 592 593 if (pcapng_interfaces(self) < 0) 594 goto fail; 595 596 return self; 597 fail: 598 free(self); 599 return NULL; 600 } 601 602 void 603 rte_pcapng_close(rte_pcapng_t *self) 604 { 605 close(self->outfd); 606 free(self); 607 } 608