1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Microsoft Corporation 3 */ 4 5 #include <errno.h> 6 #include <net/if.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/uio.h> 11 #include <time.h> 12 #include <unistd.h> 13 14 #include <rte_common.h> 15 #include <rte_cycles.h> 16 #include <rte_dev.h> 17 #include <rte_errno.h> 18 #include <rte_ethdev.h> 19 #include <rte_ether.h> 20 #include <rte_mbuf.h> 21 #include <rte_pcapng.h> 22 #include <rte_time.h> 23 24 #include "pcapng_proto.h" 25 26 /* conversion from DPDK speed to PCAPNG */ 27 #define PCAPNG_MBPS_SPEED 1000000ull 28 29 /* Format of the capture file handle */ 30 struct rte_pcapng { 31 int outfd; /* output file */ 32 /* DPDK port id to interface index in file */ 33 uint32_t port_index[RTE_MAX_ETHPORTS]; 34 }; 35 36 /* For converting TSC cycles to PCAPNG ns format */ 37 struct pcapng_time { 38 uint64_t ns; 39 uint64_t cycles; 40 } pcapng_time; 41 42 RTE_INIT(pcapng_init) 43 { 44 struct timespec ts; 45 46 pcapng_time.cycles = rte_get_tsc_cycles(); 47 clock_gettime(CLOCK_REALTIME, &ts); 48 pcapng_time.ns = rte_timespec_to_ns(&ts); 49 } 50 51 /* PCAPNG timestamps are in nanoseconds */ 52 static uint64_t pcapng_tsc_to_ns(uint64_t cycles) 53 { 54 uint64_t delta; 55 56 delta = cycles - pcapng_time.cycles; 57 return pcapng_time.ns + (delta * NSEC_PER_SEC) / rte_get_tsc_hz(); 58 } 59 60 /* length of option including padding */ 61 static uint16_t pcapng_optlen(uint16_t len) 62 { 63 return RTE_ALIGN(sizeof(struct pcapng_option) + len, 64 sizeof(uint32_t)); 65 } 66 67 /* build TLV option and return location of next */ 68 static struct pcapng_option * 69 pcapng_add_option(struct pcapng_option *popt, uint16_t code, 70 const void *data, uint16_t len) 71 { 72 popt->code = code; 73 popt->length = len; 74 memcpy(popt->data, data, len); 75 76 return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len)); 77 } 78 79 /* 80 * Write required initial section header describing the capture 81 */ 82 static int 83 pcapng_section_block(rte_pcapng_t *self, 84 const char *os, const char *hw, 85 const char *app, const char *comment) 86 { 87 struct pcapng_section_header *hdr; 88 struct pcapng_option *opt; 89 void *buf; 90 uint32_t len; 91 ssize_t cc; 92 93 len = sizeof(*hdr); 94 if (hw) 95 len += pcapng_optlen(strlen(hw)); 96 if (os) 97 len += pcapng_optlen(strlen(os)); 98 if (app) 99 len += pcapng_optlen(strlen(app)); 100 if (comment) 101 len += pcapng_optlen(strlen(comment)); 102 103 /* reserve space for OPT_END */ 104 len += pcapng_optlen(0); 105 len += sizeof(uint32_t); 106 107 buf = calloc(1, len); 108 if (!buf) 109 return -1; 110 111 hdr = (struct pcapng_section_header *)buf; 112 *hdr = (struct pcapng_section_header) { 113 .block_type = PCAPNG_SECTION_BLOCK, 114 .block_length = len, 115 .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC, 116 .major_version = PCAPNG_MAJOR_VERS, 117 .minor_version = PCAPNG_MINOR_VERS, 118 .section_length = UINT64_MAX, 119 }; 120 121 /* After the section header insert variable length options. */ 122 opt = (struct pcapng_option *)(hdr + 1); 123 if (comment) 124 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, 125 comment, strlen(comment)); 126 if (hw) 127 opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE, 128 hw, strlen(hw)); 129 if (os) 130 opt = pcapng_add_option(opt, PCAPNG_SHB_OS, 131 os, strlen(os)); 132 if (app) 133 opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL, 134 app, strlen(app)); 135 136 /* The standard requires last option to be OPT_END */ 137 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 138 139 /* clone block_length after option */ 140 memcpy(opt, &hdr->block_length, sizeof(uint32_t)); 141 142 cc = write(self->outfd, buf, len); 143 free(buf); 144 145 return cc; 146 } 147 148 /* Write an interface block for a DPDK port */ 149 static int 150 pcapng_add_interface(rte_pcapng_t *self, uint16_t port) 151 { 152 struct pcapng_interface_block *hdr; 153 struct rte_eth_dev_info dev_info; 154 struct rte_ether_addr *ea, macaddr; 155 const struct rte_device *dev; 156 struct rte_eth_link link; 157 struct pcapng_option *opt; 158 const uint8_t tsresol = 9; /* nanosecond resolution */ 159 uint32_t len; 160 void *buf; 161 char ifname[IF_NAMESIZE]; 162 char ifhw[256]; 163 uint64_t speed = 0; 164 165 if (rte_eth_dev_info_get(port, &dev_info) < 0) 166 return -1; 167 168 /* make something like an interface name */ 169 if (if_indextoname(dev_info.if_index, ifname) == NULL) 170 snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port); 171 172 /* make a useful device hardware string */ 173 dev = dev_info.device; 174 if (dev) 175 snprintf(ifhw, sizeof(ifhw), 176 "%s-%s", dev->bus->name, dev->name); 177 178 /* DPDK reports in units of Mbps */ 179 rte_eth_link_get(port, &link); 180 if (link.link_status == RTE_ETH_LINK_UP) 181 speed = link.link_speed * PCAPNG_MBPS_SPEED; 182 183 if (rte_eth_macaddr_get(port, &macaddr) < 0) 184 ea = NULL; 185 else 186 ea = &macaddr; 187 188 /* Compute length of interface block options */ 189 len = sizeof(*hdr); 190 191 len += pcapng_optlen(sizeof(tsresol)); /* timestamp */ 192 len += pcapng_optlen(strlen(ifname)); /* ifname */ 193 194 if (ea) 195 len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */ 196 if (speed != 0) 197 len += pcapng_optlen(sizeof(uint64_t)); 198 if (dev) 199 len += pcapng_optlen(strlen(ifhw)); 200 201 len += pcapng_optlen(0); 202 len += sizeof(uint32_t); 203 204 buf = alloca(len); 205 if (!buf) 206 return -1; 207 208 hdr = (struct pcapng_interface_block *)buf; 209 *hdr = (struct pcapng_interface_block) { 210 .block_type = PCAPNG_INTERFACE_BLOCK, 211 .link_type = 1, /* DLT_EN10MB - Ethernet */ 212 .block_length = len, 213 }; 214 215 opt = (struct pcapng_option *)(hdr + 1); 216 opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL, 217 &tsresol, sizeof(tsresol)); 218 opt = pcapng_add_option(opt, PCAPNG_IFB_NAME, 219 ifname, strlen(ifname)); 220 if (ea) 221 opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR, 222 ea, RTE_ETHER_ADDR_LEN); 223 if (speed != 0) 224 opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED, 225 &speed, sizeof(uint64_t)); 226 if (dev) 227 opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE, 228 ifhw, strlen(ifhw)); 229 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 230 231 /* clone block_length after optionsa */ 232 memcpy(opt, &hdr->block_length, sizeof(uint32_t)); 233 234 return write(self->outfd, buf, len); 235 } 236 237 /* 238 * Write the list of possible interfaces at the start 239 * of the file. 240 */ 241 static int 242 pcapng_interfaces(rte_pcapng_t *self) 243 { 244 uint16_t port_id; 245 uint16_t index = 0; 246 247 RTE_ETH_FOREACH_DEV(port_id) { 248 /* The list if ports in pcapng needs to be contiguous */ 249 self->port_index[port_id] = index++; 250 if (pcapng_add_interface(self, port_id) < 0) 251 return -1; 252 } 253 return 0; 254 } 255 256 /* 257 * Write an Interface statistics block at the end of capture. 258 */ 259 ssize_t 260 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, 261 const char *comment, 262 uint64_t start_time, uint64_t end_time, 263 uint64_t ifrecv, uint64_t ifdrop) 264 { 265 struct pcapng_statistics *hdr; 266 struct pcapng_option *opt; 267 uint32_t optlen, len; 268 uint8_t *buf; 269 uint64_t ns; 270 271 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); 272 273 optlen = 0; 274 275 if (ifrecv != UINT64_MAX) 276 optlen += pcapng_optlen(sizeof(ifrecv)); 277 if (ifdrop != UINT64_MAX) 278 optlen += pcapng_optlen(sizeof(ifdrop)); 279 if (start_time != 0) 280 optlen += pcapng_optlen(sizeof(start_time)); 281 if (end_time != 0) 282 optlen += pcapng_optlen(sizeof(end_time)); 283 if (comment) 284 optlen += pcapng_optlen(strlen(comment)); 285 if (optlen != 0) 286 optlen += pcapng_optlen(0); 287 288 len = sizeof(*hdr) + optlen + sizeof(uint32_t); 289 buf = alloca(len); 290 if (buf == NULL) 291 return -1; 292 293 hdr = (struct pcapng_statistics *)buf; 294 opt = (struct pcapng_option *)(hdr + 1); 295 296 if (comment) 297 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, 298 comment, strlen(comment)); 299 if (start_time != 0) 300 opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME, 301 &start_time, sizeof(start_time)); 302 if (end_time != 0) 303 opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME, 304 &end_time, sizeof(end_time)); 305 if (ifrecv != UINT64_MAX) 306 opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV, 307 &ifrecv, sizeof(ifrecv)); 308 if (ifdrop != UINT64_MAX) 309 opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP, 310 &ifdrop, sizeof(ifdrop)); 311 if (optlen != 0) 312 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 313 314 hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK; 315 hdr->block_length = len; 316 hdr->interface_id = self->port_index[port_id]; 317 318 ns = pcapng_tsc_to_ns(rte_get_tsc_cycles()); 319 hdr->timestamp_hi = ns >> 32; 320 hdr->timestamp_lo = (uint32_t)ns; 321 322 /* clone block_length after option */ 323 memcpy(opt, &len, sizeof(uint32_t)); 324 325 return write(self->outfd, buf, len); 326 } 327 328 uint32_t 329 rte_pcapng_mbuf_size(uint32_t length) 330 { 331 /* The VLAN and EPB header must fit in the mbuf headroom. */ 332 RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) + 333 sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM); 334 335 /* The flags and queue information are added at the end. */ 336 return sizeof(struct rte_mbuf) 337 + RTE_ALIGN(length, sizeof(uint32_t)) 338 + pcapng_optlen(sizeof(uint32_t)) /* flag option */ 339 + pcapng_optlen(sizeof(uint32_t)) /* queue option */ 340 + sizeof(uint32_t); /* length */ 341 } 342 343 /* More generalized version rte_vlan_insert() */ 344 static int 345 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci) 346 { 347 struct rte_ether_hdr *nh, *oh; 348 struct rte_vlan_hdr *vh; 349 350 if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1) 351 return -EINVAL; 352 353 if (rte_pktmbuf_data_len(m) < sizeof(*oh)) 354 return -EINVAL; 355 356 oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 357 nh = (struct rte_ether_hdr *) 358 rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr)); 359 if (nh == NULL) 360 return -ENOSPC; 361 362 memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN); 363 nh->ether_type = rte_cpu_to_be_16(ether_type); 364 365 vh = (struct rte_vlan_hdr *) (nh + 1); 366 vh->vlan_tci = rte_cpu_to_be_16(tci); 367 368 return 0; 369 } 370 371 /* 372 * The mbufs created use the Pcapng standard enhanced packet block. 373 * 374 * 1 2 3 375 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 376 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 377 * 0 | Block Type = 0x00000006 | 378 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 379 * 4 | Block Total Length | 380 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 381 * 8 | Interface ID | 382 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 383 * 12 | Timestamp (High) | 384 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 385 * 16 | Timestamp (Low) | 386 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 387 * 20 | Captured Packet Length | 388 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 389 * 24 | Original Packet Length | 390 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 391 * 28 / / 392 * / Packet Data / 393 * / variable length, padded to 32 bits / 394 * / / 395 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 396 * | Option Code = 0x0002 | Option Length = 0x004 | 397 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 398 * | Flags (direction) | 399 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 400 * | Option Code = 0x0006 | Option Length = 0x002 | 401 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 402 * | Queue id | 403 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 404 * | Block Total Length | 405 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 406 */ 407 408 /* Make a copy of original mbuf with pcapng header and options */ 409 struct rte_mbuf * 410 rte_pcapng_copy(uint16_t port_id, uint32_t queue, 411 const struct rte_mbuf *md, 412 struct rte_mempool *mp, 413 uint32_t length, uint64_t cycles, 414 enum rte_pcapng_direction direction) 415 { 416 struct pcapng_enhance_packet_block *epb; 417 uint32_t orig_len, data_len, padding, flags; 418 struct pcapng_option *opt; 419 const uint16_t optlen = pcapng_optlen(sizeof(flags)) + pcapng_optlen(sizeof(queue)); 420 struct rte_mbuf *mc; 421 uint64_t ns; 422 423 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 424 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); 425 #endif 426 ns = pcapng_tsc_to_ns(cycles); 427 428 orig_len = rte_pktmbuf_pkt_len(md); 429 430 /* Take snapshot of the data */ 431 mc = rte_pktmbuf_copy(md, mp, 0, length); 432 if (unlikely(mc == NULL)) 433 return NULL; 434 435 /* Expand any offloaded VLAN information */ 436 if ((direction == RTE_PCAPNG_DIRECTION_IN && 437 (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) || 438 (direction == RTE_PCAPNG_DIRECTION_OUT && 439 (md->ol_flags & RTE_MBUF_F_TX_VLAN))) { 440 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN, 441 md->vlan_tci) != 0) 442 goto fail; 443 } 444 445 if ((direction == RTE_PCAPNG_DIRECTION_IN && 446 (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) || 447 (direction == RTE_PCAPNG_DIRECTION_OUT && 448 (md->ol_flags & RTE_MBUF_F_TX_QINQ))) { 449 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ, 450 md->vlan_tci_outer) != 0) 451 goto fail; 452 } 453 454 /* pad the packet to 32 bit boundary */ 455 data_len = rte_pktmbuf_data_len(mc); 456 padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len; 457 if (padding > 0) { 458 void *tail = rte_pktmbuf_append(mc, padding); 459 460 if (tail == NULL) 461 goto fail; 462 memset(tail, 0, padding); 463 } 464 465 /* reserve trailing options and block length */ 466 opt = (struct pcapng_option *) 467 rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); 468 if (unlikely(opt == NULL)) 469 goto fail; 470 471 switch (direction) { 472 case RTE_PCAPNG_DIRECTION_IN: 473 flags = PCAPNG_IFB_INBOUND; 474 break; 475 case RTE_PCAPNG_DIRECTION_OUT: 476 flags = PCAPNG_IFB_OUTBOUND; 477 break; 478 default: 479 flags = 0; 480 } 481 482 opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS, 483 &flags, sizeof(flags)); 484 485 opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE, 486 &queue, sizeof(queue)); 487 488 /* Note: END_OPT necessary here. Wireshark doesn't do it. */ 489 490 /* Add PCAPNG packet header */ 491 epb = (struct pcapng_enhance_packet_block *) 492 rte_pktmbuf_prepend(mc, sizeof(*epb)); 493 if (unlikely(epb == NULL)) 494 goto fail; 495 496 epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK; 497 epb->block_length = rte_pktmbuf_data_len(mc); 498 499 /* Interface index is filled in later during write */ 500 mc->port = port_id; 501 502 epb->timestamp_hi = ns >> 32; 503 epb->timestamp_lo = (uint32_t)ns; 504 epb->capture_length = data_len; 505 epb->original_length = orig_len; 506 507 /* set trailer of block length */ 508 *(uint32_t *)opt = epb->block_length; 509 510 return mc; 511 512 fail: 513 rte_pktmbuf_free(mc); 514 return NULL; 515 } 516 517 /* Count how many segments are in this array of mbufs */ 518 static unsigned int 519 mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n) 520 { 521 unsigned int i, iovcnt; 522 523 for (iovcnt = 0, i = 0; i < n; i++) { 524 const struct rte_mbuf *m = pkts[i]; 525 526 __rte_mbuf_sanity_check(m, 1); 527 528 iovcnt += m->nb_segs; 529 } 530 return iovcnt; 531 } 532 533 /* Write pre-formatted packets to file. */ 534 ssize_t 535 rte_pcapng_write_packets(rte_pcapng_t *self, 536 struct rte_mbuf *pkts[], uint16_t nb_pkts) 537 { 538 int iovcnt = mbuf_burst_segs(pkts, nb_pkts); 539 struct iovec iov[iovcnt]; 540 unsigned int i, cnt; 541 ssize_t ret; 542 543 for (i = cnt = 0; i < nb_pkts; i++) { 544 struct rte_mbuf *m = pkts[i]; 545 struct pcapng_enhance_packet_block *epb; 546 547 /* sanity check that is really a pcapng mbuf */ 548 epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *); 549 if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK || 550 epb->block_length != rte_pktmbuf_data_len(m))) { 551 rte_errno = EINVAL; 552 return -1; 553 } 554 555 /* 556 * The DPDK port is recorded during pcapng_copy. 557 * Map that to PCAPNG interface in file. 558 */ 559 epb->interface_id = self->port_index[m->port]; 560 do { 561 iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *); 562 iov[cnt].iov_len = rte_pktmbuf_data_len(m); 563 ++cnt; 564 } while ((m = m->next)); 565 } 566 567 ret = writev(self->outfd, iov, iovcnt); 568 if (unlikely(ret < 0)) 569 rte_errno = errno; 570 return ret; 571 } 572 573 /* Create new pcapng writer handle */ 574 rte_pcapng_t * 575 rte_pcapng_fdopen(int fd, 576 const char *osname, const char *hardware, 577 const char *appname, const char *comment) 578 { 579 rte_pcapng_t *self; 580 581 self = malloc(sizeof(*self)); 582 if (!self) { 583 rte_errno = ENOMEM; 584 return NULL; 585 } 586 587 self->outfd = fd; 588 589 if (pcapng_section_block(self, osname, hardware, appname, comment) < 0) 590 goto fail; 591 592 if (pcapng_interfaces(self) < 0) 593 goto fail; 594 595 return self; 596 fail: 597 free(self); 598 return NULL; 599 } 600 601 void 602 rte_pcapng_close(rte_pcapng_t *self) 603 { 604 close(self->outfd); 605 free(self); 606 } 607