1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Microsoft Corporation 3 */ 4 5 #include <errno.h> 6 #include <stdbool.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <time.h> 11 #include <unistd.h> 12 13 #ifndef RTE_EXEC_ENV_WINDOWS 14 #include <net/if.h> 15 #include <sys/uio.h> 16 #endif 17 18 #include <bus_driver.h> 19 #include <rte_common.h> 20 #include <rte_cycles.h> 21 #include <dev_driver.h> 22 #include <rte_errno.h> 23 #include <rte_ethdev.h> 24 #include <rte_ether.h> 25 #include <rte_mbuf.h> 26 #include <rte_os_shim.h> 27 #include <rte_pcapng.h> 28 #include <rte_reciprocal.h> 29 #include <rte_time.h> 30 31 #include "pcapng_proto.h" 32 33 /* conversion from DPDK speed to PCAPNG */ 34 #define PCAPNG_MBPS_SPEED 1000000ull 35 36 /* Format of the capture file handle */ 37 struct rte_pcapng { 38 int outfd; /* output file */ 39 40 unsigned int ports; /* number of interfaces added */ 41 42 /* DPDK port id to interface index in file */ 43 uint32_t port_index[RTE_MAX_ETHPORTS]; 44 }; 45 46 /* For converting TSC cycles to PCAPNG ns format */ 47 static struct pcapng_time { 48 uint64_t ns; 49 uint64_t cycles; 50 uint64_t tsc_hz; 51 struct rte_reciprocal_u64 tsc_hz_inverse; 52 } pcapng_time; 53 54 55 #ifdef RTE_EXEC_ENV_WINDOWS 56 /* 57 * Windows does not have writev() call. 58 * Emulate this by copying to a new buffer. 59 * The copy is necessary since pcapng needs to be thread-safe 60 * and do atomic write operations. 61 */ 62 63 #define IOV_MAX 128 64 struct iovec { 65 void *iov_base; 66 size_t iov_len; 67 }; 68 69 static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) 70 { 71 size_t bytes = 0; 72 uint8_t *ptr; 73 void *tmp_buf; 74 ssize_t ret; 75 int i; 76 77 for (i = 0; i < iovcnt; i++) 78 bytes += iov[i].iov_len; 79 80 if (unlikely(bytes == 0)) 81 return 0; 82 83 tmp_buf = malloc(bytes); 84 if (unlikely(tmp_buf == NULL)) { 85 errno = ENOMEM; 86 return -1; 87 } 88 89 ptr = tmp_buf; 90 for (i = 0; i < iovcnt; i++) { 91 rte_memcpy(ptr, iov[i].iov_base, iov[i].iov_len); 92 ptr += iov[i].iov_len; 93 } 94 95 ret = write(fd, tmp_buf, bytes); 96 free(tmp_buf); 97 return ret; 98 } 99 100 #define IF_NAMESIZE 16 101 /* compatibility wrapper because name is optional */ 102 #define if_indextoname(ifindex, ifname) NULL 103 #endif 104 105 static inline void 106 pcapng_init(void) 107 { 108 struct timespec ts; 109 110 pcapng_time.cycles = rte_get_tsc_cycles(); 111 clock_gettime(CLOCK_REALTIME, &ts); 112 pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2; 113 pcapng_time.ns = rte_timespec_to_ns(&ts); 114 115 pcapng_time.tsc_hz = rte_get_tsc_hz(); 116 pcapng_time.tsc_hz_inverse = rte_reciprocal_value_u64(pcapng_time.tsc_hz); 117 } 118 119 /* PCAPNG timestamps are in nanoseconds */ 120 static uint64_t pcapng_tsc_to_ns(uint64_t cycles) 121 { 122 uint64_t delta, secs; 123 124 if (!pcapng_time.tsc_hz) 125 pcapng_init(); 126 127 /* In essence the calculation is: 128 * delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / rte_get_tsc_hz() 129 * but this overflows within 4 to 8 seconds depending on TSC frequency. 130 * Instead, if delta >= pcapng_time.tsc_hz: 131 * Increase pcapng_time.ns and pcapng_time.cycles by the number of 132 * whole seconds in delta and reduce delta accordingly. 133 * delta will therefore always lie in the interval [0, pcapng_time.tsc_hz), 134 * which will not overflow when multiplied by NSEC_PER_SEC provided the 135 * TSC frequency < approx 18.4GHz. 136 * 137 * Currently all TSCs operate below 5GHz. 138 */ 139 delta = cycles - pcapng_time.cycles; 140 if (unlikely(delta >= pcapng_time.tsc_hz)) { 141 if (likely(delta < pcapng_time.tsc_hz * 2)) { 142 delta -= pcapng_time.tsc_hz; 143 pcapng_time.cycles += pcapng_time.tsc_hz; 144 pcapng_time.ns += NSEC_PER_SEC; 145 } else { 146 secs = rte_reciprocal_divide_u64(delta, &pcapng_time.tsc_hz_inverse); 147 delta -= secs * pcapng_time.tsc_hz; 148 pcapng_time.cycles += secs * pcapng_time.tsc_hz; 149 pcapng_time.ns += secs * NSEC_PER_SEC; 150 } 151 } 152 153 return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC, 154 &pcapng_time.tsc_hz_inverse); 155 } 156 157 /* length of option including padding */ 158 static uint16_t pcapng_optlen(uint16_t len) 159 { 160 return RTE_ALIGN(sizeof(struct pcapng_option) + len, 161 sizeof(uint32_t)); 162 } 163 164 /* build TLV option and return location of next */ 165 static struct pcapng_option * 166 pcapng_add_option(struct pcapng_option *popt, uint16_t code, 167 const void *data, uint16_t len) 168 { 169 popt->code = code; 170 popt->length = len; 171 memcpy(popt->data, data, len); 172 173 return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len)); 174 } 175 176 /* 177 * Write required initial section header describing the capture 178 */ 179 static int 180 pcapng_section_block(rte_pcapng_t *self, 181 const char *os, const char *hw, 182 const char *app, const char *comment) 183 { 184 struct pcapng_section_header *hdr; 185 struct pcapng_option *opt; 186 void *buf; 187 uint32_t len; 188 ssize_t cc; 189 190 len = sizeof(*hdr); 191 if (hw) 192 len += pcapng_optlen(strlen(hw)); 193 if (os) 194 len += pcapng_optlen(strlen(os)); 195 if (app) 196 len += pcapng_optlen(strlen(app)); 197 if (comment) 198 len += pcapng_optlen(strlen(comment)); 199 200 /* reserve space for OPT_END */ 201 len += pcapng_optlen(0); 202 len += sizeof(uint32_t); 203 204 buf = calloc(1, len); 205 if (!buf) 206 return -1; 207 208 hdr = (struct pcapng_section_header *)buf; 209 *hdr = (struct pcapng_section_header) { 210 .block_type = PCAPNG_SECTION_BLOCK, 211 .block_length = len, 212 .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC, 213 .major_version = PCAPNG_MAJOR_VERS, 214 .minor_version = PCAPNG_MINOR_VERS, 215 .section_length = UINT64_MAX, 216 }; 217 218 /* After the section header insert variable length options. */ 219 opt = (struct pcapng_option *)(hdr + 1); 220 if (comment) 221 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, 222 comment, strlen(comment)); 223 if (hw) 224 opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE, 225 hw, strlen(hw)); 226 if (os) 227 opt = pcapng_add_option(opt, PCAPNG_SHB_OS, 228 os, strlen(os)); 229 if (app) 230 opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL, 231 app, strlen(app)); 232 233 /* The standard requires last option to be OPT_END */ 234 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 235 236 /* clone block_length after option */ 237 memcpy(opt, &hdr->block_length, sizeof(uint32_t)); 238 239 cc = write(self->outfd, buf, len); 240 free(buf); 241 242 return cc; 243 } 244 245 /* Write an interface block for a DPDK port */ 246 int 247 rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, 248 const char *ifname, const char *ifdescr, 249 const char *filter) 250 { 251 struct pcapng_interface_block *hdr; 252 struct rte_eth_dev_info dev_info; 253 struct rte_ether_addr *ea, macaddr; 254 const struct rte_device *dev; 255 struct rte_eth_link link; 256 struct pcapng_option *opt; 257 const uint8_t tsresol = 9; /* nanosecond resolution */ 258 uint32_t len; 259 void *buf; 260 char ifname_buf[IF_NAMESIZE]; 261 char ifhw[256]; 262 uint64_t speed = 0; 263 264 if (rte_eth_dev_info_get(port, &dev_info) < 0) 265 return -1; 266 267 /* make something like an interface name */ 268 if (ifname == NULL) { 269 /* Use kernel name if available */ 270 ifname = if_indextoname(dev_info.if_index, ifname_buf); 271 if (ifname == NULL) { 272 snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); 273 ifname = ifname_buf; 274 } 275 } 276 277 /* make a useful device hardware string */ 278 dev = dev_info.device; 279 if (dev) 280 snprintf(ifhw, sizeof(ifhw), 281 "%s-%s", dev->bus->name, dev->name); 282 283 /* DPDK reports in units of Mbps */ 284 if (rte_eth_link_get(port, &link) == 0 && 285 link.link_status == RTE_ETH_LINK_UP) 286 speed = link.link_speed * PCAPNG_MBPS_SPEED; 287 288 if (rte_eth_macaddr_get(port, &macaddr) < 0) 289 ea = NULL; 290 else 291 ea = &macaddr; 292 293 /* Compute length of interface block options */ 294 len = sizeof(*hdr); 295 296 len += pcapng_optlen(sizeof(tsresol)); /* timestamp */ 297 len += pcapng_optlen(strlen(ifname)); /* ifname */ 298 299 if (ifdescr) 300 len += pcapng_optlen(strlen(ifdescr)); 301 if (ea) 302 len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */ 303 if (speed != 0) 304 len += pcapng_optlen(sizeof(uint64_t)); 305 if (filter) 306 len += pcapng_optlen(strlen(filter) + 1); 307 if (dev) 308 len += pcapng_optlen(strlen(ifhw)); 309 310 len += pcapng_optlen(0); 311 len += sizeof(uint32_t); 312 313 buf = alloca(len); 314 if (!buf) 315 return -1; 316 317 hdr = (struct pcapng_interface_block *)buf; 318 *hdr = (struct pcapng_interface_block) { 319 .block_type = PCAPNG_INTERFACE_BLOCK, 320 .link_type = 1, /* DLT_EN10MB - Ethernet */ 321 .block_length = len, 322 }; 323 324 opt = (struct pcapng_option *)(hdr + 1); 325 opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL, 326 &tsresol, sizeof(tsresol)); 327 opt = pcapng_add_option(opt, PCAPNG_IFB_NAME, 328 ifname, strlen(ifname)); 329 if (ifdescr) 330 opt = pcapng_add_option(opt, PCAPNG_IFB_DESCRIPTION, 331 ifdescr, strlen(ifdescr)); 332 if (ea) 333 opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR, 334 ea, RTE_ETHER_ADDR_LEN); 335 if (speed != 0) 336 opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED, 337 &speed, sizeof(uint64_t)); 338 if (dev) 339 opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE, 340 ifhw, strlen(ifhw)); 341 if (filter) { 342 /* Encoding is that the first octet indicates string vs BPF */ 343 size_t len; 344 char *buf; 345 346 len = strlen(filter) + 1; 347 buf = alloca(len); 348 *buf = '\0'; 349 memcpy(buf + 1, filter, len); 350 351 opt = pcapng_add_option(opt, PCAPNG_IFB_FILTER, 352 buf, len); 353 } 354 355 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 356 357 /* clone block_length after optionsa */ 358 memcpy(opt, &hdr->block_length, sizeof(uint32_t)); 359 360 /* remember the file index */ 361 self->port_index[port] = self->ports++; 362 363 return write(self->outfd, buf, len); 364 } 365 366 /* 367 * Write an Interface statistics block at the end of capture. 368 */ 369 ssize_t 370 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, 371 const char *comment, 372 uint64_t start_time, uint64_t end_time, 373 uint64_t ifrecv, uint64_t ifdrop) 374 { 375 struct pcapng_statistics *hdr; 376 struct pcapng_option *opt; 377 uint32_t optlen, len; 378 uint8_t *buf; 379 uint64_t ns; 380 381 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); 382 383 optlen = 0; 384 385 if (ifrecv != UINT64_MAX) 386 optlen += pcapng_optlen(sizeof(ifrecv)); 387 if (ifdrop != UINT64_MAX) 388 optlen += pcapng_optlen(sizeof(ifdrop)); 389 if (start_time != 0) 390 optlen += pcapng_optlen(sizeof(start_time)); 391 if (end_time != 0) 392 optlen += pcapng_optlen(sizeof(end_time)); 393 if (comment) 394 optlen += pcapng_optlen(strlen(comment)); 395 if (optlen != 0) 396 optlen += pcapng_optlen(0); 397 398 len = sizeof(*hdr) + optlen + sizeof(uint32_t); 399 buf = alloca(len); 400 if (buf == NULL) 401 return -1; 402 403 hdr = (struct pcapng_statistics *)buf; 404 opt = (struct pcapng_option *)(hdr + 1); 405 406 if (comment) 407 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, 408 comment, strlen(comment)); 409 if (start_time != 0) 410 opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME, 411 &start_time, sizeof(start_time)); 412 if (end_time != 0) 413 opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME, 414 &end_time, sizeof(end_time)); 415 if (ifrecv != UINT64_MAX) 416 opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV, 417 &ifrecv, sizeof(ifrecv)); 418 if (ifdrop != UINT64_MAX) 419 opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP, 420 &ifdrop, sizeof(ifdrop)); 421 if (optlen != 0) 422 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); 423 424 hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK; 425 hdr->block_length = len; 426 hdr->interface_id = self->port_index[port_id]; 427 428 ns = pcapng_tsc_to_ns(rte_get_tsc_cycles()); 429 hdr->timestamp_hi = ns >> 32; 430 hdr->timestamp_lo = (uint32_t)ns; 431 432 /* clone block_length after option */ 433 memcpy(opt, &len, sizeof(uint32_t)); 434 435 return write(self->outfd, buf, len); 436 } 437 438 uint32_t 439 rte_pcapng_mbuf_size(uint32_t length) 440 { 441 /* The VLAN and EPB header must fit in the mbuf headroom. */ 442 RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) + 443 sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM); 444 445 /* The flags and queue information are added at the end. */ 446 return sizeof(struct rte_mbuf) 447 + RTE_ALIGN(length, sizeof(uint32_t)) 448 + pcapng_optlen(sizeof(uint32_t)) /* flag option */ 449 + pcapng_optlen(sizeof(uint32_t)) /* queue option */ 450 + sizeof(uint32_t); /* length */ 451 } 452 453 /* More generalized version rte_vlan_insert() */ 454 static int 455 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci) 456 { 457 struct rte_ether_hdr *nh, *oh; 458 struct rte_vlan_hdr *vh; 459 460 if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1) 461 return -EINVAL; 462 463 if (rte_pktmbuf_data_len(m) < sizeof(*oh)) 464 return -EINVAL; 465 466 oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 467 nh = (struct rte_ether_hdr *) 468 rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr)); 469 if (nh == NULL) 470 return -ENOSPC; 471 472 memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN); 473 nh->ether_type = rte_cpu_to_be_16(ether_type); 474 475 vh = (struct rte_vlan_hdr *) (nh + 1); 476 vh->vlan_tci = rte_cpu_to_be_16(tci); 477 478 return 0; 479 } 480 481 /* 482 * The mbufs created use the Pcapng standard enhanced packet block. 483 * 484 * 1 2 3 485 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 486 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 487 * 0 | Block Type = 0x00000006 | 488 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 489 * 4 | Block Total Length | 490 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 491 * 8 | Interface ID | 492 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 493 * 12 | Timestamp (High) | 494 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 495 * 16 | Timestamp (Low) | 496 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 497 * 20 | Captured Packet Length | 498 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 499 * 24 | Original Packet Length | 500 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 501 * 28 / / 502 * / Packet Data / 503 * / variable length, padded to 32 bits / 504 * / / 505 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 506 * | Option Code = 0x0002 | Option Length = 0x004 | 507 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 508 * | Flags (direction) | 509 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 510 * | Option Code = 0x0006 | Option Length = 0x002 | 511 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 512 * | Queue id | 513 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 514 * | Block Total Length | 515 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 516 */ 517 518 /* Make a copy of original mbuf with pcapng header and options */ 519 struct rte_mbuf * 520 rte_pcapng_copy(uint16_t port_id, uint32_t queue, 521 const struct rte_mbuf *md, 522 struct rte_mempool *mp, 523 uint32_t length, uint64_t cycles, 524 enum rte_pcapng_direction direction, 525 const char *comment) 526 { 527 struct pcapng_enhance_packet_block *epb; 528 uint32_t orig_len, data_len, padding, flags; 529 struct pcapng_option *opt; 530 uint16_t optlen; 531 struct rte_mbuf *mc; 532 uint64_t ns; 533 bool rss_hash; 534 535 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 536 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); 537 #endif 538 ns = pcapng_tsc_to_ns(cycles); 539 540 orig_len = rte_pktmbuf_pkt_len(md); 541 542 /* Take snapshot of the data */ 543 mc = rte_pktmbuf_copy(md, mp, 0, length); 544 if (unlikely(mc == NULL)) 545 return NULL; 546 547 /* Expand any offloaded VLAN information */ 548 if ((direction == RTE_PCAPNG_DIRECTION_IN && 549 (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) || 550 (direction == RTE_PCAPNG_DIRECTION_OUT && 551 (md->ol_flags & RTE_MBUF_F_TX_VLAN))) { 552 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN, 553 md->vlan_tci) != 0) 554 goto fail; 555 } 556 557 if ((direction == RTE_PCAPNG_DIRECTION_IN && 558 (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) || 559 (direction == RTE_PCAPNG_DIRECTION_OUT && 560 (md->ol_flags & RTE_MBUF_F_TX_QINQ))) { 561 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ, 562 md->vlan_tci_outer) != 0) 563 goto fail; 564 } 565 566 /* record HASH on incoming packets */ 567 rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN && 568 (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH)); 569 570 /* pad the packet to 32 bit boundary */ 571 data_len = rte_pktmbuf_data_len(mc); 572 padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len; 573 if (padding > 0) { 574 void *tail = rte_pktmbuf_append(mc, padding); 575 576 if (tail == NULL) 577 goto fail; 578 memset(tail, 0, padding); 579 } 580 581 optlen = pcapng_optlen(sizeof(flags)); 582 optlen += pcapng_optlen(sizeof(queue)); 583 if (rss_hash) 584 optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t)); 585 586 if (comment) 587 optlen += pcapng_optlen(strlen(comment)); 588 589 /* reserve trailing options and block length */ 590 opt = (struct pcapng_option *) 591 rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); 592 if (unlikely(opt == NULL)) 593 goto fail; 594 595 switch (direction) { 596 case RTE_PCAPNG_DIRECTION_IN: 597 flags = PCAPNG_IFB_INBOUND; 598 break; 599 case RTE_PCAPNG_DIRECTION_OUT: 600 flags = PCAPNG_IFB_OUTBOUND; 601 break; 602 default: 603 flags = 0; 604 } 605 606 opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS, 607 &flags, sizeof(flags)); 608 609 opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE, 610 &queue, sizeof(queue)); 611 612 if (rss_hash) { 613 uint8_t hash_opt[5]; 614 615 /* The algorithm could be something else if 616 * using rte_flow_action_rss; but the current API does not 617 * have a way for ethdev to report this on a per-packet basis. 618 */ 619 hash_opt[0] = PCAPNG_HASH_TOEPLITZ; 620 621 memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t)); 622 opt = pcapng_add_option(opt, PCAPNG_EPB_HASH, 623 &hash_opt, sizeof(hash_opt)); 624 } 625 626 if (comment) 627 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment, 628 strlen(comment)); 629 630 /* Note: END_OPT necessary here. Wireshark doesn't do it. */ 631 632 /* Add PCAPNG packet header */ 633 epb = (struct pcapng_enhance_packet_block *) 634 rte_pktmbuf_prepend(mc, sizeof(*epb)); 635 if (unlikely(epb == NULL)) 636 goto fail; 637 638 epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK; 639 epb->block_length = rte_pktmbuf_data_len(mc); 640 641 /* Interface index is filled in later during write */ 642 mc->port = port_id; 643 644 epb->timestamp_hi = ns >> 32; 645 epb->timestamp_lo = (uint32_t)ns; 646 epb->capture_length = data_len; 647 epb->original_length = orig_len; 648 649 /* set trailer of block length */ 650 *(uint32_t *)opt = epb->block_length; 651 652 return mc; 653 654 fail: 655 rte_pktmbuf_free(mc); 656 return NULL; 657 } 658 659 /* Write pre-formatted packets to file. */ 660 ssize_t 661 rte_pcapng_write_packets(rte_pcapng_t *self, 662 struct rte_mbuf *pkts[], uint16_t nb_pkts) 663 { 664 struct iovec iov[IOV_MAX]; 665 unsigned int i, cnt = 0; 666 ssize_t ret, total = 0; 667 668 for (i = 0; i < nb_pkts; i++) { 669 struct rte_mbuf *m = pkts[i]; 670 struct pcapng_enhance_packet_block *epb; 671 672 /* sanity check that is really a pcapng mbuf */ 673 epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *); 674 if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK || 675 epb->block_length != rte_pktmbuf_data_len(m))) { 676 rte_errno = EINVAL; 677 return -1; 678 } 679 680 /* check that this interface was added. */ 681 epb->interface_id = self->port_index[m->port]; 682 if (unlikely(epb->interface_id > RTE_MAX_ETHPORTS)) { 683 rte_errno = EINVAL; 684 return -1; 685 } 686 687 /* 688 * Handle case of highly fragmented and large burst size 689 * Note: this assumes that max segments per mbuf < IOV_MAX 690 */ 691 if (unlikely(cnt + m->nb_segs >= IOV_MAX)) { 692 ret = writev(self->outfd, iov, cnt); 693 if (unlikely(ret < 0)) { 694 rte_errno = errno; 695 return -1; 696 } 697 total += ret; 698 cnt = 0; 699 } 700 701 /* 702 * The DPDK port is recorded during pcapng_copy. 703 * Map that to PCAPNG interface in file. 704 */ 705 do { 706 iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *); 707 iov[cnt].iov_len = rte_pktmbuf_data_len(m); 708 ++cnt; 709 } while ((m = m->next)); 710 } 711 712 ret = writev(self->outfd, iov, cnt); 713 if (unlikely(ret < 0)) { 714 rte_errno = errno; 715 return -1; 716 } 717 return total + ret; 718 } 719 720 /* Create new pcapng writer handle */ 721 rte_pcapng_t * 722 rte_pcapng_fdopen(int fd, 723 const char *osname, const char *hardware, 724 const char *appname, const char *comment) 725 { 726 unsigned int i; 727 rte_pcapng_t *self; 728 729 self = malloc(sizeof(*self)); 730 if (!self) { 731 rte_errno = ENOMEM; 732 return NULL; 733 } 734 735 self->outfd = fd; 736 self->ports = 0; 737 for (i = 0; i < RTE_MAX_ETHPORTS; i++) 738 self->port_index[i] = UINT32_MAX; 739 740 if (pcapng_section_block(self, osname, hardware, appname, comment) < 0) 741 goto fail; 742 743 return self; 744 fail: 745 free(self); 746 return NULL; 747 } 748 749 void 750 rte_pcapng_close(rte_pcapng_t *self) 751 { 752 close(self->outfd); 753 free(self); 754 } 755