1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2017 Intel Corporation 3 */ 4 5 #include <rte_atomic.h> 6 #include <rte_branch_prediction.h> 7 #include <rte_byteorder.h> 8 #include <rte_common.h> 9 #include <rte_mbuf.h> 10 #include <ethdev_driver.h> 11 #include <ethdev_vdev.h> 12 #include <rte_malloc.h> 13 #include <rte_bus_vdev.h> 14 #include <rte_kvargs.h> 15 #include <rte_net.h> 16 #include <rte_debug.h> 17 #include <rte_ip.h> 18 #include <rte_string_fns.h> 19 #include <rte_ethdev.h> 20 #include <rte_errno.h> 21 #include <rte_cycles.h> 22 23 #include <sys/types.h> 24 #include <sys/stat.h> 25 #include <sys/socket.h> 26 #include <sys/ioctl.h> 27 #include <sys/utsname.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 #include <signal.h> 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <sys/uio.h> 34 #include <unistd.h> 35 #include <arpa/inet.h> 36 #include <net/if.h> 37 #include <linux/if_tun.h> 38 #include <linux/if_ether.h> 39 #include <fcntl.h> 40 #include <ctype.h> 41 42 #include <tap_rss.h> 43 #include <rte_eth_tap.h> 44 #include <tap_flow.h> 45 #include <tap_netlink.h> 46 #include <tap_tcmsgs.h> 47 48 /* Linux based path to the TUN device */ 49 #define TUN_TAP_DEV_PATH "/dev/net/tun" 50 #define DEFAULT_TAP_NAME "dtap" 51 #define DEFAULT_TUN_NAME "dtun" 52 53 #define ETH_TAP_IFACE_ARG "iface" 54 #define ETH_TAP_REMOTE_ARG "remote" 55 #define ETH_TAP_MAC_ARG "mac" 56 #define ETH_TAP_MAC_FIXED "fixed" 57 58 #define ETH_TAP_USR_MAC_FMT "xx:xx:xx:xx:xx:xx" 59 #define ETH_TAP_CMP_MAC_FMT "0123456789ABCDEFabcdef" 60 #define ETH_TAP_MAC_ARG_FMT ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT 61 62 #define TAP_GSO_MBUFS_PER_CORE 128 63 #define TAP_GSO_MBUF_SEG_SIZE 128 64 #define TAP_GSO_MBUF_CACHE_SIZE 4 65 #define TAP_GSO_MBUFS_NUM \ 66 (TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE) 67 68 /* IPC key for queue fds sync */ 69 #define TAP_MP_KEY "tap_mp_sync_queues" 70 71 #define TAP_IOV_DEFAULT_MAX 1024 72 73 static int tap_devices_count; 74 75 static const char *tuntap_types[ETH_TUNTAP_TYPE_MAX] = { 76 "UNKNOWN", "TUN", "TAP" 77 }; 78 79 static const char *valid_arguments[] = { 80 ETH_TAP_IFACE_ARG, 81 ETH_TAP_REMOTE_ARG, 82 ETH_TAP_MAC_ARG, 83 NULL 84 }; 85 86 static volatile uint32_t tap_trigger; /* Rx trigger */ 87 88 static struct rte_eth_link pmd_link = { 89 .link_speed = ETH_SPEED_NUM_10G, 90 .link_duplex = ETH_LINK_FULL_DUPLEX, 91 .link_status = ETH_LINK_DOWN, 92 .link_autoneg = ETH_LINK_FIXED, 93 }; 94 95 static void 96 tap_trigger_cb(int sig __rte_unused) 97 { 98 /* Valid trigger values are nonzero */ 99 tap_trigger = (tap_trigger + 1) | 0x80000000; 100 } 101 102 /* Specifies on what netdevices the ioctl should be applied */ 103 enum ioctl_mode { 104 LOCAL_AND_REMOTE, 105 LOCAL_ONLY, 106 REMOTE_ONLY, 107 }; 108 109 /* Message header to synchronize queues via IPC */ 110 struct ipc_queues { 111 char port_name[RTE_DEV_NAME_MAX_LEN]; 112 int rxq_count; 113 int txq_count; 114 /* 115 * The file descriptors are in the dedicated part 116 * of the Unix message to be translated by the kernel. 117 */ 118 }; 119 120 static int tap_intr_handle_set(struct rte_eth_dev *dev, int set); 121 122 /** 123 * Tun/Tap allocation routine 124 * 125 * @param[in] pmd 126 * Pointer to private structure. 127 * 128 * @param[in] is_keepalive 129 * Keepalive flag 130 * 131 * @return 132 * -1 on failure, fd on success 133 */ 134 static int 135 tun_alloc(struct pmd_internals *pmd, int is_keepalive) 136 { 137 struct ifreq ifr; 138 #ifdef IFF_MULTI_QUEUE 139 unsigned int features; 140 #endif 141 int fd, signo, flags; 142 143 memset(&ifr, 0, sizeof(struct ifreq)); 144 145 /* 146 * Do not set IFF_NO_PI as packet information header will be needed 147 * to check if a received packet has been truncated. 148 */ 149 ifr.ifr_flags = (pmd->type == ETH_TUNTAP_TYPE_TAP) ? 150 IFF_TAP : IFF_TUN | IFF_POINTOPOINT; 151 strlcpy(ifr.ifr_name, pmd->name, IFNAMSIZ); 152 153 fd = open(TUN_TAP_DEV_PATH, O_RDWR); 154 if (fd < 0) { 155 TAP_LOG(ERR, "Unable to open %s interface", TUN_TAP_DEV_PATH); 156 goto error; 157 } 158 159 #ifdef IFF_MULTI_QUEUE 160 /* Grab the TUN features to verify we can work multi-queue */ 161 if (ioctl(fd, TUNGETFEATURES, &features) < 0) { 162 TAP_LOG(ERR, "unable to get TUN/TAP features"); 163 goto error; 164 } 165 TAP_LOG(DEBUG, "%s Features %08x", TUN_TAP_DEV_PATH, features); 166 167 if (features & IFF_MULTI_QUEUE) { 168 TAP_LOG(DEBUG, " Multi-queue support for %d queues", 169 RTE_PMD_TAP_MAX_QUEUES); 170 ifr.ifr_flags |= IFF_MULTI_QUEUE; 171 } else 172 #endif 173 { 174 ifr.ifr_flags |= IFF_ONE_QUEUE; 175 TAP_LOG(DEBUG, " Single queue only support"); 176 } 177 178 /* Set the TUN/TAP configuration and set the name if needed */ 179 if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { 180 TAP_LOG(WARNING, "Unable to set TUNSETIFF for %s: %s", 181 ifr.ifr_name, strerror(errno)); 182 goto error; 183 } 184 185 /* 186 * Name passed to kernel might be wildcard like dtun%d 187 * and need to find the resulting device. 188 */ 189 TAP_LOG(DEBUG, "Device name is '%s'", ifr.ifr_name); 190 strlcpy(pmd->name, ifr.ifr_name, RTE_ETH_NAME_MAX_LEN); 191 192 if (is_keepalive) { 193 /* 194 * Detach the TUN/TAP keep-alive queue 195 * to avoid traffic through it 196 */ 197 ifr.ifr_flags = IFF_DETACH_QUEUE; 198 if (ioctl(fd, TUNSETQUEUE, (void *)&ifr) < 0) { 199 TAP_LOG(WARNING, 200 "Unable to detach keep-alive queue for %s: %s", 201 ifr.ifr_name, strerror(errno)); 202 goto error; 203 } 204 } 205 206 flags = fcntl(fd, F_GETFL); 207 if (flags == -1) { 208 TAP_LOG(WARNING, 209 "Unable to get %s current flags\n", 210 ifr.ifr_name); 211 goto error; 212 } 213 214 /* Always set the file descriptor to non-blocking */ 215 flags |= O_NONBLOCK; 216 if (fcntl(fd, F_SETFL, flags) < 0) { 217 TAP_LOG(WARNING, 218 "Unable to set %s to nonblocking: %s", 219 ifr.ifr_name, strerror(errno)); 220 goto error; 221 } 222 223 /* Find a free realtime signal */ 224 for (signo = SIGRTMIN + 1; signo < SIGRTMAX; signo++) { 225 struct sigaction sa; 226 227 if (sigaction(signo, NULL, &sa) == -1) { 228 TAP_LOG(WARNING, 229 "Unable to get current rt-signal %d handler", 230 signo); 231 goto error; 232 } 233 234 /* Already have the handler we want on this signal */ 235 if (sa.sa_handler == tap_trigger_cb) 236 break; 237 238 /* Is handler in use by application */ 239 if (sa.sa_handler != SIG_DFL) { 240 TAP_LOG(DEBUG, 241 "Skipping used rt-signal %d", signo); 242 continue; 243 } 244 245 sa = (struct sigaction) { 246 .sa_flags = SA_RESTART, 247 .sa_handler = tap_trigger_cb, 248 }; 249 250 if (sigaction(signo, &sa, NULL) == -1) { 251 TAP_LOG(WARNING, 252 "Unable to set rt-signal %d handler\n", signo); 253 goto error; 254 } 255 256 /* Found a good signal to use */ 257 TAP_LOG(DEBUG, 258 "Using rt-signal %d", signo); 259 break; 260 } 261 262 if (signo == SIGRTMAX) { 263 TAP_LOG(WARNING, "All rt-signals are in use\n"); 264 265 /* Disable trigger globally in case of error */ 266 tap_trigger = 0; 267 TAP_LOG(NOTICE, "No Rx trigger signal available\n"); 268 } else { 269 /* Enable signal on file descriptor */ 270 if (fcntl(fd, F_SETSIG, signo) < 0) { 271 TAP_LOG(WARNING, "Unable to set signo %d for fd %d: %s", 272 signo, fd, strerror(errno)); 273 goto error; 274 } 275 if (fcntl(fd, F_SETFL, flags | O_ASYNC) < 0) { 276 TAP_LOG(WARNING, "Unable to set fcntl flags: %s", 277 strerror(errno)); 278 goto error; 279 } 280 281 if (fcntl(fd, F_SETOWN, getpid()) < 0) { 282 TAP_LOG(WARNING, "Unable to set fcntl owner: %s", 283 strerror(errno)); 284 goto error; 285 } 286 } 287 return fd; 288 289 error: 290 if (fd >= 0) 291 close(fd); 292 return -1; 293 } 294 295 static void 296 tap_verify_csum(struct rte_mbuf *mbuf) 297 { 298 uint32_t l2 = mbuf->packet_type & RTE_PTYPE_L2_MASK; 299 uint32_t l3 = mbuf->packet_type & RTE_PTYPE_L3_MASK; 300 uint32_t l4 = mbuf->packet_type & RTE_PTYPE_L4_MASK; 301 unsigned int l2_len = sizeof(struct rte_ether_hdr); 302 unsigned int l3_len; 303 uint16_t cksum = 0; 304 void *l3_hdr; 305 void *l4_hdr; 306 struct rte_udp_hdr *udp_hdr; 307 308 if (l2 == RTE_PTYPE_L2_ETHER_VLAN) 309 l2_len += 4; 310 else if (l2 == RTE_PTYPE_L2_ETHER_QINQ) 311 l2_len += 8; 312 /* Don't verify checksum for packets with discontinuous L2 header */ 313 if (unlikely(l2_len + sizeof(struct rte_ipv4_hdr) > 314 rte_pktmbuf_data_len(mbuf))) 315 return; 316 l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len); 317 if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) { 318 struct rte_ipv4_hdr *iph = l3_hdr; 319 320 l3_len = rte_ipv4_hdr_len(iph); 321 if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf))) 322 return; 323 /* check that the total length reported by header is not 324 * greater than the total received size 325 */ 326 if (l2_len + rte_be_to_cpu_16(iph->total_length) > 327 rte_pktmbuf_data_len(mbuf)) 328 return; 329 330 cksum = ~rte_raw_cksum(iph, l3_len); 331 mbuf->ol_flags |= cksum ? 332 PKT_RX_IP_CKSUM_BAD : 333 PKT_RX_IP_CKSUM_GOOD; 334 } else if (l3 == RTE_PTYPE_L3_IPV6) { 335 struct rte_ipv6_hdr *iph = l3_hdr; 336 337 l3_len = sizeof(struct rte_ipv6_hdr); 338 /* check that the total length reported by header is not 339 * greater than the total received size 340 */ 341 if (l2_len + l3_len + rte_be_to_cpu_16(iph->payload_len) > 342 rte_pktmbuf_data_len(mbuf)) 343 return; 344 } else { 345 /* IPv6 extensions are not supported */ 346 return; 347 } 348 if (l4 == RTE_PTYPE_L4_UDP || l4 == RTE_PTYPE_L4_TCP) { 349 l4_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len + l3_len); 350 /* Don't verify checksum for multi-segment packets. */ 351 if (mbuf->nb_segs > 1) 352 return; 353 if (l3 == RTE_PTYPE_L3_IPV4) { 354 if (l4 == RTE_PTYPE_L4_UDP) { 355 udp_hdr = (struct rte_udp_hdr *)l4_hdr; 356 if (udp_hdr->dgram_cksum == 0) { 357 /* 358 * For IPv4, a zero UDP checksum 359 * indicates that the sender did not 360 * generate one [RFC 768]. 361 */ 362 mbuf->ol_flags |= PKT_RX_L4_CKSUM_NONE; 363 return; 364 } 365 } 366 cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); 367 } else if (l3 == RTE_PTYPE_L3_IPV6) { 368 cksum = ~rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); 369 } 370 mbuf->ol_flags |= cksum ? 371 PKT_RX_L4_CKSUM_BAD : 372 PKT_RX_L4_CKSUM_GOOD; 373 } 374 } 375 376 static uint64_t 377 tap_rx_offload_get_port_capa(void) 378 { 379 /* 380 * No specific port Rx offload capabilities. 381 */ 382 return 0; 383 } 384 385 static uint64_t 386 tap_rx_offload_get_queue_capa(void) 387 { 388 return DEV_RX_OFFLOAD_SCATTER | 389 DEV_RX_OFFLOAD_IPV4_CKSUM | 390 DEV_RX_OFFLOAD_UDP_CKSUM | 391 DEV_RX_OFFLOAD_TCP_CKSUM; 392 } 393 394 static void 395 tap_rxq_pool_free(struct rte_mbuf *pool) 396 { 397 struct rte_mbuf *mbuf = pool; 398 uint16_t nb_segs = 1; 399 400 if (mbuf == NULL) 401 return; 402 403 while (mbuf->next) { 404 mbuf = mbuf->next; 405 nb_segs++; 406 } 407 pool->nb_segs = nb_segs; 408 rte_pktmbuf_free(pool); 409 } 410 411 /* Callback to handle the rx burst of packets to the correct interface and 412 * file descriptor(s) in a multi-queue setup. 413 */ 414 static uint16_t 415 pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 416 { 417 struct rx_queue *rxq = queue; 418 struct pmd_process_private *process_private; 419 uint16_t num_rx; 420 unsigned long num_rx_bytes = 0; 421 uint32_t trigger = tap_trigger; 422 423 if (trigger == rxq->trigger_seen) 424 return 0; 425 426 process_private = rte_eth_devices[rxq->in_port].process_private; 427 for (num_rx = 0; num_rx < nb_pkts; ) { 428 struct rte_mbuf *mbuf = rxq->pool; 429 struct rte_mbuf *seg = NULL; 430 struct rte_mbuf *new_tail = NULL; 431 uint16_t data_off = rte_pktmbuf_headroom(mbuf); 432 int len; 433 434 len = readv(process_private->rxq_fds[rxq->queue_id], 435 *rxq->iovecs, 436 1 + (rxq->rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ? 437 rxq->nb_rx_desc : 1)); 438 if (len < (int)sizeof(struct tun_pi)) 439 break; 440 441 /* Packet couldn't fit in the provided mbuf */ 442 if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) { 443 rxq->stats.ierrors++; 444 continue; 445 } 446 447 len -= sizeof(struct tun_pi); 448 449 mbuf->pkt_len = len; 450 mbuf->port = rxq->in_port; 451 while (1) { 452 struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp); 453 454 if (unlikely(!buf)) { 455 rxq->stats.rx_nombuf++; 456 /* No new buf has been allocated: do nothing */ 457 if (!new_tail || !seg) 458 goto end; 459 460 seg->next = NULL; 461 tap_rxq_pool_free(mbuf); 462 463 goto end; 464 } 465 seg = seg ? seg->next : mbuf; 466 if (rxq->pool == mbuf) 467 rxq->pool = buf; 468 if (new_tail) 469 new_tail->next = buf; 470 new_tail = buf; 471 new_tail->next = seg->next; 472 473 /* iovecs[0] is reserved for packet info (pi) */ 474 (*rxq->iovecs)[mbuf->nb_segs].iov_len = 475 buf->buf_len - data_off; 476 (*rxq->iovecs)[mbuf->nb_segs].iov_base = 477 (char *)buf->buf_addr + data_off; 478 479 seg->data_len = RTE_MIN(seg->buf_len - data_off, len); 480 seg->data_off = data_off; 481 482 len -= seg->data_len; 483 if (len <= 0) 484 break; 485 mbuf->nb_segs++; 486 /* First segment has headroom, not the others */ 487 data_off = 0; 488 } 489 seg->next = NULL; 490 mbuf->packet_type = rte_net_get_ptype(mbuf, NULL, 491 RTE_PTYPE_ALL_MASK); 492 if (rxq->rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM) 493 tap_verify_csum(mbuf); 494 495 /* account for the receive frame */ 496 bufs[num_rx++] = mbuf; 497 num_rx_bytes += mbuf->pkt_len; 498 } 499 end: 500 rxq->stats.ipackets += num_rx; 501 rxq->stats.ibytes += num_rx_bytes; 502 503 if (trigger && num_rx < nb_pkts) 504 rxq->trigger_seen = trigger; 505 506 return num_rx; 507 } 508 509 static uint64_t 510 tap_tx_offload_get_port_capa(void) 511 { 512 /* 513 * No specific port Tx offload capabilities. 514 */ 515 return 0; 516 } 517 518 static uint64_t 519 tap_tx_offload_get_queue_capa(void) 520 { 521 return DEV_TX_OFFLOAD_MULTI_SEGS | 522 DEV_TX_OFFLOAD_IPV4_CKSUM | 523 DEV_TX_OFFLOAD_UDP_CKSUM | 524 DEV_TX_OFFLOAD_TCP_CKSUM | 525 DEV_TX_OFFLOAD_TCP_TSO; 526 } 527 528 /* Finalize l4 checksum calculation */ 529 static void 530 tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum, 531 uint32_t l4_raw_cksum) 532 { 533 if (l4_cksum) { 534 uint32_t cksum; 535 536 cksum = __rte_raw_cksum_reduce(l4_raw_cksum); 537 cksum += l4_phdr_cksum; 538 539 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); 540 cksum = (~cksum) & 0xffff; 541 if (cksum == 0) 542 cksum = 0xffff; 543 *l4_cksum = cksum; 544 } 545 } 546 547 /* Accumaulate L4 raw checksums */ 548 static void 549 tap_tx_l4_add_rcksum(char *l4_data, unsigned int l4_len, uint16_t *l4_cksum, 550 uint32_t *l4_raw_cksum) 551 { 552 if (l4_cksum == NULL) 553 return; 554 555 *l4_raw_cksum = __rte_raw_cksum(l4_data, l4_len, *l4_raw_cksum); 556 } 557 558 /* L3 and L4 pseudo headers checksum offloads */ 559 static void 560 tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, 561 unsigned int l3_len, unsigned int l4_len, uint16_t **l4_cksum, 562 uint16_t *l4_phdr_cksum, uint32_t *l4_raw_cksum) 563 { 564 void *l3_hdr = packet + l2_len; 565 566 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) { 567 struct rte_ipv4_hdr *iph = l3_hdr; 568 uint16_t cksum; 569 570 iph->hdr_checksum = 0; 571 cksum = rte_raw_cksum(iph, l3_len); 572 iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum; 573 } 574 if (ol_flags & PKT_TX_L4_MASK) { 575 void *l4_hdr; 576 577 l4_hdr = packet + l2_len + l3_len; 578 if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) 579 *l4_cksum = &((struct rte_udp_hdr *)l4_hdr)->dgram_cksum; 580 else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) 581 *l4_cksum = &((struct rte_tcp_hdr *)l4_hdr)->cksum; 582 else 583 return; 584 **l4_cksum = 0; 585 if (ol_flags & PKT_TX_IPV4) 586 *l4_phdr_cksum = rte_ipv4_phdr_cksum(l3_hdr, 0); 587 else 588 *l4_phdr_cksum = rte_ipv6_phdr_cksum(l3_hdr, 0); 589 *l4_raw_cksum = __rte_raw_cksum(l4_hdr, l4_len, 0); 590 } 591 } 592 593 static inline int 594 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, 595 struct rte_mbuf **pmbufs, 596 uint16_t *num_packets, unsigned long *num_tx_bytes) 597 { 598 int i; 599 uint16_t l234_hlen; 600 struct pmd_process_private *process_private; 601 602 process_private = rte_eth_devices[txq->out_port].process_private; 603 604 for (i = 0; i < num_mbufs; i++) { 605 struct rte_mbuf *mbuf = pmbufs[i]; 606 struct iovec iovecs[mbuf->nb_segs + 2]; 607 struct tun_pi pi = { .flags = 0, .proto = 0x00 }; 608 struct rte_mbuf *seg = mbuf; 609 char m_copy[mbuf->data_len]; 610 int proto; 611 int n; 612 int j; 613 int k; /* current index in iovecs for copying segments */ 614 uint16_t seg_len; /* length of first segment */ 615 uint16_t nb_segs; 616 uint16_t *l4_cksum; /* l4 checksum (pseudo header + payload) */ 617 uint32_t l4_raw_cksum = 0; /* TCP/UDP payload raw checksum */ 618 uint16_t l4_phdr_cksum = 0; /* TCP/UDP pseudo header checksum */ 619 uint16_t is_cksum = 0; /* in case cksum should be offloaded */ 620 621 l4_cksum = NULL; 622 if (txq->type == ETH_TUNTAP_TYPE_TUN) { 623 /* 624 * TUN and TAP are created with IFF_NO_PI disabled. 625 * For TUN PMD this mandatory as fields are used by 626 * Kernel tun.c to determine whether its IP or non IP 627 * packets. 628 * 629 * The logic fetches the first byte of data from mbuf 630 * then compares whether its v4 or v6. If first byte 631 * is 4 or 6, then protocol field is updated. 632 */ 633 char *buff_data = rte_pktmbuf_mtod(seg, void *); 634 proto = (*buff_data & 0xf0); 635 pi.proto = (proto == 0x40) ? 636 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) : 637 ((proto == 0x60) ? 638 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) : 639 0x00); 640 } 641 642 k = 0; 643 iovecs[k].iov_base = π 644 iovecs[k].iov_len = sizeof(pi); 645 k++; 646 647 nb_segs = mbuf->nb_segs; 648 if (txq->csum && 649 ((mbuf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4) || 650 (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM || 651 (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM))) { 652 is_cksum = 1; 653 654 /* Support only packets with at least layer 4 655 * header included in the first segment 656 */ 657 seg_len = rte_pktmbuf_data_len(mbuf); 658 l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; 659 if (seg_len < l234_hlen) 660 return -1; 661 662 /* To change checksums, work on a * copy of l2, l3 663 * headers + l4 pseudo header 664 */ 665 rte_memcpy(m_copy, rte_pktmbuf_mtod(mbuf, void *), 666 l234_hlen); 667 tap_tx_l3_cksum(m_copy, mbuf->ol_flags, 668 mbuf->l2_len, mbuf->l3_len, mbuf->l4_len, 669 &l4_cksum, &l4_phdr_cksum, 670 &l4_raw_cksum); 671 iovecs[k].iov_base = m_copy; 672 iovecs[k].iov_len = l234_hlen; 673 k++; 674 675 /* Update next iovecs[] beyond l2, l3, l4 headers */ 676 if (seg_len > l234_hlen) { 677 iovecs[k].iov_len = seg_len - l234_hlen; 678 iovecs[k].iov_base = 679 rte_pktmbuf_mtod(seg, char *) + 680 l234_hlen; 681 tap_tx_l4_add_rcksum(iovecs[k].iov_base, 682 iovecs[k].iov_len, l4_cksum, 683 &l4_raw_cksum); 684 k++; 685 nb_segs++; 686 } 687 seg = seg->next; 688 } 689 690 for (j = k; j <= nb_segs; j++) { 691 iovecs[j].iov_len = rte_pktmbuf_data_len(seg); 692 iovecs[j].iov_base = rte_pktmbuf_mtod(seg, void *); 693 if (is_cksum) 694 tap_tx_l4_add_rcksum(iovecs[j].iov_base, 695 iovecs[j].iov_len, l4_cksum, 696 &l4_raw_cksum); 697 seg = seg->next; 698 } 699 700 if (is_cksum) 701 tap_tx_l4_cksum(l4_cksum, l4_phdr_cksum, l4_raw_cksum); 702 703 /* copy the tx frame data */ 704 n = writev(process_private->txq_fds[txq->queue_id], iovecs, j); 705 if (n <= 0) 706 return -1; 707 708 (*num_packets)++; 709 (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf); 710 } 711 return 0; 712 } 713 714 /* Callback to handle sending packets from the tap interface 715 */ 716 static uint16_t 717 pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 718 { 719 struct tx_queue *txq = queue; 720 uint16_t num_tx = 0; 721 uint16_t num_packets = 0; 722 unsigned long num_tx_bytes = 0; 723 uint32_t max_size; 724 int i; 725 726 if (unlikely(nb_pkts == 0)) 727 return 0; 728 729 struct rte_mbuf *gso_mbufs[MAX_GSO_MBUFS]; 730 max_size = *txq->mtu + (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + 4); 731 for (i = 0; i < nb_pkts; i++) { 732 struct rte_mbuf *mbuf_in = bufs[num_tx]; 733 struct rte_mbuf **mbuf; 734 uint16_t num_mbufs = 0; 735 uint16_t tso_segsz = 0; 736 int ret; 737 int num_tso_mbufs; 738 uint16_t hdrs_len; 739 uint64_t tso; 740 741 tso = mbuf_in->ol_flags & PKT_TX_TCP_SEG; 742 if (tso) { 743 struct rte_gso_ctx *gso_ctx = &txq->gso_ctx; 744 745 /* TCP segmentation implies TCP checksum offload */ 746 mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM; 747 748 /* gso size is calculated without RTE_ETHER_CRC_LEN */ 749 hdrs_len = mbuf_in->l2_len + mbuf_in->l3_len + 750 mbuf_in->l4_len; 751 tso_segsz = mbuf_in->tso_segsz + hdrs_len; 752 if (unlikely(tso_segsz == hdrs_len) || 753 tso_segsz > *txq->mtu) { 754 txq->stats.errs++; 755 break; 756 } 757 gso_ctx->gso_size = tso_segsz; 758 /* 'mbuf_in' packet to segment */ 759 num_tso_mbufs = rte_gso_segment(mbuf_in, 760 gso_ctx, /* gso control block */ 761 (struct rte_mbuf **)&gso_mbufs, /* out mbufs */ 762 RTE_DIM(gso_mbufs)); /* max tso mbufs */ 763 764 /* ret contains the number of new created mbufs */ 765 if (num_tso_mbufs < 0) 766 break; 767 768 if (num_tso_mbufs >= 1) { 769 mbuf = gso_mbufs; 770 num_mbufs = num_tso_mbufs; 771 } else { 772 /* 0 means it can be transmitted directly 773 * without gso. 774 */ 775 mbuf = &mbuf_in; 776 num_mbufs = 1; 777 } 778 } else { 779 /* stats.errs will be incremented */ 780 if (rte_pktmbuf_pkt_len(mbuf_in) > max_size) 781 break; 782 783 /* ret 0 indicates no new mbufs were created */ 784 num_tso_mbufs = 0; 785 mbuf = &mbuf_in; 786 num_mbufs = 1; 787 } 788 789 ret = tap_write_mbufs(txq, num_mbufs, mbuf, 790 &num_packets, &num_tx_bytes); 791 if (ret == -1) { 792 txq->stats.errs++; 793 /* free tso mbufs */ 794 if (num_tso_mbufs > 0) 795 rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); 796 break; 797 } 798 num_tx++; 799 /* free original mbuf */ 800 rte_pktmbuf_free(mbuf_in); 801 /* free tso mbufs */ 802 if (num_tso_mbufs > 0) 803 rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); 804 } 805 806 txq->stats.opackets += num_packets; 807 txq->stats.errs += nb_pkts - num_tx; 808 txq->stats.obytes += num_tx_bytes; 809 810 return num_tx; 811 } 812 813 static const char * 814 tap_ioctl_req2str(unsigned long request) 815 { 816 switch (request) { 817 case SIOCSIFFLAGS: 818 return "SIOCSIFFLAGS"; 819 case SIOCGIFFLAGS: 820 return "SIOCGIFFLAGS"; 821 case SIOCGIFHWADDR: 822 return "SIOCGIFHWADDR"; 823 case SIOCSIFHWADDR: 824 return "SIOCSIFHWADDR"; 825 case SIOCSIFMTU: 826 return "SIOCSIFMTU"; 827 } 828 return "UNKNOWN"; 829 } 830 831 static int 832 tap_ioctl(struct pmd_internals *pmd, unsigned long request, 833 struct ifreq *ifr, int set, enum ioctl_mode mode) 834 { 835 short req_flags = ifr->ifr_flags; 836 int remote = pmd->remote_if_index && 837 (mode == REMOTE_ONLY || mode == LOCAL_AND_REMOTE); 838 839 if (!pmd->remote_if_index && mode == REMOTE_ONLY) 840 return 0; 841 /* 842 * If there is a remote netdevice, apply ioctl on it, then apply it on 843 * the tap netdevice. 844 */ 845 apply: 846 if (remote) 847 strlcpy(ifr->ifr_name, pmd->remote_iface, IFNAMSIZ); 848 else if (mode == LOCAL_ONLY || mode == LOCAL_AND_REMOTE) 849 strlcpy(ifr->ifr_name, pmd->name, IFNAMSIZ); 850 switch (request) { 851 case SIOCSIFFLAGS: 852 /* fetch current flags to leave other flags untouched */ 853 if (ioctl(pmd->ioctl_sock, SIOCGIFFLAGS, ifr) < 0) 854 goto error; 855 if (set) 856 ifr->ifr_flags |= req_flags; 857 else 858 ifr->ifr_flags &= ~req_flags; 859 break; 860 case SIOCGIFFLAGS: 861 case SIOCGIFHWADDR: 862 case SIOCSIFHWADDR: 863 case SIOCSIFMTU: 864 break; 865 default: 866 TAP_LOG(WARNING, "%s: ioctl() called with wrong arg", 867 pmd->name); 868 return -EINVAL; 869 } 870 if (ioctl(pmd->ioctl_sock, request, ifr) < 0) 871 goto error; 872 if (remote-- && mode == LOCAL_AND_REMOTE) 873 goto apply; 874 return 0; 875 876 error: 877 TAP_LOG(DEBUG, "%s(%s) failed: %s(%d)", ifr->ifr_name, 878 tap_ioctl_req2str(request), strerror(errno), errno); 879 return -errno; 880 } 881 882 static int 883 tap_link_set_down(struct rte_eth_dev *dev) 884 { 885 struct pmd_internals *pmd = dev->data->dev_private; 886 struct ifreq ifr = { .ifr_flags = IFF_UP }; 887 888 dev->data->dev_link.link_status = ETH_LINK_DOWN; 889 return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_ONLY); 890 } 891 892 static int 893 tap_link_set_up(struct rte_eth_dev *dev) 894 { 895 struct pmd_internals *pmd = dev->data->dev_private; 896 struct ifreq ifr = { .ifr_flags = IFF_UP }; 897 898 dev->data->dev_link.link_status = ETH_LINK_UP; 899 return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); 900 } 901 902 static int 903 tap_dev_start(struct rte_eth_dev *dev) 904 { 905 int err, i; 906 907 err = tap_intr_handle_set(dev, 1); 908 if (err) 909 return err; 910 911 err = tap_link_set_up(dev); 912 if (err) 913 return err; 914 915 for (i = 0; i < dev->data->nb_tx_queues; i++) 916 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 917 for (i = 0; i < dev->data->nb_rx_queues; i++) 918 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 919 920 return err; 921 } 922 923 /* This function gets called when the current port gets stopped. 924 */ 925 static int 926 tap_dev_stop(struct rte_eth_dev *dev) 927 { 928 int i; 929 930 for (i = 0; i < dev->data->nb_tx_queues; i++) 931 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 932 for (i = 0; i < dev->data->nb_rx_queues; i++) 933 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 934 935 tap_intr_handle_set(dev, 0); 936 tap_link_set_down(dev); 937 938 return 0; 939 } 940 941 static int 942 tap_dev_configure(struct rte_eth_dev *dev) 943 { 944 struct pmd_internals *pmd = dev->data->dev_private; 945 946 if (dev->data->nb_rx_queues > RTE_PMD_TAP_MAX_QUEUES) { 947 TAP_LOG(ERR, 948 "%s: number of rx queues %d exceeds max num of queues %d", 949 dev->device->name, 950 dev->data->nb_rx_queues, 951 RTE_PMD_TAP_MAX_QUEUES); 952 return -1; 953 } 954 if (dev->data->nb_tx_queues > RTE_PMD_TAP_MAX_QUEUES) { 955 TAP_LOG(ERR, 956 "%s: number of tx queues %d exceeds max num of queues %d", 957 dev->device->name, 958 dev->data->nb_tx_queues, 959 RTE_PMD_TAP_MAX_QUEUES); 960 return -1; 961 } 962 963 TAP_LOG(INFO, "%s: %s: TX configured queues number: %u", 964 dev->device->name, pmd->name, dev->data->nb_tx_queues); 965 966 TAP_LOG(INFO, "%s: %s: RX configured queues number: %u", 967 dev->device->name, pmd->name, dev->data->nb_rx_queues); 968 969 return 0; 970 } 971 972 static uint32_t 973 tap_dev_speed_capa(void) 974 { 975 uint32_t speed = pmd_link.link_speed; 976 uint32_t capa = 0; 977 978 if (speed >= ETH_SPEED_NUM_10M) 979 capa |= ETH_LINK_SPEED_10M; 980 if (speed >= ETH_SPEED_NUM_100M) 981 capa |= ETH_LINK_SPEED_100M; 982 if (speed >= ETH_SPEED_NUM_1G) 983 capa |= ETH_LINK_SPEED_1G; 984 if (speed >= ETH_SPEED_NUM_5G) 985 capa |= ETH_LINK_SPEED_2_5G; 986 if (speed >= ETH_SPEED_NUM_5G) 987 capa |= ETH_LINK_SPEED_5G; 988 if (speed >= ETH_SPEED_NUM_10G) 989 capa |= ETH_LINK_SPEED_10G; 990 if (speed >= ETH_SPEED_NUM_20G) 991 capa |= ETH_LINK_SPEED_20G; 992 if (speed >= ETH_SPEED_NUM_25G) 993 capa |= ETH_LINK_SPEED_25G; 994 if (speed >= ETH_SPEED_NUM_40G) 995 capa |= ETH_LINK_SPEED_40G; 996 if (speed >= ETH_SPEED_NUM_50G) 997 capa |= ETH_LINK_SPEED_50G; 998 if (speed >= ETH_SPEED_NUM_56G) 999 capa |= ETH_LINK_SPEED_56G; 1000 if (speed >= ETH_SPEED_NUM_100G) 1001 capa |= ETH_LINK_SPEED_100G; 1002 1003 return capa; 1004 } 1005 1006 static int 1007 tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 1008 { 1009 struct pmd_internals *internals = dev->data->dev_private; 1010 1011 dev_info->if_index = internals->if_index; 1012 dev_info->max_mac_addrs = 1; 1013 dev_info->max_rx_pktlen = (uint32_t)RTE_ETHER_MAX_VLAN_FRAME_LEN; 1014 dev_info->max_rx_queues = RTE_PMD_TAP_MAX_QUEUES; 1015 dev_info->max_tx_queues = RTE_PMD_TAP_MAX_QUEUES; 1016 dev_info->min_rx_bufsize = 0; 1017 dev_info->speed_capa = tap_dev_speed_capa(); 1018 dev_info->rx_queue_offload_capa = tap_rx_offload_get_queue_capa(); 1019 dev_info->rx_offload_capa = tap_rx_offload_get_port_capa() | 1020 dev_info->rx_queue_offload_capa; 1021 dev_info->tx_queue_offload_capa = tap_tx_offload_get_queue_capa(); 1022 dev_info->tx_offload_capa = tap_tx_offload_get_port_capa() | 1023 dev_info->tx_queue_offload_capa; 1024 dev_info->hash_key_size = TAP_RSS_HASH_KEY_SIZE; 1025 /* 1026 * limitation: TAP supports all of IP, UDP and TCP hash 1027 * functions together and not in partial combinations 1028 */ 1029 dev_info->flow_type_rss_offloads = ~TAP_RSS_HF_MASK; 1030 1031 return 0; 1032 } 1033 1034 static int 1035 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats) 1036 { 1037 unsigned int i, imax; 1038 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 1039 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 1040 unsigned long rx_nombuf = 0, ierrors = 0; 1041 const struct pmd_internals *pmd = dev->data->dev_private; 1042 1043 /* rx queue statistics */ 1044 imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? 1045 dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS; 1046 for (i = 0; i < imax; i++) { 1047 tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets; 1048 tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes; 1049 rx_total += tap_stats->q_ipackets[i]; 1050 rx_bytes_total += tap_stats->q_ibytes[i]; 1051 rx_nombuf += pmd->rxq[i].stats.rx_nombuf; 1052 ierrors += pmd->rxq[i].stats.ierrors; 1053 } 1054 1055 /* tx queue statistics */ 1056 imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? 1057 dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS; 1058 1059 for (i = 0; i < imax; i++) { 1060 tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets; 1061 tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes; 1062 tx_total += tap_stats->q_opackets[i]; 1063 tx_err_total += pmd->txq[i].stats.errs; 1064 tx_bytes_total += tap_stats->q_obytes[i]; 1065 } 1066 1067 tap_stats->ipackets = rx_total; 1068 tap_stats->ibytes = rx_bytes_total; 1069 tap_stats->ierrors = ierrors; 1070 tap_stats->rx_nombuf = rx_nombuf; 1071 tap_stats->opackets = tx_total; 1072 tap_stats->oerrors = tx_err_total; 1073 tap_stats->obytes = tx_bytes_total; 1074 return 0; 1075 } 1076 1077 static int 1078 tap_stats_reset(struct rte_eth_dev *dev) 1079 { 1080 int i; 1081 struct pmd_internals *pmd = dev->data->dev_private; 1082 1083 for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { 1084 pmd->rxq[i].stats.ipackets = 0; 1085 pmd->rxq[i].stats.ibytes = 0; 1086 pmd->rxq[i].stats.ierrors = 0; 1087 pmd->rxq[i].stats.rx_nombuf = 0; 1088 1089 pmd->txq[i].stats.opackets = 0; 1090 pmd->txq[i].stats.errs = 0; 1091 pmd->txq[i].stats.obytes = 0; 1092 } 1093 1094 return 0; 1095 } 1096 1097 static int 1098 tap_dev_close(struct rte_eth_dev *dev) 1099 { 1100 int i; 1101 struct pmd_internals *internals = dev->data->dev_private; 1102 struct pmd_process_private *process_private = dev->process_private; 1103 struct rx_queue *rxq; 1104 1105 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1106 rte_free(dev->process_private); 1107 return 0; 1108 } 1109 1110 tap_link_set_down(dev); 1111 if (internals->nlsk_fd != -1) { 1112 tap_flow_flush(dev, NULL); 1113 tap_flow_implicit_flush(internals, NULL); 1114 tap_nl_final(internals->nlsk_fd); 1115 internals->nlsk_fd = -1; 1116 } 1117 1118 for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { 1119 if (process_private->rxq_fds[i] != -1) { 1120 rxq = &internals->rxq[i]; 1121 close(process_private->rxq_fds[i]); 1122 process_private->rxq_fds[i] = -1; 1123 tap_rxq_pool_free(rxq->pool); 1124 rte_free(rxq->iovecs); 1125 rxq->pool = NULL; 1126 rxq->iovecs = NULL; 1127 } 1128 if (process_private->txq_fds[i] != -1) { 1129 close(process_private->txq_fds[i]); 1130 process_private->txq_fds[i] = -1; 1131 } 1132 } 1133 1134 if (internals->remote_if_index) { 1135 /* Restore initial remote state */ 1136 ioctl(internals->ioctl_sock, SIOCSIFFLAGS, 1137 &internals->remote_initial_flags); 1138 } 1139 1140 rte_mempool_free(internals->gso_ctx_mp); 1141 internals->gso_ctx_mp = NULL; 1142 1143 if (internals->ka_fd != -1) { 1144 close(internals->ka_fd); 1145 internals->ka_fd = -1; 1146 } 1147 1148 /* mac_addrs must not be freed alone because part of dev_private */ 1149 dev->data->mac_addrs = NULL; 1150 1151 internals = dev->data->dev_private; 1152 TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", 1153 tuntap_types[internals->type], rte_socket_id()); 1154 1155 if (internals->ioctl_sock != -1) { 1156 close(internals->ioctl_sock); 1157 internals->ioctl_sock = -1; 1158 } 1159 rte_free(dev->process_private); 1160 if (tap_devices_count == 1) 1161 rte_mp_action_unregister(TAP_MP_KEY); 1162 tap_devices_count--; 1163 /* 1164 * Since TUN device has no more opened file descriptors 1165 * it will be removed from kernel 1166 */ 1167 1168 return 0; 1169 } 1170 1171 static void 1172 tap_rx_queue_release(void *queue) 1173 { 1174 struct rx_queue *rxq = queue; 1175 struct pmd_process_private *process_private; 1176 1177 if (!rxq) 1178 return; 1179 process_private = rte_eth_devices[rxq->in_port].process_private; 1180 if (process_private->rxq_fds[rxq->queue_id] != -1) { 1181 close(process_private->rxq_fds[rxq->queue_id]); 1182 process_private->rxq_fds[rxq->queue_id] = -1; 1183 tap_rxq_pool_free(rxq->pool); 1184 rte_free(rxq->iovecs); 1185 rxq->pool = NULL; 1186 rxq->iovecs = NULL; 1187 } 1188 } 1189 1190 static void 1191 tap_tx_queue_release(void *queue) 1192 { 1193 struct tx_queue *txq = queue; 1194 struct pmd_process_private *process_private; 1195 1196 if (!txq) 1197 return; 1198 process_private = rte_eth_devices[txq->out_port].process_private; 1199 1200 if (process_private->txq_fds[txq->queue_id] != -1) { 1201 close(process_private->txq_fds[txq->queue_id]); 1202 process_private->txq_fds[txq->queue_id] = -1; 1203 } 1204 } 1205 1206 static int 1207 tap_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused) 1208 { 1209 struct rte_eth_link *dev_link = &dev->data->dev_link; 1210 struct pmd_internals *pmd = dev->data->dev_private; 1211 struct ifreq ifr = { .ifr_flags = 0 }; 1212 1213 if (pmd->remote_if_index) { 1214 tap_ioctl(pmd, SIOCGIFFLAGS, &ifr, 0, REMOTE_ONLY); 1215 if (!(ifr.ifr_flags & IFF_UP) || 1216 !(ifr.ifr_flags & IFF_RUNNING)) { 1217 dev_link->link_status = ETH_LINK_DOWN; 1218 return 0; 1219 } 1220 } 1221 tap_ioctl(pmd, SIOCGIFFLAGS, &ifr, 0, LOCAL_ONLY); 1222 dev_link->link_status = 1223 ((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING) ? 1224 ETH_LINK_UP : 1225 ETH_LINK_DOWN); 1226 return 0; 1227 } 1228 1229 static int 1230 tap_promisc_enable(struct rte_eth_dev *dev) 1231 { 1232 struct pmd_internals *pmd = dev->data->dev_private; 1233 struct ifreq ifr = { .ifr_flags = IFF_PROMISC }; 1234 int ret; 1235 1236 ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); 1237 if (ret != 0) 1238 return ret; 1239 1240 if (pmd->remote_if_index && !pmd->flow_isolate) { 1241 dev->data->promiscuous = 1; 1242 ret = tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC); 1243 if (ret != 0) { 1244 /* Rollback promisc flag */ 1245 tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); 1246 /* 1247 * rte_eth_dev_promiscuous_enable() rollback 1248 * dev->data->promiscuous in the case of failure. 1249 */ 1250 return ret; 1251 } 1252 } 1253 1254 return 0; 1255 } 1256 1257 static int 1258 tap_promisc_disable(struct rte_eth_dev *dev) 1259 { 1260 struct pmd_internals *pmd = dev->data->dev_private; 1261 struct ifreq ifr = { .ifr_flags = IFF_PROMISC }; 1262 int ret; 1263 1264 ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); 1265 if (ret != 0) 1266 return ret; 1267 1268 if (pmd->remote_if_index && !pmd->flow_isolate) { 1269 dev->data->promiscuous = 0; 1270 ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC); 1271 if (ret != 0) { 1272 /* Rollback promisc flag */ 1273 tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); 1274 /* 1275 * rte_eth_dev_promiscuous_disable() rollback 1276 * dev->data->promiscuous in the case of failure. 1277 */ 1278 return ret; 1279 } 1280 } 1281 1282 return 0; 1283 } 1284 1285 static int 1286 tap_allmulti_enable(struct rte_eth_dev *dev) 1287 { 1288 struct pmd_internals *pmd = dev->data->dev_private; 1289 struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI }; 1290 int ret; 1291 1292 ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); 1293 if (ret != 0) 1294 return ret; 1295 1296 if (pmd->remote_if_index && !pmd->flow_isolate) { 1297 dev->data->all_multicast = 1; 1298 ret = tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI); 1299 if (ret != 0) { 1300 /* Rollback allmulti flag */ 1301 tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); 1302 /* 1303 * rte_eth_dev_allmulticast_enable() rollback 1304 * dev->data->all_multicast in the case of failure. 1305 */ 1306 return ret; 1307 } 1308 } 1309 1310 return 0; 1311 } 1312 1313 static int 1314 tap_allmulti_disable(struct rte_eth_dev *dev) 1315 { 1316 struct pmd_internals *pmd = dev->data->dev_private; 1317 struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI }; 1318 int ret; 1319 1320 ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); 1321 if (ret != 0) 1322 return ret; 1323 1324 if (pmd->remote_if_index && !pmd->flow_isolate) { 1325 dev->data->all_multicast = 0; 1326 ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI); 1327 if (ret != 0) { 1328 /* Rollback allmulti flag */ 1329 tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); 1330 /* 1331 * rte_eth_dev_allmulticast_disable() rollback 1332 * dev->data->all_multicast in the case of failure. 1333 */ 1334 return ret; 1335 } 1336 } 1337 1338 return 0; 1339 } 1340 1341 static int 1342 tap_mac_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) 1343 { 1344 struct pmd_internals *pmd = dev->data->dev_private; 1345 enum ioctl_mode mode = LOCAL_ONLY; 1346 struct ifreq ifr; 1347 int ret; 1348 1349 if (pmd->type == ETH_TUNTAP_TYPE_TUN) { 1350 TAP_LOG(ERR, "%s: can't MAC address for TUN", 1351 dev->device->name); 1352 return -ENOTSUP; 1353 } 1354 1355 if (rte_is_zero_ether_addr(mac_addr)) { 1356 TAP_LOG(ERR, "%s: can't set an empty MAC address", 1357 dev->device->name); 1358 return -EINVAL; 1359 } 1360 /* Check the actual current MAC address on the tap netdevice */ 1361 ret = tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, LOCAL_ONLY); 1362 if (ret < 0) 1363 return ret; 1364 if (rte_is_same_ether_addr( 1365 (struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data, 1366 mac_addr)) 1367 return 0; 1368 /* Check the current MAC address on the remote */ 1369 ret = tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, REMOTE_ONLY); 1370 if (ret < 0) 1371 return ret; 1372 if (!rte_is_same_ether_addr( 1373 (struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data, 1374 mac_addr)) 1375 mode = LOCAL_AND_REMOTE; 1376 ifr.ifr_hwaddr.sa_family = AF_LOCAL; 1377 rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, RTE_ETHER_ADDR_LEN); 1378 ret = tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 1, mode); 1379 if (ret < 0) 1380 return ret; 1381 rte_memcpy(&pmd->eth_addr, mac_addr, RTE_ETHER_ADDR_LEN); 1382 if (pmd->remote_if_index && !pmd->flow_isolate) { 1383 /* Replace MAC redirection rule after a MAC change */ 1384 ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_LOCAL_MAC); 1385 if (ret < 0) { 1386 TAP_LOG(ERR, 1387 "%s: Couldn't delete MAC redirection rule", 1388 dev->device->name); 1389 return ret; 1390 } 1391 ret = tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC); 1392 if (ret < 0) { 1393 TAP_LOG(ERR, 1394 "%s: Couldn't add MAC redirection rule", 1395 dev->device->name); 1396 return ret; 1397 } 1398 } 1399 1400 return 0; 1401 } 1402 1403 static int 1404 tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) 1405 { 1406 uint32_t gso_types; 1407 char pool_name[64]; 1408 struct pmd_internals *pmd = dev->data->dev_private; 1409 int ret; 1410 1411 /* initialize GSO context */ 1412 gso_types = DEV_TX_OFFLOAD_TCP_TSO; 1413 if (!pmd->gso_ctx_mp) { 1414 /* 1415 * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE 1416 * bytes size per mbuf use this pool for both direct and 1417 * indirect mbufs 1418 */ 1419 ret = snprintf(pool_name, sizeof(pool_name), "mp_%s", 1420 dev->device->name); 1421 if (ret < 0 || ret >= (int)sizeof(pool_name)) { 1422 TAP_LOG(ERR, 1423 "%s: failed to create mbuf pool name for device %s," 1424 "device name too long or output error, ret: %d\n", 1425 pmd->name, dev->device->name, ret); 1426 return -ENAMETOOLONG; 1427 } 1428 pmd->gso_ctx_mp = rte_pktmbuf_pool_create(pool_name, 1429 TAP_GSO_MBUFS_NUM, TAP_GSO_MBUF_CACHE_SIZE, 0, 1430 RTE_PKTMBUF_HEADROOM + TAP_GSO_MBUF_SEG_SIZE, 1431 SOCKET_ID_ANY); 1432 if (!pmd->gso_ctx_mp) { 1433 TAP_LOG(ERR, 1434 "%s: failed to create mbuf pool for device %s\n", 1435 pmd->name, dev->device->name); 1436 return -1; 1437 } 1438 } 1439 1440 gso_ctx->direct_pool = pmd->gso_ctx_mp; 1441 gso_ctx->indirect_pool = pmd->gso_ctx_mp; 1442 gso_ctx->gso_types = gso_types; 1443 gso_ctx->gso_size = 0; /* gso_size is set in tx_burst() per packet */ 1444 gso_ctx->flag = 0; 1445 1446 return 0; 1447 } 1448 1449 static int 1450 tap_setup_queue(struct rte_eth_dev *dev, 1451 struct pmd_internals *internals, 1452 uint16_t qid, 1453 int is_rx) 1454 { 1455 int ret; 1456 int *fd; 1457 int *other_fd; 1458 const char *dir; 1459 struct pmd_internals *pmd = dev->data->dev_private; 1460 struct pmd_process_private *process_private = dev->process_private; 1461 struct rx_queue *rx = &internals->rxq[qid]; 1462 struct tx_queue *tx = &internals->txq[qid]; 1463 struct rte_gso_ctx *gso_ctx; 1464 1465 if (is_rx) { 1466 fd = &process_private->rxq_fds[qid]; 1467 other_fd = &process_private->txq_fds[qid]; 1468 dir = "rx"; 1469 gso_ctx = NULL; 1470 } else { 1471 fd = &process_private->txq_fds[qid]; 1472 other_fd = &process_private->rxq_fds[qid]; 1473 dir = "tx"; 1474 gso_ctx = &tx->gso_ctx; 1475 } 1476 if (*fd != -1) { 1477 /* fd for this queue already exists */ 1478 TAP_LOG(DEBUG, "%s: fd %d for %s queue qid %d exists", 1479 pmd->name, *fd, dir, qid); 1480 gso_ctx = NULL; 1481 } else if (*other_fd != -1) { 1482 /* Only other_fd exists. dup it */ 1483 *fd = dup(*other_fd); 1484 if (*fd < 0) { 1485 *fd = -1; 1486 TAP_LOG(ERR, "%s: dup() failed.", pmd->name); 1487 return -1; 1488 } 1489 TAP_LOG(DEBUG, "%s: dup fd %d for %s queue qid %d (%d)", 1490 pmd->name, *other_fd, dir, qid, *fd); 1491 } else { 1492 /* Both RX and TX fds do not exist (equal -1). Create fd */ 1493 *fd = tun_alloc(pmd, 0); 1494 if (*fd < 0) { 1495 *fd = -1; /* restore original value */ 1496 TAP_LOG(ERR, "%s: tun_alloc() failed.", pmd->name); 1497 return -1; 1498 } 1499 TAP_LOG(DEBUG, "%s: add %s queue for qid %d fd %d", 1500 pmd->name, dir, qid, *fd); 1501 } 1502 1503 tx->mtu = &dev->data->mtu; 1504 rx->rxmode = &dev->data->dev_conf.rxmode; 1505 if (gso_ctx) { 1506 ret = tap_gso_ctx_setup(gso_ctx, dev); 1507 if (ret) 1508 return -1; 1509 } 1510 1511 tx->type = pmd->type; 1512 1513 return *fd; 1514 } 1515 1516 static int 1517 tap_rx_queue_setup(struct rte_eth_dev *dev, 1518 uint16_t rx_queue_id, 1519 uint16_t nb_rx_desc, 1520 unsigned int socket_id, 1521 const struct rte_eth_rxconf *rx_conf __rte_unused, 1522 struct rte_mempool *mp) 1523 { 1524 struct pmd_internals *internals = dev->data->dev_private; 1525 struct pmd_process_private *process_private = dev->process_private; 1526 struct rx_queue *rxq = &internals->rxq[rx_queue_id]; 1527 struct rte_mbuf **tmp = &rxq->pool; 1528 long iov_max = sysconf(_SC_IOV_MAX); 1529 1530 if (iov_max <= 0) { 1531 TAP_LOG(WARNING, 1532 "_SC_IOV_MAX is not defined. Using %d as default", 1533 TAP_IOV_DEFAULT_MAX); 1534 iov_max = TAP_IOV_DEFAULT_MAX; 1535 } 1536 uint16_t nb_desc = RTE_MIN(nb_rx_desc, iov_max - 1); 1537 struct iovec (*iovecs)[nb_desc + 1]; 1538 int data_off = RTE_PKTMBUF_HEADROOM; 1539 int ret = 0; 1540 int fd; 1541 int i; 1542 1543 if (rx_queue_id >= dev->data->nb_rx_queues || !mp) { 1544 TAP_LOG(WARNING, 1545 "nb_rx_queues %d too small or mempool NULL", 1546 dev->data->nb_rx_queues); 1547 return -1; 1548 } 1549 1550 rxq->mp = mp; 1551 rxq->trigger_seen = 1; /* force initial burst */ 1552 rxq->in_port = dev->data->port_id; 1553 rxq->queue_id = rx_queue_id; 1554 rxq->nb_rx_desc = nb_desc; 1555 iovecs = rte_zmalloc_socket(dev->device->name, sizeof(*iovecs), 0, 1556 socket_id); 1557 if (!iovecs) { 1558 TAP_LOG(WARNING, 1559 "%s: Couldn't allocate %d RX descriptors", 1560 dev->device->name, nb_desc); 1561 return -ENOMEM; 1562 } 1563 rxq->iovecs = iovecs; 1564 1565 dev->data->rx_queues[rx_queue_id] = rxq; 1566 fd = tap_setup_queue(dev, internals, rx_queue_id, 1); 1567 if (fd == -1) { 1568 ret = fd; 1569 goto error; 1570 } 1571 1572 (*rxq->iovecs)[0].iov_len = sizeof(struct tun_pi); 1573 (*rxq->iovecs)[0].iov_base = &rxq->pi; 1574 1575 for (i = 1; i <= nb_desc; i++) { 1576 *tmp = rte_pktmbuf_alloc(rxq->mp); 1577 if (!*tmp) { 1578 TAP_LOG(WARNING, 1579 "%s: couldn't allocate memory for queue %d", 1580 dev->device->name, rx_queue_id); 1581 ret = -ENOMEM; 1582 goto error; 1583 } 1584 (*rxq->iovecs)[i].iov_len = (*tmp)->buf_len - data_off; 1585 (*rxq->iovecs)[i].iov_base = 1586 (char *)(*tmp)->buf_addr + data_off; 1587 data_off = 0; 1588 tmp = &(*tmp)->next; 1589 } 1590 1591 TAP_LOG(DEBUG, " RX TUNTAP device name %s, qid %d on fd %d", 1592 internals->name, rx_queue_id, 1593 process_private->rxq_fds[rx_queue_id]); 1594 1595 return 0; 1596 1597 error: 1598 tap_rxq_pool_free(rxq->pool); 1599 rxq->pool = NULL; 1600 rte_free(rxq->iovecs); 1601 rxq->iovecs = NULL; 1602 return ret; 1603 } 1604 1605 static int 1606 tap_tx_queue_setup(struct rte_eth_dev *dev, 1607 uint16_t tx_queue_id, 1608 uint16_t nb_tx_desc __rte_unused, 1609 unsigned int socket_id __rte_unused, 1610 const struct rte_eth_txconf *tx_conf) 1611 { 1612 struct pmd_internals *internals = dev->data->dev_private; 1613 struct pmd_process_private *process_private = dev->process_private; 1614 struct tx_queue *txq; 1615 int ret; 1616 uint64_t offloads; 1617 1618 if (tx_queue_id >= dev->data->nb_tx_queues) 1619 return -1; 1620 dev->data->tx_queues[tx_queue_id] = &internals->txq[tx_queue_id]; 1621 txq = dev->data->tx_queues[tx_queue_id]; 1622 txq->out_port = dev->data->port_id; 1623 txq->queue_id = tx_queue_id; 1624 1625 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1626 txq->csum = !!(offloads & 1627 (DEV_TX_OFFLOAD_IPV4_CKSUM | 1628 DEV_TX_OFFLOAD_UDP_CKSUM | 1629 DEV_TX_OFFLOAD_TCP_CKSUM)); 1630 1631 ret = tap_setup_queue(dev, internals, tx_queue_id, 0); 1632 if (ret == -1) 1633 return -1; 1634 TAP_LOG(DEBUG, 1635 " TX TUNTAP device name %s, qid %d on fd %d csum %s", 1636 internals->name, tx_queue_id, 1637 process_private->txq_fds[tx_queue_id], 1638 txq->csum ? "on" : "off"); 1639 1640 return 0; 1641 } 1642 1643 static int 1644 tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1645 { 1646 struct pmd_internals *pmd = dev->data->dev_private; 1647 struct ifreq ifr = { .ifr_mtu = mtu }; 1648 int err = 0; 1649 1650 err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE); 1651 if (!err) 1652 dev->data->mtu = mtu; 1653 1654 return err; 1655 } 1656 1657 static int 1658 tap_set_mc_addr_list(struct rte_eth_dev *dev __rte_unused, 1659 struct rte_ether_addr *mc_addr_set __rte_unused, 1660 uint32_t nb_mc_addr __rte_unused) 1661 { 1662 /* 1663 * Nothing to do actually: the tap has no filtering whatsoever, every 1664 * packet is received. 1665 */ 1666 return 0; 1667 } 1668 1669 static int 1670 tap_nl_msg_handler(struct nlmsghdr *nh, void *arg) 1671 { 1672 struct rte_eth_dev *dev = arg; 1673 struct pmd_internals *pmd = dev->data->dev_private; 1674 struct ifinfomsg *info = NLMSG_DATA(nh); 1675 1676 if (nh->nlmsg_type != RTM_NEWLINK || 1677 (info->ifi_index != pmd->if_index && 1678 info->ifi_index != pmd->remote_if_index)) 1679 return 0; 1680 return tap_link_update(dev, 0); 1681 } 1682 1683 static void 1684 tap_dev_intr_handler(void *cb_arg) 1685 { 1686 struct rte_eth_dev *dev = cb_arg; 1687 struct pmd_internals *pmd = dev->data->dev_private; 1688 1689 tap_nl_recv(pmd->intr_handle.fd, tap_nl_msg_handler, dev); 1690 } 1691 1692 static int 1693 tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) 1694 { 1695 struct pmd_internals *pmd = dev->data->dev_private; 1696 int ret; 1697 1698 /* In any case, disable interrupt if the conf is no longer there. */ 1699 if (!dev->data->dev_conf.intr_conf.lsc) { 1700 if (pmd->intr_handle.fd != -1) { 1701 goto clean; 1702 } 1703 return 0; 1704 } 1705 if (set) { 1706 pmd->intr_handle.fd = tap_nl_init(RTMGRP_LINK); 1707 if (unlikely(pmd->intr_handle.fd == -1)) 1708 return -EBADF; 1709 return rte_intr_callback_register( 1710 &pmd->intr_handle, tap_dev_intr_handler, dev); 1711 } 1712 1713 clean: 1714 do { 1715 ret = rte_intr_callback_unregister(&pmd->intr_handle, 1716 tap_dev_intr_handler, dev); 1717 if (ret >= 0) { 1718 break; 1719 } else if (ret == -EAGAIN) { 1720 rte_delay_ms(100); 1721 } else { 1722 TAP_LOG(ERR, "intr callback unregister failed: %d", 1723 ret); 1724 break; 1725 } 1726 } while (true); 1727 1728 tap_nl_final(pmd->intr_handle.fd); 1729 pmd->intr_handle.fd = -1; 1730 1731 return 0; 1732 } 1733 1734 static int 1735 tap_intr_handle_set(struct rte_eth_dev *dev, int set) 1736 { 1737 int err; 1738 1739 err = tap_lsc_intr_handle_set(dev, set); 1740 if (err < 0) { 1741 if (!set) 1742 tap_rx_intr_vec_set(dev, 0); 1743 return err; 1744 } 1745 err = tap_rx_intr_vec_set(dev, set); 1746 if (err && set) 1747 tap_lsc_intr_handle_set(dev, 0); 1748 return err; 1749 } 1750 1751 static const uint32_t* 1752 tap_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) 1753 { 1754 static const uint32_t ptypes[] = { 1755 RTE_PTYPE_INNER_L2_ETHER, 1756 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1757 RTE_PTYPE_INNER_L2_ETHER_QINQ, 1758 RTE_PTYPE_INNER_L3_IPV4, 1759 RTE_PTYPE_INNER_L3_IPV4_EXT, 1760 RTE_PTYPE_INNER_L3_IPV6, 1761 RTE_PTYPE_INNER_L3_IPV6_EXT, 1762 RTE_PTYPE_INNER_L4_FRAG, 1763 RTE_PTYPE_INNER_L4_UDP, 1764 RTE_PTYPE_INNER_L4_TCP, 1765 RTE_PTYPE_INNER_L4_SCTP, 1766 RTE_PTYPE_L2_ETHER, 1767 RTE_PTYPE_L2_ETHER_VLAN, 1768 RTE_PTYPE_L2_ETHER_QINQ, 1769 RTE_PTYPE_L3_IPV4, 1770 RTE_PTYPE_L3_IPV4_EXT, 1771 RTE_PTYPE_L3_IPV6_EXT, 1772 RTE_PTYPE_L3_IPV6, 1773 RTE_PTYPE_L4_FRAG, 1774 RTE_PTYPE_L4_UDP, 1775 RTE_PTYPE_L4_TCP, 1776 RTE_PTYPE_L4_SCTP, 1777 }; 1778 1779 return ptypes; 1780 } 1781 1782 static int 1783 tap_flow_ctrl_get(struct rte_eth_dev *dev __rte_unused, 1784 struct rte_eth_fc_conf *fc_conf) 1785 { 1786 fc_conf->mode = RTE_FC_NONE; 1787 return 0; 1788 } 1789 1790 static int 1791 tap_flow_ctrl_set(struct rte_eth_dev *dev __rte_unused, 1792 struct rte_eth_fc_conf *fc_conf) 1793 { 1794 if (fc_conf->mode != RTE_FC_NONE) 1795 return -ENOTSUP; 1796 return 0; 1797 } 1798 1799 /** 1800 * DPDK callback to update the RSS hash configuration. 1801 * 1802 * @param dev 1803 * Pointer to Ethernet device structure. 1804 * @param[in] rss_conf 1805 * RSS configuration data. 1806 * 1807 * @return 1808 * 0 on success, a negative errno value otherwise and rte_errno is set. 1809 */ 1810 static int 1811 tap_rss_hash_update(struct rte_eth_dev *dev, 1812 struct rte_eth_rss_conf *rss_conf) 1813 { 1814 if (rss_conf->rss_hf & TAP_RSS_HF_MASK) { 1815 rte_errno = EINVAL; 1816 return -rte_errno; 1817 } 1818 if (rss_conf->rss_key && rss_conf->rss_key_len) { 1819 /* 1820 * Currently TAP RSS key is hard coded 1821 * and cannot be updated 1822 */ 1823 TAP_LOG(ERR, 1824 "port %u RSS key cannot be updated", 1825 dev->data->port_id); 1826 rte_errno = EINVAL; 1827 return -rte_errno; 1828 } 1829 return 0; 1830 } 1831 1832 static int 1833 tap_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1834 { 1835 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; 1836 1837 return 0; 1838 } 1839 1840 static int 1841 tap_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) 1842 { 1843 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; 1844 1845 return 0; 1846 } 1847 1848 static int 1849 tap_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1850 { 1851 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; 1852 1853 return 0; 1854 } 1855 1856 static int 1857 tap_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) 1858 { 1859 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; 1860 1861 return 0; 1862 } 1863 static const struct eth_dev_ops ops = { 1864 .dev_start = tap_dev_start, 1865 .dev_stop = tap_dev_stop, 1866 .dev_close = tap_dev_close, 1867 .dev_configure = tap_dev_configure, 1868 .dev_infos_get = tap_dev_info, 1869 .rx_queue_setup = tap_rx_queue_setup, 1870 .tx_queue_setup = tap_tx_queue_setup, 1871 .rx_queue_start = tap_rx_queue_start, 1872 .tx_queue_start = tap_tx_queue_start, 1873 .rx_queue_stop = tap_rx_queue_stop, 1874 .tx_queue_stop = tap_tx_queue_stop, 1875 .rx_queue_release = tap_rx_queue_release, 1876 .tx_queue_release = tap_tx_queue_release, 1877 .flow_ctrl_get = tap_flow_ctrl_get, 1878 .flow_ctrl_set = tap_flow_ctrl_set, 1879 .link_update = tap_link_update, 1880 .dev_set_link_up = tap_link_set_up, 1881 .dev_set_link_down = tap_link_set_down, 1882 .promiscuous_enable = tap_promisc_enable, 1883 .promiscuous_disable = tap_promisc_disable, 1884 .allmulticast_enable = tap_allmulti_enable, 1885 .allmulticast_disable = tap_allmulti_disable, 1886 .mac_addr_set = tap_mac_set, 1887 .mtu_set = tap_mtu_set, 1888 .set_mc_addr_list = tap_set_mc_addr_list, 1889 .stats_get = tap_stats_get, 1890 .stats_reset = tap_stats_reset, 1891 .dev_supported_ptypes_get = tap_dev_supported_ptypes_get, 1892 .rss_hash_update = tap_rss_hash_update, 1893 .flow_ops_get = tap_dev_flow_ops_get, 1894 }; 1895 1896 static int 1897 eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, 1898 char *remote_iface, struct rte_ether_addr *mac_addr, 1899 enum rte_tuntap_type type) 1900 { 1901 int numa_node = rte_socket_id(); 1902 struct rte_eth_dev *dev; 1903 struct pmd_internals *pmd; 1904 struct pmd_process_private *process_private; 1905 const char *tuntap_name = tuntap_types[type]; 1906 struct rte_eth_dev_data *data; 1907 struct ifreq ifr; 1908 int i; 1909 1910 TAP_LOG(DEBUG, "%s device on numa %u", tuntap_name, rte_socket_id()); 1911 1912 dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd)); 1913 if (!dev) { 1914 TAP_LOG(ERR, "%s Unable to allocate device struct", 1915 tuntap_name); 1916 goto error_exit_nodev; 1917 } 1918 1919 process_private = (struct pmd_process_private *) 1920 rte_zmalloc_socket(tap_name, sizeof(struct pmd_process_private), 1921 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 1922 1923 if (process_private == NULL) { 1924 TAP_LOG(ERR, "Failed to alloc memory for process private"); 1925 return -1; 1926 } 1927 pmd = dev->data->dev_private; 1928 dev->process_private = process_private; 1929 pmd->dev = dev; 1930 strlcpy(pmd->name, tap_name, sizeof(pmd->name)); 1931 pmd->type = type; 1932 pmd->ka_fd = -1; 1933 pmd->nlsk_fd = -1; 1934 pmd->gso_ctx_mp = NULL; 1935 1936 pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); 1937 if (pmd->ioctl_sock == -1) { 1938 TAP_LOG(ERR, 1939 "%s Unable to get a socket for management: %s", 1940 tuntap_name, strerror(errno)); 1941 goto error_exit; 1942 } 1943 1944 /* Setup some default values */ 1945 data = dev->data; 1946 data->dev_private = pmd; 1947 data->dev_flags = RTE_ETH_DEV_INTR_LSC | 1948 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1949 data->numa_node = numa_node; 1950 1951 data->dev_link = pmd_link; 1952 data->mac_addrs = &pmd->eth_addr; 1953 /* Set the number of RX and TX queues */ 1954 data->nb_rx_queues = 0; 1955 data->nb_tx_queues = 0; 1956 1957 dev->dev_ops = &ops; 1958 dev->rx_pkt_burst = pmd_rx_burst; 1959 dev->tx_pkt_burst = pmd_tx_burst; 1960 1961 pmd->intr_handle.type = RTE_INTR_HANDLE_EXT; 1962 pmd->intr_handle.fd = -1; 1963 dev->intr_handle = &pmd->intr_handle; 1964 1965 /* Presetup the fds to -1 as being not valid */ 1966 for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { 1967 process_private->rxq_fds[i] = -1; 1968 process_private->txq_fds[i] = -1; 1969 } 1970 1971 if (pmd->type == ETH_TUNTAP_TYPE_TAP) { 1972 if (rte_is_zero_ether_addr(mac_addr)) 1973 rte_eth_random_addr((uint8_t *)&pmd->eth_addr); 1974 else 1975 rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(*mac_addr)); 1976 } 1977 1978 /* 1979 * Allocate a TUN device keep-alive file descriptor that will only be 1980 * closed when the TUN device itself is closed or removed. 1981 * This keep-alive file descriptor will guarantee that the TUN device 1982 * exists even when all of its queues are closed 1983 */ 1984 pmd->ka_fd = tun_alloc(pmd, 1); 1985 if (pmd->ka_fd == -1) { 1986 TAP_LOG(ERR, "Unable to create %s interface", tuntap_name); 1987 goto error_exit; 1988 } 1989 TAP_LOG(DEBUG, "allocated %s", pmd->name); 1990 1991 ifr.ifr_mtu = dev->data->mtu; 1992 if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0) 1993 goto error_exit; 1994 1995 if (pmd->type == ETH_TUNTAP_TYPE_TAP) { 1996 memset(&ifr, 0, sizeof(struct ifreq)); 1997 ifr.ifr_hwaddr.sa_family = AF_LOCAL; 1998 rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, 1999 RTE_ETHER_ADDR_LEN); 2000 if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0) 2001 goto error_exit; 2002 } 2003 2004 /* 2005 * Set up everything related to rte_flow: 2006 * - netlink socket 2007 * - tap / remote if_index 2008 * - mandatory QDISCs 2009 * - rte_flow actual/implicit lists 2010 * - implicit rules 2011 */ 2012 pmd->nlsk_fd = tap_nl_init(0); 2013 if (pmd->nlsk_fd == -1) { 2014 TAP_LOG(WARNING, "%s: failed to create netlink socket.", 2015 pmd->name); 2016 goto disable_rte_flow; 2017 } 2018 pmd->if_index = if_nametoindex(pmd->name); 2019 if (!pmd->if_index) { 2020 TAP_LOG(ERR, "%s: failed to get if_index.", pmd->name); 2021 goto disable_rte_flow; 2022 } 2023 if (qdisc_create_multiq(pmd->nlsk_fd, pmd->if_index) < 0) { 2024 TAP_LOG(ERR, "%s: failed to create multiq qdisc.", 2025 pmd->name); 2026 goto disable_rte_flow; 2027 } 2028 if (qdisc_create_ingress(pmd->nlsk_fd, pmd->if_index) < 0) { 2029 TAP_LOG(ERR, "%s: failed to create ingress qdisc.", 2030 pmd->name); 2031 goto disable_rte_flow; 2032 } 2033 LIST_INIT(&pmd->flows); 2034 2035 if (strlen(remote_iface)) { 2036 pmd->remote_if_index = if_nametoindex(remote_iface); 2037 if (!pmd->remote_if_index) { 2038 TAP_LOG(ERR, "%s: failed to get %s if_index.", 2039 pmd->name, remote_iface); 2040 goto error_remote; 2041 } 2042 strlcpy(pmd->remote_iface, remote_iface, RTE_ETH_NAME_MAX_LEN); 2043 2044 /* Save state of remote device */ 2045 tap_ioctl(pmd, SIOCGIFFLAGS, &pmd->remote_initial_flags, 0, REMOTE_ONLY); 2046 2047 /* Replicate remote MAC address */ 2048 if (tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, REMOTE_ONLY) < 0) { 2049 TAP_LOG(ERR, "%s: failed to get %s MAC address.", 2050 pmd->name, pmd->remote_iface); 2051 goto error_remote; 2052 } 2053 rte_memcpy(&pmd->eth_addr, ifr.ifr_hwaddr.sa_data, 2054 RTE_ETHER_ADDR_LEN); 2055 /* The desired MAC is already in ifreq after SIOCGIFHWADDR. */ 2056 if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0) { 2057 TAP_LOG(ERR, "%s: failed to get %s MAC address.", 2058 pmd->name, remote_iface); 2059 goto error_remote; 2060 } 2061 2062 /* 2063 * Flush usually returns negative value because it tries to 2064 * delete every QDISC (and on a running device, one QDISC at 2065 * least is needed). Ignore negative return value. 2066 */ 2067 qdisc_flush(pmd->nlsk_fd, pmd->remote_if_index); 2068 if (qdisc_create_ingress(pmd->nlsk_fd, 2069 pmd->remote_if_index) < 0) { 2070 TAP_LOG(ERR, "%s: failed to create ingress qdisc.", 2071 pmd->remote_iface); 2072 goto error_remote; 2073 } 2074 LIST_INIT(&pmd->implicit_flows); 2075 if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0 || 2076 tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0 || 2077 tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0 || 2078 tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0) { 2079 TAP_LOG(ERR, 2080 "%s: failed to create implicit rules.", 2081 pmd->name); 2082 goto error_remote; 2083 } 2084 } 2085 2086 rte_eth_dev_probing_finish(dev); 2087 return 0; 2088 2089 disable_rte_flow: 2090 TAP_LOG(ERR, " Disabling rte flow support: %s(%d)", 2091 strerror(errno), errno); 2092 if (strlen(remote_iface)) { 2093 TAP_LOG(ERR, "Remote feature requires flow support."); 2094 goto error_exit; 2095 } 2096 rte_eth_dev_probing_finish(dev); 2097 return 0; 2098 2099 error_remote: 2100 TAP_LOG(ERR, " Can't set up remote feature: %s(%d)", 2101 strerror(errno), errno); 2102 tap_flow_implicit_flush(pmd, NULL); 2103 2104 error_exit: 2105 if (pmd->nlsk_fd != -1) 2106 close(pmd->nlsk_fd); 2107 if (pmd->ka_fd != -1) 2108 close(pmd->ka_fd); 2109 if (pmd->ioctl_sock != -1) 2110 close(pmd->ioctl_sock); 2111 /* mac_addrs must not be freed alone because part of dev_private */ 2112 dev->data->mac_addrs = NULL; 2113 rte_eth_dev_release_port(dev); 2114 2115 error_exit_nodev: 2116 TAP_LOG(ERR, "%s Unable to initialize %s", 2117 tuntap_name, rte_vdev_device_name(vdev)); 2118 2119 return -EINVAL; 2120 } 2121 2122 /* make sure name is a possible Linux network device name */ 2123 static bool 2124 is_valid_iface(const char *name) 2125 { 2126 if (*name == '\0') 2127 return false; 2128 2129 if (strnlen(name, IFNAMSIZ) == IFNAMSIZ) 2130 return false; 2131 2132 while (*name) { 2133 if (*name == '/' || *name == ':' || isspace(*name)) 2134 return false; 2135 name++; 2136 } 2137 return true; 2138 } 2139 2140 static int 2141 set_interface_name(const char *key __rte_unused, 2142 const char *value, 2143 void *extra_args) 2144 { 2145 char *name = (char *)extra_args; 2146 2147 if (value) { 2148 if (!is_valid_iface(value)) { 2149 TAP_LOG(ERR, "TAP invalid remote interface name (%s)", 2150 value); 2151 return -1; 2152 } 2153 strlcpy(name, value, RTE_ETH_NAME_MAX_LEN); 2154 } else { 2155 /* use tap%d which causes kernel to choose next available */ 2156 strlcpy(name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN); 2157 } 2158 return 0; 2159 } 2160 2161 static int 2162 set_remote_iface(const char *key __rte_unused, 2163 const char *value, 2164 void *extra_args) 2165 { 2166 char *name = (char *)extra_args; 2167 2168 if (value) { 2169 if (!is_valid_iface(value)) { 2170 TAP_LOG(ERR, "TAP invalid remote interface name (%s)", 2171 value); 2172 return -1; 2173 } 2174 strlcpy(name, value, RTE_ETH_NAME_MAX_LEN); 2175 } 2176 2177 return 0; 2178 } 2179 2180 static int parse_user_mac(struct rte_ether_addr *user_mac, 2181 const char *value) 2182 { 2183 unsigned int index = 0; 2184 char mac_temp[strlen(ETH_TAP_USR_MAC_FMT) + 1], *mac_byte = NULL; 2185 2186 if (user_mac == NULL || value == NULL) 2187 return 0; 2188 2189 strlcpy(mac_temp, value, sizeof(mac_temp)); 2190 mac_byte = strtok(mac_temp, ":"); 2191 2192 while ((mac_byte != NULL) && 2193 (strlen(mac_byte) <= 2) && 2194 (strlen(mac_byte) == strspn(mac_byte, 2195 ETH_TAP_CMP_MAC_FMT))) { 2196 user_mac->addr_bytes[index++] = strtoul(mac_byte, NULL, 16); 2197 mac_byte = strtok(NULL, ":"); 2198 } 2199 2200 return index; 2201 } 2202 2203 static int 2204 set_mac_type(const char *key __rte_unused, 2205 const char *value, 2206 void *extra_args) 2207 { 2208 struct rte_ether_addr *user_mac = extra_args; 2209 2210 if (!value) 2211 return 0; 2212 2213 if (!strncasecmp(ETH_TAP_MAC_FIXED, value, strlen(ETH_TAP_MAC_FIXED))) { 2214 static int iface_idx; 2215 2216 /* fixed mac = 00:64:74:61:70:<iface_idx> */ 2217 memcpy((char *)user_mac->addr_bytes, "\0dtap", 2218 RTE_ETHER_ADDR_LEN); 2219 user_mac->addr_bytes[RTE_ETHER_ADDR_LEN - 1] = 2220 iface_idx++ + '0'; 2221 goto success; 2222 } 2223 2224 if (parse_user_mac(user_mac, value) != 6) 2225 goto error; 2226 success: 2227 TAP_LOG(DEBUG, "TAP user MAC param (%s)", value); 2228 return 0; 2229 2230 error: 2231 TAP_LOG(ERR, "TAP user MAC (%s) is not in format (%s|%s)", 2232 value, ETH_TAP_MAC_FIXED, ETH_TAP_USR_MAC_FMT); 2233 return -1; 2234 } 2235 2236 /* 2237 * Open a TUN interface device. TUN PMD 2238 * 1) sets tap_type as false 2239 * 2) intakes iface as argument. 2240 * 3) as interface is virtual set speed to 10G 2241 */ 2242 static int 2243 rte_pmd_tun_probe(struct rte_vdev_device *dev) 2244 { 2245 const char *name, *params; 2246 int ret; 2247 struct rte_kvargs *kvlist = NULL; 2248 char tun_name[RTE_ETH_NAME_MAX_LEN]; 2249 char remote_iface[RTE_ETH_NAME_MAX_LEN]; 2250 struct rte_eth_dev *eth_dev; 2251 2252 name = rte_vdev_device_name(dev); 2253 params = rte_vdev_device_args(dev); 2254 memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN); 2255 2256 if (rte_eal_process_type() == RTE_PROC_SECONDARY && 2257 strlen(params) == 0) { 2258 eth_dev = rte_eth_dev_attach_secondary(name); 2259 if (!eth_dev) { 2260 TAP_LOG(ERR, "Failed to probe %s", name); 2261 return -1; 2262 } 2263 eth_dev->dev_ops = &ops; 2264 eth_dev->device = &dev->device; 2265 rte_eth_dev_probing_finish(eth_dev); 2266 return 0; 2267 } 2268 2269 /* use tun%d which causes kernel to choose next available */ 2270 strlcpy(tun_name, DEFAULT_TUN_NAME "%d", RTE_ETH_NAME_MAX_LEN); 2271 2272 if (params && (params[0] != '\0')) { 2273 TAP_LOG(DEBUG, "parameters (%s)", params); 2274 2275 kvlist = rte_kvargs_parse(params, valid_arguments); 2276 if (kvlist) { 2277 if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) { 2278 ret = rte_kvargs_process(kvlist, 2279 ETH_TAP_IFACE_ARG, 2280 &set_interface_name, 2281 tun_name); 2282 2283 if (ret == -1) 2284 goto leave; 2285 } 2286 } 2287 } 2288 pmd_link.link_speed = ETH_SPEED_NUM_10G; 2289 2290 TAP_LOG(DEBUG, "Initializing pmd_tun for %s", name); 2291 2292 ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0, 2293 ETH_TUNTAP_TYPE_TUN); 2294 2295 leave: 2296 if (ret == -1) { 2297 TAP_LOG(ERR, "Failed to create pmd for %s as %s", 2298 name, tun_name); 2299 } 2300 rte_kvargs_free(kvlist); 2301 2302 return ret; 2303 } 2304 2305 /* Request queue file descriptors from secondary to primary. */ 2306 static int 2307 tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev) 2308 { 2309 int ret; 2310 struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0}; 2311 struct rte_mp_msg request, *reply; 2312 struct rte_mp_reply replies; 2313 struct ipc_queues *request_param = (struct ipc_queues *)request.param; 2314 struct ipc_queues *reply_param; 2315 struct pmd_process_private *process_private = dev->process_private; 2316 int queue, fd_iterator; 2317 2318 /* Prepare the request */ 2319 memset(&request, 0, sizeof(request)); 2320 strlcpy(request.name, TAP_MP_KEY, sizeof(request.name)); 2321 strlcpy(request_param->port_name, port_name, 2322 sizeof(request_param->port_name)); 2323 request.len_param = sizeof(*request_param); 2324 /* Send request and receive reply */ 2325 ret = rte_mp_request_sync(&request, &replies, &timeout); 2326 if (ret < 0 || replies.nb_received != 1) { 2327 TAP_LOG(ERR, "Failed to request queues from primary: %d", 2328 rte_errno); 2329 return -1; 2330 } 2331 reply = &replies.msgs[0]; 2332 reply_param = (struct ipc_queues *)reply->param; 2333 TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name); 2334 2335 /* Attach the queues from received file descriptors */ 2336 if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) { 2337 TAP_LOG(ERR, "Unexpected number of fds received"); 2338 return -1; 2339 } 2340 2341 dev->data->nb_rx_queues = reply_param->rxq_count; 2342 dev->data->nb_tx_queues = reply_param->txq_count; 2343 fd_iterator = 0; 2344 for (queue = 0; queue < reply_param->rxq_count; queue++) 2345 process_private->rxq_fds[queue] = reply->fds[fd_iterator++]; 2346 for (queue = 0; queue < reply_param->txq_count; queue++) 2347 process_private->txq_fds[queue] = reply->fds[fd_iterator++]; 2348 free(reply); 2349 return 0; 2350 } 2351 2352 /* Send the queue file descriptors from the primary process to secondary. */ 2353 static int 2354 tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer) 2355 { 2356 struct rte_eth_dev *dev; 2357 struct pmd_process_private *process_private; 2358 struct rte_mp_msg reply; 2359 const struct ipc_queues *request_param = 2360 (const struct ipc_queues *)request->param; 2361 struct ipc_queues *reply_param = 2362 (struct ipc_queues *)reply.param; 2363 uint16_t port_id; 2364 int queue; 2365 int ret; 2366 2367 /* Get requested port */ 2368 TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name); 2369 ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id); 2370 if (ret) { 2371 TAP_LOG(ERR, "Failed to get port id for %s", 2372 request_param->port_name); 2373 return -1; 2374 } 2375 dev = &rte_eth_devices[port_id]; 2376 process_private = dev->process_private; 2377 2378 /* Fill file descriptors for all queues */ 2379 reply.num_fds = 0; 2380 reply_param->rxq_count = 0; 2381 if (dev->data->nb_rx_queues + dev->data->nb_tx_queues > 2382 RTE_MP_MAX_FD_NUM){ 2383 TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds"); 2384 return -1; 2385 } 2386 2387 for (queue = 0; queue < dev->data->nb_rx_queues; queue++) { 2388 reply.fds[reply.num_fds++] = process_private->rxq_fds[queue]; 2389 reply_param->rxq_count++; 2390 } 2391 RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues); 2392 2393 reply_param->txq_count = 0; 2394 for (queue = 0; queue < dev->data->nb_tx_queues; queue++) { 2395 reply.fds[reply.num_fds++] = process_private->txq_fds[queue]; 2396 reply_param->txq_count++; 2397 } 2398 RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues); 2399 2400 /* Send reply */ 2401 strlcpy(reply.name, request->name, sizeof(reply.name)); 2402 strlcpy(reply_param->port_name, request_param->port_name, 2403 sizeof(reply_param->port_name)); 2404 reply.len_param = sizeof(*reply_param); 2405 if (rte_mp_reply(&reply, peer) < 0) { 2406 TAP_LOG(ERR, "Failed to reply an IPC request to sync queues"); 2407 return -1; 2408 } 2409 return 0; 2410 } 2411 2412 /* Open a TAP interface device. 2413 */ 2414 static int 2415 rte_pmd_tap_probe(struct rte_vdev_device *dev) 2416 { 2417 const char *name, *params; 2418 int ret; 2419 struct rte_kvargs *kvlist = NULL; 2420 int speed; 2421 char tap_name[RTE_ETH_NAME_MAX_LEN]; 2422 char remote_iface[RTE_ETH_NAME_MAX_LEN]; 2423 struct rte_ether_addr user_mac = { .addr_bytes = {0} }; 2424 struct rte_eth_dev *eth_dev; 2425 int tap_devices_count_increased = 0; 2426 2427 name = rte_vdev_device_name(dev); 2428 params = rte_vdev_device_args(dev); 2429 2430 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 2431 eth_dev = rte_eth_dev_attach_secondary(name); 2432 if (!eth_dev) { 2433 TAP_LOG(ERR, "Failed to probe %s", name); 2434 return -1; 2435 } 2436 eth_dev->dev_ops = &ops; 2437 eth_dev->device = &dev->device; 2438 eth_dev->rx_pkt_burst = pmd_rx_burst; 2439 eth_dev->tx_pkt_burst = pmd_tx_burst; 2440 if (!rte_eal_primary_proc_alive(NULL)) { 2441 TAP_LOG(ERR, "Primary process is missing"); 2442 return -1; 2443 } 2444 eth_dev->process_private = (struct pmd_process_private *) 2445 rte_zmalloc_socket(name, 2446 sizeof(struct pmd_process_private), 2447 RTE_CACHE_LINE_SIZE, 2448 eth_dev->device->numa_node); 2449 if (eth_dev->process_private == NULL) { 2450 TAP_LOG(ERR, 2451 "Failed to alloc memory for process private"); 2452 return -1; 2453 } 2454 2455 ret = tap_mp_attach_queues(name, eth_dev); 2456 if (ret != 0) 2457 return -1; 2458 rte_eth_dev_probing_finish(eth_dev); 2459 return 0; 2460 } 2461 2462 speed = ETH_SPEED_NUM_10G; 2463 2464 /* use tap%d which causes kernel to choose next available */ 2465 strlcpy(tap_name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN); 2466 memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN); 2467 2468 if (params && (params[0] != '\0')) { 2469 TAP_LOG(DEBUG, "parameters (%s)", params); 2470 2471 kvlist = rte_kvargs_parse(params, valid_arguments); 2472 if (kvlist) { 2473 if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) { 2474 ret = rte_kvargs_process(kvlist, 2475 ETH_TAP_IFACE_ARG, 2476 &set_interface_name, 2477 tap_name); 2478 if (ret == -1) 2479 goto leave; 2480 } 2481 2482 if (rte_kvargs_count(kvlist, ETH_TAP_REMOTE_ARG) == 1) { 2483 ret = rte_kvargs_process(kvlist, 2484 ETH_TAP_REMOTE_ARG, 2485 &set_remote_iface, 2486 remote_iface); 2487 if (ret == -1) 2488 goto leave; 2489 } 2490 2491 if (rte_kvargs_count(kvlist, ETH_TAP_MAC_ARG) == 1) { 2492 ret = rte_kvargs_process(kvlist, 2493 ETH_TAP_MAC_ARG, 2494 &set_mac_type, 2495 &user_mac); 2496 if (ret == -1) 2497 goto leave; 2498 } 2499 } 2500 } 2501 pmd_link.link_speed = speed; 2502 2503 TAP_LOG(DEBUG, "Initializing pmd_tap for %s", name); 2504 2505 /* Register IPC feed callback */ 2506 if (!tap_devices_count) { 2507 ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues); 2508 if (ret < 0 && rte_errno != ENOTSUP) { 2509 TAP_LOG(ERR, "tap: Failed to register IPC callback: %s", 2510 strerror(rte_errno)); 2511 goto leave; 2512 } 2513 } 2514 tap_devices_count++; 2515 tap_devices_count_increased = 1; 2516 ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac, 2517 ETH_TUNTAP_TYPE_TAP); 2518 2519 leave: 2520 if (ret == -1) { 2521 TAP_LOG(ERR, "Failed to create pmd for %s as %s", 2522 name, tap_name); 2523 if (tap_devices_count_increased == 1) { 2524 if (tap_devices_count == 1) 2525 rte_mp_action_unregister(TAP_MP_KEY); 2526 tap_devices_count--; 2527 } 2528 } 2529 rte_kvargs_free(kvlist); 2530 2531 return ret; 2532 } 2533 2534 /* detach a TUNTAP device. 2535 */ 2536 static int 2537 rte_pmd_tap_remove(struct rte_vdev_device *dev) 2538 { 2539 struct rte_eth_dev *eth_dev = NULL; 2540 2541 /* find the ethdev entry */ 2542 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); 2543 if (!eth_dev) 2544 return 0; 2545 2546 tap_dev_close(eth_dev); 2547 rte_eth_dev_release_port(eth_dev); 2548 2549 return 0; 2550 } 2551 2552 static struct rte_vdev_driver pmd_tun_drv = { 2553 .probe = rte_pmd_tun_probe, 2554 .remove = rte_pmd_tap_remove, 2555 }; 2556 2557 static struct rte_vdev_driver pmd_tap_drv = { 2558 .probe = rte_pmd_tap_probe, 2559 .remove = rte_pmd_tap_remove, 2560 }; 2561 2562 RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv); 2563 RTE_PMD_REGISTER_VDEV(net_tun, pmd_tun_drv); 2564 RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap); 2565 RTE_PMD_REGISTER_PARAM_STRING(net_tun, 2566 ETH_TAP_IFACE_ARG "=<string> "); 2567 RTE_PMD_REGISTER_PARAM_STRING(net_tap, 2568 ETH_TAP_IFACE_ARG "=<string> " 2569 ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " " 2570 ETH_TAP_REMOTE_ARG "=<string>"); 2571 RTE_LOG_REGISTER(tap_logtype, pmd.net.tap, NOTICE); 2572