1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com> 3 * Originally based upon librte_pmd_pcap code: 4 * Copyright(c) 2010-2015 Intel Corporation. 5 * Copyright(c) 2014 6WIND S.A. 6 * All rights reserved. 7 */ 8 9 #include <rte_string_fns.h> 10 #include <rte_mbuf.h> 11 #include <rte_ethdev_driver.h> 12 #include <rte_ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_kvargs.h> 15 #include <rte_bus_vdev.h> 16 17 #include <errno.h> 18 #include <linux/if_ether.h> 19 #include <linux/if_packet.h> 20 #include <arpa/inet.h> 21 #include <net/if.h> 22 #include <net/if_arp.h> 23 #include <sys/types.h> 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <string.h> 27 #include <sys/mman.h> 28 #include <unistd.h> 29 #include <poll.h> 30 31 #define ETH_AF_PACKET_IFACE_ARG "iface" 32 #define ETH_AF_PACKET_NUM_Q_ARG "qpairs" 33 #define ETH_AF_PACKET_BLOCKSIZE_ARG "blocksz" 34 #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" 35 #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" 36 #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass" 37 38 #define DFLT_FRAME_SIZE (1 << 11) 39 #define DFLT_FRAME_COUNT (1 << 9) 40 41 struct pkt_rx_queue { 42 int sockfd; 43 44 struct iovec *rd; 45 uint8_t *map; 46 unsigned int framecount; 47 unsigned int framenum; 48 49 struct rte_mempool *mb_pool; 50 uint16_t in_port; 51 52 volatile unsigned long rx_pkts; 53 volatile unsigned long rx_bytes; 54 }; 55 56 struct pkt_tx_queue { 57 int sockfd; 58 unsigned int frame_data_size; 59 60 struct iovec *rd; 61 uint8_t *map; 62 unsigned int framecount; 63 unsigned int framenum; 64 65 volatile unsigned long tx_pkts; 66 volatile unsigned long err_pkts; 67 volatile unsigned long tx_bytes; 68 }; 69 70 struct pmd_internals { 71 unsigned nb_queues; 72 73 int if_index; 74 char *if_name; 75 struct rte_ether_addr eth_addr; 76 77 struct tpacket_req req; 78 79 struct pkt_rx_queue *rx_queue; 80 struct pkt_tx_queue *tx_queue; 81 }; 82 83 static const char *valid_arguments[] = { 84 ETH_AF_PACKET_IFACE_ARG, 85 ETH_AF_PACKET_NUM_Q_ARG, 86 ETH_AF_PACKET_BLOCKSIZE_ARG, 87 ETH_AF_PACKET_FRAMESIZE_ARG, 88 ETH_AF_PACKET_FRAMECOUNT_ARG, 89 ETH_AF_PACKET_QDISC_BYPASS_ARG, 90 NULL 91 }; 92 93 static struct rte_eth_link pmd_link = { 94 .link_speed = ETH_SPEED_NUM_10G, 95 .link_duplex = ETH_LINK_FULL_DUPLEX, 96 .link_status = ETH_LINK_DOWN, 97 .link_autoneg = ETH_LINK_FIXED, 98 }; 99 100 static int af_packet_logtype; 101 102 #define PMD_LOG(level, fmt, args...) \ 103 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 104 "%s(): " fmt "\n", __func__, ##args) 105 106 #define PMD_LOG_ERRNO(level, fmt, args...) \ 107 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 108 "%s(): " fmt ":%s\n", __func__, ##args, strerror(errno)) 109 110 static uint16_t 111 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 112 { 113 unsigned i; 114 struct tpacket2_hdr *ppd; 115 struct rte_mbuf *mbuf; 116 uint8_t *pbuf; 117 struct pkt_rx_queue *pkt_q = queue; 118 uint16_t num_rx = 0; 119 unsigned long num_rx_bytes = 0; 120 unsigned int framecount, framenum; 121 122 if (unlikely(nb_pkts == 0)) 123 return 0; 124 125 /* 126 * Reads the given number of packets from the AF_PACKET socket one by 127 * one and copies the packet data into a newly allocated mbuf. 128 */ 129 framecount = pkt_q->framecount; 130 framenum = pkt_q->framenum; 131 for (i = 0; i < nb_pkts; i++) { 132 /* point at the next incoming frame */ 133 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 134 if ((ppd->tp_status & TP_STATUS_USER) == 0) 135 break; 136 137 /* allocate the next mbuf */ 138 mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool); 139 if (unlikely(mbuf == NULL)) 140 break; 141 142 /* packet will fit in the mbuf, go ahead and receive it */ 143 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen; 144 pbuf = (uint8_t *) ppd + ppd->tp_mac; 145 memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); 146 147 /* check for vlan info */ 148 if (ppd->tp_status & TP_STATUS_VLAN_VALID) { 149 mbuf->vlan_tci = ppd->tp_vlan_tci; 150 mbuf->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); 151 } 152 153 /* release incoming frame and advance ring buffer */ 154 ppd->tp_status = TP_STATUS_KERNEL; 155 if (++framenum >= framecount) 156 framenum = 0; 157 mbuf->port = pkt_q->in_port; 158 159 /* account for the receive frame */ 160 bufs[i] = mbuf; 161 num_rx++; 162 num_rx_bytes += mbuf->pkt_len; 163 } 164 pkt_q->framenum = framenum; 165 pkt_q->rx_pkts += num_rx; 166 pkt_q->rx_bytes += num_rx_bytes; 167 return num_rx; 168 } 169 170 /* 171 * Callback to handle sending packets through a real NIC. 172 */ 173 static uint16_t 174 eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 175 { 176 struct tpacket2_hdr *ppd; 177 struct rte_mbuf *mbuf; 178 uint8_t *pbuf; 179 unsigned int framecount, framenum; 180 struct pollfd pfd; 181 struct pkt_tx_queue *pkt_q = queue; 182 uint16_t num_tx = 0; 183 unsigned long num_tx_bytes = 0; 184 int i; 185 186 if (unlikely(nb_pkts == 0)) 187 return 0; 188 189 memset(&pfd, 0, sizeof(pfd)); 190 pfd.fd = pkt_q->sockfd; 191 pfd.events = POLLOUT; 192 pfd.revents = 0; 193 194 framecount = pkt_q->framecount; 195 framenum = pkt_q->framenum; 196 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 197 for (i = 0; i < nb_pkts; i++) { 198 mbuf = *bufs++; 199 200 /* drop oversized packets */ 201 if (mbuf->pkt_len > pkt_q->frame_data_size) { 202 rte_pktmbuf_free(mbuf); 203 continue; 204 } 205 206 /* insert vlan info if necessary */ 207 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 208 if (rte_vlan_insert(&mbuf)) { 209 rte_pktmbuf_free(mbuf); 210 continue; 211 } 212 } 213 214 /* point at the next incoming frame */ 215 if ((ppd->tp_status != TP_STATUS_AVAILABLE) && 216 (poll(&pfd, 1, -1) < 0)) 217 break; 218 219 /* copy the tx frame data */ 220 pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - 221 sizeof(struct sockaddr_ll); 222 223 struct rte_mbuf *tmp_mbuf = mbuf; 224 while (tmp_mbuf) { 225 uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf); 226 memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len); 227 pbuf += data_len; 228 tmp_mbuf = tmp_mbuf->next; 229 } 230 231 ppd->tp_len = mbuf->pkt_len; 232 ppd->tp_snaplen = mbuf->pkt_len; 233 234 /* release incoming frame and advance ring buffer */ 235 ppd->tp_status = TP_STATUS_SEND_REQUEST; 236 if (++framenum >= framecount) 237 framenum = 0; 238 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 239 240 num_tx++; 241 num_tx_bytes += mbuf->pkt_len; 242 rte_pktmbuf_free(mbuf); 243 } 244 245 /* kick-off transmits */ 246 if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 && 247 errno != ENOBUFS && errno != EAGAIN) { 248 /* 249 * In case of a ENOBUFS/EAGAIN error all of the enqueued 250 * packets will be considered successful even though only some 251 * are sent. 252 */ 253 254 num_tx = 0; 255 num_tx_bytes = 0; 256 } 257 258 pkt_q->framenum = framenum; 259 pkt_q->tx_pkts += num_tx; 260 pkt_q->err_pkts += i - num_tx; 261 pkt_q->tx_bytes += num_tx_bytes; 262 return i; 263 } 264 265 static int 266 eth_dev_start(struct rte_eth_dev *dev) 267 { 268 dev->data->dev_link.link_status = ETH_LINK_UP; 269 return 0; 270 } 271 272 /* 273 * This function gets called when the current port gets stopped. 274 */ 275 static void 276 eth_dev_stop(struct rte_eth_dev *dev) 277 { 278 unsigned i; 279 int sockfd; 280 struct pmd_internals *internals = dev->data->dev_private; 281 282 for (i = 0; i < internals->nb_queues; i++) { 283 sockfd = internals->rx_queue[i].sockfd; 284 if (sockfd != -1) 285 close(sockfd); 286 287 /* Prevent use after free in case tx fd == rx fd */ 288 if (sockfd != internals->tx_queue[i].sockfd) { 289 sockfd = internals->tx_queue[i].sockfd; 290 if (sockfd != -1) 291 close(sockfd); 292 } 293 294 internals->rx_queue[i].sockfd = -1; 295 internals->tx_queue[i].sockfd = -1; 296 } 297 298 dev->data->dev_link.link_status = ETH_LINK_DOWN; 299 } 300 301 static int 302 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 303 { 304 return 0; 305 } 306 307 static int 308 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 309 { 310 struct pmd_internals *internals = dev->data->dev_private; 311 312 dev_info->if_index = internals->if_index; 313 dev_info->max_mac_addrs = 1; 314 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 315 dev_info->max_rx_queues = (uint16_t)internals->nb_queues; 316 dev_info->max_tx_queues = (uint16_t)internals->nb_queues; 317 dev_info->min_rx_bufsize = 0; 318 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 319 DEV_TX_OFFLOAD_VLAN_INSERT; 320 321 return 0; 322 } 323 324 static int 325 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats) 326 { 327 unsigned i, imax; 328 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 329 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 330 const struct pmd_internals *internal = dev->data->dev_private; 331 332 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 333 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 334 for (i = 0; i < imax; i++) { 335 igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts; 336 igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes; 337 rx_total += igb_stats->q_ipackets[i]; 338 rx_bytes_total += igb_stats->q_ibytes[i]; 339 } 340 341 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 342 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 343 for (i = 0; i < imax; i++) { 344 igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts; 345 igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes; 346 tx_total += igb_stats->q_opackets[i]; 347 tx_err_total += internal->tx_queue[i].err_pkts; 348 tx_bytes_total += igb_stats->q_obytes[i]; 349 } 350 351 igb_stats->ipackets = rx_total; 352 igb_stats->ibytes = rx_bytes_total; 353 igb_stats->opackets = tx_total; 354 igb_stats->oerrors = tx_err_total; 355 igb_stats->obytes = tx_bytes_total; 356 return 0; 357 } 358 359 static int 360 eth_stats_reset(struct rte_eth_dev *dev) 361 { 362 unsigned i; 363 struct pmd_internals *internal = dev->data->dev_private; 364 365 for (i = 0; i < internal->nb_queues; i++) { 366 internal->rx_queue[i].rx_pkts = 0; 367 internal->rx_queue[i].rx_bytes = 0; 368 } 369 370 for (i = 0; i < internal->nb_queues; i++) { 371 internal->tx_queue[i].tx_pkts = 0; 372 internal->tx_queue[i].err_pkts = 0; 373 internal->tx_queue[i].tx_bytes = 0; 374 } 375 376 return 0; 377 } 378 379 static void 380 eth_dev_close(struct rte_eth_dev *dev __rte_unused) 381 { 382 } 383 384 static void 385 eth_queue_release(void *q __rte_unused) 386 { 387 } 388 389 static int 390 eth_link_update(struct rte_eth_dev *dev __rte_unused, 391 int wait_to_complete __rte_unused) 392 { 393 return 0; 394 } 395 396 static int 397 eth_rx_queue_setup(struct rte_eth_dev *dev, 398 uint16_t rx_queue_id, 399 uint16_t nb_rx_desc __rte_unused, 400 unsigned int socket_id __rte_unused, 401 const struct rte_eth_rxconf *rx_conf __rte_unused, 402 struct rte_mempool *mb_pool) 403 { 404 struct pmd_internals *internals = dev->data->dev_private; 405 struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; 406 unsigned int buf_size, data_size; 407 408 pkt_q->mb_pool = mb_pool; 409 410 /* Now get the space available for data in the mbuf */ 411 buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - 412 RTE_PKTMBUF_HEADROOM; 413 data_size = internals->req.tp_frame_size; 414 data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); 415 416 if (data_size > buf_size) { 417 PMD_LOG(ERR, 418 "%s: %d bytes will not fit in mbuf (%d bytes)", 419 dev->device->name, data_size, buf_size); 420 return -ENOMEM; 421 } 422 423 dev->data->rx_queues[rx_queue_id] = pkt_q; 424 pkt_q->in_port = dev->data->port_id; 425 426 return 0; 427 } 428 429 static int 430 eth_tx_queue_setup(struct rte_eth_dev *dev, 431 uint16_t tx_queue_id, 432 uint16_t nb_tx_desc __rte_unused, 433 unsigned int socket_id __rte_unused, 434 const struct rte_eth_txconf *tx_conf __rte_unused) 435 { 436 437 struct pmd_internals *internals = dev->data->dev_private; 438 439 dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id]; 440 return 0; 441 } 442 443 static int 444 eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 445 { 446 struct pmd_internals *internals = dev->data->dev_private; 447 struct ifreq ifr = { .ifr_mtu = mtu }; 448 int ret; 449 int s; 450 unsigned int data_size = internals->req.tp_frame_size - 451 TPACKET2_HDRLEN; 452 453 if (mtu > data_size) 454 return -EINVAL; 455 456 s = socket(PF_INET, SOCK_DGRAM, 0); 457 if (s < 0) 458 return -EINVAL; 459 460 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 461 ret = ioctl(s, SIOCSIFMTU, &ifr); 462 close(s); 463 464 if (ret < 0) 465 return -EINVAL; 466 467 return 0; 468 } 469 470 static int 471 eth_dev_macaddr_set(struct rte_eth_dev *dev, struct rte_ether_addr *addr) 472 { 473 struct pmd_internals *internals = dev->data->dev_private; 474 struct ifreq ifr = { }; 475 int sockfd = internals->rx_queue[0].sockfd; 476 int ret; 477 478 if (sockfd == -1) { 479 PMD_LOG(ERR, "receive socket not found"); 480 return -EINVAL; 481 } 482 483 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 484 ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; 485 memcpy(ifr.ifr_hwaddr.sa_data, addr, sizeof(*addr)); 486 ret = ioctl(sockfd, SIOCSIFHWADDR, &ifr); 487 488 if (ret < 0) { 489 PMD_LOG_ERRNO(ERR, "ioctl(SIOCSIFHWADDR) failed"); 490 return -EINVAL; 491 } 492 493 return 0; 494 } 495 496 static int 497 eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) 498 { 499 struct ifreq ifr; 500 int ret = 0; 501 int s; 502 503 s = socket(PF_INET, SOCK_DGRAM, 0); 504 if (s < 0) 505 return -errno; 506 507 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ); 508 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) { 509 ret = -errno; 510 goto out; 511 } 512 ifr.ifr_flags &= mask; 513 ifr.ifr_flags |= flags; 514 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) { 515 ret = -errno; 516 goto out; 517 } 518 out: 519 close(s); 520 return ret; 521 } 522 523 static int 524 eth_dev_promiscuous_enable(struct rte_eth_dev *dev) 525 { 526 struct pmd_internals *internals = dev->data->dev_private; 527 528 return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); 529 } 530 531 static int 532 eth_dev_promiscuous_disable(struct rte_eth_dev *dev) 533 { 534 struct pmd_internals *internals = dev->data->dev_private; 535 536 return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); 537 } 538 539 static const struct eth_dev_ops ops = { 540 .dev_start = eth_dev_start, 541 .dev_stop = eth_dev_stop, 542 .dev_close = eth_dev_close, 543 .dev_configure = eth_dev_configure, 544 .dev_infos_get = eth_dev_info, 545 .mac_addr_set = eth_dev_macaddr_set, 546 .mtu_set = eth_dev_mtu_set, 547 .promiscuous_enable = eth_dev_promiscuous_enable, 548 .promiscuous_disable = eth_dev_promiscuous_disable, 549 .rx_queue_setup = eth_rx_queue_setup, 550 .tx_queue_setup = eth_tx_queue_setup, 551 .rx_queue_release = eth_queue_release, 552 .tx_queue_release = eth_queue_release, 553 .link_update = eth_link_update, 554 .stats_get = eth_stats_get, 555 .stats_reset = eth_stats_reset, 556 }; 557 558 /* 559 * Opens an AF_PACKET socket 560 */ 561 static int 562 open_packet_iface(const char *key __rte_unused, 563 const char *value __rte_unused, 564 void *extra_args) 565 { 566 int *sockfd = extra_args; 567 568 /* Open an AF_PACKET socket... */ 569 *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 570 if (*sockfd == -1) { 571 PMD_LOG(ERR, "Could not open AF_PACKET socket"); 572 return -1; 573 } 574 575 return 0; 576 } 577 578 static int 579 rte_pmd_init_internals(struct rte_vdev_device *dev, 580 const int sockfd, 581 const unsigned nb_queues, 582 unsigned int blocksize, 583 unsigned int blockcnt, 584 unsigned int framesize, 585 unsigned int framecnt, 586 unsigned int qdisc_bypass, 587 struct pmd_internals **internals, 588 struct rte_eth_dev **eth_dev, 589 struct rte_kvargs *kvlist) 590 { 591 const char *name = rte_vdev_device_name(dev); 592 const unsigned int numa_node = dev->device.numa_node; 593 struct rte_eth_dev_data *data = NULL; 594 struct rte_kvargs_pair *pair = NULL; 595 struct ifreq ifr; 596 size_t ifnamelen; 597 unsigned k_idx; 598 struct sockaddr_ll sockaddr; 599 struct tpacket_req *req; 600 struct pkt_rx_queue *rx_queue; 601 struct pkt_tx_queue *tx_queue; 602 int rc, tpver, discard; 603 int qsockfd = -1; 604 unsigned int i, q, rdsize; 605 #if defined(PACKET_FANOUT) 606 int fanout_arg; 607 #endif 608 609 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 610 pair = &kvlist->pairs[k_idx]; 611 if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL) 612 break; 613 } 614 if (pair == NULL) { 615 PMD_LOG(ERR, 616 "%s: no interface specified for AF_PACKET ethdev", 617 name); 618 return -1; 619 } 620 621 PMD_LOG(INFO, 622 "%s: creating AF_PACKET-backed ethdev on numa socket %u", 623 name, numa_node); 624 625 *internals = rte_zmalloc_socket(name, sizeof(**internals), 626 0, numa_node); 627 if (*internals == NULL) 628 return -1; 629 630 631 (*internals)->rx_queue = rte_calloc_socket("af_packet_rx", 632 nb_queues, 633 sizeof(struct pkt_rx_queue), 634 0, numa_node); 635 (*internals)->tx_queue = rte_calloc_socket("af_packet_tx", 636 nb_queues, 637 sizeof(struct pkt_tx_queue), 638 0, numa_node); 639 if (!(*internals)->rx_queue || !(*internals)->tx_queue) { 640 rte_free((*internals)->rx_queue); 641 rte_free((*internals)->tx_queue); 642 return -1; 643 } 644 645 for (q = 0; q < nb_queues; q++) { 646 (*internals)->rx_queue[q].map = MAP_FAILED; 647 (*internals)->tx_queue[q].map = MAP_FAILED; 648 } 649 650 req = &((*internals)->req); 651 652 req->tp_block_size = blocksize; 653 req->tp_block_nr = blockcnt; 654 req->tp_frame_size = framesize; 655 req->tp_frame_nr = framecnt; 656 657 ifnamelen = strlen(pair->value); 658 if (ifnamelen < sizeof(ifr.ifr_name)) { 659 memcpy(ifr.ifr_name, pair->value, ifnamelen); 660 ifr.ifr_name[ifnamelen] = '\0'; 661 } else { 662 PMD_LOG(ERR, 663 "%s: I/F name too long (%s)", 664 name, pair->value); 665 return -1; 666 } 667 if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { 668 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name); 669 return -1; 670 } 671 (*internals)->if_name = strdup(pair->value); 672 if ((*internals)->if_name == NULL) 673 return -1; 674 (*internals)->if_index = ifr.ifr_ifindex; 675 676 if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { 677 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFHWADDR)", name); 678 return -1; 679 } 680 memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 681 682 memset(&sockaddr, 0, sizeof(sockaddr)); 683 sockaddr.sll_family = AF_PACKET; 684 sockaddr.sll_protocol = htons(ETH_P_ALL); 685 sockaddr.sll_ifindex = (*internals)->if_index; 686 687 #if defined(PACKET_FANOUT) 688 fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; 689 fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; 690 #if defined(PACKET_FANOUT_FLAG_ROLLOVER) 691 fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; 692 #endif 693 #endif 694 695 for (q = 0; q < nb_queues; q++) { 696 /* Open an AF_PACKET socket for this queue... */ 697 qsockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 698 if (qsockfd == -1) { 699 PMD_LOG_ERRNO(ERR, 700 "%s: could not open AF_PACKET socket", 701 name); 702 return -1; 703 } 704 705 tpver = TPACKET_V2; 706 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION, 707 &tpver, sizeof(tpver)); 708 if (rc == -1) { 709 PMD_LOG_ERRNO(ERR, 710 "%s: could not set PACKET_VERSION on AF_PACKET socket for %s", 711 name, pair->value); 712 goto error; 713 } 714 715 discard = 1; 716 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS, 717 &discard, sizeof(discard)); 718 if (rc == -1) { 719 PMD_LOG_ERRNO(ERR, 720 "%s: could not set PACKET_LOSS on AF_PACKET socket for %s", 721 name, pair->value); 722 goto error; 723 } 724 725 #if defined(PACKET_QDISC_BYPASS) 726 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS, 727 &qdisc_bypass, sizeof(qdisc_bypass)); 728 if (rc == -1) { 729 PMD_LOG_ERRNO(ERR, 730 "%s: could not set PACKET_QDISC_BYPASS on AF_PACKET socket for %s", 731 name, pair->value); 732 goto error; 733 } 734 #else 735 RTE_SET_USED(qdisc_bypass); 736 #endif 737 738 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)); 739 if (rc == -1) { 740 PMD_LOG_ERRNO(ERR, 741 "%s: could not set PACKET_RX_RING on AF_PACKET socket for %s", 742 name, pair->value); 743 goto error; 744 } 745 746 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req)); 747 if (rc == -1) { 748 PMD_LOG_ERRNO(ERR, 749 "%s: could not set PACKET_TX_RING on AF_PACKET " 750 "socket for %s", name, pair->value); 751 goto error; 752 } 753 754 rx_queue = &((*internals)->rx_queue[q]); 755 rx_queue->framecount = req->tp_frame_nr; 756 757 rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr, 758 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 759 qsockfd, 0); 760 if (rx_queue->map == MAP_FAILED) { 761 PMD_LOG_ERRNO(ERR, 762 "%s: call to mmap failed on AF_PACKET socket for %s", 763 name, pair->value); 764 goto error; 765 } 766 767 /* rdsize is same for both Tx and Rx */ 768 rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd)); 769 770 rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 771 if (rx_queue->rd == NULL) 772 goto error; 773 for (i = 0; i < req->tp_frame_nr; ++i) { 774 rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize); 775 rx_queue->rd[i].iov_len = req->tp_frame_size; 776 } 777 rx_queue->sockfd = qsockfd; 778 779 tx_queue = &((*internals)->tx_queue[q]); 780 tx_queue->framecount = req->tp_frame_nr; 781 tx_queue->frame_data_size = req->tp_frame_size; 782 tx_queue->frame_data_size -= TPACKET2_HDRLEN - 783 sizeof(struct sockaddr_ll); 784 785 tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; 786 787 tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 788 if (tx_queue->rd == NULL) 789 goto error; 790 for (i = 0; i < req->tp_frame_nr; ++i) { 791 tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize); 792 tx_queue->rd[i].iov_len = req->tp_frame_size; 793 } 794 tx_queue->sockfd = qsockfd; 795 796 rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)); 797 if (rc == -1) { 798 PMD_LOG_ERRNO(ERR, 799 "%s: could not bind AF_PACKET socket to %s", 800 name, pair->value); 801 goto error; 802 } 803 804 #if defined(PACKET_FANOUT) 805 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, 806 &fanout_arg, sizeof(fanout_arg)); 807 if (rc == -1) { 808 PMD_LOG_ERRNO(ERR, 809 "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s", 810 name, pair->value); 811 goto error; 812 } 813 #endif 814 } 815 816 /* reserve an ethdev entry */ 817 *eth_dev = rte_eth_vdev_allocate(dev, 0); 818 if (*eth_dev == NULL) 819 goto error; 820 821 /* 822 * now put it all together 823 * - store queue data in internals, 824 * - store numa_node in eth_dev 825 * - point eth_dev_data to internals 826 * - and point eth_dev structure to new eth_dev_data structure 827 */ 828 829 (*internals)->nb_queues = nb_queues; 830 831 data = (*eth_dev)->data; 832 data->dev_private = *internals; 833 data->nb_rx_queues = (uint16_t)nb_queues; 834 data->nb_tx_queues = (uint16_t)nb_queues; 835 data->dev_link = pmd_link; 836 data->mac_addrs = &(*internals)->eth_addr; 837 838 (*eth_dev)->dev_ops = &ops; 839 840 return 0; 841 842 error: 843 if (qsockfd != -1) 844 close(qsockfd); 845 for (q = 0; q < nb_queues; q++) { 846 munmap((*internals)->rx_queue[q].map, 847 2 * req->tp_block_size * req->tp_block_nr); 848 849 rte_free((*internals)->rx_queue[q].rd); 850 rte_free((*internals)->tx_queue[q].rd); 851 if (((*internals)->rx_queue[q].sockfd != 0) && 852 ((*internals)->rx_queue[q].sockfd != qsockfd)) 853 close((*internals)->rx_queue[q].sockfd); 854 } 855 free((*internals)->if_name); 856 rte_free(*internals); 857 return -1; 858 } 859 860 static int 861 rte_eth_from_packet(struct rte_vdev_device *dev, 862 int const *sockfd, 863 struct rte_kvargs *kvlist) 864 { 865 const char *name = rte_vdev_device_name(dev); 866 struct pmd_internals *internals = NULL; 867 struct rte_eth_dev *eth_dev = NULL; 868 struct rte_kvargs_pair *pair = NULL; 869 unsigned k_idx; 870 unsigned int blockcount; 871 unsigned int blocksize; 872 unsigned int framesize = DFLT_FRAME_SIZE; 873 unsigned int framecount = DFLT_FRAME_COUNT; 874 unsigned int qpairs = 1; 875 unsigned int qdisc_bypass = 1; 876 877 /* do some parameter checking */ 878 if (*sockfd < 0) 879 return -1; 880 881 blocksize = getpagesize(); 882 883 /* 884 * Walk arguments for configurable settings 885 */ 886 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 887 pair = &kvlist->pairs[k_idx]; 888 if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) { 889 qpairs = atoi(pair->value); 890 if (qpairs < 1) { 891 PMD_LOG(ERR, 892 "%s: invalid qpairs value", 893 name); 894 return -1; 895 } 896 continue; 897 } 898 if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) { 899 blocksize = atoi(pair->value); 900 if (!blocksize) { 901 PMD_LOG(ERR, 902 "%s: invalid blocksize value", 903 name); 904 return -1; 905 } 906 continue; 907 } 908 if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) { 909 framesize = atoi(pair->value); 910 if (!framesize) { 911 PMD_LOG(ERR, 912 "%s: invalid framesize value", 913 name); 914 return -1; 915 } 916 continue; 917 } 918 if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) { 919 framecount = atoi(pair->value); 920 if (!framecount) { 921 PMD_LOG(ERR, 922 "%s: invalid framecount value", 923 name); 924 return -1; 925 } 926 continue; 927 } 928 if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) { 929 qdisc_bypass = atoi(pair->value); 930 if (qdisc_bypass > 1) { 931 PMD_LOG(ERR, 932 "%s: invalid bypass value", 933 name); 934 return -1; 935 } 936 continue; 937 } 938 } 939 940 if (framesize > blocksize) { 941 PMD_LOG(ERR, 942 "%s: AF_PACKET MMAP frame size exceeds block size!", 943 name); 944 return -1; 945 } 946 947 blockcount = framecount / (blocksize / framesize); 948 if (!blockcount) { 949 PMD_LOG(ERR, 950 "%s: invalid AF_PACKET MMAP parameters", name); 951 return -1; 952 } 953 954 PMD_LOG(INFO, "%s: AF_PACKET MMAP parameters:", name); 955 PMD_LOG(INFO, "%s:\tblock size %d", name, blocksize); 956 PMD_LOG(INFO, "%s:\tblock count %d", name, blockcount); 957 PMD_LOG(INFO, "%s:\tframe size %d", name, framesize); 958 PMD_LOG(INFO, "%s:\tframe count %d", name, framecount); 959 960 if (rte_pmd_init_internals(dev, *sockfd, qpairs, 961 blocksize, blockcount, 962 framesize, framecount, 963 qdisc_bypass, 964 &internals, ð_dev, 965 kvlist) < 0) 966 return -1; 967 968 eth_dev->rx_pkt_burst = eth_af_packet_rx; 969 eth_dev->tx_pkt_burst = eth_af_packet_tx; 970 971 rte_eth_dev_probing_finish(eth_dev); 972 return 0; 973 } 974 975 static int 976 rte_pmd_af_packet_probe(struct rte_vdev_device *dev) 977 { 978 int ret = 0; 979 struct rte_kvargs *kvlist; 980 int sockfd = -1; 981 struct rte_eth_dev *eth_dev; 982 const char *name = rte_vdev_device_name(dev); 983 984 PMD_LOG(INFO, "Initializing pmd_af_packet for %s", name); 985 986 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 987 eth_dev = rte_eth_dev_attach_secondary(name); 988 if (!eth_dev) { 989 PMD_LOG(ERR, "Failed to probe %s", name); 990 return -1; 991 } 992 /* TODO: request info from primary to set up Rx and Tx */ 993 eth_dev->dev_ops = &ops; 994 eth_dev->device = &dev->device; 995 rte_eth_dev_probing_finish(eth_dev); 996 return 0; 997 } 998 999 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1000 if (kvlist == NULL) { 1001 ret = -1; 1002 goto exit; 1003 } 1004 1005 /* 1006 * If iface argument is passed we open the NICs and use them for 1007 * reading / writing 1008 */ 1009 if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) { 1010 1011 ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, 1012 &open_packet_iface, &sockfd); 1013 if (ret < 0) 1014 goto exit; 1015 } 1016 1017 if (dev->device.numa_node == SOCKET_ID_ANY) 1018 dev->device.numa_node = rte_socket_id(); 1019 1020 ret = rte_eth_from_packet(dev, &sockfd, kvlist); 1021 close(sockfd); /* no longer needed */ 1022 1023 exit: 1024 rte_kvargs_free(kvlist); 1025 return ret; 1026 } 1027 1028 static int 1029 rte_pmd_af_packet_remove(struct rte_vdev_device *dev) 1030 { 1031 struct rte_eth_dev *eth_dev = NULL; 1032 struct pmd_internals *internals; 1033 struct tpacket_req *req; 1034 unsigned q; 1035 1036 PMD_LOG(INFO, "Closing AF_PACKET ethdev on numa socket %u", 1037 rte_socket_id()); 1038 1039 if (dev == NULL) 1040 return -1; 1041 1042 /* find the ethdev entry */ 1043 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); 1044 if (eth_dev == NULL) 1045 return -1; 1046 1047 /* mac_addrs must not be freed alone because part of dev_private */ 1048 eth_dev->data->mac_addrs = NULL; 1049 1050 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1051 return rte_eth_dev_release_port(eth_dev); 1052 1053 internals = eth_dev->data->dev_private; 1054 req = &internals->req; 1055 for (q = 0; q < internals->nb_queues; q++) { 1056 munmap(internals->rx_queue[q].map, 1057 2 * req->tp_block_size * req->tp_block_nr); 1058 rte_free(internals->rx_queue[q].rd); 1059 rte_free(internals->tx_queue[q].rd); 1060 } 1061 free(internals->if_name); 1062 rte_free(internals->rx_queue); 1063 rte_free(internals->tx_queue); 1064 1065 rte_eth_dev_release_port(eth_dev); 1066 1067 return 0; 1068 } 1069 1070 static struct rte_vdev_driver pmd_af_packet_drv = { 1071 .probe = rte_pmd_af_packet_probe, 1072 .remove = rte_pmd_af_packet_remove, 1073 }; 1074 1075 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); 1076 RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); 1077 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, 1078 "iface=<string> " 1079 "qpairs=<int> " 1080 "blocksz=<int> " 1081 "framesz=<int> " 1082 "framecnt=<int> " 1083 "qdisc_bypass=<0|1>"); 1084 1085 RTE_INIT(af_packet_init_log) 1086 { 1087 af_packet_logtype = rte_log_register("pmd.net.packet"); 1088 if (af_packet_logtype >= 0) 1089 rte_log_set_level(af_packet_logtype, RTE_LOG_NOTICE); 1090 } 1091