1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com> 3 * Originally based upon librte_pmd_pcap code: 4 * Copyright(c) 2010-2015 Intel Corporation. 5 * Copyright(c) 2014 6WIND S.A. 6 * All rights reserved. 7 */ 8 9 #include <rte_string_fns.h> 10 #include <rte_mbuf.h> 11 #include <ethdev_driver.h> 12 #include <ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_kvargs.h> 15 #include <rte_bus_vdev.h> 16 17 #include <errno.h> 18 #include <linux/if_ether.h> 19 #include <linux/if_packet.h> 20 #include <arpa/inet.h> 21 #include <net/if.h> 22 #include <net/if_arp.h> 23 #include <sys/types.h> 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <string.h> 27 #include <sys/mman.h> 28 #include <unistd.h> 29 #include <poll.h> 30 31 #define ETH_AF_PACKET_IFACE_ARG "iface" 32 #define ETH_AF_PACKET_NUM_Q_ARG "qpairs" 33 #define ETH_AF_PACKET_BLOCKSIZE_ARG "blocksz" 34 #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" 35 #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" 36 #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass" 37 38 #define DFLT_FRAME_SIZE (1 << 11) 39 #define DFLT_FRAME_COUNT (1 << 9) 40 41 struct pkt_rx_queue { 42 int sockfd; 43 44 struct iovec *rd; 45 uint8_t *map; 46 unsigned int framecount; 47 unsigned int framenum; 48 49 struct rte_mempool *mb_pool; 50 uint16_t in_port; 51 52 volatile unsigned long rx_pkts; 53 volatile unsigned long rx_bytes; 54 }; 55 56 struct pkt_tx_queue { 57 int sockfd; 58 unsigned int frame_data_size; 59 60 struct iovec *rd; 61 uint8_t *map; 62 unsigned int framecount; 63 unsigned int framenum; 64 65 volatile unsigned long tx_pkts; 66 volatile unsigned long err_pkts; 67 volatile unsigned long tx_bytes; 68 }; 69 70 struct pmd_internals { 71 unsigned nb_queues; 72 73 int if_index; 74 char *if_name; 75 struct rte_ether_addr eth_addr; 76 77 struct tpacket_req req; 78 79 struct pkt_rx_queue *rx_queue; 80 struct pkt_tx_queue *tx_queue; 81 }; 82 83 static const char *valid_arguments[] = { 84 ETH_AF_PACKET_IFACE_ARG, 85 ETH_AF_PACKET_NUM_Q_ARG, 86 ETH_AF_PACKET_BLOCKSIZE_ARG, 87 ETH_AF_PACKET_FRAMESIZE_ARG, 88 ETH_AF_PACKET_FRAMECOUNT_ARG, 89 ETH_AF_PACKET_QDISC_BYPASS_ARG, 90 NULL 91 }; 92 93 static struct rte_eth_link pmd_link = { 94 .link_speed = ETH_SPEED_NUM_10G, 95 .link_duplex = ETH_LINK_FULL_DUPLEX, 96 .link_status = ETH_LINK_DOWN, 97 .link_autoneg = ETH_LINK_FIXED, 98 }; 99 100 RTE_LOG_REGISTER(af_packet_logtype, pmd.net.af_packet, NOTICE); 101 102 #define PMD_LOG(level, fmt, args...) \ 103 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 104 "%s(): " fmt "\n", __func__, ##args) 105 106 #define PMD_LOG_ERRNO(level, fmt, args...) \ 107 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 108 "%s(): " fmt ":%s\n", __func__, ##args, strerror(errno)) 109 110 static uint16_t 111 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 112 { 113 unsigned i; 114 struct tpacket2_hdr *ppd; 115 struct rte_mbuf *mbuf; 116 uint8_t *pbuf; 117 struct pkt_rx_queue *pkt_q = queue; 118 uint16_t num_rx = 0; 119 unsigned long num_rx_bytes = 0; 120 unsigned int framecount, framenum; 121 122 if (unlikely(nb_pkts == 0)) 123 return 0; 124 125 /* 126 * Reads the given number of packets from the AF_PACKET socket one by 127 * one and copies the packet data into a newly allocated mbuf. 128 */ 129 framecount = pkt_q->framecount; 130 framenum = pkt_q->framenum; 131 for (i = 0; i < nb_pkts; i++) { 132 /* point at the next incoming frame */ 133 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 134 if ((ppd->tp_status & TP_STATUS_USER) == 0) 135 break; 136 137 /* allocate the next mbuf */ 138 mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool); 139 if (unlikely(mbuf == NULL)) 140 break; 141 142 /* packet will fit in the mbuf, go ahead and receive it */ 143 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen; 144 pbuf = (uint8_t *) ppd + ppd->tp_mac; 145 memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); 146 147 /* check for vlan info */ 148 if (ppd->tp_status & TP_STATUS_VLAN_VALID) { 149 mbuf->vlan_tci = ppd->tp_vlan_tci; 150 mbuf->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); 151 } 152 153 /* release incoming frame and advance ring buffer */ 154 ppd->tp_status = TP_STATUS_KERNEL; 155 if (++framenum >= framecount) 156 framenum = 0; 157 mbuf->port = pkt_q->in_port; 158 159 /* account for the receive frame */ 160 bufs[i] = mbuf; 161 num_rx++; 162 num_rx_bytes += mbuf->pkt_len; 163 } 164 pkt_q->framenum = framenum; 165 pkt_q->rx_pkts += num_rx; 166 pkt_q->rx_bytes += num_rx_bytes; 167 return num_rx; 168 } 169 170 /* 171 * Callback to handle sending packets through a real NIC. 172 */ 173 static uint16_t 174 eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 175 { 176 struct tpacket2_hdr *ppd; 177 struct rte_mbuf *mbuf; 178 uint8_t *pbuf; 179 unsigned int framecount, framenum; 180 struct pollfd pfd; 181 struct pkt_tx_queue *pkt_q = queue; 182 uint16_t num_tx = 0; 183 unsigned long num_tx_bytes = 0; 184 int i; 185 186 if (unlikely(nb_pkts == 0)) 187 return 0; 188 189 memset(&pfd, 0, sizeof(pfd)); 190 pfd.fd = pkt_q->sockfd; 191 pfd.events = POLLOUT; 192 pfd.revents = 0; 193 194 framecount = pkt_q->framecount; 195 framenum = pkt_q->framenum; 196 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 197 for (i = 0; i < nb_pkts; i++) { 198 mbuf = *bufs++; 199 200 /* drop oversized packets */ 201 if (mbuf->pkt_len > pkt_q->frame_data_size) { 202 rte_pktmbuf_free(mbuf); 203 continue; 204 } 205 206 /* insert vlan info if necessary */ 207 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 208 if (rte_vlan_insert(&mbuf)) { 209 rte_pktmbuf_free(mbuf); 210 continue; 211 } 212 } 213 214 /* point at the next incoming frame */ 215 if ((ppd->tp_status != TP_STATUS_AVAILABLE) && 216 (poll(&pfd, 1, -1) < 0)) 217 break; 218 219 /* copy the tx frame data */ 220 pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - 221 sizeof(struct sockaddr_ll); 222 223 struct rte_mbuf *tmp_mbuf = mbuf; 224 while (tmp_mbuf) { 225 uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf); 226 memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len); 227 pbuf += data_len; 228 tmp_mbuf = tmp_mbuf->next; 229 } 230 231 ppd->tp_len = mbuf->pkt_len; 232 ppd->tp_snaplen = mbuf->pkt_len; 233 234 /* release incoming frame and advance ring buffer */ 235 ppd->tp_status = TP_STATUS_SEND_REQUEST; 236 if (++framenum >= framecount) 237 framenum = 0; 238 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 239 240 num_tx++; 241 num_tx_bytes += mbuf->pkt_len; 242 rte_pktmbuf_free(mbuf); 243 } 244 245 /* kick-off transmits */ 246 if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 && 247 errno != ENOBUFS && errno != EAGAIN) { 248 /* 249 * In case of a ENOBUFS/EAGAIN error all of the enqueued 250 * packets will be considered successful even though only some 251 * are sent. 252 */ 253 254 num_tx = 0; 255 num_tx_bytes = 0; 256 } 257 258 pkt_q->framenum = framenum; 259 pkt_q->tx_pkts += num_tx; 260 pkt_q->err_pkts += i - num_tx; 261 pkt_q->tx_bytes += num_tx_bytes; 262 return i; 263 } 264 265 static int 266 eth_dev_start(struct rte_eth_dev *dev) 267 { 268 dev->data->dev_link.link_status = ETH_LINK_UP; 269 return 0; 270 } 271 272 /* 273 * This function gets called when the current port gets stopped. 274 */ 275 static int 276 eth_dev_stop(struct rte_eth_dev *dev) 277 { 278 unsigned i; 279 int sockfd; 280 struct pmd_internals *internals = dev->data->dev_private; 281 282 for (i = 0; i < internals->nb_queues; i++) { 283 sockfd = internals->rx_queue[i].sockfd; 284 if (sockfd != -1) 285 close(sockfd); 286 287 /* Prevent use after free in case tx fd == rx fd */ 288 if (sockfd != internals->tx_queue[i].sockfd) { 289 sockfd = internals->tx_queue[i].sockfd; 290 if (sockfd != -1) 291 close(sockfd); 292 } 293 294 internals->rx_queue[i].sockfd = -1; 295 internals->tx_queue[i].sockfd = -1; 296 } 297 298 dev->data->dev_link.link_status = ETH_LINK_DOWN; 299 return 0; 300 } 301 302 static int 303 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 304 { 305 return 0; 306 } 307 308 static int 309 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 310 { 311 struct pmd_internals *internals = dev->data->dev_private; 312 313 dev_info->if_index = internals->if_index; 314 dev_info->max_mac_addrs = 1; 315 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 316 dev_info->max_rx_queues = (uint16_t)internals->nb_queues; 317 dev_info->max_tx_queues = (uint16_t)internals->nb_queues; 318 dev_info->min_rx_bufsize = 0; 319 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 320 DEV_TX_OFFLOAD_VLAN_INSERT; 321 322 return 0; 323 } 324 325 static int 326 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats) 327 { 328 unsigned i, imax; 329 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 330 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 331 const struct pmd_internals *internal = dev->data->dev_private; 332 333 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 334 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 335 for (i = 0; i < imax; i++) { 336 igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts; 337 igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes; 338 rx_total += igb_stats->q_ipackets[i]; 339 rx_bytes_total += igb_stats->q_ibytes[i]; 340 } 341 342 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 343 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 344 for (i = 0; i < imax; i++) { 345 igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts; 346 igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes; 347 tx_total += igb_stats->q_opackets[i]; 348 tx_err_total += internal->tx_queue[i].err_pkts; 349 tx_bytes_total += igb_stats->q_obytes[i]; 350 } 351 352 igb_stats->ipackets = rx_total; 353 igb_stats->ibytes = rx_bytes_total; 354 igb_stats->opackets = tx_total; 355 igb_stats->oerrors = tx_err_total; 356 igb_stats->obytes = tx_bytes_total; 357 return 0; 358 } 359 360 static int 361 eth_stats_reset(struct rte_eth_dev *dev) 362 { 363 unsigned i; 364 struct pmd_internals *internal = dev->data->dev_private; 365 366 for (i = 0; i < internal->nb_queues; i++) { 367 internal->rx_queue[i].rx_pkts = 0; 368 internal->rx_queue[i].rx_bytes = 0; 369 } 370 371 for (i = 0; i < internal->nb_queues; i++) { 372 internal->tx_queue[i].tx_pkts = 0; 373 internal->tx_queue[i].err_pkts = 0; 374 internal->tx_queue[i].tx_bytes = 0; 375 } 376 377 return 0; 378 } 379 380 static int 381 eth_dev_close(struct rte_eth_dev *dev) 382 { 383 struct pmd_internals *internals; 384 struct tpacket_req *req; 385 unsigned int q; 386 387 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 388 return 0; 389 390 PMD_LOG(INFO, "Closing AF_PACKET ethdev on NUMA socket %u", 391 rte_socket_id()); 392 393 internals = dev->data->dev_private; 394 req = &internals->req; 395 for (q = 0; q < internals->nb_queues; q++) { 396 munmap(internals->rx_queue[q].map, 397 2 * req->tp_block_size * req->tp_block_nr); 398 rte_free(internals->rx_queue[q].rd); 399 rte_free(internals->tx_queue[q].rd); 400 } 401 free(internals->if_name); 402 rte_free(internals->rx_queue); 403 rte_free(internals->tx_queue); 404 405 /* mac_addrs must not be freed alone because part of dev_private */ 406 dev->data->mac_addrs = NULL; 407 return 0; 408 } 409 410 static void 411 eth_queue_release(void *q __rte_unused) 412 { 413 } 414 415 static int 416 eth_link_update(struct rte_eth_dev *dev __rte_unused, 417 int wait_to_complete __rte_unused) 418 { 419 return 0; 420 } 421 422 static int 423 eth_rx_queue_setup(struct rte_eth_dev *dev, 424 uint16_t rx_queue_id, 425 uint16_t nb_rx_desc __rte_unused, 426 unsigned int socket_id __rte_unused, 427 const struct rte_eth_rxconf *rx_conf __rte_unused, 428 struct rte_mempool *mb_pool) 429 { 430 struct pmd_internals *internals = dev->data->dev_private; 431 struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; 432 unsigned int buf_size, data_size; 433 434 pkt_q->mb_pool = mb_pool; 435 436 /* Now get the space available for data in the mbuf */ 437 buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - 438 RTE_PKTMBUF_HEADROOM; 439 data_size = internals->req.tp_frame_size; 440 data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); 441 442 if (data_size > buf_size) { 443 PMD_LOG(ERR, 444 "%s: %d bytes will not fit in mbuf (%d bytes)", 445 dev->device->name, data_size, buf_size); 446 return -ENOMEM; 447 } 448 449 dev->data->rx_queues[rx_queue_id] = pkt_q; 450 pkt_q->in_port = dev->data->port_id; 451 452 return 0; 453 } 454 455 static int 456 eth_tx_queue_setup(struct rte_eth_dev *dev, 457 uint16_t tx_queue_id, 458 uint16_t nb_tx_desc __rte_unused, 459 unsigned int socket_id __rte_unused, 460 const struct rte_eth_txconf *tx_conf __rte_unused) 461 { 462 463 struct pmd_internals *internals = dev->data->dev_private; 464 465 dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id]; 466 return 0; 467 } 468 469 static int 470 eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 471 { 472 struct pmd_internals *internals = dev->data->dev_private; 473 struct ifreq ifr = { .ifr_mtu = mtu }; 474 int ret; 475 int s; 476 unsigned int data_size = internals->req.tp_frame_size - 477 TPACKET2_HDRLEN; 478 479 if (mtu > data_size) 480 return -EINVAL; 481 482 s = socket(PF_INET, SOCK_DGRAM, 0); 483 if (s < 0) 484 return -EINVAL; 485 486 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 487 ret = ioctl(s, SIOCSIFMTU, &ifr); 488 close(s); 489 490 if (ret < 0) 491 return -EINVAL; 492 493 return 0; 494 } 495 496 static int 497 eth_dev_macaddr_set(struct rte_eth_dev *dev, struct rte_ether_addr *addr) 498 { 499 struct pmd_internals *internals = dev->data->dev_private; 500 struct ifreq ifr = { }; 501 int sockfd = internals->rx_queue[0].sockfd; 502 int ret; 503 504 if (sockfd == -1) { 505 PMD_LOG(ERR, "receive socket not found"); 506 return -EINVAL; 507 } 508 509 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 510 ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; 511 memcpy(ifr.ifr_hwaddr.sa_data, addr, sizeof(*addr)); 512 ret = ioctl(sockfd, SIOCSIFHWADDR, &ifr); 513 514 if (ret < 0) { 515 PMD_LOG_ERRNO(ERR, "ioctl(SIOCSIFHWADDR) failed"); 516 return -EINVAL; 517 } 518 519 return 0; 520 } 521 522 static int 523 eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) 524 { 525 struct ifreq ifr; 526 int ret = 0; 527 int s; 528 529 s = socket(PF_INET, SOCK_DGRAM, 0); 530 if (s < 0) 531 return -errno; 532 533 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ); 534 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) { 535 ret = -errno; 536 goto out; 537 } 538 ifr.ifr_flags &= mask; 539 ifr.ifr_flags |= flags; 540 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) { 541 ret = -errno; 542 goto out; 543 } 544 out: 545 close(s); 546 return ret; 547 } 548 549 static int 550 eth_dev_promiscuous_enable(struct rte_eth_dev *dev) 551 { 552 struct pmd_internals *internals = dev->data->dev_private; 553 554 return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); 555 } 556 557 static int 558 eth_dev_promiscuous_disable(struct rte_eth_dev *dev) 559 { 560 struct pmd_internals *internals = dev->data->dev_private; 561 562 return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); 563 } 564 565 static const struct eth_dev_ops ops = { 566 .dev_start = eth_dev_start, 567 .dev_stop = eth_dev_stop, 568 .dev_close = eth_dev_close, 569 .dev_configure = eth_dev_configure, 570 .dev_infos_get = eth_dev_info, 571 .mac_addr_set = eth_dev_macaddr_set, 572 .mtu_set = eth_dev_mtu_set, 573 .promiscuous_enable = eth_dev_promiscuous_enable, 574 .promiscuous_disable = eth_dev_promiscuous_disable, 575 .rx_queue_setup = eth_rx_queue_setup, 576 .tx_queue_setup = eth_tx_queue_setup, 577 .rx_queue_release = eth_queue_release, 578 .tx_queue_release = eth_queue_release, 579 .link_update = eth_link_update, 580 .stats_get = eth_stats_get, 581 .stats_reset = eth_stats_reset, 582 }; 583 584 /* 585 * Opens an AF_PACKET socket 586 */ 587 static int 588 open_packet_iface(const char *key __rte_unused, 589 const char *value __rte_unused, 590 void *extra_args) 591 { 592 int *sockfd = extra_args; 593 594 /* Open an AF_PACKET socket... */ 595 *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 596 if (*sockfd == -1) { 597 PMD_LOG(ERR, "Could not open AF_PACKET socket"); 598 return -1; 599 } 600 601 return 0; 602 } 603 604 static int 605 rte_pmd_init_internals(struct rte_vdev_device *dev, 606 const int sockfd, 607 const unsigned nb_queues, 608 unsigned int blocksize, 609 unsigned int blockcnt, 610 unsigned int framesize, 611 unsigned int framecnt, 612 unsigned int qdisc_bypass, 613 struct pmd_internals **internals, 614 struct rte_eth_dev **eth_dev, 615 struct rte_kvargs *kvlist) 616 { 617 const char *name = rte_vdev_device_name(dev); 618 const unsigned int numa_node = dev->device.numa_node; 619 struct rte_eth_dev_data *data = NULL; 620 struct rte_kvargs_pair *pair = NULL; 621 struct ifreq ifr; 622 size_t ifnamelen; 623 unsigned k_idx; 624 struct sockaddr_ll sockaddr; 625 struct tpacket_req *req; 626 struct pkt_rx_queue *rx_queue; 627 struct pkt_tx_queue *tx_queue; 628 int rc, tpver, discard; 629 int qsockfd = -1; 630 unsigned int i, q, rdsize; 631 #if defined(PACKET_FANOUT) 632 int fanout_arg; 633 #endif 634 635 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 636 pair = &kvlist->pairs[k_idx]; 637 if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL) 638 break; 639 } 640 if (pair == NULL) { 641 PMD_LOG(ERR, 642 "%s: no interface specified for AF_PACKET ethdev", 643 name); 644 return -1; 645 } 646 647 PMD_LOG(INFO, 648 "%s: creating AF_PACKET-backed ethdev on numa socket %u", 649 name, numa_node); 650 651 *internals = rte_zmalloc_socket(name, sizeof(**internals), 652 0, numa_node); 653 if (*internals == NULL) 654 return -1; 655 656 657 (*internals)->rx_queue = rte_calloc_socket("af_packet_rx", 658 nb_queues, 659 sizeof(struct pkt_rx_queue), 660 0, numa_node); 661 (*internals)->tx_queue = rte_calloc_socket("af_packet_tx", 662 nb_queues, 663 sizeof(struct pkt_tx_queue), 664 0, numa_node); 665 if (!(*internals)->rx_queue || !(*internals)->tx_queue) { 666 goto free_internals; 667 } 668 669 for (q = 0; q < nb_queues; q++) { 670 (*internals)->rx_queue[q].map = MAP_FAILED; 671 (*internals)->tx_queue[q].map = MAP_FAILED; 672 (*internals)->rx_queue[q].sockfd = -1; 673 (*internals)->tx_queue[q].sockfd = -1; 674 } 675 676 req = &((*internals)->req); 677 678 req->tp_block_size = blocksize; 679 req->tp_block_nr = blockcnt; 680 req->tp_frame_size = framesize; 681 req->tp_frame_nr = framecnt; 682 683 ifnamelen = strlen(pair->value); 684 if (ifnamelen < sizeof(ifr.ifr_name)) { 685 memcpy(ifr.ifr_name, pair->value, ifnamelen); 686 ifr.ifr_name[ifnamelen] = '\0'; 687 } else { 688 PMD_LOG(ERR, 689 "%s: I/F name too long (%s)", 690 name, pair->value); 691 goto free_internals; 692 } 693 if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { 694 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name); 695 goto free_internals; 696 } 697 (*internals)->if_name = strdup(pair->value); 698 if ((*internals)->if_name == NULL) 699 goto free_internals; 700 (*internals)->if_index = ifr.ifr_ifindex; 701 702 if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { 703 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFHWADDR)", name); 704 goto free_internals; 705 } 706 memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 707 708 memset(&sockaddr, 0, sizeof(sockaddr)); 709 sockaddr.sll_family = AF_PACKET; 710 sockaddr.sll_protocol = htons(ETH_P_ALL); 711 sockaddr.sll_ifindex = (*internals)->if_index; 712 713 #if defined(PACKET_FANOUT) 714 fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; 715 fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; 716 #if defined(PACKET_FANOUT_FLAG_ROLLOVER) 717 fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; 718 #endif 719 #endif 720 721 for (q = 0; q < nb_queues; q++) { 722 /* Open an AF_PACKET socket for this queue... */ 723 qsockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 724 if (qsockfd == -1) { 725 PMD_LOG_ERRNO(ERR, 726 "%s: could not open AF_PACKET socket", 727 name); 728 goto error; 729 } 730 731 tpver = TPACKET_V2; 732 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION, 733 &tpver, sizeof(tpver)); 734 if (rc == -1) { 735 PMD_LOG_ERRNO(ERR, 736 "%s: could not set PACKET_VERSION on AF_PACKET socket for %s", 737 name, pair->value); 738 goto error; 739 } 740 741 discard = 1; 742 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS, 743 &discard, sizeof(discard)); 744 if (rc == -1) { 745 PMD_LOG_ERRNO(ERR, 746 "%s: could not set PACKET_LOSS on AF_PACKET socket for %s", 747 name, pair->value); 748 goto error; 749 } 750 751 #if defined(PACKET_QDISC_BYPASS) 752 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS, 753 &qdisc_bypass, sizeof(qdisc_bypass)); 754 if (rc == -1) { 755 PMD_LOG_ERRNO(ERR, 756 "%s: could not set PACKET_QDISC_BYPASS on AF_PACKET socket for %s", 757 name, pair->value); 758 goto error; 759 } 760 #else 761 RTE_SET_USED(qdisc_bypass); 762 #endif 763 764 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)); 765 if (rc == -1) { 766 PMD_LOG_ERRNO(ERR, 767 "%s: could not set PACKET_RX_RING on AF_PACKET socket for %s", 768 name, pair->value); 769 goto error; 770 } 771 772 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req)); 773 if (rc == -1) { 774 PMD_LOG_ERRNO(ERR, 775 "%s: could not set PACKET_TX_RING on AF_PACKET " 776 "socket for %s", name, pair->value); 777 goto error; 778 } 779 780 rx_queue = &((*internals)->rx_queue[q]); 781 rx_queue->framecount = req->tp_frame_nr; 782 783 rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr, 784 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 785 qsockfd, 0); 786 if (rx_queue->map == MAP_FAILED) { 787 PMD_LOG_ERRNO(ERR, 788 "%s: call to mmap failed on AF_PACKET socket for %s", 789 name, pair->value); 790 goto error; 791 } 792 793 /* rdsize is same for both Tx and Rx */ 794 rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd)); 795 796 rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 797 if (rx_queue->rd == NULL) 798 goto error; 799 for (i = 0; i < req->tp_frame_nr; ++i) { 800 rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize); 801 rx_queue->rd[i].iov_len = req->tp_frame_size; 802 } 803 rx_queue->sockfd = qsockfd; 804 805 tx_queue = &((*internals)->tx_queue[q]); 806 tx_queue->framecount = req->tp_frame_nr; 807 tx_queue->frame_data_size = req->tp_frame_size; 808 tx_queue->frame_data_size -= TPACKET2_HDRLEN - 809 sizeof(struct sockaddr_ll); 810 811 tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; 812 813 tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 814 if (tx_queue->rd == NULL) 815 goto error; 816 for (i = 0; i < req->tp_frame_nr; ++i) { 817 tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize); 818 tx_queue->rd[i].iov_len = req->tp_frame_size; 819 } 820 tx_queue->sockfd = qsockfd; 821 822 rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)); 823 if (rc == -1) { 824 PMD_LOG_ERRNO(ERR, 825 "%s: could not bind AF_PACKET socket to %s", 826 name, pair->value); 827 goto error; 828 } 829 830 #if defined(PACKET_FANOUT) 831 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, 832 &fanout_arg, sizeof(fanout_arg)); 833 if (rc == -1) { 834 PMD_LOG_ERRNO(ERR, 835 "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s", 836 name, pair->value); 837 goto error; 838 } 839 #endif 840 } 841 842 /* reserve an ethdev entry */ 843 *eth_dev = rte_eth_vdev_allocate(dev, 0); 844 if (*eth_dev == NULL) 845 goto error; 846 847 /* 848 * now put it all together 849 * - store queue data in internals, 850 * - store numa_node in eth_dev 851 * - point eth_dev_data to internals 852 * - and point eth_dev structure to new eth_dev_data structure 853 */ 854 855 (*internals)->nb_queues = nb_queues; 856 857 data = (*eth_dev)->data; 858 data->dev_private = *internals; 859 data->nb_rx_queues = (uint16_t)nb_queues; 860 data->nb_tx_queues = (uint16_t)nb_queues; 861 data->dev_link = pmd_link; 862 data->mac_addrs = &(*internals)->eth_addr; 863 data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 864 865 (*eth_dev)->dev_ops = &ops; 866 867 return 0; 868 869 error: 870 if (qsockfd != -1) 871 close(qsockfd); 872 for (q = 0; q < nb_queues; q++) { 873 if ((*internals)->rx_queue[q].map != MAP_FAILED) 874 munmap((*internals)->rx_queue[q].map, 875 2 * req->tp_block_size * req->tp_block_nr); 876 877 rte_free((*internals)->rx_queue[q].rd); 878 rte_free((*internals)->tx_queue[q].rd); 879 if (((*internals)->rx_queue[q].sockfd >= 0) && 880 ((*internals)->rx_queue[q].sockfd != qsockfd)) 881 close((*internals)->rx_queue[q].sockfd); 882 } 883 free_internals: 884 rte_free((*internals)->rx_queue); 885 rte_free((*internals)->tx_queue); 886 free((*internals)->if_name); 887 rte_free(*internals); 888 return -1; 889 } 890 891 static int 892 rte_eth_from_packet(struct rte_vdev_device *dev, 893 int const *sockfd, 894 struct rte_kvargs *kvlist) 895 { 896 const char *name = rte_vdev_device_name(dev); 897 struct pmd_internals *internals = NULL; 898 struct rte_eth_dev *eth_dev = NULL; 899 struct rte_kvargs_pair *pair = NULL; 900 unsigned k_idx; 901 unsigned int blockcount; 902 unsigned int blocksize; 903 unsigned int framesize = DFLT_FRAME_SIZE; 904 unsigned int framecount = DFLT_FRAME_COUNT; 905 unsigned int qpairs = 1; 906 unsigned int qdisc_bypass = 1; 907 908 /* do some parameter checking */ 909 if (*sockfd < 0) 910 return -1; 911 912 blocksize = getpagesize(); 913 914 /* 915 * Walk arguments for configurable settings 916 */ 917 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 918 pair = &kvlist->pairs[k_idx]; 919 if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) { 920 qpairs = atoi(pair->value); 921 if (qpairs < 1) { 922 PMD_LOG(ERR, 923 "%s: invalid qpairs value", 924 name); 925 return -1; 926 } 927 continue; 928 } 929 if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) { 930 blocksize = atoi(pair->value); 931 if (!blocksize) { 932 PMD_LOG(ERR, 933 "%s: invalid blocksize value", 934 name); 935 return -1; 936 } 937 continue; 938 } 939 if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) { 940 framesize = atoi(pair->value); 941 if (!framesize) { 942 PMD_LOG(ERR, 943 "%s: invalid framesize value", 944 name); 945 return -1; 946 } 947 continue; 948 } 949 if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) { 950 framecount = atoi(pair->value); 951 if (!framecount) { 952 PMD_LOG(ERR, 953 "%s: invalid framecount value", 954 name); 955 return -1; 956 } 957 continue; 958 } 959 if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) { 960 qdisc_bypass = atoi(pair->value); 961 if (qdisc_bypass > 1) { 962 PMD_LOG(ERR, 963 "%s: invalid bypass value", 964 name); 965 return -1; 966 } 967 continue; 968 } 969 } 970 971 if (framesize > blocksize) { 972 PMD_LOG(ERR, 973 "%s: AF_PACKET MMAP frame size exceeds block size!", 974 name); 975 return -1; 976 } 977 978 blockcount = framecount / (blocksize / framesize); 979 if (!blockcount) { 980 PMD_LOG(ERR, 981 "%s: invalid AF_PACKET MMAP parameters", name); 982 return -1; 983 } 984 985 PMD_LOG(INFO, "%s: AF_PACKET MMAP parameters:", name); 986 PMD_LOG(INFO, "%s:\tblock size %d", name, blocksize); 987 PMD_LOG(INFO, "%s:\tblock count %d", name, blockcount); 988 PMD_LOG(INFO, "%s:\tframe size %d", name, framesize); 989 PMD_LOG(INFO, "%s:\tframe count %d", name, framecount); 990 991 if (rte_pmd_init_internals(dev, *sockfd, qpairs, 992 blocksize, blockcount, 993 framesize, framecount, 994 qdisc_bypass, 995 &internals, ð_dev, 996 kvlist) < 0) 997 return -1; 998 999 eth_dev->rx_pkt_burst = eth_af_packet_rx; 1000 eth_dev->tx_pkt_burst = eth_af_packet_tx; 1001 1002 rte_eth_dev_probing_finish(eth_dev); 1003 return 0; 1004 } 1005 1006 static int 1007 rte_pmd_af_packet_probe(struct rte_vdev_device *dev) 1008 { 1009 int ret = 0; 1010 struct rte_kvargs *kvlist; 1011 int sockfd = -1; 1012 struct rte_eth_dev *eth_dev; 1013 const char *name = rte_vdev_device_name(dev); 1014 1015 PMD_LOG(INFO, "Initializing pmd_af_packet for %s", name); 1016 1017 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1018 eth_dev = rte_eth_dev_attach_secondary(name); 1019 if (!eth_dev) { 1020 PMD_LOG(ERR, "Failed to probe %s", name); 1021 return -1; 1022 } 1023 /* TODO: request info from primary to set up Rx and Tx */ 1024 eth_dev->dev_ops = &ops; 1025 eth_dev->device = &dev->device; 1026 rte_eth_dev_probing_finish(eth_dev); 1027 return 0; 1028 } 1029 1030 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1031 if (kvlist == NULL) { 1032 ret = -1; 1033 goto exit; 1034 } 1035 1036 /* 1037 * If iface argument is passed we open the NICs and use them for 1038 * reading / writing 1039 */ 1040 if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) { 1041 1042 ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, 1043 &open_packet_iface, &sockfd); 1044 if (ret < 0) 1045 goto exit; 1046 } 1047 1048 if (dev->device.numa_node == SOCKET_ID_ANY) 1049 dev->device.numa_node = rte_socket_id(); 1050 1051 ret = rte_eth_from_packet(dev, &sockfd, kvlist); 1052 close(sockfd); /* no longer needed */ 1053 1054 exit: 1055 rte_kvargs_free(kvlist); 1056 return ret; 1057 } 1058 1059 static int 1060 rte_pmd_af_packet_remove(struct rte_vdev_device *dev) 1061 { 1062 struct rte_eth_dev *eth_dev; 1063 1064 if (dev == NULL) 1065 return -1; 1066 1067 /* find the ethdev entry */ 1068 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); 1069 if (eth_dev == NULL) 1070 return 0; /* port already released */ 1071 1072 eth_dev_close(eth_dev); 1073 rte_eth_dev_release_port(eth_dev); 1074 1075 return 0; 1076 } 1077 1078 static struct rte_vdev_driver pmd_af_packet_drv = { 1079 .probe = rte_pmd_af_packet_probe, 1080 .remove = rte_pmd_af_packet_remove, 1081 }; 1082 1083 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); 1084 RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); 1085 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, 1086 "iface=<string> " 1087 "qpairs=<int> " 1088 "blocksz=<int> " 1089 "framesz=<int> " 1090 "framecnt=<int> " 1091 "qdisc_bypass=<0|1>"); 1092