1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com> 3 * Originally based upon librte_pmd_pcap code: 4 * Copyright(c) 2010-2015 Intel Corporation. 5 * Copyright(c) 2014 6WIND S.A. 6 * All rights reserved. 7 */ 8 9 #include <rte_string_fns.h> 10 #include <rte_mbuf.h> 11 #include <rte_ethdev_driver.h> 12 #include <rte_ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_kvargs.h> 15 #include <rte_bus_vdev.h> 16 17 #include <errno.h> 18 #include <linux/if_ether.h> 19 #include <linux/if_packet.h> 20 #include <arpa/inet.h> 21 #include <net/if.h> 22 #include <net/if_arp.h> 23 #include <sys/types.h> 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <string.h> 27 #include <sys/mman.h> 28 #include <unistd.h> 29 #include <poll.h> 30 31 #define ETH_AF_PACKET_IFACE_ARG "iface" 32 #define ETH_AF_PACKET_NUM_Q_ARG "qpairs" 33 #define ETH_AF_PACKET_BLOCKSIZE_ARG "blocksz" 34 #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" 35 #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" 36 #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass" 37 38 #define DFLT_FRAME_SIZE (1 << 11) 39 #define DFLT_FRAME_COUNT (1 << 9) 40 41 struct pkt_rx_queue { 42 int sockfd; 43 44 struct iovec *rd; 45 uint8_t *map; 46 unsigned int framecount; 47 unsigned int framenum; 48 49 struct rte_mempool *mb_pool; 50 uint16_t in_port; 51 52 volatile unsigned long rx_pkts; 53 volatile unsigned long rx_bytes; 54 }; 55 56 struct pkt_tx_queue { 57 int sockfd; 58 unsigned int frame_data_size; 59 60 struct iovec *rd; 61 uint8_t *map; 62 unsigned int framecount; 63 unsigned int framenum; 64 65 volatile unsigned long tx_pkts; 66 volatile unsigned long err_pkts; 67 volatile unsigned long tx_bytes; 68 }; 69 70 struct pmd_internals { 71 unsigned nb_queues; 72 73 int if_index; 74 char *if_name; 75 struct rte_ether_addr eth_addr; 76 77 struct tpacket_req req; 78 79 struct pkt_rx_queue *rx_queue; 80 struct pkt_tx_queue *tx_queue; 81 }; 82 83 static const char *valid_arguments[] = { 84 ETH_AF_PACKET_IFACE_ARG, 85 ETH_AF_PACKET_NUM_Q_ARG, 86 ETH_AF_PACKET_BLOCKSIZE_ARG, 87 ETH_AF_PACKET_FRAMESIZE_ARG, 88 ETH_AF_PACKET_FRAMECOUNT_ARG, 89 ETH_AF_PACKET_QDISC_BYPASS_ARG, 90 NULL 91 }; 92 93 static struct rte_eth_link pmd_link = { 94 .link_speed = ETH_SPEED_NUM_10G, 95 .link_duplex = ETH_LINK_FULL_DUPLEX, 96 .link_status = ETH_LINK_DOWN, 97 .link_autoneg = ETH_LINK_FIXED, 98 }; 99 100 RTE_LOG_REGISTER(af_packet_logtype, pmd.net.packet, NOTICE); 101 102 #define PMD_LOG(level, fmt, args...) \ 103 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 104 "%s(): " fmt "\n", __func__, ##args) 105 106 #define PMD_LOG_ERRNO(level, fmt, args...) \ 107 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 108 "%s(): " fmt ":%s\n", __func__, ##args, strerror(errno)) 109 110 static uint16_t 111 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 112 { 113 unsigned i; 114 struct tpacket2_hdr *ppd; 115 struct rte_mbuf *mbuf; 116 uint8_t *pbuf; 117 struct pkt_rx_queue *pkt_q = queue; 118 uint16_t num_rx = 0; 119 unsigned long num_rx_bytes = 0; 120 unsigned int framecount, framenum; 121 122 if (unlikely(nb_pkts == 0)) 123 return 0; 124 125 /* 126 * Reads the given number of packets from the AF_PACKET socket one by 127 * one and copies the packet data into a newly allocated mbuf. 128 */ 129 framecount = pkt_q->framecount; 130 framenum = pkt_q->framenum; 131 for (i = 0; i < nb_pkts; i++) { 132 /* point at the next incoming frame */ 133 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 134 if ((ppd->tp_status & TP_STATUS_USER) == 0) 135 break; 136 137 /* allocate the next mbuf */ 138 mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool); 139 if (unlikely(mbuf == NULL)) 140 break; 141 142 /* packet will fit in the mbuf, go ahead and receive it */ 143 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen; 144 pbuf = (uint8_t *) ppd + ppd->tp_mac; 145 memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); 146 147 /* check for vlan info */ 148 if (ppd->tp_status & TP_STATUS_VLAN_VALID) { 149 mbuf->vlan_tci = ppd->tp_vlan_tci; 150 mbuf->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); 151 } 152 153 /* release incoming frame and advance ring buffer */ 154 ppd->tp_status = TP_STATUS_KERNEL; 155 if (++framenum >= framecount) 156 framenum = 0; 157 mbuf->port = pkt_q->in_port; 158 159 /* account for the receive frame */ 160 bufs[i] = mbuf; 161 num_rx++; 162 num_rx_bytes += mbuf->pkt_len; 163 } 164 pkt_q->framenum = framenum; 165 pkt_q->rx_pkts += num_rx; 166 pkt_q->rx_bytes += num_rx_bytes; 167 return num_rx; 168 } 169 170 /* 171 * Callback to handle sending packets through a real NIC. 172 */ 173 static uint16_t 174 eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 175 { 176 struct tpacket2_hdr *ppd; 177 struct rte_mbuf *mbuf; 178 uint8_t *pbuf; 179 unsigned int framecount, framenum; 180 struct pollfd pfd; 181 struct pkt_tx_queue *pkt_q = queue; 182 uint16_t num_tx = 0; 183 unsigned long num_tx_bytes = 0; 184 int i; 185 186 if (unlikely(nb_pkts == 0)) 187 return 0; 188 189 memset(&pfd, 0, sizeof(pfd)); 190 pfd.fd = pkt_q->sockfd; 191 pfd.events = POLLOUT; 192 pfd.revents = 0; 193 194 framecount = pkt_q->framecount; 195 framenum = pkt_q->framenum; 196 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 197 for (i = 0; i < nb_pkts; i++) { 198 mbuf = *bufs++; 199 200 /* drop oversized packets */ 201 if (mbuf->pkt_len > pkt_q->frame_data_size) { 202 rte_pktmbuf_free(mbuf); 203 continue; 204 } 205 206 /* insert vlan info if necessary */ 207 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 208 if (rte_vlan_insert(&mbuf)) { 209 rte_pktmbuf_free(mbuf); 210 continue; 211 } 212 } 213 214 /* point at the next incoming frame */ 215 if ((ppd->tp_status != TP_STATUS_AVAILABLE) && 216 (poll(&pfd, 1, -1) < 0)) 217 break; 218 219 /* copy the tx frame data */ 220 pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - 221 sizeof(struct sockaddr_ll); 222 223 struct rte_mbuf *tmp_mbuf = mbuf; 224 while (tmp_mbuf) { 225 uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf); 226 memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len); 227 pbuf += data_len; 228 tmp_mbuf = tmp_mbuf->next; 229 } 230 231 ppd->tp_len = mbuf->pkt_len; 232 ppd->tp_snaplen = mbuf->pkt_len; 233 234 /* release incoming frame and advance ring buffer */ 235 ppd->tp_status = TP_STATUS_SEND_REQUEST; 236 if (++framenum >= framecount) 237 framenum = 0; 238 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 239 240 num_tx++; 241 num_tx_bytes += mbuf->pkt_len; 242 rte_pktmbuf_free(mbuf); 243 } 244 245 /* kick-off transmits */ 246 if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 && 247 errno != ENOBUFS && errno != EAGAIN) { 248 /* 249 * In case of a ENOBUFS/EAGAIN error all of the enqueued 250 * packets will be considered successful even though only some 251 * are sent. 252 */ 253 254 num_tx = 0; 255 num_tx_bytes = 0; 256 } 257 258 pkt_q->framenum = framenum; 259 pkt_q->tx_pkts += num_tx; 260 pkt_q->err_pkts += i - num_tx; 261 pkt_q->tx_bytes += num_tx_bytes; 262 return i; 263 } 264 265 static int 266 eth_dev_start(struct rte_eth_dev *dev) 267 { 268 dev->data->dev_link.link_status = ETH_LINK_UP; 269 return 0; 270 } 271 272 /* 273 * This function gets called when the current port gets stopped. 274 */ 275 static void 276 eth_dev_stop(struct rte_eth_dev *dev) 277 { 278 unsigned i; 279 int sockfd; 280 struct pmd_internals *internals = dev->data->dev_private; 281 282 for (i = 0; i < internals->nb_queues; i++) { 283 sockfd = internals->rx_queue[i].sockfd; 284 if (sockfd != -1) 285 close(sockfd); 286 287 /* Prevent use after free in case tx fd == rx fd */ 288 if (sockfd != internals->tx_queue[i].sockfd) { 289 sockfd = internals->tx_queue[i].sockfd; 290 if (sockfd != -1) 291 close(sockfd); 292 } 293 294 internals->rx_queue[i].sockfd = -1; 295 internals->tx_queue[i].sockfd = -1; 296 } 297 298 dev->data->dev_link.link_status = ETH_LINK_DOWN; 299 } 300 301 static int 302 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 303 { 304 return 0; 305 } 306 307 static int 308 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 309 { 310 struct pmd_internals *internals = dev->data->dev_private; 311 312 dev_info->if_index = internals->if_index; 313 dev_info->max_mac_addrs = 1; 314 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 315 dev_info->max_rx_queues = (uint16_t)internals->nb_queues; 316 dev_info->max_tx_queues = (uint16_t)internals->nb_queues; 317 dev_info->min_rx_bufsize = 0; 318 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 319 DEV_TX_OFFLOAD_VLAN_INSERT; 320 321 return 0; 322 } 323 324 static int 325 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats) 326 { 327 unsigned i, imax; 328 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 329 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 330 const struct pmd_internals *internal = dev->data->dev_private; 331 332 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 333 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 334 for (i = 0; i < imax; i++) { 335 igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts; 336 igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes; 337 rx_total += igb_stats->q_ipackets[i]; 338 rx_bytes_total += igb_stats->q_ibytes[i]; 339 } 340 341 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 342 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 343 for (i = 0; i < imax; i++) { 344 igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts; 345 igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes; 346 tx_total += igb_stats->q_opackets[i]; 347 tx_err_total += internal->tx_queue[i].err_pkts; 348 tx_bytes_total += igb_stats->q_obytes[i]; 349 } 350 351 igb_stats->ipackets = rx_total; 352 igb_stats->ibytes = rx_bytes_total; 353 igb_stats->opackets = tx_total; 354 igb_stats->oerrors = tx_err_total; 355 igb_stats->obytes = tx_bytes_total; 356 return 0; 357 } 358 359 static int 360 eth_stats_reset(struct rte_eth_dev *dev) 361 { 362 unsigned i; 363 struct pmd_internals *internal = dev->data->dev_private; 364 365 for (i = 0; i < internal->nb_queues; i++) { 366 internal->rx_queue[i].rx_pkts = 0; 367 internal->rx_queue[i].rx_bytes = 0; 368 } 369 370 for (i = 0; i < internal->nb_queues; i++) { 371 internal->tx_queue[i].tx_pkts = 0; 372 internal->tx_queue[i].err_pkts = 0; 373 internal->tx_queue[i].tx_bytes = 0; 374 } 375 376 return 0; 377 } 378 379 static int 380 eth_dev_close(struct rte_eth_dev *dev) 381 { 382 struct pmd_internals *internals; 383 struct tpacket_req *req; 384 unsigned int q; 385 386 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 387 return 0; 388 389 PMD_LOG(INFO, "Closing AF_PACKET ethdev on NUMA socket %u", 390 rte_socket_id()); 391 392 internals = dev->data->dev_private; 393 req = &internals->req; 394 for (q = 0; q < internals->nb_queues; q++) { 395 munmap(internals->rx_queue[q].map, 396 2 * req->tp_block_size * req->tp_block_nr); 397 rte_free(internals->rx_queue[q].rd); 398 rte_free(internals->tx_queue[q].rd); 399 } 400 free(internals->if_name); 401 rte_free(internals->rx_queue); 402 rte_free(internals->tx_queue); 403 404 /* mac_addrs must not be freed alone because part of dev_private */ 405 dev->data->mac_addrs = NULL; 406 return 0; 407 } 408 409 static void 410 eth_queue_release(void *q __rte_unused) 411 { 412 } 413 414 static int 415 eth_link_update(struct rte_eth_dev *dev __rte_unused, 416 int wait_to_complete __rte_unused) 417 { 418 return 0; 419 } 420 421 static int 422 eth_rx_queue_setup(struct rte_eth_dev *dev, 423 uint16_t rx_queue_id, 424 uint16_t nb_rx_desc __rte_unused, 425 unsigned int socket_id __rte_unused, 426 const struct rte_eth_rxconf *rx_conf __rte_unused, 427 struct rte_mempool *mb_pool) 428 { 429 struct pmd_internals *internals = dev->data->dev_private; 430 struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; 431 unsigned int buf_size, data_size; 432 433 pkt_q->mb_pool = mb_pool; 434 435 /* Now get the space available for data in the mbuf */ 436 buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - 437 RTE_PKTMBUF_HEADROOM; 438 data_size = internals->req.tp_frame_size; 439 data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); 440 441 if (data_size > buf_size) { 442 PMD_LOG(ERR, 443 "%s: %d bytes will not fit in mbuf (%d bytes)", 444 dev->device->name, data_size, buf_size); 445 return -ENOMEM; 446 } 447 448 dev->data->rx_queues[rx_queue_id] = pkt_q; 449 pkt_q->in_port = dev->data->port_id; 450 451 return 0; 452 } 453 454 static int 455 eth_tx_queue_setup(struct rte_eth_dev *dev, 456 uint16_t tx_queue_id, 457 uint16_t nb_tx_desc __rte_unused, 458 unsigned int socket_id __rte_unused, 459 const struct rte_eth_txconf *tx_conf __rte_unused) 460 { 461 462 struct pmd_internals *internals = dev->data->dev_private; 463 464 dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id]; 465 return 0; 466 } 467 468 static int 469 eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 470 { 471 struct pmd_internals *internals = dev->data->dev_private; 472 struct ifreq ifr = { .ifr_mtu = mtu }; 473 int ret; 474 int s; 475 unsigned int data_size = internals->req.tp_frame_size - 476 TPACKET2_HDRLEN; 477 478 if (mtu > data_size) 479 return -EINVAL; 480 481 s = socket(PF_INET, SOCK_DGRAM, 0); 482 if (s < 0) 483 return -EINVAL; 484 485 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 486 ret = ioctl(s, SIOCSIFMTU, &ifr); 487 close(s); 488 489 if (ret < 0) 490 return -EINVAL; 491 492 return 0; 493 } 494 495 static int 496 eth_dev_macaddr_set(struct rte_eth_dev *dev, struct rte_ether_addr *addr) 497 { 498 struct pmd_internals *internals = dev->data->dev_private; 499 struct ifreq ifr = { }; 500 int sockfd = internals->rx_queue[0].sockfd; 501 int ret; 502 503 if (sockfd == -1) { 504 PMD_LOG(ERR, "receive socket not found"); 505 return -EINVAL; 506 } 507 508 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 509 ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; 510 memcpy(ifr.ifr_hwaddr.sa_data, addr, sizeof(*addr)); 511 ret = ioctl(sockfd, SIOCSIFHWADDR, &ifr); 512 513 if (ret < 0) { 514 PMD_LOG_ERRNO(ERR, "ioctl(SIOCSIFHWADDR) failed"); 515 return -EINVAL; 516 } 517 518 return 0; 519 } 520 521 static int 522 eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) 523 { 524 struct ifreq ifr; 525 int ret = 0; 526 int s; 527 528 s = socket(PF_INET, SOCK_DGRAM, 0); 529 if (s < 0) 530 return -errno; 531 532 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ); 533 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) { 534 ret = -errno; 535 goto out; 536 } 537 ifr.ifr_flags &= mask; 538 ifr.ifr_flags |= flags; 539 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) { 540 ret = -errno; 541 goto out; 542 } 543 out: 544 close(s); 545 return ret; 546 } 547 548 static int 549 eth_dev_promiscuous_enable(struct rte_eth_dev *dev) 550 { 551 struct pmd_internals *internals = dev->data->dev_private; 552 553 return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); 554 } 555 556 static int 557 eth_dev_promiscuous_disable(struct rte_eth_dev *dev) 558 { 559 struct pmd_internals *internals = dev->data->dev_private; 560 561 return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); 562 } 563 564 static const struct eth_dev_ops ops = { 565 .dev_start = eth_dev_start, 566 .dev_stop = eth_dev_stop, 567 .dev_close = eth_dev_close, 568 .dev_configure = eth_dev_configure, 569 .dev_infos_get = eth_dev_info, 570 .mac_addr_set = eth_dev_macaddr_set, 571 .mtu_set = eth_dev_mtu_set, 572 .promiscuous_enable = eth_dev_promiscuous_enable, 573 .promiscuous_disable = eth_dev_promiscuous_disable, 574 .rx_queue_setup = eth_rx_queue_setup, 575 .tx_queue_setup = eth_tx_queue_setup, 576 .rx_queue_release = eth_queue_release, 577 .tx_queue_release = eth_queue_release, 578 .link_update = eth_link_update, 579 .stats_get = eth_stats_get, 580 .stats_reset = eth_stats_reset, 581 }; 582 583 /* 584 * Opens an AF_PACKET socket 585 */ 586 static int 587 open_packet_iface(const char *key __rte_unused, 588 const char *value __rte_unused, 589 void *extra_args) 590 { 591 int *sockfd = extra_args; 592 593 /* Open an AF_PACKET socket... */ 594 *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 595 if (*sockfd == -1) { 596 PMD_LOG(ERR, "Could not open AF_PACKET socket"); 597 return -1; 598 } 599 600 return 0; 601 } 602 603 static int 604 rte_pmd_init_internals(struct rte_vdev_device *dev, 605 const int sockfd, 606 const unsigned nb_queues, 607 unsigned int blocksize, 608 unsigned int blockcnt, 609 unsigned int framesize, 610 unsigned int framecnt, 611 unsigned int qdisc_bypass, 612 struct pmd_internals **internals, 613 struct rte_eth_dev **eth_dev, 614 struct rte_kvargs *kvlist) 615 { 616 const char *name = rte_vdev_device_name(dev); 617 const unsigned int numa_node = dev->device.numa_node; 618 struct rte_eth_dev_data *data = NULL; 619 struct rte_kvargs_pair *pair = NULL; 620 struct ifreq ifr; 621 size_t ifnamelen; 622 unsigned k_idx; 623 struct sockaddr_ll sockaddr; 624 struct tpacket_req *req; 625 struct pkt_rx_queue *rx_queue; 626 struct pkt_tx_queue *tx_queue; 627 int rc, tpver, discard; 628 int qsockfd = -1; 629 unsigned int i, q, rdsize; 630 #if defined(PACKET_FANOUT) 631 int fanout_arg; 632 #endif 633 634 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 635 pair = &kvlist->pairs[k_idx]; 636 if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL) 637 break; 638 } 639 if (pair == NULL) { 640 PMD_LOG(ERR, 641 "%s: no interface specified for AF_PACKET ethdev", 642 name); 643 return -1; 644 } 645 646 PMD_LOG(INFO, 647 "%s: creating AF_PACKET-backed ethdev on numa socket %u", 648 name, numa_node); 649 650 *internals = rte_zmalloc_socket(name, sizeof(**internals), 651 0, numa_node); 652 if (*internals == NULL) 653 return -1; 654 655 656 (*internals)->rx_queue = rte_calloc_socket("af_packet_rx", 657 nb_queues, 658 sizeof(struct pkt_rx_queue), 659 0, numa_node); 660 (*internals)->tx_queue = rte_calloc_socket("af_packet_tx", 661 nb_queues, 662 sizeof(struct pkt_tx_queue), 663 0, numa_node); 664 if (!(*internals)->rx_queue || !(*internals)->tx_queue) { 665 goto free_internals; 666 } 667 668 for (q = 0; q < nb_queues; q++) { 669 (*internals)->rx_queue[q].map = MAP_FAILED; 670 (*internals)->tx_queue[q].map = MAP_FAILED; 671 (*internals)->rx_queue[q].sockfd = -1; 672 (*internals)->tx_queue[q].sockfd = -1; 673 } 674 675 req = &((*internals)->req); 676 677 req->tp_block_size = blocksize; 678 req->tp_block_nr = blockcnt; 679 req->tp_frame_size = framesize; 680 req->tp_frame_nr = framecnt; 681 682 ifnamelen = strlen(pair->value); 683 if (ifnamelen < sizeof(ifr.ifr_name)) { 684 memcpy(ifr.ifr_name, pair->value, ifnamelen); 685 ifr.ifr_name[ifnamelen] = '\0'; 686 } else { 687 PMD_LOG(ERR, 688 "%s: I/F name too long (%s)", 689 name, pair->value); 690 goto free_internals; 691 } 692 if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { 693 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name); 694 goto free_internals; 695 } 696 (*internals)->if_name = strdup(pair->value); 697 if ((*internals)->if_name == NULL) 698 goto free_internals; 699 (*internals)->if_index = ifr.ifr_ifindex; 700 701 if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { 702 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFHWADDR)", name); 703 goto free_internals; 704 } 705 memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 706 707 memset(&sockaddr, 0, sizeof(sockaddr)); 708 sockaddr.sll_family = AF_PACKET; 709 sockaddr.sll_protocol = htons(ETH_P_ALL); 710 sockaddr.sll_ifindex = (*internals)->if_index; 711 712 #if defined(PACKET_FANOUT) 713 fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; 714 fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; 715 #if defined(PACKET_FANOUT_FLAG_ROLLOVER) 716 fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; 717 #endif 718 #endif 719 720 for (q = 0; q < nb_queues; q++) { 721 /* Open an AF_PACKET socket for this queue... */ 722 qsockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 723 if (qsockfd == -1) { 724 PMD_LOG_ERRNO(ERR, 725 "%s: could not open AF_PACKET socket", 726 name); 727 goto error; 728 } 729 730 tpver = TPACKET_V2; 731 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION, 732 &tpver, sizeof(tpver)); 733 if (rc == -1) { 734 PMD_LOG_ERRNO(ERR, 735 "%s: could not set PACKET_VERSION on AF_PACKET socket for %s", 736 name, pair->value); 737 goto error; 738 } 739 740 discard = 1; 741 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS, 742 &discard, sizeof(discard)); 743 if (rc == -1) { 744 PMD_LOG_ERRNO(ERR, 745 "%s: could not set PACKET_LOSS on AF_PACKET socket for %s", 746 name, pair->value); 747 goto error; 748 } 749 750 #if defined(PACKET_QDISC_BYPASS) 751 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS, 752 &qdisc_bypass, sizeof(qdisc_bypass)); 753 if (rc == -1) { 754 PMD_LOG_ERRNO(ERR, 755 "%s: could not set PACKET_QDISC_BYPASS on AF_PACKET socket for %s", 756 name, pair->value); 757 goto error; 758 } 759 #else 760 RTE_SET_USED(qdisc_bypass); 761 #endif 762 763 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)); 764 if (rc == -1) { 765 PMD_LOG_ERRNO(ERR, 766 "%s: could not set PACKET_RX_RING on AF_PACKET socket for %s", 767 name, pair->value); 768 goto error; 769 } 770 771 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req)); 772 if (rc == -1) { 773 PMD_LOG_ERRNO(ERR, 774 "%s: could not set PACKET_TX_RING on AF_PACKET " 775 "socket for %s", name, pair->value); 776 goto error; 777 } 778 779 rx_queue = &((*internals)->rx_queue[q]); 780 rx_queue->framecount = req->tp_frame_nr; 781 782 rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr, 783 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 784 qsockfd, 0); 785 if (rx_queue->map == MAP_FAILED) { 786 PMD_LOG_ERRNO(ERR, 787 "%s: call to mmap failed on AF_PACKET socket for %s", 788 name, pair->value); 789 goto error; 790 } 791 792 /* rdsize is same for both Tx and Rx */ 793 rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd)); 794 795 rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 796 if (rx_queue->rd == NULL) 797 goto error; 798 for (i = 0; i < req->tp_frame_nr; ++i) { 799 rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize); 800 rx_queue->rd[i].iov_len = req->tp_frame_size; 801 } 802 rx_queue->sockfd = qsockfd; 803 804 tx_queue = &((*internals)->tx_queue[q]); 805 tx_queue->framecount = req->tp_frame_nr; 806 tx_queue->frame_data_size = req->tp_frame_size; 807 tx_queue->frame_data_size -= TPACKET2_HDRLEN - 808 sizeof(struct sockaddr_ll); 809 810 tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; 811 812 tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 813 if (tx_queue->rd == NULL) 814 goto error; 815 for (i = 0; i < req->tp_frame_nr; ++i) { 816 tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize); 817 tx_queue->rd[i].iov_len = req->tp_frame_size; 818 } 819 tx_queue->sockfd = qsockfd; 820 821 rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)); 822 if (rc == -1) { 823 PMD_LOG_ERRNO(ERR, 824 "%s: could not bind AF_PACKET socket to %s", 825 name, pair->value); 826 goto error; 827 } 828 829 #if defined(PACKET_FANOUT) 830 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, 831 &fanout_arg, sizeof(fanout_arg)); 832 if (rc == -1) { 833 PMD_LOG_ERRNO(ERR, 834 "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s", 835 name, pair->value); 836 goto error; 837 } 838 #endif 839 } 840 841 /* reserve an ethdev entry */ 842 *eth_dev = rte_eth_vdev_allocate(dev, 0); 843 if (*eth_dev == NULL) 844 goto error; 845 846 /* 847 * now put it all together 848 * - store queue data in internals, 849 * - store numa_node in eth_dev 850 * - point eth_dev_data to internals 851 * - and point eth_dev structure to new eth_dev_data structure 852 */ 853 854 (*internals)->nb_queues = nb_queues; 855 856 data = (*eth_dev)->data; 857 data->dev_private = *internals; 858 data->nb_rx_queues = (uint16_t)nb_queues; 859 data->nb_tx_queues = (uint16_t)nb_queues; 860 data->dev_link = pmd_link; 861 data->mac_addrs = &(*internals)->eth_addr; 862 863 (*eth_dev)->dev_ops = &ops; 864 865 return 0; 866 867 error: 868 if (qsockfd != -1) 869 close(qsockfd); 870 for (q = 0; q < nb_queues; q++) { 871 if ((*internals)->rx_queue[q].map != MAP_FAILED) 872 munmap((*internals)->rx_queue[q].map, 873 2 * req->tp_block_size * req->tp_block_nr); 874 875 rte_free((*internals)->rx_queue[q].rd); 876 rte_free((*internals)->tx_queue[q].rd); 877 if (((*internals)->rx_queue[q].sockfd >= 0) && 878 ((*internals)->rx_queue[q].sockfd != qsockfd)) 879 close((*internals)->rx_queue[q].sockfd); 880 } 881 free_internals: 882 rte_free((*internals)->rx_queue); 883 rte_free((*internals)->tx_queue); 884 free((*internals)->if_name); 885 rte_free(*internals); 886 return -1; 887 } 888 889 static int 890 rte_eth_from_packet(struct rte_vdev_device *dev, 891 int const *sockfd, 892 struct rte_kvargs *kvlist) 893 { 894 const char *name = rte_vdev_device_name(dev); 895 struct pmd_internals *internals = NULL; 896 struct rte_eth_dev *eth_dev = NULL; 897 struct rte_kvargs_pair *pair = NULL; 898 unsigned k_idx; 899 unsigned int blockcount; 900 unsigned int blocksize; 901 unsigned int framesize = DFLT_FRAME_SIZE; 902 unsigned int framecount = DFLT_FRAME_COUNT; 903 unsigned int qpairs = 1; 904 unsigned int qdisc_bypass = 1; 905 906 /* do some parameter checking */ 907 if (*sockfd < 0) 908 return -1; 909 910 blocksize = getpagesize(); 911 912 /* 913 * Walk arguments for configurable settings 914 */ 915 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 916 pair = &kvlist->pairs[k_idx]; 917 if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) { 918 qpairs = atoi(pair->value); 919 if (qpairs < 1) { 920 PMD_LOG(ERR, 921 "%s: invalid qpairs value", 922 name); 923 return -1; 924 } 925 continue; 926 } 927 if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) { 928 blocksize = atoi(pair->value); 929 if (!blocksize) { 930 PMD_LOG(ERR, 931 "%s: invalid blocksize value", 932 name); 933 return -1; 934 } 935 continue; 936 } 937 if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) { 938 framesize = atoi(pair->value); 939 if (!framesize) { 940 PMD_LOG(ERR, 941 "%s: invalid framesize value", 942 name); 943 return -1; 944 } 945 continue; 946 } 947 if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) { 948 framecount = atoi(pair->value); 949 if (!framecount) { 950 PMD_LOG(ERR, 951 "%s: invalid framecount value", 952 name); 953 return -1; 954 } 955 continue; 956 } 957 if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) { 958 qdisc_bypass = atoi(pair->value); 959 if (qdisc_bypass > 1) { 960 PMD_LOG(ERR, 961 "%s: invalid bypass value", 962 name); 963 return -1; 964 } 965 continue; 966 } 967 } 968 969 if (framesize > blocksize) { 970 PMD_LOG(ERR, 971 "%s: AF_PACKET MMAP frame size exceeds block size!", 972 name); 973 return -1; 974 } 975 976 blockcount = framecount / (blocksize / framesize); 977 if (!blockcount) { 978 PMD_LOG(ERR, 979 "%s: invalid AF_PACKET MMAP parameters", name); 980 return -1; 981 } 982 983 PMD_LOG(INFO, "%s: AF_PACKET MMAP parameters:", name); 984 PMD_LOG(INFO, "%s:\tblock size %d", name, blocksize); 985 PMD_LOG(INFO, "%s:\tblock count %d", name, blockcount); 986 PMD_LOG(INFO, "%s:\tframe size %d", name, framesize); 987 PMD_LOG(INFO, "%s:\tframe count %d", name, framecount); 988 989 if (rte_pmd_init_internals(dev, *sockfd, qpairs, 990 blocksize, blockcount, 991 framesize, framecount, 992 qdisc_bypass, 993 &internals, ð_dev, 994 kvlist) < 0) 995 return -1; 996 997 eth_dev->rx_pkt_burst = eth_af_packet_rx; 998 eth_dev->tx_pkt_burst = eth_af_packet_tx; 999 1000 rte_eth_dev_probing_finish(eth_dev); 1001 return 0; 1002 } 1003 1004 static int 1005 rte_pmd_af_packet_probe(struct rte_vdev_device *dev) 1006 { 1007 int ret = 0; 1008 struct rte_kvargs *kvlist; 1009 int sockfd = -1; 1010 struct rte_eth_dev *eth_dev; 1011 const char *name = rte_vdev_device_name(dev); 1012 1013 PMD_LOG(INFO, "Initializing pmd_af_packet for %s", name); 1014 1015 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1016 eth_dev = rte_eth_dev_attach_secondary(name); 1017 if (!eth_dev) { 1018 PMD_LOG(ERR, "Failed to probe %s", name); 1019 return -1; 1020 } 1021 /* TODO: request info from primary to set up Rx and Tx */ 1022 eth_dev->dev_ops = &ops; 1023 eth_dev->device = &dev->device; 1024 rte_eth_dev_probing_finish(eth_dev); 1025 return 0; 1026 } 1027 1028 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1029 if (kvlist == NULL) { 1030 ret = -1; 1031 goto exit; 1032 } 1033 1034 /* 1035 * If iface argument is passed we open the NICs and use them for 1036 * reading / writing 1037 */ 1038 if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) { 1039 1040 ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, 1041 &open_packet_iface, &sockfd); 1042 if (ret < 0) 1043 goto exit; 1044 } 1045 1046 if (dev->device.numa_node == SOCKET_ID_ANY) 1047 dev->device.numa_node = rte_socket_id(); 1048 1049 ret = rte_eth_from_packet(dev, &sockfd, kvlist); 1050 close(sockfd); /* no longer needed */ 1051 1052 exit: 1053 rte_kvargs_free(kvlist); 1054 return ret; 1055 } 1056 1057 static int 1058 rte_pmd_af_packet_remove(struct rte_vdev_device *dev) 1059 { 1060 struct rte_eth_dev *eth_dev; 1061 1062 if (dev == NULL) 1063 return -1; 1064 1065 /* find the ethdev entry */ 1066 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); 1067 if (eth_dev == NULL) 1068 return 0; /* port already released */ 1069 1070 eth_dev_close(eth_dev); 1071 rte_eth_dev_release_port(eth_dev); 1072 1073 return 0; 1074 } 1075 1076 static struct rte_vdev_driver pmd_af_packet_drv = { 1077 .probe = rte_pmd_af_packet_probe, 1078 .remove = rte_pmd_af_packet_remove, 1079 }; 1080 1081 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); 1082 RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); 1083 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, 1084 "iface=<string> " 1085 "qpairs=<int> " 1086 "blocksz=<int> " 1087 "framesz=<int> " 1088 "framecnt=<int> " 1089 "qdisc_bypass=<0|1>"); 1090