1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com> 5 * 6 * Originally based upon librte_pmd_pcap code: 7 * 8 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 9 * Copyright(c) 2014 6WIND S.A. 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * * Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in 20 * the documentation and/or other materials provided with the 21 * distribution. 22 * * Neither the name of Intel Corporation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <rte_mbuf.h> 40 #include <rte_ethdev_driver.h> 41 #include <rte_ethdev_vdev.h> 42 #include <rte_malloc.h> 43 #include <rte_kvargs.h> 44 #include <rte_bus_vdev.h> 45 46 #include <linux/if_ether.h> 47 #include <linux/if_packet.h> 48 #include <arpa/inet.h> 49 #include <net/if.h> 50 #include <sys/types.h> 51 #include <sys/socket.h> 52 #include <sys/ioctl.h> 53 #include <sys/mman.h> 54 #include <unistd.h> 55 #include <poll.h> 56 57 #define ETH_AF_PACKET_IFACE_ARG "iface" 58 #define ETH_AF_PACKET_NUM_Q_ARG "qpairs" 59 #define ETH_AF_PACKET_BLOCKSIZE_ARG "blocksz" 60 #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" 61 #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" 62 #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass" 63 64 #define DFLT_BLOCK_SIZE (1 << 12) 65 #define DFLT_FRAME_SIZE (1 << 11) 66 #define DFLT_FRAME_COUNT (1 << 9) 67 68 #define RTE_PMD_AF_PACKET_MAX_RINGS 16 69 70 struct pkt_rx_queue { 71 int sockfd; 72 73 struct iovec *rd; 74 uint8_t *map; 75 unsigned int framecount; 76 unsigned int framenum; 77 78 struct rte_mempool *mb_pool; 79 uint16_t in_port; 80 81 volatile unsigned long rx_pkts; 82 volatile unsigned long err_pkts; 83 volatile unsigned long rx_bytes; 84 }; 85 86 struct pkt_tx_queue { 87 int sockfd; 88 unsigned int frame_data_size; 89 90 struct iovec *rd; 91 uint8_t *map; 92 unsigned int framecount; 93 unsigned int framenum; 94 95 volatile unsigned long tx_pkts; 96 volatile unsigned long err_pkts; 97 volatile unsigned long tx_bytes; 98 }; 99 100 struct pmd_internals { 101 unsigned nb_queues; 102 103 int if_index; 104 char *if_name; 105 struct ether_addr eth_addr; 106 107 struct tpacket_req req; 108 109 struct pkt_rx_queue rx_queue[RTE_PMD_AF_PACKET_MAX_RINGS]; 110 struct pkt_tx_queue tx_queue[RTE_PMD_AF_PACKET_MAX_RINGS]; 111 }; 112 113 static const char *valid_arguments[] = { 114 ETH_AF_PACKET_IFACE_ARG, 115 ETH_AF_PACKET_NUM_Q_ARG, 116 ETH_AF_PACKET_BLOCKSIZE_ARG, 117 ETH_AF_PACKET_FRAMESIZE_ARG, 118 ETH_AF_PACKET_FRAMECOUNT_ARG, 119 ETH_AF_PACKET_QDISC_BYPASS_ARG, 120 NULL 121 }; 122 123 static struct rte_eth_link pmd_link = { 124 .link_speed = ETH_SPEED_NUM_10G, 125 .link_duplex = ETH_LINK_FULL_DUPLEX, 126 .link_status = ETH_LINK_DOWN, 127 .link_autoneg = ETH_LINK_AUTONEG 128 }; 129 130 static uint16_t 131 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 132 { 133 unsigned i; 134 struct tpacket2_hdr *ppd; 135 struct rte_mbuf *mbuf; 136 uint8_t *pbuf; 137 struct pkt_rx_queue *pkt_q = queue; 138 uint16_t num_rx = 0; 139 unsigned long num_rx_bytes = 0; 140 unsigned int framecount, framenum; 141 142 if (unlikely(nb_pkts == 0)) 143 return 0; 144 145 /* 146 * Reads the given number of packets from the AF_PACKET socket one by 147 * one and copies the packet data into a newly allocated mbuf. 148 */ 149 framecount = pkt_q->framecount; 150 framenum = pkt_q->framenum; 151 for (i = 0; i < nb_pkts; i++) { 152 /* point at the next incoming frame */ 153 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 154 if ((ppd->tp_status & TP_STATUS_USER) == 0) 155 break; 156 157 /* allocate the next mbuf */ 158 mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool); 159 if (unlikely(mbuf == NULL)) 160 break; 161 162 /* packet will fit in the mbuf, go ahead and receive it */ 163 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen; 164 pbuf = (uint8_t *) ppd + ppd->tp_mac; 165 memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); 166 167 /* check for vlan info */ 168 if (ppd->tp_status & TP_STATUS_VLAN_VALID) { 169 mbuf->vlan_tci = ppd->tp_vlan_tci; 170 mbuf->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); 171 } 172 173 /* release incoming frame and advance ring buffer */ 174 ppd->tp_status = TP_STATUS_KERNEL; 175 if (++framenum >= framecount) 176 framenum = 0; 177 mbuf->port = pkt_q->in_port; 178 179 /* account for the receive frame */ 180 bufs[i] = mbuf; 181 num_rx++; 182 num_rx_bytes += mbuf->pkt_len; 183 } 184 pkt_q->framenum = framenum; 185 pkt_q->rx_pkts += num_rx; 186 pkt_q->rx_bytes += num_rx_bytes; 187 return num_rx; 188 } 189 190 /* 191 * Callback to handle sending packets through a real NIC. 192 */ 193 static uint16_t 194 eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 195 { 196 struct tpacket2_hdr *ppd; 197 struct rte_mbuf *mbuf; 198 uint8_t *pbuf; 199 unsigned int framecount, framenum; 200 struct pollfd pfd; 201 struct pkt_tx_queue *pkt_q = queue; 202 uint16_t num_tx = 0; 203 unsigned long num_tx_bytes = 0; 204 int i; 205 206 if (unlikely(nb_pkts == 0)) 207 return 0; 208 209 memset(&pfd, 0, sizeof(pfd)); 210 pfd.fd = pkt_q->sockfd; 211 pfd.events = POLLOUT; 212 pfd.revents = 0; 213 214 framecount = pkt_q->framecount; 215 framenum = pkt_q->framenum; 216 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 217 for (i = 0; i < nb_pkts; i++) { 218 mbuf = *bufs++; 219 220 /* drop oversized packets */ 221 if (mbuf->pkt_len > pkt_q->frame_data_size) { 222 rte_pktmbuf_free(mbuf); 223 continue; 224 } 225 226 /* insert vlan info if necessary */ 227 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 228 if (rte_vlan_insert(&mbuf)) { 229 rte_pktmbuf_free(mbuf); 230 continue; 231 } 232 } 233 234 /* point at the next incoming frame */ 235 if ((ppd->tp_status != TP_STATUS_AVAILABLE) && 236 (poll(&pfd, 1, -1) < 0)) 237 break; 238 239 /* copy the tx frame data */ 240 pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - 241 sizeof(struct sockaddr_ll); 242 243 struct rte_mbuf *tmp_mbuf = mbuf; 244 while (tmp_mbuf) { 245 uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf); 246 memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len); 247 pbuf += data_len; 248 tmp_mbuf = tmp_mbuf->next; 249 } 250 251 ppd->tp_len = mbuf->pkt_len; 252 ppd->tp_snaplen = mbuf->pkt_len; 253 254 /* release incoming frame and advance ring buffer */ 255 ppd->tp_status = TP_STATUS_SEND_REQUEST; 256 if (++framenum >= framecount) 257 framenum = 0; 258 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 259 260 num_tx++; 261 num_tx_bytes += mbuf->pkt_len; 262 rte_pktmbuf_free(mbuf); 263 } 264 265 /* kick-off transmits */ 266 if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1) { 267 /* error sending -- no packets transmitted */ 268 num_tx = 0; 269 num_tx_bytes = 0; 270 } 271 272 pkt_q->framenum = framenum; 273 pkt_q->tx_pkts += num_tx; 274 pkt_q->err_pkts += i - num_tx; 275 pkt_q->tx_bytes += num_tx_bytes; 276 return i; 277 } 278 279 static int 280 eth_dev_start(struct rte_eth_dev *dev) 281 { 282 dev->data->dev_link.link_status = ETH_LINK_UP; 283 return 0; 284 } 285 286 /* 287 * This function gets called when the current port gets stopped. 288 */ 289 static void 290 eth_dev_stop(struct rte_eth_dev *dev) 291 { 292 unsigned i; 293 int sockfd; 294 struct pmd_internals *internals = dev->data->dev_private; 295 296 for (i = 0; i < internals->nb_queues; i++) { 297 sockfd = internals->rx_queue[i].sockfd; 298 if (sockfd != -1) 299 close(sockfd); 300 301 /* Prevent use after free in case tx fd == rx fd */ 302 if (sockfd != internals->tx_queue[i].sockfd) { 303 sockfd = internals->tx_queue[i].sockfd; 304 if (sockfd != -1) 305 close(sockfd); 306 } 307 308 internals->rx_queue[i].sockfd = -1; 309 internals->tx_queue[i].sockfd = -1; 310 } 311 312 dev->data->dev_link.link_status = ETH_LINK_DOWN; 313 } 314 315 static int 316 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 317 { 318 return 0; 319 } 320 321 static void 322 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 323 { 324 struct pmd_internals *internals = dev->data->dev_private; 325 326 dev_info->if_index = internals->if_index; 327 dev_info->max_mac_addrs = 1; 328 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 329 dev_info->max_rx_queues = (uint16_t)internals->nb_queues; 330 dev_info->max_tx_queues = (uint16_t)internals->nb_queues; 331 dev_info->min_rx_bufsize = 0; 332 } 333 334 static int 335 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats) 336 { 337 unsigned i, imax; 338 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 339 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 340 const struct pmd_internals *internal = dev->data->dev_private; 341 342 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 343 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 344 for (i = 0; i < imax; i++) { 345 igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts; 346 igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes; 347 rx_total += igb_stats->q_ipackets[i]; 348 rx_bytes_total += igb_stats->q_ibytes[i]; 349 } 350 351 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 352 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 353 for (i = 0; i < imax; i++) { 354 igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts; 355 igb_stats->q_errors[i] = internal->tx_queue[i].err_pkts; 356 igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes; 357 tx_total += igb_stats->q_opackets[i]; 358 tx_err_total += igb_stats->q_errors[i]; 359 tx_bytes_total += igb_stats->q_obytes[i]; 360 } 361 362 igb_stats->ipackets = rx_total; 363 igb_stats->ibytes = rx_bytes_total; 364 igb_stats->opackets = tx_total; 365 igb_stats->oerrors = tx_err_total; 366 igb_stats->obytes = tx_bytes_total; 367 return 0; 368 } 369 370 static void 371 eth_stats_reset(struct rte_eth_dev *dev) 372 { 373 unsigned i; 374 struct pmd_internals *internal = dev->data->dev_private; 375 376 for (i = 0; i < internal->nb_queues; i++) { 377 internal->rx_queue[i].rx_pkts = 0; 378 internal->rx_queue[i].rx_bytes = 0; 379 } 380 381 for (i = 0; i < internal->nb_queues; i++) { 382 internal->tx_queue[i].tx_pkts = 0; 383 internal->tx_queue[i].err_pkts = 0; 384 internal->tx_queue[i].tx_bytes = 0; 385 } 386 } 387 388 static void 389 eth_dev_close(struct rte_eth_dev *dev __rte_unused) 390 { 391 } 392 393 static void 394 eth_queue_release(void *q __rte_unused) 395 { 396 } 397 398 static int 399 eth_link_update(struct rte_eth_dev *dev __rte_unused, 400 int wait_to_complete __rte_unused) 401 { 402 return 0; 403 } 404 405 static int 406 eth_rx_queue_setup(struct rte_eth_dev *dev, 407 uint16_t rx_queue_id, 408 uint16_t nb_rx_desc __rte_unused, 409 unsigned int socket_id __rte_unused, 410 const struct rte_eth_rxconf *rx_conf __rte_unused, 411 struct rte_mempool *mb_pool) 412 { 413 struct pmd_internals *internals = dev->data->dev_private; 414 struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; 415 unsigned int buf_size, data_size; 416 417 pkt_q->mb_pool = mb_pool; 418 419 /* Now get the space available for data in the mbuf */ 420 buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - 421 RTE_PKTMBUF_HEADROOM; 422 data_size = internals->req.tp_frame_size; 423 data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); 424 425 if (data_size > buf_size) { 426 RTE_LOG(ERR, PMD, 427 "%s: %d bytes will not fit in mbuf (%d bytes)\n", 428 dev->device->name, data_size, buf_size); 429 return -ENOMEM; 430 } 431 432 dev->data->rx_queues[rx_queue_id] = pkt_q; 433 pkt_q->in_port = dev->data->port_id; 434 435 return 0; 436 } 437 438 static int 439 eth_tx_queue_setup(struct rte_eth_dev *dev, 440 uint16_t tx_queue_id, 441 uint16_t nb_tx_desc __rte_unused, 442 unsigned int socket_id __rte_unused, 443 const struct rte_eth_txconf *tx_conf __rte_unused) 444 { 445 446 struct pmd_internals *internals = dev->data->dev_private; 447 448 dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id]; 449 return 0; 450 } 451 452 static int 453 eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 454 { 455 struct pmd_internals *internals = dev->data->dev_private; 456 struct ifreq ifr = { .ifr_mtu = mtu }; 457 int ret; 458 int s; 459 unsigned int data_size = internals->req.tp_frame_size - 460 TPACKET2_HDRLEN - 461 sizeof(struct sockaddr_ll); 462 463 if (mtu > data_size) 464 return -EINVAL; 465 466 s = socket(PF_INET, SOCK_DGRAM, 0); 467 if (s < 0) 468 return -EINVAL; 469 470 snprintf(ifr.ifr_name, IFNAMSIZ, "%s", internals->if_name); 471 ret = ioctl(s, SIOCSIFMTU, &ifr); 472 close(s); 473 474 if (ret < 0) 475 return -EINVAL; 476 477 return 0; 478 } 479 480 static void 481 eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) 482 { 483 struct ifreq ifr; 484 int s; 485 486 s = socket(PF_INET, SOCK_DGRAM, 0); 487 if (s < 0) 488 return; 489 490 snprintf(ifr.ifr_name, IFNAMSIZ, "%s", if_name); 491 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) 492 goto out; 493 ifr.ifr_flags &= mask; 494 ifr.ifr_flags |= flags; 495 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) 496 goto out; 497 out: 498 close(s); 499 } 500 501 static void 502 eth_dev_promiscuous_enable(struct rte_eth_dev *dev) 503 { 504 struct pmd_internals *internals = dev->data->dev_private; 505 506 eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); 507 } 508 509 static void 510 eth_dev_promiscuous_disable(struct rte_eth_dev *dev) 511 { 512 struct pmd_internals *internals = dev->data->dev_private; 513 514 eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); 515 } 516 517 static const struct eth_dev_ops ops = { 518 .dev_start = eth_dev_start, 519 .dev_stop = eth_dev_stop, 520 .dev_close = eth_dev_close, 521 .dev_configure = eth_dev_configure, 522 .dev_infos_get = eth_dev_info, 523 .mtu_set = eth_dev_mtu_set, 524 .promiscuous_enable = eth_dev_promiscuous_enable, 525 .promiscuous_disable = eth_dev_promiscuous_disable, 526 .rx_queue_setup = eth_rx_queue_setup, 527 .tx_queue_setup = eth_tx_queue_setup, 528 .rx_queue_release = eth_queue_release, 529 .tx_queue_release = eth_queue_release, 530 .link_update = eth_link_update, 531 .stats_get = eth_stats_get, 532 .stats_reset = eth_stats_reset, 533 }; 534 535 /* 536 * Opens an AF_PACKET socket 537 */ 538 static int 539 open_packet_iface(const char *key __rte_unused, 540 const char *value __rte_unused, 541 void *extra_args) 542 { 543 int *sockfd = extra_args; 544 545 /* Open an AF_PACKET socket... */ 546 *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 547 if (*sockfd == -1) { 548 RTE_LOG(ERR, PMD, "Could not open AF_PACKET socket\n"); 549 return -1; 550 } 551 552 return 0; 553 } 554 555 static struct rte_vdev_driver pmd_af_packet_drv; 556 557 static int 558 rte_pmd_init_internals(struct rte_vdev_device *dev, 559 const int sockfd, 560 const unsigned nb_queues, 561 unsigned int blocksize, 562 unsigned int blockcnt, 563 unsigned int framesize, 564 unsigned int framecnt, 565 unsigned int qdisc_bypass, 566 struct pmd_internals **internals, 567 struct rte_eth_dev **eth_dev, 568 struct rte_kvargs *kvlist) 569 { 570 const char *name = rte_vdev_device_name(dev); 571 const unsigned int numa_node = dev->device.numa_node; 572 struct rte_eth_dev_data *data = NULL; 573 struct rte_kvargs_pair *pair = NULL; 574 struct ifreq ifr; 575 size_t ifnamelen; 576 unsigned k_idx; 577 struct sockaddr_ll sockaddr; 578 struct tpacket_req *req; 579 struct pkt_rx_queue *rx_queue; 580 struct pkt_tx_queue *tx_queue; 581 int rc, tpver, discard; 582 int qsockfd = -1; 583 unsigned int i, q, rdsize; 584 #if defined(PACKET_FANOUT) 585 int fanout_arg; 586 #endif 587 588 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 589 pair = &kvlist->pairs[k_idx]; 590 if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL) 591 break; 592 } 593 if (pair == NULL) { 594 RTE_LOG(ERR, PMD, 595 "%s: no interface specified for AF_PACKET ethdev\n", 596 name); 597 goto error_early; 598 } 599 600 RTE_LOG(INFO, PMD, 601 "%s: creating AF_PACKET-backed ethdev on numa socket %u\n", 602 name, numa_node); 603 604 /* 605 * now do all data allocation - for eth_dev structure, dummy pci driver 606 * and internal (private) data 607 */ 608 data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); 609 if (data == NULL) 610 goto error_early; 611 612 *internals = rte_zmalloc_socket(name, sizeof(**internals), 613 0, numa_node); 614 if (*internals == NULL) 615 goto error_early; 616 617 for (q = 0; q < nb_queues; q++) { 618 (*internals)->rx_queue[q].map = MAP_FAILED; 619 (*internals)->tx_queue[q].map = MAP_FAILED; 620 } 621 622 req = &((*internals)->req); 623 624 req->tp_block_size = blocksize; 625 req->tp_block_nr = blockcnt; 626 req->tp_frame_size = framesize; 627 req->tp_frame_nr = framecnt; 628 629 ifnamelen = strlen(pair->value); 630 if (ifnamelen < sizeof(ifr.ifr_name)) { 631 memcpy(ifr.ifr_name, pair->value, ifnamelen); 632 ifr.ifr_name[ifnamelen] = '\0'; 633 } else { 634 RTE_LOG(ERR, PMD, 635 "%s: I/F name too long (%s)\n", 636 name, pair->value); 637 goto error_early; 638 } 639 if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { 640 RTE_LOG(ERR, PMD, 641 "%s: ioctl failed (SIOCGIFINDEX)\n", 642 name); 643 goto error_early; 644 } 645 (*internals)->if_name = strdup(pair->value); 646 if ((*internals)->if_name == NULL) 647 goto error_early; 648 (*internals)->if_index = ifr.ifr_ifindex; 649 650 if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { 651 RTE_LOG(ERR, PMD, 652 "%s: ioctl failed (SIOCGIFHWADDR)\n", 653 name); 654 goto error_early; 655 } 656 memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 657 658 memset(&sockaddr, 0, sizeof(sockaddr)); 659 sockaddr.sll_family = AF_PACKET; 660 sockaddr.sll_protocol = htons(ETH_P_ALL); 661 sockaddr.sll_ifindex = (*internals)->if_index; 662 663 #if defined(PACKET_FANOUT) 664 fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; 665 fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; 666 #if defined(PACKET_FANOUT_FLAG_ROLLOVER) 667 fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; 668 #endif 669 #endif 670 671 for (q = 0; q < nb_queues; q++) { 672 /* Open an AF_PACKET socket for this queue... */ 673 qsockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 674 if (qsockfd == -1) { 675 RTE_LOG(ERR, PMD, 676 "%s: could not open AF_PACKET socket\n", 677 name); 678 return -1; 679 } 680 681 tpver = TPACKET_V2; 682 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION, 683 &tpver, sizeof(tpver)); 684 if (rc == -1) { 685 RTE_LOG(ERR, PMD, 686 "%s: could not set PACKET_VERSION on AF_PACKET " 687 "socket for %s\n", name, pair->value); 688 goto error; 689 } 690 691 discard = 1; 692 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS, 693 &discard, sizeof(discard)); 694 if (rc == -1) { 695 RTE_LOG(ERR, PMD, 696 "%s: could not set PACKET_LOSS on " 697 "AF_PACKET socket for %s\n", name, pair->value); 698 goto error; 699 } 700 701 #if defined(PACKET_QDISC_BYPASS) 702 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS, 703 &qdisc_bypass, sizeof(qdisc_bypass)); 704 if (rc == -1) { 705 RTE_LOG(ERR, PMD, 706 "%s: could not set PACKET_QDISC_BYPASS " 707 "on AF_PACKET socket for %s\n", name, 708 pair->value); 709 goto error; 710 } 711 #else 712 RTE_SET_USED(qdisc_bypass); 713 #endif 714 715 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)); 716 if (rc == -1) { 717 RTE_LOG(ERR, PMD, 718 "%s: could not set PACKET_RX_RING on AF_PACKET " 719 "socket for %s\n", name, pair->value); 720 goto error; 721 } 722 723 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req)); 724 if (rc == -1) { 725 RTE_LOG(ERR, PMD, 726 "%s: could not set PACKET_TX_RING on AF_PACKET " 727 "socket for %s\n", name, pair->value); 728 goto error; 729 } 730 731 rx_queue = &((*internals)->rx_queue[q]); 732 rx_queue->framecount = req->tp_frame_nr; 733 734 rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr, 735 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 736 qsockfd, 0); 737 if (rx_queue->map == MAP_FAILED) { 738 RTE_LOG(ERR, PMD, 739 "%s: call to mmap failed on AF_PACKET socket for %s\n", 740 name, pair->value); 741 goto error; 742 } 743 744 /* rdsize is same for both Tx and Rx */ 745 rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd)); 746 747 rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 748 if (rx_queue->rd == NULL) 749 goto error; 750 for (i = 0; i < req->tp_frame_nr; ++i) { 751 rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize); 752 rx_queue->rd[i].iov_len = req->tp_frame_size; 753 } 754 rx_queue->sockfd = qsockfd; 755 756 tx_queue = &((*internals)->tx_queue[q]); 757 tx_queue->framecount = req->tp_frame_nr; 758 tx_queue->frame_data_size = req->tp_frame_size; 759 tx_queue->frame_data_size -= TPACKET2_HDRLEN - 760 sizeof(struct sockaddr_ll); 761 762 tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; 763 764 tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 765 if (tx_queue->rd == NULL) 766 goto error; 767 for (i = 0; i < req->tp_frame_nr; ++i) { 768 tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize); 769 tx_queue->rd[i].iov_len = req->tp_frame_size; 770 } 771 tx_queue->sockfd = qsockfd; 772 773 rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)); 774 if (rc == -1) { 775 RTE_LOG(ERR, PMD, 776 "%s: could not bind AF_PACKET socket to %s\n", 777 name, pair->value); 778 goto error; 779 } 780 781 #if defined(PACKET_FANOUT) 782 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, 783 &fanout_arg, sizeof(fanout_arg)); 784 if (rc == -1) { 785 RTE_LOG(ERR, PMD, 786 "%s: could not set PACKET_FANOUT on AF_PACKET socket " 787 "for %s\n", name, pair->value); 788 goto error; 789 } 790 #endif 791 } 792 793 /* reserve an ethdev entry */ 794 *eth_dev = rte_eth_vdev_allocate(dev, 0); 795 if (*eth_dev == NULL) 796 goto error; 797 798 /* 799 * now put it all together 800 * - store queue data in internals, 801 * - store numa_node in eth_dev 802 * - point eth_dev_data to internals 803 * - and point eth_dev structure to new eth_dev_data structure 804 */ 805 806 (*internals)->nb_queues = nb_queues; 807 808 rte_memcpy(data, (*eth_dev)->data, sizeof(*data)); 809 data->dev_private = *internals; 810 data->nb_rx_queues = (uint16_t)nb_queues; 811 data->nb_tx_queues = (uint16_t)nb_queues; 812 data->dev_link = pmd_link; 813 data->mac_addrs = &(*internals)->eth_addr; 814 815 (*eth_dev)->data = data; 816 (*eth_dev)->dev_ops = &ops; 817 818 return 0; 819 820 error: 821 if (qsockfd != -1) 822 close(qsockfd); 823 for (q = 0; q < nb_queues; q++) { 824 munmap((*internals)->rx_queue[q].map, 825 2 * req->tp_block_size * req->tp_block_nr); 826 827 rte_free((*internals)->rx_queue[q].rd); 828 rte_free((*internals)->tx_queue[q].rd); 829 if (((*internals)->rx_queue[q].sockfd != 0) && 830 ((*internals)->rx_queue[q].sockfd != qsockfd)) 831 close((*internals)->rx_queue[q].sockfd); 832 } 833 free((*internals)->if_name); 834 rte_free(*internals); 835 error_early: 836 rte_free(data); 837 return -1; 838 } 839 840 static int 841 rte_eth_from_packet(struct rte_vdev_device *dev, 842 int const *sockfd, 843 struct rte_kvargs *kvlist) 844 { 845 const char *name = rte_vdev_device_name(dev); 846 struct pmd_internals *internals = NULL; 847 struct rte_eth_dev *eth_dev = NULL; 848 struct rte_kvargs_pair *pair = NULL; 849 unsigned k_idx; 850 unsigned int blockcount; 851 unsigned int blocksize = DFLT_BLOCK_SIZE; 852 unsigned int framesize = DFLT_FRAME_SIZE; 853 unsigned int framecount = DFLT_FRAME_COUNT; 854 unsigned int qpairs = 1; 855 unsigned int qdisc_bypass = 1; 856 857 /* do some parameter checking */ 858 if (*sockfd < 0) 859 return -1; 860 861 /* 862 * Walk arguments for configurable settings 863 */ 864 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 865 pair = &kvlist->pairs[k_idx]; 866 if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) { 867 qpairs = atoi(pair->value); 868 if (qpairs < 1 || 869 qpairs > RTE_PMD_AF_PACKET_MAX_RINGS) { 870 RTE_LOG(ERR, PMD, 871 "%s: invalid qpairs value\n", 872 name); 873 return -1; 874 } 875 continue; 876 } 877 if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) { 878 blocksize = atoi(pair->value); 879 if (!blocksize) { 880 RTE_LOG(ERR, PMD, 881 "%s: invalid blocksize value\n", 882 name); 883 return -1; 884 } 885 continue; 886 } 887 if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) { 888 framesize = atoi(pair->value); 889 if (!framesize) { 890 RTE_LOG(ERR, PMD, 891 "%s: invalid framesize value\n", 892 name); 893 return -1; 894 } 895 continue; 896 } 897 if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) { 898 framecount = atoi(pair->value); 899 if (!framecount) { 900 RTE_LOG(ERR, PMD, 901 "%s: invalid framecount value\n", 902 name); 903 return -1; 904 } 905 continue; 906 } 907 if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) { 908 qdisc_bypass = atoi(pair->value); 909 if (qdisc_bypass > 1) { 910 RTE_LOG(ERR, PMD, 911 "%s: invalid bypass value\n", 912 name); 913 return -1; 914 } 915 continue; 916 } 917 } 918 919 if (framesize > blocksize) { 920 RTE_LOG(ERR, PMD, 921 "%s: AF_PACKET MMAP frame size exceeds block size!\n", 922 name); 923 return -1; 924 } 925 926 blockcount = framecount / (blocksize / framesize); 927 if (!blockcount) { 928 RTE_LOG(ERR, PMD, 929 "%s: invalid AF_PACKET MMAP parameters\n", name); 930 return -1; 931 } 932 933 RTE_LOG(INFO, PMD, "%s: AF_PACKET MMAP parameters:\n", name); 934 RTE_LOG(INFO, PMD, "%s:\tblock size %d\n", name, blocksize); 935 RTE_LOG(INFO, PMD, "%s:\tblock count %d\n", name, blockcount); 936 RTE_LOG(INFO, PMD, "%s:\tframe size %d\n", name, framesize); 937 RTE_LOG(INFO, PMD, "%s:\tframe count %d\n", name, framecount); 938 939 if (rte_pmd_init_internals(dev, *sockfd, qpairs, 940 blocksize, blockcount, 941 framesize, framecount, 942 qdisc_bypass, 943 &internals, ð_dev, 944 kvlist) < 0) 945 return -1; 946 947 eth_dev->rx_pkt_burst = eth_af_packet_rx; 948 eth_dev->tx_pkt_burst = eth_af_packet_tx; 949 950 return 0; 951 } 952 953 static int 954 rte_pmd_af_packet_probe(struct rte_vdev_device *dev) 955 { 956 int ret = 0; 957 struct rte_kvargs *kvlist; 958 int sockfd = -1; 959 960 RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n", 961 rte_vdev_device_name(dev)); 962 963 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 964 if (kvlist == NULL) { 965 ret = -1; 966 goto exit; 967 } 968 969 /* 970 * If iface argument is passed we open the NICs and use them for 971 * reading / writing 972 */ 973 if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) { 974 975 ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, 976 &open_packet_iface, &sockfd); 977 if (ret < 0) 978 goto exit; 979 } 980 981 if (dev->device.numa_node == SOCKET_ID_ANY) 982 dev->device.numa_node = rte_socket_id(); 983 984 ret = rte_eth_from_packet(dev, &sockfd, kvlist); 985 close(sockfd); /* no longer needed */ 986 987 exit: 988 rte_kvargs_free(kvlist); 989 return ret; 990 } 991 992 static int 993 rte_pmd_af_packet_remove(struct rte_vdev_device *dev) 994 { 995 struct rte_eth_dev *eth_dev = NULL; 996 struct pmd_internals *internals; 997 unsigned q; 998 999 RTE_LOG(INFO, PMD, "Closing AF_PACKET ethdev on numa socket %u\n", 1000 rte_socket_id()); 1001 1002 if (dev == NULL) 1003 return -1; 1004 1005 /* find the ethdev entry */ 1006 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); 1007 if (eth_dev == NULL) 1008 return -1; 1009 1010 internals = eth_dev->data->dev_private; 1011 for (q = 0; q < internals->nb_queues; q++) { 1012 rte_free(internals->rx_queue[q].rd); 1013 rte_free(internals->tx_queue[q].rd); 1014 } 1015 free(internals->if_name); 1016 1017 rte_free(eth_dev->data->dev_private); 1018 rte_free(eth_dev->data); 1019 1020 rte_eth_dev_release_port(eth_dev); 1021 1022 return 0; 1023 } 1024 1025 static struct rte_vdev_driver pmd_af_packet_drv = { 1026 .probe = rte_pmd_af_packet_probe, 1027 .remove = rte_pmd_af_packet_remove, 1028 }; 1029 1030 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); 1031 RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); 1032 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, 1033 "iface=<string> " 1034 "qpairs=<int> " 1035 "blocksz=<int> " 1036 "framesz=<int> " 1037 "framecnt=<int> " 1038 "qdisc_bypass=<0|1>"); 1039