1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com> 5 * 6 * Originally based upon librte_pmd_pcap code: 7 * 8 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 9 * Copyright(c) 2014 6WIND S.A. 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * * Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in 20 * the documentation and/or other materials provided with the 21 * distribution. 22 * * Neither the name of Intel Corporation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <rte_mbuf.h> 40 #include <rte_ethdev.h> 41 #include <rte_malloc.h> 42 #include <rte_kvargs.h> 43 #include <rte_vdev.h> 44 45 #include <linux/if_ether.h> 46 #include <linux/if_packet.h> 47 #include <arpa/inet.h> 48 #include <net/if.h> 49 #include <sys/types.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/mman.h> 53 #include <unistd.h> 54 #include <poll.h> 55 56 #define ETH_AF_PACKET_IFACE_ARG "iface" 57 #define ETH_AF_PACKET_NUM_Q_ARG "qpairs" 58 #define ETH_AF_PACKET_BLOCKSIZE_ARG "blocksz" 59 #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" 60 #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" 61 62 #define DFLT_BLOCK_SIZE (1 << 12) 63 #define DFLT_FRAME_SIZE (1 << 11) 64 #define DFLT_FRAME_COUNT (1 << 9) 65 66 #define RTE_PMD_AF_PACKET_MAX_RINGS 16 67 68 struct pkt_rx_queue { 69 int sockfd; 70 71 struct iovec *rd; 72 uint8_t *map; 73 unsigned int framecount; 74 unsigned int framenum; 75 76 struct rte_mempool *mb_pool; 77 uint8_t in_port; 78 79 volatile unsigned long rx_pkts; 80 volatile unsigned long err_pkts; 81 volatile unsigned long rx_bytes; 82 }; 83 84 struct pkt_tx_queue { 85 int sockfd; 86 unsigned int frame_data_size; 87 88 struct iovec *rd; 89 uint8_t *map; 90 unsigned int framecount; 91 unsigned int framenum; 92 93 volatile unsigned long tx_pkts; 94 volatile unsigned long err_pkts; 95 volatile unsigned long tx_bytes; 96 }; 97 98 struct pmd_internals { 99 unsigned nb_queues; 100 101 int if_index; 102 char *if_name; 103 struct ether_addr eth_addr; 104 105 struct tpacket_req req; 106 107 struct pkt_rx_queue rx_queue[RTE_PMD_AF_PACKET_MAX_RINGS]; 108 struct pkt_tx_queue tx_queue[RTE_PMD_AF_PACKET_MAX_RINGS]; 109 }; 110 111 static const char *valid_arguments[] = { 112 ETH_AF_PACKET_IFACE_ARG, 113 ETH_AF_PACKET_NUM_Q_ARG, 114 ETH_AF_PACKET_BLOCKSIZE_ARG, 115 ETH_AF_PACKET_FRAMESIZE_ARG, 116 ETH_AF_PACKET_FRAMECOUNT_ARG, 117 NULL 118 }; 119 120 static struct rte_eth_link pmd_link = { 121 .link_speed = ETH_SPEED_NUM_10G, 122 .link_duplex = ETH_LINK_FULL_DUPLEX, 123 .link_status = ETH_LINK_DOWN, 124 .link_autoneg = ETH_LINK_SPEED_AUTONEG 125 }; 126 127 static uint16_t 128 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 129 { 130 unsigned i; 131 struct tpacket2_hdr *ppd; 132 struct rte_mbuf *mbuf; 133 uint8_t *pbuf; 134 struct pkt_rx_queue *pkt_q = queue; 135 uint16_t num_rx = 0; 136 unsigned long num_rx_bytes = 0; 137 unsigned int framecount, framenum; 138 139 if (unlikely(nb_pkts == 0)) 140 return 0; 141 142 /* 143 * Reads the given number of packets from the AF_PACKET socket one by 144 * one and copies the packet data into a newly allocated mbuf. 145 */ 146 framecount = pkt_q->framecount; 147 framenum = pkt_q->framenum; 148 for (i = 0; i < nb_pkts; i++) { 149 /* point at the next incoming frame */ 150 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 151 if ((ppd->tp_status & TP_STATUS_USER) == 0) 152 break; 153 154 /* allocate the next mbuf */ 155 mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool); 156 if (unlikely(mbuf == NULL)) 157 break; 158 159 /* packet will fit in the mbuf, go ahead and receive it */ 160 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen; 161 pbuf = (uint8_t *) ppd + ppd->tp_mac; 162 memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); 163 164 /* check for vlan info */ 165 if (ppd->tp_status & TP_STATUS_VLAN_VALID) { 166 mbuf->vlan_tci = ppd->tp_vlan_tci; 167 mbuf->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED); 168 } 169 170 /* release incoming frame and advance ring buffer */ 171 ppd->tp_status = TP_STATUS_KERNEL; 172 if (++framenum >= framecount) 173 framenum = 0; 174 mbuf->port = pkt_q->in_port; 175 176 /* account for the receive frame */ 177 bufs[i] = mbuf; 178 num_rx++; 179 num_rx_bytes += mbuf->pkt_len; 180 } 181 pkt_q->framenum = framenum; 182 pkt_q->rx_pkts += num_rx; 183 pkt_q->rx_bytes += num_rx_bytes; 184 return num_rx; 185 } 186 187 /* 188 * Callback to handle sending packets through a real NIC. 189 */ 190 static uint16_t 191 eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 192 { 193 struct tpacket2_hdr *ppd; 194 struct rte_mbuf *mbuf; 195 uint8_t *pbuf; 196 unsigned int framecount, framenum; 197 struct pollfd pfd; 198 struct pkt_tx_queue *pkt_q = queue; 199 uint16_t num_tx = 0; 200 unsigned long num_tx_bytes = 0; 201 int i; 202 203 if (unlikely(nb_pkts == 0)) 204 return 0; 205 206 memset(&pfd, 0, sizeof(pfd)); 207 pfd.fd = pkt_q->sockfd; 208 pfd.events = POLLOUT; 209 pfd.revents = 0; 210 211 framecount = pkt_q->framecount; 212 framenum = pkt_q->framenum; 213 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 214 for (i = 0; i < nb_pkts; i++) { 215 mbuf = *bufs++; 216 217 /* drop oversized packets */ 218 if (rte_pktmbuf_data_len(mbuf) > pkt_q->frame_data_size) { 219 rte_pktmbuf_free(mbuf); 220 continue; 221 } 222 223 /* insert vlan info if necessary */ 224 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 225 if (rte_vlan_insert(&mbuf)) { 226 rte_pktmbuf_free(mbuf); 227 continue; 228 } 229 } 230 231 /* point at the next incoming frame */ 232 if ((ppd->tp_status != TP_STATUS_AVAILABLE) && 233 (poll(&pfd, 1, -1) < 0)) 234 break; 235 236 /* copy the tx frame data */ 237 pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - 238 sizeof(struct sockaddr_ll); 239 memcpy(pbuf, rte_pktmbuf_mtod(mbuf, void*), rte_pktmbuf_data_len(mbuf)); 240 ppd->tp_len = ppd->tp_snaplen = rte_pktmbuf_data_len(mbuf); 241 242 /* release incoming frame and advance ring buffer */ 243 ppd->tp_status = TP_STATUS_SEND_REQUEST; 244 if (++framenum >= framecount) 245 framenum = 0; 246 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 247 248 num_tx++; 249 num_tx_bytes += mbuf->pkt_len; 250 rte_pktmbuf_free(mbuf); 251 } 252 253 /* kick-off transmits */ 254 if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1) 255 num_tx = 0; /* error sending -- no packets transmitted */ 256 257 pkt_q->framenum = framenum; 258 pkt_q->tx_pkts += num_tx; 259 pkt_q->err_pkts += i - num_tx; 260 pkt_q->tx_bytes += num_tx_bytes; 261 return i; 262 } 263 264 static int 265 eth_dev_start(struct rte_eth_dev *dev) 266 { 267 dev->data->dev_link.link_status = ETH_LINK_UP; 268 return 0; 269 } 270 271 /* 272 * This function gets called when the current port gets stopped. 273 */ 274 static void 275 eth_dev_stop(struct rte_eth_dev *dev) 276 { 277 unsigned i; 278 int sockfd; 279 struct pmd_internals *internals = dev->data->dev_private; 280 281 for (i = 0; i < internals->nb_queues; i++) { 282 sockfd = internals->rx_queue[i].sockfd; 283 if (sockfd != -1) 284 close(sockfd); 285 286 /* Prevent use after free in case tx fd == rx fd */ 287 if (sockfd != internals->tx_queue[i].sockfd) { 288 sockfd = internals->tx_queue[i].sockfd; 289 if (sockfd != -1) 290 close(sockfd); 291 } 292 293 internals->rx_queue[i].sockfd = -1; 294 internals->tx_queue[i].sockfd = -1; 295 } 296 297 dev->data->dev_link.link_status = ETH_LINK_DOWN; 298 } 299 300 static int 301 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 302 { 303 return 0; 304 } 305 306 static void 307 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 308 { 309 struct pmd_internals *internals = dev->data->dev_private; 310 311 dev_info->if_index = internals->if_index; 312 dev_info->max_mac_addrs = 1; 313 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 314 dev_info->max_rx_queues = (uint16_t)internals->nb_queues; 315 dev_info->max_tx_queues = (uint16_t)internals->nb_queues; 316 dev_info->min_rx_bufsize = 0; 317 } 318 319 static void 320 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats) 321 { 322 unsigned i, imax; 323 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 324 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 325 const struct pmd_internals *internal = dev->data->dev_private; 326 327 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 328 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 329 for (i = 0; i < imax; i++) { 330 igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts; 331 igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes; 332 rx_total += igb_stats->q_ipackets[i]; 333 rx_bytes_total += igb_stats->q_ibytes[i]; 334 } 335 336 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 337 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 338 for (i = 0; i < imax; i++) { 339 igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts; 340 igb_stats->q_errors[i] = internal->tx_queue[i].err_pkts; 341 igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes; 342 tx_total += igb_stats->q_opackets[i]; 343 tx_err_total += igb_stats->q_errors[i]; 344 tx_bytes_total += igb_stats->q_obytes[i]; 345 } 346 347 igb_stats->ipackets = rx_total; 348 igb_stats->ibytes = rx_bytes_total; 349 igb_stats->opackets = tx_total; 350 igb_stats->oerrors = tx_err_total; 351 igb_stats->obytes = tx_bytes_total; 352 } 353 354 static void 355 eth_stats_reset(struct rte_eth_dev *dev) 356 { 357 unsigned i; 358 struct pmd_internals *internal = dev->data->dev_private; 359 360 for (i = 0; i < internal->nb_queues; i++) { 361 internal->rx_queue[i].rx_pkts = 0; 362 internal->rx_queue[i].rx_bytes = 0; 363 } 364 365 for (i = 0; i < internal->nb_queues; i++) { 366 internal->tx_queue[i].tx_pkts = 0; 367 internal->tx_queue[i].err_pkts = 0; 368 internal->tx_queue[i].tx_bytes = 0; 369 } 370 } 371 372 static void 373 eth_dev_close(struct rte_eth_dev *dev __rte_unused) 374 { 375 } 376 377 static void 378 eth_queue_release(void *q __rte_unused) 379 { 380 } 381 382 static int 383 eth_link_update(struct rte_eth_dev *dev __rte_unused, 384 int wait_to_complete __rte_unused) 385 { 386 return 0; 387 } 388 389 static int 390 eth_rx_queue_setup(struct rte_eth_dev *dev, 391 uint16_t rx_queue_id, 392 uint16_t nb_rx_desc __rte_unused, 393 unsigned int socket_id __rte_unused, 394 const struct rte_eth_rxconf *rx_conf __rte_unused, 395 struct rte_mempool *mb_pool) 396 { 397 struct pmd_internals *internals = dev->data->dev_private; 398 struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; 399 unsigned int buf_size, data_size; 400 401 pkt_q->mb_pool = mb_pool; 402 403 /* Now get the space available for data in the mbuf */ 404 buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - 405 RTE_PKTMBUF_HEADROOM; 406 data_size = internals->req.tp_frame_size; 407 data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); 408 409 if (data_size > buf_size) { 410 RTE_LOG(ERR, PMD, 411 "%s: %d bytes will not fit in mbuf (%d bytes)\n", 412 dev->data->name, data_size, buf_size); 413 return -ENOMEM; 414 } 415 416 dev->data->rx_queues[rx_queue_id] = pkt_q; 417 pkt_q->in_port = dev->data->port_id; 418 419 return 0; 420 } 421 422 static int 423 eth_tx_queue_setup(struct rte_eth_dev *dev, 424 uint16_t tx_queue_id, 425 uint16_t nb_tx_desc __rte_unused, 426 unsigned int socket_id __rte_unused, 427 const struct rte_eth_txconf *tx_conf __rte_unused) 428 { 429 430 struct pmd_internals *internals = dev->data->dev_private; 431 432 dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id]; 433 return 0; 434 } 435 436 static int 437 eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 438 { 439 struct pmd_internals *internals = dev->data->dev_private; 440 struct ifreq ifr = { .ifr_mtu = mtu }; 441 int ret; 442 int s; 443 unsigned int data_size = internals->req.tp_frame_size - 444 TPACKET2_HDRLEN - 445 sizeof(struct sockaddr_ll); 446 447 if (mtu > data_size) 448 return -EINVAL; 449 450 s = socket(PF_INET, SOCK_DGRAM, 0); 451 if (s < 0) 452 return -EINVAL; 453 454 strncpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 455 ret = ioctl(s, SIOCSIFMTU, &ifr); 456 close(s); 457 458 if (ret < 0) 459 return -EINVAL; 460 461 return 0; 462 } 463 464 static void 465 eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) 466 { 467 struct ifreq ifr; 468 int s; 469 470 s = socket(PF_INET, SOCK_DGRAM, 0); 471 if (s < 0) 472 return; 473 474 strncpy(ifr.ifr_name, if_name, IFNAMSIZ); 475 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) 476 goto out; 477 ifr.ifr_flags &= mask; 478 ifr.ifr_flags |= flags; 479 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) 480 goto out; 481 out: 482 close(s); 483 } 484 485 static void 486 eth_dev_promiscuous_enable(struct rte_eth_dev *dev) 487 { 488 struct pmd_internals *internals = dev->data->dev_private; 489 490 eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); 491 } 492 493 static void 494 eth_dev_promiscuous_disable(struct rte_eth_dev *dev) 495 { 496 struct pmd_internals *internals = dev->data->dev_private; 497 498 eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); 499 } 500 501 static const struct eth_dev_ops ops = { 502 .dev_start = eth_dev_start, 503 .dev_stop = eth_dev_stop, 504 .dev_close = eth_dev_close, 505 .dev_configure = eth_dev_configure, 506 .dev_infos_get = eth_dev_info, 507 .mtu_set = eth_dev_mtu_set, 508 .promiscuous_enable = eth_dev_promiscuous_enable, 509 .promiscuous_disable = eth_dev_promiscuous_disable, 510 .rx_queue_setup = eth_rx_queue_setup, 511 .tx_queue_setup = eth_tx_queue_setup, 512 .rx_queue_release = eth_queue_release, 513 .tx_queue_release = eth_queue_release, 514 .link_update = eth_link_update, 515 .stats_get = eth_stats_get, 516 .stats_reset = eth_stats_reset, 517 }; 518 519 /* 520 * Opens an AF_PACKET socket 521 */ 522 static int 523 open_packet_iface(const char *key __rte_unused, 524 const char *value __rte_unused, 525 void *extra_args) 526 { 527 int *sockfd = extra_args; 528 529 /* Open an AF_PACKET socket... */ 530 *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 531 if (*sockfd == -1) { 532 RTE_LOG(ERR, PMD, "Could not open AF_PACKET socket\n"); 533 return -1; 534 } 535 536 return 0; 537 } 538 539 static struct rte_vdev_driver pmd_af_packet_drv; 540 541 static int 542 rte_pmd_init_internals(const char *name, 543 const int sockfd, 544 const unsigned nb_queues, 545 unsigned int blocksize, 546 unsigned int blockcnt, 547 unsigned int framesize, 548 unsigned int framecnt, 549 const unsigned numa_node, 550 struct pmd_internals **internals, 551 struct rte_eth_dev **eth_dev, 552 struct rte_kvargs *kvlist) 553 { 554 struct rte_eth_dev_data *data = NULL; 555 struct rte_kvargs_pair *pair = NULL; 556 struct ifreq ifr; 557 size_t ifnamelen; 558 unsigned k_idx; 559 struct sockaddr_ll sockaddr; 560 struct tpacket_req *req; 561 struct pkt_rx_queue *rx_queue; 562 struct pkt_tx_queue *tx_queue; 563 int rc, tpver, discard; 564 int qsockfd = -1; 565 unsigned int i, q, rdsize; 566 int fanout_arg __rte_unused, bypass __rte_unused; 567 568 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 569 pair = &kvlist->pairs[k_idx]; 570 if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL) 571 break; 572 } 573 if (pair == NULL) { 574 RTE_LOG(ERR, PMD, 575 "%s: no interface specified for AF_PACKET ethdev\n", 576 name); 577 goto error_early; 578 } 579 580 RTE_LOG(INFO, PMD, 581 "%s: creating AF_PACKET-backed ethdev on numa socket %u\n", 582 name, numa_node); 583 584 /* 585 * now do all data allocation - for eth_dev structure, dummy pci driver 586 * and internal (private) data 587 */ 588 data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); 589 if (data == NULL) 590 goto error_early; 591 592 *internals = rte_zmalloc_socket(name, sizeof(**internals), 593 0, numa_node); 594 if (*internals == NULL) 595 goto error_early; 596 597 for (q = 0; q < nb_queues; q++) { 598 (*internals)->rx_queue[q].map = MAP_FAILED; 599 (*internals)->tx_queue[q].map = MAP_FAILED; 600 } 601 602 req = &((*internals)->req); 603 604 req->tp_block_size = blocksize; 605 req->tp_block_nr = blockcnt; 606 req->tp_frame_size = framesize; 607 req->tp_frame_nr = framecnt; 608 609 ifnamelen = strlen(pair->value); 610 if (ifnamelen < sizeof(ifr.ifr_name)) { 611 memcpy(ifr.ifr_name, pair->value, ifnamelen); 612 ifr.ifr_name[ifnamelen] = '\0'; 613 } else { 614 RTE_LOG(ERR, PMD, 615 "%s: I/F name too long (%s)\n", 616 name, pair->value); 617 goto error_early; 618 } 619 if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { 620 RTE_LOG(ERR, PMD, 621 "%s: ioctl failed (SIOCGIFINDEX)\n", 622 name); 623 goto error_early; 624 } 625 (*internals)->if_name = strdup(pair->value); 626 (*internals)->if_index = ifr.ifr_ifindex; 627 628 if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { 629 RTE_LOG(ERR, PMD, 630 "%s: ioctl failed (SIOCGIFHWADDR)\n", 631 name); 632 goto error_early; 633 } 634 memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 635 636 memset(&sockaddr, 0, sizeof(sockaddr)); 637 sockaddr.sll_family = AF_PACKET; 638 sockaddr.sll_protocol = htons(ETH_P_ALL); 639 sockaddr.sll_ifindex = (*internals)->if_index; 640 641 #if defined(PACKET_FANOUT) 642 fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; 643 fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; 644 #if defined(PACKET_FANOUT_FLAG_ROLLOVER) 645 fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; 646 #endif 647 #endif 648 649 for (q = 0; q < nb_queues; q++) { 650 /* Open an AF_PACKET socket for this queue... */ 651 qsockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 652 if (qsockfd == -1) { 653 RTE_LOG(ERR, PMD, 654 "%s: could not open AF_PACKET socket\n", 655 name); 656 return -1; 657 } 658 659 tpver = TPACKET_V2; 660 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION, 661 &tpver, sizeof(tpver)); 662 if (rc == -1) { 663 RTE_LOG(ERR, PMD, 664 "%s: could not set PACKET_VERSION on AF_PACKET " 665 "socket for %s\n", name, pair->value); 666 goto error; 667 } 668 669 discard = 1; 670 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS, 671 &discard, sizeof(discard)); 672 if (rc == -1) { 673 RTE_LOG(ERR, PMD, 674 "%s: could not set PACKET_LOSS on " 675 "AF_PACKET socket for %s\n", name, pair->value); 676 goto error; 677 } 678 679 #if defined(PACKET_QDISC_BYPASS) 680 bypass = 1; 681 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS, 682 &bypass, sizeof(bypass)); 683 if (rc == -1) { 684 RTE_LOG(ERR, PMD, 685 "%s: could not set PACKET_QDISC_BYPASS " 686 "on AF_PACKET socket for %s\n", name, 687 pair->value); 688 goto error; 689 } 690 #endif 691 692 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)); 693 if (rc == -1) { 694 RTE_LOG(ERR, PMD, 695 "%s: could not set PACKET_RX_RING on AF_PACKET " 696 "socket for %s\n", name, pair->value); 697 goto error; 698 } 699 700 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req)); 701 if (rc == -1) { 702 RTE_LOG(ERR, PMD, 703 "%s: could not set PACKET_TX_RING on AF_PACKET " 704 "socket for %s\n", name, pair->value); 705 goto error; 706 } 707 708 rx_queue = &((*internals)->rx_queue[q]); 709 rx_queue->framecount = req->tp_frame_nr; 710 711 rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr, 712 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 713 qsockfd, 0); 714 if (rx_queue->map == MAP_FAILED) { 715 RTE_LOG(ERR, PMD, 716 "%s: call to mmap failed on AF_PACKET socket for %s\n", 717 name, pair->value); 718 goto error; 719 } 720 721 /* rdsize is same for both Tx and Rx */ 722 rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd)); 723 724 rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 725 if (rx_queue->rd == NULL) 726 goto error; 727 for (i = 0; i < req->tp_frame_nr; ++i) { 728 rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize); 729 rx_queue->rd[i].iov_len = req->tp_frame_size; 730 } 731 rx_queue->sockfd = qsockfd; 732 733 tx_queue = &((*internals)->tx_queue[q]); 734 tx_queue->framecount = req->tp_frame_nr; 735 tx_queue->frame_data_size = req->tp_frame_size; 736 tx_queue->frame_data_size -= TPACKET2_HDRLEN - 737 sizeof(struct sockaddr_ll); 738 739 tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; 740 741 tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 742 if (tx_queue->rd == NULL) 743 goto error; 744 for (i = 0; i < req->tp_frame_nr; ++i) { 745 tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize); 746 tx_queue->rd[i].iov_len = req->tp_frame_size; 747 } 748 tx_queue->sockfd = qsockfd; 749 750 rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)); 751 if (rc == -1) { 752 RTE_LOG(ERR, PMD, 753 "%s: could not bind AF_PACKET socket to %s\n", 754 name, pair->value); 755 goto error; 756 } 757 758 #if defined(PACKET_FANOUT) 759 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, 760 &fanout_arg, sizeof(fanout_arg)); 761 if (rc == -1) { 762 RTE_LOG(ERR, PMD, 763 "%s: could not set PACKET_FANOUT on AF_PACKET socket " 764 "for %s\n", name, pair->value); 765 goto error; 766 } 767 #endif 768 } 769 770 /* reserve an ethdev entry */ 771 *eth_dev = rte_eth_dev_allocate(name); 772 if (*eth_dev == NULL) 773 goto error; 774 775 /* 776 * now put it all together 777 * - store queue data in internals, 778 * - store numa_node in eth_dev 779 * - point eth_dev_data to internals 780 * - and point eth_dev structure to new eth_dev_data structure 781 */ 782 783 (*internals)->nb_queues = nb_queues; 784 785 data->dev_private = *internals; 786 data->port_id = (*eth_dev)->data->port_id; 787 data->nb_rx_queues = (uint16_t)nb_queues; 788 data->nb_tx_queues = (uint16_t)nb_queues; 789 data->dev_link = pmd_link; 790 data->mac_addrs = &(*internals)->eth_addr; 791 strncpy(data->name, 792 (*eth_dev)->data->name, strlen((*eth_dev)->data->name)); 793 794 (*eth_dev)->data = data; 795 (*eth_dev)->dev_ops = &ops; 796 (*eth_dev)->driver = NULL; 797 (*eth_dev)->data->dev_flags = RTE_ETH_DEV_DETACHABLE; 798 (*eth_dev)->data->drv_name = pmd_af_packet_drv.driver.name; 799 (*eth_dev)->data->kdrv = RTE_KDRV_NONE; 800 (*eth_dev)->data->numa_node = numa_node; 801 802 return 0; 803 804 error: 805 if (qsockfd != -1) 806 close(qsockfd); 807 for (q = 0; q < nb_queues; q++) { 808 munmap((*internals)->rx_queue[q].map, 809 2 * req->tp_block_size * req->tp_block_nr); 810 811 rte_free((*internals)->rx_queue[q].rd); 812 rte_free((*internals)->tx_queue[q].rd); 813 if (((*internals)->rx_queue[q].sockfd != 0) && 814 ((*internals)->rx_queue[q].sockfd != qsockfd)) 815 close((*internals)->rx_queue[q].sockfd); 816 } 817 free((*internals)->if_name); 818 rte_free(*internals); 819 error_early: 820 rte_free(data); 821 return -1; 822 } 823 824 static int 825 rte_eth_from_packet(const char *name, 826 int const *sockfd, 827 const unsigned numa_node, 828 struct rte_kvargs *kvlist) 829 { 830 struct pmd_internals *internals = NULL; 831 struct rte_eth_dev *eth_dev = NULL; 832 struct rte_kvargs_pair *pair = NULL; 833 unsigned k_idx; 834 unsigned int blockcount; 835 unsigned int blocksize = DFLT_BLOCK_SIZE; 836 unsigned int framesize = DFLT_FRAME_SIZE; 837 unsigned int framecount = DFLT_FRAME_COUNT; 838 unsigned int qpairs = 1; 839 840 /* do some parameter checking */ 841 if (*sockfd < 0) 842 return -1; 843 844 /* 845 * Walk arguments for configurable settings 846 */ 847 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 848 pair = &kvlist->pairs[k_idx]; 849 if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) { 850 qpairs = atoi(pair->value); 851 if (qpairs < 1 || 852 qpairs > RTE_PMD_AF_PACKET_MAX_RINGS) { 853 RTE_LOG(ERR, PMD, 854 "%s: invalid qpairs value\n", 855 name); 856 return -1; 857 } 858 continue; 859 } 860 if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) { 861 blocksize = atoi(pair->value); 862 if (!blocksize) { 863 RTE_LOG(ERR, PMD, 864 "%s: invalid blocksize value\n", 865 name); 866 return -1; 867 } 868 continue; 869 } 870 if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) { 871 framesize = atoi(pair->value); 872 if (!framesize) { 873 RTE_LOG(ERR, PMD, 874 "%s: invalid framesize value\n", 875 name); 876 return -1; 877 } 878 continue; 879 } 880 if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) { 881 framecount = atoi(pair->value); 882 if (!framecount) { 883 RTE_LOG(ERR, PMD, 884 "%s: invalid framecount value\n", 885 name); 886 return -1; 887 } 888 continue; 889 } 890 } 891 892 if (framesize > blocksize) { 893 RTE_LOG(ERR, PMD, 894 "%s: AF_PACKET MMAP frame size exceeds block size!\n", 895 name); 896 return -1; 897 } 898 899 blockcount = framecount / (blocksize / framesize); 900 if (!blockcount) { 901 RTE_LOG(ERR, PMD, 902 "%s: invalid AF_PACKET MMAP parameters\n", name); 903 return -1; 904 } 905 906 RTE_LOG(INFO, PMD, "%s: AF_PACKET MMAP parameters:\n", name); 907 RTE_LOG(INFO, PMD, "%s:\tblock size %d\n", name, blocksize); 908 RTE_LOG(INFO, PMD, "%s:\tblock count %d\n", name, blockcount); 909 RTE_LOG(INFO, PMD, "%s:\tframe size %d\n", name, framesize); 910 RTE_LOG(INFO, PMD, "%s:\tframe count %d\n", name, framecount); 911 912 if (rte_pmd_init_internals(name, *sockfd, qpairs, 913 blocksize, blockcount, 914 framesize, framecount, 915 numa_node, &internals, ð_dev, 916 kvlist) < 0) 917 return -1; 918 919 eth_dev->rx_pkt_burst = eth_af_packet_rx; 920 eth_dev->tx_pkt_burst = eth_af_packet_tx; 921 922 return 0; 923 } 924 925 static int 926 rte_pmd_af_packet_probe(const char *name, const char *params) 927 { 928 unsigned numa_node; 929 int ret = 0; 930 struct rte_kvargs *kvlist; 931 int sockfd = -1; 932 933 RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n", name); 934 935 numa_node = rte_socket_id(); 936 937 kvlist = rte_kvargs_parse(params, valid_arguments); 938 if (kvlist == NULL) { 939 ret = -1; 940 goto exit; 941 } 942 943 /* 944 * If iface argument is passed we open the NICs and use them for 945 * reading / writing 946 */ 947 if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) { 948 949 ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, 950 &open_packet_iface, &sockfd); 951 if (ret < 0) 952 goto exit; 953 } 954 955 ret = rte_eth_from_packet(name, &sockfd, numa_node, kvlist); 956 close(sockfd); /* no longer needed */ 957 958 exit: 959 rte_kvargs_free(kvlist); 960 return ret; 961 } 962 963 static int 964 rte_pmd_af_packet_remove(const char *name) 965 { 966 struct rte_eth_dev *eth_dev = NULL; 967 struct pmd_internals *internals; 968 unsigned q; 969 970 RTE_LOG(INFO, PMD, "Closing AF_PACKET ethdev on numa socket %u\n", 971 rte_socket_id()); 972 973 if (name == NULL) 974 return -1; 975 976 /* find the ethdev entry */ 977 eth_dev = rte_eth_dev_allocated(name); 978 if (eth_dev == NULL) 979 return -1; 980 981 internals = eth_dev->data->dev_private; 982 for (q = 0; q < internals->nb_queues; q++) { 983 rte_free(internals->rx_queue[q].rd); 984 rte_free(internals->tx_queue[q].rd); 985 } 986 free(internals->if_name); 987 988 rte_free(eth_dev->data->dev_private); 989 rte_free(eth_dev->data); 990 991 rte_eth_dev_release_port(eth_dev); 992 993 return 0; 994 } 995 996 static struct rte_vdev_driver pmd_af_packet_drv = { 997 .probe = rte_pmd_af_packet_probe, 998 .remove = rte_pmd_af_packet_remove, 999 }; 1000 1001 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); 1002 RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); 1003 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, 1004 "iface=<string> " 1005 "qpairs=<int> " 1006 "blocksz=<int> " 1007 "framesz=<int> " 1008 "framecnt=<int>"); 1009