1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com> 3 * Originally based upon librte_pmd_pcap code: 4 * Copyright(c) 2010-2015 Intel Corporation. 5 * Copyright(c) 2014 6WIND S.A. 6 * All rights reserved. 7 */ 8 9 #include <rte_string_fns.h> 10 #include <rte_mbuf.h> 11 #include <ethdev_driver.h> 12 #include <ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_kvargs.h> 15 #include <rte_bus_vdev.h> 16 17 #include <errno.h> 18 #include <linux/if_ether.h> 19 #include <linux/if_packet.h> 20 #include <arpa/inet.h> 21 #include <net/if.h> 22 #include <net/if_arp.h> 23 #include <sys/types.h> 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <string.h> 27 #include <sys/mman.h> 28 #include <unistd.h> 29 #include <poll.h> 30 31 #define ETH_AF_PACKET_IFACE_ARG "iface" 32 #define ETH_AF_PACKET_NUM_Q_ARG "qpairs" 33 #define ETH_AF_PACKET_BLOCKSIZE_ARG "blocksz" 34 #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" 35 #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" 36 #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass" 37 38 #define DFLT_FRAME_SIZE (1 << 11) 39 #define DFLT_FRAME_COUNT (1 << 9) 40 41 struct pkt_rx_queue { 42 int sockfd; 43 44 struct iovec *rd; 45 uint8_t *map; 46 unsigned int framecount; 47 unsigned int framenum; 48 49 struct rte_mempool *mb_pool; 50 uint16_t in_port; 51 uint8_t vlan_strip; 52 53 volatile unsigned long rx_pkts; 54 volatile unsigned long rx_bytes; 55 }; 56 57 struct pkt_tx_queue { 58 int sockfd; 59 unsigned int frame_data_size; 60 61 struct iovec *rd; 62 uint8_t *map; 63 unsigned int framecount; 64 unsigned int framenum; 65 66 volatile unsigned long tx_pkts; 67 volatile unsigned long err_pkts; 68 volatile unsigned long tx_bytes; 69 }; 70 71 struct pmd_internals { 72 unsigned nb_queues; 73 74 int if_index; 75 char *if_name; 76 struct rte_ether_addr eth_addr; 77 78 struct tpacket_req req; 79 80 struct pkt_rx_queue *rx_queue; 81 struct pkt_tx_queue *tx_queue; 82 uint8_t vlan_strip; 83 }; 84 85 static const char *valid_arguments[] = { 86 ETH_AF_PACKET_IFACE_ARG, 87 ETH_AF_PACKET_NUM_Q_ARG, 88 ETH_AF_PACKET_BLOCKSIZE_ARG, 89 ETH_AF_PACKET_FRAMESIZE_ARG, 90 ETH_AF_PACKET_FRAMECOUNT_ARG, 91 ETH_AF_PACKET_QDISC_BYPASS_ARG, 92 NULL 93 }; 94 95 static struct rte_eth_link pmd_link = { 96 .link_speed = ETH_SPEED_NUM_10G, 97 .link_duplex = ETH_LINK_FULL_DUPLEX, 98 .link_status = ETH_LINK_DOWN, 99 .link_autoneg = ETH_LINK_FIXED, 100 }; 101 102 RTE_LOG_REGISTER_DEFAULT(af_packet_logtype, NOTICE); 103 104 #define PMD_LOG(level, fmt, args...) \ 105 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 106 "%s(): " fmt "\n", __func__, ##args) 107 108 #define PMD_LOG_ERRNO(level, fmt, args...) \ 109 rte_log(RTE_LOG_ ## level, af_packet_logtype, \ 110 "%s(): " fmt ":%s\n", __func__, ##args, strerror(errno)) 111 112 static uint16_t 113 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 114 { 115 unsigned i; 116 struct tpacket2_hdr *ppd; 117 struct rte_mbuf *mbuf; 118 uint8_t *pbuf; 119 struct pkt_rx_queue *pkt_q = queue; 120 uint16_t num_rx = 0; 121 unsigned long num_rx_bytes = 0; 122 unsigned int framecount, framenum; 123 124 if (unlikely(nb_pkts == 0)) 125 return 0; 126 127 /* 128 * Reads the given number of packets from the AF_PACKET socket one by 129 * one and copies the packet data into a newly allocated mbuf. 130 */ 131 framecount = pkt_q->framecount; 132 framenum = pkt_q->framenum; 133 for (i = 0; i < nb_pkts; i++) { 134 /* point at the next incoming frame */ 135 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 136 if ((ppd->tp_status & TP_STATUS_USER) == 0) 137 break; 138 139 /* allocate the next mbuf */ 140 mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool); 141 if (unlikely(mbuf == NULL)) 142 break; 143 144 /* packet will fit in the mbuf, go ahead and receive it */ 145 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen; 146 pbuf = (uint8_t *) ppd + ppd->tp_mac; 147 memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); 148 149 /* check for vlan info */ 150 if (ppd->tp_status & TP_STATUS_VLAN_VALID) { 151 mbuf->vlan_tci = ppd->tp_vlan_tci; 152 mbuf->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); 153 154 if (!pkt_q->vlan_strip && rte_vlan_insert(&mbuf)) 155 PMD_LOG(ERR, "Failed to reinsert VLAN tag"); 156 } 157 158 /* release incoming frame and advance ring buffer */ 159 ppd->tp_status = TP_STATUS_KERNEL; 160 if (++framenum >= framecount) 161 framenum = 0; 162 mbuf->port = pkt_q->in_port; 163 164 /* account for the receive frame */ 165 bufs[i] = mbuf; 166 num_rx++; 167 num_rx_bytes += mbuf->pkt_len; 168 } 169 pkt_q->framenum = framenum; 170 pkt_q->rx_pkts += num_rx; 171 pkt_q->rx_bytes += num_rx_bytes; 172 return num_rx; 173 } 174 175 /* 176 * Check if there is an available frame in the ring 177 */ 178 static inline bool 179 tx_ring_status_available(uint32_t tp_status) 180 { 181 /* 182 * We eliminate the timestamp status from the packet status. 183 * This should only matter if timestamping is enabled on the socket, 184 * but there is a bug in the kernel which is fixed in newer releases. 185 * 186 * See the following kernel commit for reference: 187 * commit 171c3b151118a2fe0fc1e2a9d1b5a1570cfe82d2 188 * net: packetmmap: fix only tx timestamp on request 189 */ 190 tp_status &= ~(TP_STATUS_TS_SOFTWARE | TP_STATUS_TS_RAW_HARDWARE); 191 192 return tp_status == TP_STATUS_AVAILABLE; 193 } 194 195 /* 196 * Callback to handle sending packets through a real NIC. 197 */ 198 static uint16_t 199 eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 200 { 201 struct tpacket2_hdr *ppd; 202 struct rte_mbuf *mbuf; 203 uint8_t *pbuf; 204 unsigned int framecount, framenum; 205 struct pollfd pfd; 206 struct pkt_tx_queue *pkt_q = queue; 207 uint16_t num_tx = 0; 208 unsigned long num_tx_bytes = 0; 209 int i; 210 211 if (unlikely(nb_pkts == 0)) 212 return 0; 213 214 memset(&pfd, 0, sizeof(pfd)); 215 pfd.fd = pkt_q->sockfd; 216 pfd.events = POLLOUT; 217 pfd.revents = 0; 218 219 framecount = pkt_q->framecount; 220 framenum = pkt_q->framenum; 221 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 222 for (i = 0; i < nb_pkts; i++) { 223 mbuf = *bufs++; 224 225 /* drop oversized packets */ 226 if (mbuf->pkt_len > pkt_q->frame_data_size) { 227 rte_pktmbuf_free(mbuf); 228 continue; 229 } 230 231 /* insert vlan info if necessary */ 232 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 233 if (rte_vlan_insert(&mbuf)) { 234 rte_pktmbuf_free(mbuf); 235 continue; 236 } 237 } 238 239 /* point at the next incoming frame */ 240 if (!tx_ring_status_available(ppd->tp_status) && 241 poll(&pfd, 1, -1) < 0) 242 break; 243 244 /* copy the tx frame data */ 245 pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - 246 sizeof(struct sockaddr_ll); 247 248 struct rte_mbuf *tmp_mbuf = mbuf; 249 while (tmp_mbuf) { 250 uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf); 251 memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len); 252 pbuf += data_len; 253 tmp_mbuf = tmp_mbuf->next; 254 } 255 256 ppd->tp_len = mbuf->pkt_len; 257 ppd->tp_snaplen = mbuf->pkt_len; 258 259 /* release incoming frame and advance ring buffer */ 260 ppd->tp_status = TP_STATUS_SEND_REQUEST; 261 if (++framenum >= framecount) 262 framenum = 0; 263 ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; 264 265 num_tx++; 266 num_tx_bytes += mbuf->pkt_len; 267 rte_pktmbuf_free(mbuf); 268 } 269 270 /* kick-off transmits */ 271 if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 && 272 errno != ENOBUFS && errno != EAGAIN) { 273 /* 274 * In case of a ENOBUFS/EAGAIN error all of the enqueued 275 * packets will be considered successful even though only some 276 * are sent. 277 */ 278 279 num_tx = 0; 280 num_tx_bytes = 0; 281 } 282 283 pkt_q->framenum = framenum; 284 pkt_q->tx_pkts += num_tx; 285 pkt_q->err_pkts += i - num_tx; 286 pkt_q->tx_bytes += num_tx_bytes; 287 return i; 288 } 289 290 static int 291 eth_dev_start(struct rte_eth_dev *dev) 292 { 293 dev->data->dev_link.link_status = ETH_LINK_UP; 294 return 0; 295 } 296 297 /* 298 * This function gets called when the current port gets stopped. 299 */ 300 static int 301 eth_dev_stop(struct rte_eth_dev *dev) 302 { 303 unsigned i; 304 int sockfd; 305 struct pmd_internals *internals = dev->data->dev_private; 306 307 for (i = 0; i < internals->nb_queues; i++) { 308 sockfd = internals->rx_queue[i].sockfd; 309 if (sockfd != -1) 310 close(sockfd); 311 312 /* Prevent use after free in case tx fd == rx fd */ 313 if (sockfd != internals->tx_queue[i].sockfd) { 314 sockfd = internals->tx_queue[i].sockfd; 315 if (sockfd != -1) 316 close(sockfd); 317 } 318 319 internals->rx_queue[i].sockfd = -1; 320 internals->tx_queue[i].sockfd = -1; 321 } 322 323 dev->data->dev_link.link_status = ETH_LINK_DOWN; 324 return 0; 325 } 326 327 static int 328 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 329 { 330 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 331 const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode; 332 struct pmd_internals *internals = dev->data->dev_private; 333 334 internals->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 335 return 0; 336 } 337 338 static int 339 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 340 { 341 struct pmd_internals *internals = dev->data->dev_private; 342 343 dev_info->if_index = internals->if_index; 344 dev_info->max_mac_addrs = 1; 345 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 346 dev_info->max_rx_queues = (uint16_t)internals->nb_queues; 347 dev_info->max_tx_queues = (uint16_t)internals->nb_queues; 348 dev_info->min_rx_bufsize = 0; 349 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 350 DEV_TX_OFFLOAD_VLAN_INSERT; 351 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 352 353 return 0; 354 } 355 356 static int 357 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats) 358 { 359 unsigned i, imax; 360 unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0; 361 unsigned long rx_bytes_total = 0, tx_bytes_total = 0; 362 const struct pmd_internals *internal = dev->data->dev_private; 363 364 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 365 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 366 for (i = 0; i < imax; i++) { 367 igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts; 368 igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes; 369 rx_total += igb_stats->q_ipackets[i]; 370 rx_bytes_total += igb_stats->q_ibytes[i]; 371 } 372 373 imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ? 374 internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS); 375 for (i = 0; i < imax; i++) { 376 igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts; 377 igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes; 378 tx_total += igb_stats->q_opackets[i]; 379 tx_err_total += internal->tx_queue[i].err_pkts; 380 tx_bytes_total += igb_stats->q_obytes[i]; 381 } 382 383 igb_stats->ipackets = rx_total; 384 igb_stats->ibytes = rx_bytes_total; 385 igb_stats->opackets = tx_total; 386 igb_stats->oerrors = tx_err_total; 387 igb_stats->obytes = tx_bytes_total; 388 return 0; 389 } 390 391 static int 392 eth_stats_reset(struct rte_eth_dev *dev) 393 { 394 unsigned i; 395 struct pmd_internals *internal = dev->data->dev_private; 396 397 for (i = 0; i < internal->nb_queues; i++) { 398 internal->rx_queue[i].rx_pkts = 0; 399 internal->rx_queue[i].rx_bytes = 0; 400 } 401 402 for (i = 0; i < internal->nb_queues; i++) { 403 internal->tx_queue[i].tx_pkts = 0; 404 internal->tx_queue[i].err_pkts = 0; 405 internal->tx_queue[i].tx_bytes = 0; 406 } 407 408 return 0; 409 } 410 411 static int 412 eth_dev_close(struct rte_eth_dev *dev) 413 { 414 struct pmd_internals *internals; 415 struct tpacket_req *req; 416 unsigned int q; 417 418 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 419 return 0; 420 421 PMD_LOG(INFO, "Closing AF_PACKET ethdev on NUMA socket %u", 422 rte_socket_id()); 423 424 internals = dev->data->dev_private; 425 req = &internals->req; 426 for (q = 0; q < internals->nb_queues; q++) { 427 munmap(internals->rx_queue[q].map, 428 2 * req->tp_block_size * req->tp_block_nr); 429 rte_free(internals->rx_queue[q].rd); 430 rte_free(internals->tx_queue[q].rd); 431 } 432 free(internals->if_name); 433 rte_free(internals->rx_queue); 434 rte_free(internals->tx_queue); 435 436 /* mac_addrs must not be freed alone because part of dev_private */ 437 dev->data->mac_addrs = NULL; 438 return 0; 439 } 440 441 static int 442 eth_link_update(struct rte_eth_dev *dev __rte_unused, 443 int wait_to_complete __rte_unused) 444 { 445 return 0; 446 } 447 448 static int 449 eth_rx_queue_setup(struct rte_eth_dev *dev, 450 uint16_t rx_queue_id, 451 uint16_t nb_rx_desc __rte_unused, 452 unsigned int socket_id __rte_unused, 453 const struct rte_eth_rxconf *rx_conf __rte_unused, 454 struct rte_mempool *mb_pool) 455 { 456 struct pmd_internals *internals = dev->data->dev_private; 457 struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; 458 unsigned int buf_size, data_size; 459 460 pkt_q->mb_pool = mb_pool; 461 462 /* Now get the space available for data in the mbuf */ 463 buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - 464 RTE_PKTMBUF_HEADROOM; 465 data_size = internals->req.tp_frame_size; 466 data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); 467 468 if (data_size > buf_size) { 469 PMD_LOG(ERR, 470 "%s: %d bytes will not fit in mbuf (%d bytes)", 471 dev->device->name, data_size, buf_size); 472 return -ENOMEM; 473 } 474 475 dev->data->rx_queues[rx_queue_id] = pkt_q; 476 pkt_q->in_port = dev->data->port_id; 477 pkt_q->vlan_strip = internals->vlan_strip; 478 479 return 0; 480 } 481 482 static int 483 eth_tx_queue_setup(struct rte_eth_dev *dev, 484 uint16_t tx_queue_id, 485 uint16_t nb_tx_desc __rte_unused, 486 unsigned int socket_id __rte_unused, 487 const struct rte_eth_txconf *tx_conf __rte_unused) 488 { 489 490 struct pmd_internals *internals = dev->data->dev_private; 491 492 dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id]; 493 return 0; 494 } 495 496 static int 497 eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 498 { 499 struct pmd_internals *internals = dev->data->dev_private; 500 struct ifreq ifr = { .ifr_mtu = mtu }; 501 int ret; 502 int s; 503 unsigned int data_size = internals->req.tp_frame_size - 504 TPACKET2_HDRLEN; 505 506 if (mtu > data_size) 507 return -EINVAL; 508 509 s = socket(PF_INET, SOCK_DGRAM, 0); 510 if (s < 0) 511 return -EINVAL; 512 513 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 514 ret = ioctl(s, SIOCSIFMTU, &ifr); 515 close(s); 516 517 if (ret < 0) 518 return -EINVAL; 519 520 return 0; 521 } 522 523 static int 524 eth_dev_macaddr_set(struct rte_eth_dev *dev, struct rte_ether_addr *addr) 525 { 526 struct pmd_internals *internals = dev->data->dev_private; 527 struct ifreq ifr = { }; 528 int sockfd = internals->rx_queue[0].sockfd; 529 int ret; 530 531 if (sockfd == -1) { 532 PMD_LOG(ERR, "receive socket not found"); 533 return -EINVAL; 534 } 535 536 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); 537 ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; 538 memcpy(ifr.ifr_hwaddr.sa_data, addr, sizeof(*addr)); 539 ret = ioctl(sockfd, SIOCSIFHWADDR, &ifr); 540 541 if (ret < 0) { 542 PMD_LOG_ERRNO(ERR, "ioctl(SIOCSIFHWADDR) failed"); 543 return -EINVAL; 544 } 545 546 return 0; 547 } 548 549 static int 550 eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) 551 { 552 struct ifreq ifr; 553 int ret = 0; 554 int s; 555 556 s = socket(PF_INET, SOCK_DGRAM, 0); 557 if (s < 0) 558 return -errno; 559 560 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ); 561 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) { 562 ret = -errno; 563 goto out; 564 } 565 ifr.ifr_flags &= mask; 566 ifr.ifr_flags |= flags; 567 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) { 568 ret = -errno; 569 goto out; 570 } 571 out: 572 close(s); 573 return ret; 574 } 575 576 static int 577 eth_dev_promiscuous_enable(struct rte_eth_dev *dev) 578 { 579 struct pmd_internals *internals = dev->data->dev_private; 580 581 return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); 582 } 583 584 static int 585 eth_dev_promiscuous_disable(struct rte_eth_dev *dev) 586 { 587 struct pmd_internals *internals = dev->data->dev_private; 588 589 return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); 590 } 591 592 static const struct eth_dev_ops ops = { 593 .dev_start = eth_dev_start, 594 .dev_stop = eth_dev_stop, 595 .dev_close = eth_dev_close, 596 .dev_configure = eth_dev_configure, 597 .dev_infos_get = eth_dev_info, 598 .mac_addr_set = eth_dev_macaddr_set, 599 .mtu_set = eth_dev_mtu_set, 600 .promiscuous_enable = eth_dev_promiscuous_enable, 601 .promiscuous_disable = eth_dev_promiscuous_disable, 602 .rx_queue_setup = eth_rx_queue_setup, 603 .tx_queue_setup = eth_tx_queue_setup, 604 .link_update = eth_link_update, 605 .stats_get = eth_stats_get, 606 .stats_reset = eth_stats_reset, 607 }; 608 609 /* 610 * Opens an AF_PACKET socket 611 */ 612 static int 613 open_packet_iface(const char *key __rte_unused, 614 const char *value __rte_unused, 615 void *extra_args) 616 { 617 int *sockfd = extra_args; 618 619 /* Open an AF_PACKET socket... */ 620 *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 621 if (*sockfd == -1) { 622 PMD_LOG(ERR, "Could not open AF_PACKET socket"); 623 return -1; 624 } 625 626 return 0; 627 } 628 629 static int 630 rte_pmd_init_internals(struct rte_vdev_device *dev, 631 const int sockfd, 632 const unsigned nb_queues, 633 unsigned int blocksize, 634 unsigned int blockcnt, 635 unsigned int framesize, 636 unsigned int framecnt, 637 unsigned int qdisc_bypass, 638 struct pmd_internals **internals, 639 struct rte_eth_dev **eth_dev, 640 struct rte_kvargs *kvlist) 641 { 642 const char *name = rte_vdev_device_name(dev); 643 const unsigned int numa_node = dev->device.numa_node; 644 struct rte_eth_dev_data *data = NULL; 645 struct rte_kvargs_pair *pair = NULL; 646 struct ifreq ifr; 647 size_t ifnamelen; 648 unsigned k_idx; 649 struct sockaddr_ll sockaddr; 650 struct tpacket_req *req; 651 struct pkt_rx_queue *rx_queue; 652 struct pkt_tx_queue *tx_queue; 653 int rc, tpver, discard; 654 int qsockfd = -1; 655 unsigned int i, q, rdsize; 656 #if defined(PACKET_FANOUT) 657 int fanout_arg; 658 #endif 659 660 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 661 pair = &kvlist->pairs[k_idx]; 662 if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL) 663 break; 664 } 665 if (pair == NULL) { 666 PMD_LOG(ERR, 667 "%s: no interface specified for AF_PACKET ethdev", 668 name); 669 return -1; 670 } 671 672 PMD_LOG(INFO, 673 "%s: creating AF_PACKET-backed ethdev on numa socket %u", 674 name, numa_node); 675 676 *internals = rte_zmalloc_socket(name, sizeof(**internals), 677 0, numa_node); 678 if (*internals == NULL) 679 return -1; 680 681 682 (*internals)->rx_queue = rte_calloc_socket("af_packet_rx", 683 nb_queues, 684 sizeof(struct pkt_rx_queue), 685 0, numa_node); 686 (*internals)->tx_queue = rte_calloc_socket("af_packet_tx", 687 nb_queues, 688 sizeof(struct pkt_tx_queue), 689 0, numa_node); 690 if (!(*internals)->rx_queue || !(*internals)->tx_queue) { 691 goto free_internals; 692 } 693 694 for (q = 0; q < nb_queues; q++) { 695 (*internals)->rx_queue[q].map = MAP_FAILED; 696 (*internals)->tx_queue[q].map = MAP_FAILED; 697 (*internals)->rx_queue[q].sockfd = -1; 698 (*internals)->tx_queue[q].sockfd = -1; 699 } 700 701 req = &((*internals)->req); 702 703 req->tp_block_size = blocksize; 704 req->tp_block_nr = blockcnt; 705 req->tp_frame_size = framesize; 706 req->tp_frame_nr = framecnt; 707 708 ifnamelen = strlen(pair->value); 709 if (ifnamelen < sizeof(ifr.ifr_name)) { 710 memcpy(ifr.ifr_name, pair->value, ifnamelen); 711 ifr.ifr_name[ifnamelen] = '\0'; 712 } else { 713 PMD_LOG(ERR, 714 "%s: I/F name too long (%s)", 715 name, pair->value); 716 goto free_internals; 717 } 718 if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { 719 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name); 720 goto free_internals; 721 } 722 (*internals)->if_name = strdup(pair->value); 723 if ((*internals)->if_name == NULL) 724 goto free_internals; 725 (*internals)->if_index = ifr.ifr_ifindex; 726 727 if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { 728 PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFHWADDR)", name); 729 goto free_internals; 730 } 731 memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 732 733 memset(&sockaddr, 0, sizeof(sockaddr)); 734 sockaddr.sll_family = AF_PACKET; 735 sockaddr.sll_protocol = htons(ETH_P_ALL); 736 sockaddr.sll_ifindex = (*internals)->if_index; 737 738 #if defined(PACKET_FANOUT) 739 fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; 740 fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; 741 #if defined(PACKET_FANOUT_FLAG_ROLLOVER) 742 fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; 743 #endif 744 #endif 745 746 for (q = 0; q < nb_queues; q++) { 747 /* Open an AF_PACKET socket for this queue... */ 748 qsockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 749 if (qsockfd == -1) { 750 PMD_LOG_ERRNO(ERR, 751 "%s: could not open AF_PACKET socket", 752 name); 753 goto error; 754 } 755 756 tpver = TPACKET_V2; 757 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION, 758 &tpver, sizeof(tpver)); 759 if (rc == -1) { 760 PMD_LOG_ERRNO(ERR, 761 "%s: could not set PACKET_VERSION on AF_PACKET socket for %s", 762 name, pair->value); 763 goto error; 764 } 765 766 discard = 1; 767 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS, 768 &discard, sizeof(discard)); 769 if (rc == -1) { 770 PMD_LOG_ERRNO(ERR, 771 "%s: could not set PACKET_LOSS on AF_PACKET socket for %s", 772 name, pair->value); 773 goto error; 774 } 775 776 if (qdisc_bypass) { 777 #if defined(PACKET_QDISC_BYPASS) 778 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS, 779 &qdisc_bypass, sizeof(qdisc_bypass)); 780 if (rc == -1) { 781 PMD_LOG_ERRNO(ERR, 782 "%s: could not set PACKET_QDISC_BYPASS on AF_PACKET socket for %s", 783 name, pair->value); 784 goto error; 785 } 786 #endif 787 } 788 789 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)); 790 if (rc == -1) { 791 PMD_LOG_ERRNO(ERR, 792 "%s: could not set PACKET_RX_RING on AF_PACKET socket for %s", 793 name, pair->value); 794 goto error; 795 } 796 797 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req)); 798 if (rc == -1) { 799 PMD_LOG_ERRNO(ERR, 800 "%s: could not set PACKET_TX_RING on AF_PACKET " 801 "socket for %s", name, pair->value); 802 goto error; 803 } 804 805 rx_queue = &((*internals)->rx_queue[q]); 806 rx_queue->framecount = req->tp_frame_nr; 807 808 rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr, 809 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 810 qsockfd, 0); 811 if (rx_queue->map == MAP_FAILED) { 812 PMD_LOG_ERRNO(ERR, 813 "%s: call to mmap failed on AF_PACKET socket for %s", 814 name, pair->value); 815 goto error; 816 } 817 818 /* rdsize is same for both Tx and Rx */ 819 rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd)); 820 821 rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 822 if (rx_queue->rd == NULL) 823 goto error; 824 for (i = 0; i < req->tp_frame_nr; ++i) { 825 rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize); 826 rx_queue->rd[i].iov_len = req->tp_frame_size; 827 } 828 rx_queue->sockfd = qsockfd; 829 830 tx_queue = &((*internals)->tx_queue[q]); 831 tx_queue->framecount = req->tp_frame_nr; 832 tx_queue->frame_data_size = req->tp_frame_size; 833 tx_queue->frame_data_size -= TPACKET2_HDRLEN - 834 sizeof(struct sockaddr_ll); 835 836 tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; 837 838 tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node); 839 if (tx_queue->rd == NULL) 840 goto error; 841 for (i = 0; i < req->tp_frame_nr; ++i) { 842 tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize); 843 tx_queue->rd[i].iov_len = req->tp_frame_size; 844 } 845 tx_queue->sockfd = qsockfd; 846 847 rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)); 848 if (rc == -1) { 849 PMD_LOG_ERRNO(ERR, 850 "%s: could not bind AF_PACKET socket to %s", 851 name, pair->value); 852 goto error; 853 } 854 855 #if defined(PACKET_FANOUT) 856 rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, 857 &fanout_arg, sizeof(fanout_arg)); 858 if (rc == -1) { 859 PMD_LOG_ERRNO(ERR, 860 "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s", 861 name, pair->value); 862 goto error; 863 } 864 #endif 865 } 866 867 /* reserve an ethdev entry */ 868 *eth_dev = rte_eth_vdev_allocate(dev, 0); 869 if (*eth_dev == NULL) 870 goto error; 871 872 /* 873 * now put it all together 874 * - store queue data in internals, 875 * - store numa_node in eth_dev 876 * - point eth_dev_data to internals 877 * - and point eth_dev structure to new eth_dev_data structure 878 */ 879 880 (*internals)->nb_queues = nb_queues; 881 882 data = (*eth_dev)->data; 883 data->dev_private = *internals; 884 data->nb_rx_queues = (uint16_t)nb_queues; 885 data->nb_tx_queues = (uint16_t)nb_queues; 886 data->dev_link = pmd_link; 887 data->mac_addrs = &(*internals)->eth_addr; 888 data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 889 890 (*eth_dev)->dev_ops = &ops; 891 892 return 0; 893 894 error: 895 if (qsockfd != -1) 896 close(qsockfd); 897 for (q = 0; q < nb_queues; q++) { 898 if ((*internals)->rx_queue[q].map != MAP_FAILED) 899 munmap((*internals)->rx_queue[q].map, 900 2 * req->tp_block_size * req->tp_block_nr); 901 902 rte_free((*internals)->rx_queue[q].rd); 903 rte_free((*internals)->tx_queue[q].rd); 904 if (((*internals)->rx_queue[q].sockfd >= 0) && 905 ((*internals)->rx_queue[q].sockfd != qsockfd)) 906 close((*internals)->rx_queue[q].sockfd); 907 } 908 free_internals: 909 rte_free((*internals)->rx_queue); 910 rte_free((*internals)->tx_queue); 911 free((*internals)->if_name); 912 rte_free(*internals); 913 return -1; 914 } 915 916 static int 917 rte_eth_from_packet(struct rte_vdev_device *dev, 918 int const *sockfd, 919 struct rte_kvargs *kvlist) 920 { 921 const char *name = rte_vdev_device_name(dev); 922 struct pmd_internals *internals = NULL; 923 struct rte_eth_dev *eth_dev = NULL; 924 struct rte_kvargs_pair *pair = NULL; 925 unsigned k_idx; 926 unsigned int blockcount; 927 unsigned int blocksize; 928 unsigned int framesize = DFLT_FRAME_SIZE; 929 unsigned int framecount = DFLT_FRAME_COUNT; 930 unsigned int qpairs = 1; 931 unsigned int qdisc_bypass = 1; 932 933 /* do some parameter checking */ 934 if (*sockfd < 0) 935 return -1; 936 937 blocksize = getpagesize(); 938 939 /* 940 * Walk arguments for configurable settings 941 */ 942 for (k_idx = 0; k_idx < kvlist->count; k_idx++) { 943 pair = &kvlist->pairs[k_idx]; 944 if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) { 945 qpairs = atoi(pair->value); 946 if (qpairs < 1) { 947 PMD_LOG(ERR, 948 "%s: invalid qpairs value", 949 name); 950 return -1; 951 } 952 continue; 953 } 954 if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) { 955 blocksize = atoi(pair->value); 956 if (!blocksize) { 957 PMD_LOG(ERR, 958 "%s: invalid blocksize value", 959 name); 960 return -1; 961 } 962 continue; 963 } 964 if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) { 965 framesize = atoi(pair->value); 966 if (!framesize) { 967 PMD_LOG(ERR, 968 "%s: invalid framesize value", 969 name); 970 return -1; 971 } 972 continue; 973 } 974 if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) { 975 framecount = atoi(pair->value); 976 if (!framecount) { 977 PMD_LOG(ERR, 978 "%s: invalid framecount value", 979 name); 980 return -1; 981 } 982 continue; 983 } 984 if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) { 985 qdisc_bypass = atoi(pair->value); 986 if (qdisc_bypass > 1) { 987 PMD_LOG(ERR, 988 "%s: invalid bypass value", 989 name); 990 return -1; 991 } 992 continue; 993 } 994 } 995 996 if (framesize > blocksize) { 997 PMD_LOG(ERR, 998 "%s: AF_PACKET MMAP frame size exceeds block size!", 999 name); 1000 return -1; 1001 } 1002 1003 blockcount = framecount / (blocksize / framesize); 1004 if (!blockcount) { 1005 PMD_LOG(ERR, 1006 "%s: invalid AF_PACKET MMAP parameters", name); 1007 return -1; 1008 } 1009 1010 PMD_LOG(INFO, "%s: AF_PACKET MMAP parameters:", name); 1011 PMD_LOG(INFO, "%s:\tblock size %d", name, blocksize); 1012 PMD_LOG(INFO, "%s:\tblock count %d", name, blockcount); 1013 PMD_LOG(INFO, "%s:\tframe size %d", name, framesize); 1014 PMD_LOG(INFO, "%s:\tframe count %d", name, framecount); 1015 1016 if (rte_pmd_init_internals(dev, *sockfd, qpairs, 1017 blocksize, blockcount, 1018 framesize, framecount, 1019 qdisc_bypass, 1020 &internals, ð_dev, 1021 kvlist) < 0) 1022 return -1; 1023 1024 eth_dev->rx_pkt_burst = eth_af_packet_rx; 1025 eth_dev->tx_pkt_burst = eth_af_packet_tx; 1026 1027 rte_eth_dev_probing_finish(eth_dev); 1028 return 0; 1029 } 1030 1031 static int 1032 rte_pmd_af_packet_probe(struct rte_vdev_device *dev) 1033 { 1034 int ret = 0; 1035 struct rte_kvargs *kvlist; 1036 int sockfd = -1; 1037 struct rte_eth_dev *eth_dev; 1038 const char *name = rte_vdev_device_name(dev); 1039 1040 PMD_LOG(INFO, "Initializing pmd_af_packet for %s", name); 1041 1042 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1043 eth_dev = rte_eth_dev_attach_secondary(name); 1044 if (!eth_dev) { 1045 PMD_LOG(ERR, "Failed to probe %s", name); 1046 return -1; 1047 } 1048 /* TODO: request info from primary to set up Rx and Tx */ 1049 eth_dev->dev_ops = &ops; 1050 eth_dev->device = &dev->device; 1051 rte_eth_dev_probing_finish(eth_dev); 1052 return 0; 1053 } 1054 1055 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1056 if (kvlist == NULL) { 1057 ret = -1; 1058 goto exit; 1059 } 1060 1061 /* 1062 * If iface argument is passed we open the NICs and use them for 1063 * reading / writing 1064 */ 1065 if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) { 1066 1067 ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, 1068 &open_packet_iface, &sockfd); 1069 if (ret < 0) 1070 goto exit; 1071 } 1072 1073 if (dev->device.numa_node == SOCKET_ID_ANY) 1074 dev->device.numa_node = rte_socket_id(); 1075 1076 ret = rte_eth_from_packet(dev, &sockfd, kvlist); 1077 close(sockfd); /* no longer needed */ 1078 1079 exit: 1080 rte_kvargs_free(kvlist); 1081 return ret; 1082 } 1083 1084 static int 1085 rte_pmd_af_packet_remove(struct rte_vdev_device *dev) 1086 { 1087 struct rte_eth_dev *eth_dev; 1088 1089 if (dev == NULL) 1090 return -1; 1091 1092 /* find the ethdev entry */ 1093 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); 1094 if (eth_dev == NULL) 1095 return 0; /* port already released */ 1096 1097 eth_dev_close(eth_dev); 1098 rte_eth_dev_release_port(eth_dev); 1099 1100 return 0; 1101 } 1102 1103 static struct rte_vdev_driver pmd_af_packet_drv = { 1104 .probe = rte_pmd_af_packet_probe, 1105 .remove = rte_pmd_af_packet_remove, 1106 }; 1107 1108 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); 1109 RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); 1110 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, 1111 "iface=<string> " 1112 "qpairs=<int> " 1113 "blocksz=<int> " 1114 "framesz=<int> " 1115 "framecnt=<int> " 1116 "qdisc_bypass=<0|1>"); 1117