1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 IGEL Co., Ltd. 3 * Copyright(c) 2016-2018 Intel Corporation 4 */ 5 #include <stdlib.h> 6 #include <unistd.h> 7 #include <pthread.h> 8 #include <stdbool.h> 9 #include <sys/epoll.h> 10 11 #include <rte_mbuf.h> 12 #include <ethdev_driver.h> 13 #include <ethdev_vdev.h> 14 #include <rte_malloc.h> 15 #include <rte_memcpy.h> 16 #include <rte_net.h> 17 #include <bus_vdev_driver.h> 18 #include <rte_kvargs.h> 19 #include <rte_vhost.h> 20 #include <rte_spinlock.h> 21 22 #include "rte_eth_vhost.h" 23 24 RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE); 25 #define RTE_LOGTYPE_VHOST vhost_logtype 26 27 #define VHOST_LOG_LINE(level, ...) \ 28 RTE_LOG_LINE(level, VHOST, __VA_ARGS__) 29 30 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 31 32 #define ETH_VHOST_IFACE_ARG "iface" 33 #define ETH_VHOST_QUEUES_ARG "queues" 34 #define ETH_VHOST_CLIENT_ARG "client" 35 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" 36 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" 37 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" 38 #define ETH_VHOST_LINEAR_BUF "linear-buffer" 39 #define ETH_VHOST_EXT_BUF "ext-buffer" 40 #define ETH_VHOST_LEGACY_OL_FLAGS "legacy-ol-flags" 41 #define VHOST_MAX_PKT_BURST 32 42 43 static const char *valid_arguments[] = { 44 ETH_VHOST_IFACE_ARG, 45 ETH_VHOST_QUEUES_ARG, 46 ETH_VHOST_CLIENT_ARG, 47 ETH_VHOST_IOMMU_SUPPORT, 48 ETH_VHOST_POSTCOPY_SUPPORT, 49 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 50 ETH_VHOST_LINEAR_BUF, 51 ETH_VHOST_EXT_BUF, 52 ETH_VHOST_LEGACY_OL_FLAGS, 53 NULL 54 }; 55 56 static struct rte_ether_addr base_eth_addr = { 57 .addr_bytes = { 58 0x56 /* V */, 59 0x48 /* H */, 60 0x4F /* O */, 61 0x53 /* S */, 62 0x54 /* T */, 63 0x00 64 } 65 }; 66 67 struct vhost_stats { 68 uint64_t pkts; 69 uint64_t bytes; 70 uint64_t missed_pkts; 71 }; 72 73 struct vhost_queue { 74 int vid; 75 rte_atomic32_t allow_queuing; 76 rte_atomic32_t while_queuing; 77 struct pmd_internal *internal; 78 struct rte_mempool *mb_pool; 79 uint16_t port; 80 uint16_t virtqueue_id; 81 struct vhost_stats stats; 82 rte_spinlock_t intr_lock; 83 struct epoll_event ev; 84 int kickfd; 85 }; 86 87 struct pmd_internal { 88 rte_atomic32_t dev_attached; 89 char *iface_name; 90 uint64_t flags; 91 uint64_t disable_flags; 92 uint64_t features; 93 uint16_t max_queues; 94 int vid; 95 rte_atomic32_t started; 96 bool vlan_strip; 97 bool rx_sw_csum; 98 bool tx_sw_csum; 99 }; 100 101 struct internal_list { 102 TAILQ_ENTRY(internal_list) next; 103 struct rte_eth_dev *eth_dev; 104 }; 105 106 TAILQ_HEAD(internal_list_head, internal_list); 107 static struct internal_list_head internal_list = 108 TAILQ_HEAD_INITIALIZER(internal_list); 109 110 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 111 112 static struct rte_eth_link pmd_link = { 113 .link_speed = 10000, 114 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 115 .link_status = RTE_ETH_LINK_DOWN 116 }; 117 118 struct rte_vhost_vring_state { 119 rte_spinlock_t lock; 120 121 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 122 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 123 unsigned int index; 124 unsigned int max_vring; 125 }; 126 127 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 128 129 static int 130 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 131 { 132 struct vhost_queue *vq; 133 int ret, i; 134 135 for (i = 0; i < dev->data->nb_rx_queues; i++) { 136 vq = dev->data->rx_queues[i]; 137 ret = rte_vhost_vring_stats_reset(vq->vid, vq->virtqueue_id); 138 if (ret < 0) 139 return ret; 140 } 141 142 for (i = 0; i < dev->data->nb_tx_queues; i++) { 143 vq = dev->data->tx_queues[i]; 144 ret = rte_vhost_vring_stats_reset(vq->vid, vq->virtqueue_id); 145 if (ret < 0) 146 return ret; 147 } 148 149 return 0; 150 } 151 152 static int 153 vhost_dev_xstats_get_names(struct rte_eth_dev *dev, 154 struct rte_eth_xstat_name *xstats_names, 155 unsigned int limit) 156 { 157 struct rte_vhost_stat_name *name; 158 struct vhost_queue *vq; 159 int ret, i, count = 0, nstats = 0; 160 161 for (i = 0; i < dev->data->nb_rx_queues; i++) { 162 vq = dev->data->rx_queues[i]; 163 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id, NULL, 0); 164 if (ret < 0) 165 return ret; 166 167 nstats += ret; 168 } 169 170 for (i = 0; i < dev->data->nb_tx_queues; i++) { 171 vq = dev->data->tx_queues[i]; 172 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id, NULL, 0); 173 if (ret < 0) 174 return ret; 175 176 nstats += ret; 177 } 178 179 if (!xstats_names || limit < (unsigned int)nstats) 180 return nstats; 181 182 name = calloc(nstats, sizeof(*name)); 183 if (!name) 184 return -1; 185 186 for (i = 0; i < dev->data->nb_rx_queues; i++) { 187 vq = dev->data->rx_queues[i]; 188 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id, 189 name + count, nstats - count); 190 if (ret < 0) { 191 free(name); 192 return ret; 193 } 194 195 count += ret; 196 } 197 198 for (i = 0; i < dev->data->nb_tx_queues; i++) { 199 vq = dev->data->tx_queues[i]; 200 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id, 201 name + count, nstats - count); 202 if (ret < 0) { 203 free(name); 204 return ret; 205 } 206 207 count += ret; 208 } 209 210 for (i = 0; i < count; i++) 211 strncpy(xstats_names[i].name, name[i].name, RTE_ETH_XSTATS_NAME_SIZE); 212 213 free(name); 214 215 return count; 216 } 217 218 static int 219 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 220 unsigned int n) 221 { 222 struct rte_vhost_stat *stats; 223 struct vhost_queue *vq; 224 int ret, i, count = 0, nstats = 0; 225 226 for (i = 0; i < dev->data->nb_rx_queues; i++) { 227 vq = dev->data->rx_queues[i]; 228 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id, NULL, 0); 229 if (ret < 0) 230 return ret; 231 232 nstats += ret; 233 } 234 235 for (i = 0; i < dev->data->nb_tx_queues; i++) { 236 vq = dev->data->tx_queues[i]; 237 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id, NULL, 0); 238 if (ret < 0) 239 return ret; 240 241 nstats += ret; 242 } 243 244 if (!xstats || n < (unsigned int)nstats) 245 return nstats; 246 247 stats = calloc(nstats, sizeof(*stats)); 248 if (!stats) 249 return -1; 250 251 for (i = 0; i < dev->data->nb_rx_queues; i++) { 252 vq = dev->data->rx_queues[i]; 253 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id, 254 stats + count, nstats - count); 255 if (ret < 0) { 256 free(stats); 257 return ret; 258 } 259 260 count += ret; 261 } 262 263 for (i = 0; i < dev->data->nb_tx_queues; i++) { 264 vq = dev->data->tx_queues[i]; 265 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id, 266 stats + count, nstats - count); 267 if (ret < 0) { 268 free(stats); 269 return ret; 270 } 271 272 count += ret; 273 } 274 275 for (i = 0; i < count; i++) { 276 xstats[i].id = stats[i].id; 277 xstats[i].value = stats[i].value; 278 } 279 280 free(stats); 281 282 return nstats; 283 } 284 285 static void 286 vhost_dev_csum_configure(struct rte_eth_dev *eth_dev) 287 { 288 struct pmd_internal *internal = eth_dev->data->dev_private; 289 const struct rte_eth_rxmode *rxmode = ð_dev->data->dev_conf.rxmode; 290 const struct rte_eth_txmode *txmode = ð_dev->data->dev_conf.txmode; 291 292 internal->rx_sw_csum = false; 293 internal->tx_sw_csum = false; 294 295 /* SW checksum is not compatible with legacy mode */ 296 if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS)) 297 return; 298 299 if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) { 300 if (!(rxmode->offloads & 301 (RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) { 302 VHOST_LOG_LINE(NOTICE, "Rx csum will be done in SW, may impact performance."); 303 internal->rx_sw_csum = true; 304 } 305 } 306 307 if (!(internal->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) { 308 if (txmode->offloads & 309 (RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 310 VHOST_LOG_LINE(NOTICE, "Tx csum will be done in SW, may impact performance."); 311 internal->tx_sw_csum = true; 312 } 313 } 314 } 315 316 static void 317 vhost_dev_tx_sw_csum(struct rte_mbuf *mbuf) 318 { 319 uint32_t hdr_len; 320 uint16_t csum = 0, csum_offset; 321 322 switch (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) { 323 case RTE_MBUF_F_TX_L4_NO_CKSUM: 324 return; 325 case RTE_MBUF_F_TX_TCP_CKSUM: 326 csum_offset = offsetof(struct rte_tcp_hdr, cksum); 327 break; 328 case RTE_MBUF_F_TX_UDP_CKSUM: 329 csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum); 330 break; 331 default: 332 /* Unsupported packet type. */ 333 return; 334 } 335 336 hdr_len = mbuf->l2_len + mbuf->l3_len; 337 csum_offset += hdr_len; 338 339 /* Prepare the pseudo-header checksum */ 340 if (rte_net_intel_cksum_prepare(mbuf) < 0) 341 return; 342 343 if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0) 344 return; 345 346 csum = ~csum; 347 /* See RFC768 */ 348 if (unlikely((mbuf->packet_type & RTE_PTYPE_L4_UDP) && csum == 0)) 349 csum = 0xffff; 350 351 if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1) 352 *rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum; 353 354 mbuf->ol_flags &= ~RTE_MBUF_F_TX_L4_MASK; 355 mbuf->ol_flags |= RTE_MBUF_F_TX_L4_NO_CKSUM; 356 } 357 358 static void 359 vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf) 360 { 361 struct rte_net_hdr_lens hdr_lens; 362 uint32_t ptype, hdr_len; 363 uint16_t csum = 0, csum_offset; 364 365 /* Return early if the L4 checksum was not offloaded */ 366 if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) != RTE_MBUF_F_RX_L4_CKSUM_NONE) 367 return; 368 369 ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK); 370 371 hdr_len = hdr_lens.l2_len + hdr_lens.l3_len; 372 373 switch (ptype & RTE_PTYPE_L4_MASK) { 374 case RTE_PTYPE_L4_TCP: 375 csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len; 376 break; 377 case RTE_PTYPE_L4_UDP: 378 csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) + hdr_len; 379 break; 380 default: 381 /* Unsupported packet type */ 382 return; 383 } 384 385 /* The pseudo-header checksum is already performed, as per Virtio spec */ 386 if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0) 387 return; 388 389 csum = ~csum; 390 /* See RFC768 */ 391 if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0)) 392 csum = 0xffff; 393 394 if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1) 395 *rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum; 396 397 mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK; 398 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 399 } 400 401 static uint16_t 402 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 403 { 404 struct vhost_queue *r = q; 405 uint16_t i, nb_rx = 0; 406 uint16_t nb_receive = nb_bufs; 407 408 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 409 return 0; 410 411 rte_atomic32_set(&r->while_queuing, 1); 412 413 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 414 goto out; 415 416 /* Dequeue packets from guest TX queue */ 417 while (nb_receive) { 418 uint16_t nb_pkts; 419 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 420 VHOST_MAX_PKT_BURST); 421 422 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 423 r->mb_pool, &bufs[nb_rx], 424 num); 425 426 nb_rx += nb_pkts; 427 nb_receive -= nb_pkts; 428 if (nb_pkts < num) 429 break; 430 } 431 432 r->stats.pkts += nb_rx; 433 434 for (i = 0; likely(i < nb_rx); i++) { 435 bufs[i]->port = r->port; 436 bufs[i]->vlan_tci = 0; 437 438 if (r->internal->vlan_strip) 439 rte_vlan_strip(bufs[i]); 440 441 if (r->internal->rx_sw_csum) 442 vhost_dev_rx_sw_csum(bufs[i]); 443 444 r->stats.bytes += bufs[i]->pkt_len; 445 } 446 447 out: 448 rte_atomic32_set(&r->while_queuing, 0); 449 450 return nb_rx; 451 } 452 453 static uint16_t 454 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 455 { 456 struct vhost_queue *r = q; 457 uint16_t i, nb_tx = 0; 458 uint16_t nb_send = 0; 459 uint64_t nb_bytes = 0; 460 uint64_t nb_missed = 0; 461 462 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 463 return 0; 464 465 rte_atomic32_set(&r->while_queuing, 1); 466 467 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 468 goto out; 469 470 for (i = 0; i < nb_bufs; i++) { 471 struct rte_mbuf *m = bufs[i]; 472 473 /* Do VLAN tag insertion */ 474 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { 475 int error = rte_vlan_insert(&m); 476 if (unlikely(error)) { 477 rte_pktmbuf_free(m); 478 continue; 479 } 480 } 481 482 if (r->internal->tx_sw_csum) 483 vhost_dev_tx_sw_csum(m); 484 485 486 bufs[nb_send] = m; 487 ++nb_send; 488 } 489 490 /* Enqueue packets to guest RX queue */ 491 while (nb_send) { 492 uint16_t nb_pkts; 493 uint16_t num = (uint16_t)RTE_MIN(nb_send, 494 VHOST_MAX_PKT_BURST); 495 496 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 497 &bufs[nb_tx], num); 498 499 nb_tx += nb_pkts; 500 nb_send -= nb_pkts; 501 if (nb_pkts < num) 502 break; 503 } 504 505 for (i = 0; likely(i < nb_tx); i++) 506 nb_bytes += bufs[i]->pkt_len; 507 508 nb_missed = nb_bufs - nb_tx; 509 510 r->stats.pkts += nb_tx; 511 r->stats.bytes += nb_bytes; 512 r->stats.missed_pkts += nb_missed; 513 514 for (i = 0; likely(i < nb_tx); i++) 515 rte_pktmbuf_free(bufs[i]); 516 out: 517 rte_atomic32_set(&r->while_queuing, 0); 518 519 return nb_tx; 520 } 521 522 static inline struct internal_list * 523 find_internal_resource(char *ifname) 524 { 525 int found = 0; 526 struct internal_list *list; 527 struct pmd_internal *internal; 528 529 if (ifname == NULL) 530 return NULL; 531 532 pthread_mutex_lock(&internal_list_lock); 533 534 TAILQ_FOREACH(list, &internal_list, next) { 535 internal = list->eth_dev->data->dev_private; 536 if (!strcmp(internal->iface_name, ifname)) { 537 found = 1; 538 break; 539 } 540 } 541 542 pthread_mutex_unlock(&internal_list_lock); 543 544 if (!found) 545 return NULL; 546 547 return list; 548 } 549 550 static void 551 eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx) 552 { 553 struct rte_vhost_vring vring; 554 struct vhost_queue *vq; 555 556 vq = eth_dev->data->rx_queues[rxq_idx]; 557 if (vq == NULL || vq->vid < 0) 558 return; 559 560 if (rte_vhost_get_vhost_vring(vq->vid, (rxq_idx << 1) + 1, &vring) < 0) { 561 VHOST_LOG_LINE(DEBUG, "Failed to get rxq-%d's vring, skip!", rxq_idx); 562 return; 563 } 564 565 rte_spinlock_lock(&vq->intr_lock); 566 567 /* Remove previous kickfd from proxy epoll */ 568 if (vq->kickfd >= 0 && vq->kickfd != vring.kickfd) { 569 if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_DEL, vq->kickfd, &vq->ev) < 0) { 570 VHOST_LOG_LINE(DEBUG, "Failed to unregister %d from rxq-%d epoll: %s", 571 vq->kickfd, rxq_idx, strerror(errno)); 572 } else { 573 VHOST_LOG_LINE(DEBUG, "Unregistered %d from rxq-%d epoll", 574 vq->kickfd, rxq_idx); 575 } 576 vq->kickfd = -1; 577 } 578 579 /* Add new one, if valid */ 580 if (vq->kickfd != vring.kickfd && vring.kickfd >= 0) { 581 if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_ADD, vring.kickfd, &vq->ev) < 0) { 582 VHOST_LOG_LINE(ERR, "Failed to register %d in rxq-%d epoll: %s", 583 vring.kickfd, rxq_idx, strerror(errno)); 584 } else { 585 vq->kickfd = vring.kickfd; 586 VHOST_LOG_LINE(DEBUG, "Registered %d in rxq-%d epoll", 587 vq->kickfd, rxq_idx); 588 } 589 } 590 591 rte_spinlock_unlock(&vq->intr_lock); 592 } 593 594 static int 595 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) 596 { 597 struct vhost_queue *vq = dev->data->rx_queues[qid]; 598 599 if (vq->vid >= 0) 600 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); 601 602 return 0; 603 } 604 605 static int 606 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) 607 { 608 struct vhost_queue *vq = dev->data->rx_queues[qid]; 609 610 if (vq->vid >= 0) 611 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); 612 613 return 0; 614 } 615 616 static void 617 eth_vhost_uninstall_intr(struct rte_eth_dev *dev) 618 { 619 struct rte_intr_handle *intr_handle = dev->intr_handle; 620 621 if (intr_handle != NULL) { 622 int i; 623 624 for (i = 0; i < dev->data->nb_rx_queues; i++) { 625 int epoll_fd = rte_intr_efds_index_get(dev->intr_handle, i); 626 627 if (epoll_fd >= 0) 628 close(epoll_fd); 629 } 630 rte_intr_vec_list_free(intr_handle); 631 rte_intr_instance_free(intr_handle); 632 } 633 dev->intr_handle = NULL; 634 } 635 636 static int 637 eth_vhost_install_intr(struct rte_eth_dev *dev) 638 { 639 int nb_rxq = dev->data->nb_rx_queues; 640 struct vhost_queue *vq; 641 642 int ret; 643 int i; 644 645 dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE); 646 if (dev->intr_handle == NULL) { 647 VHOST_LOG_LINE(ERR, "Fail to allocate intr_handle"); 648 ret = -ENOMEM; 649 goto error; 650 } 651 if (rte_intr_efd_counter_size_set(dev->intr_handle, 0)) { 652 ret = -rte_errno; 653 goto error; 654 } 655 656 if (rte_intr_vec_list_alloc(dev->intr_handle, NULL, nb_rxq)) { 657 VHOST_LOG_LINE(ERR, "Failed to allocate memory for interrupt vector"); 658 ret = -ENOMEM; 659 goto error; 660 } 661 662 VHOST_LOG_LINE(DEBUG, "Prepare intr vec"); 663 for (i = 0; i < nb_rxq; i++) { 664 int epoll_fd = epoll_create1(0); 665 666 if (epoll_fd < 0) { 667 VHOST_LOG_LINE(ERR, "Failed to create proxy epoll fd for rxq-%d", i); 668 ret = -errno; 669 goto error; 670 } 671 672 if (rte_intr_vec_list_index_set(dev->intr_handle, i, 673 RTE_INTR_VEC_RXTX_OFFSET + i) || 674 rte_intr_efds_index_set(dev->intr_handle, i, epoll_fd)) { 675 ret = -rte_errno; 676 close(epoll_fd); 677 goto error; 678 } 679 680 vq = dev->data->rx_queues[i]; 681 memset(&vq->ev, 0, sizeof(vq->ev)); 682 vq->ev.events = EPOLLIN; 683 vq->ev.data.fd = epoll_fd; 684 } 685 686 if (rte_intr_nb_efd_set(dev->intr_handle, nb_rxq)) { 687 ret = -rte_errno; 688 goto error; 689 } 690 if (rte_intr_max_intr_set(dev->intr_handle, nb_rxq + 1)) { 691 ret = -rte_errno; 692 goto error; 693 } 694 if (rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_VDEV)) { 695 ret = -rte_errno; 696 goto error; 697 } 698 699 return 0; 700 701 error: 702 eth_vhost_uninstall_intr(dev); 703 return ret; 704 } 705 706 static void 707 eth_vhost_configure_intr(struct rte_eth_dev *dev) 708 { 709 int i; 710 711 VHOST_LOG_LINE(DEBUG, "Configure intr vec"); 712 for (i = 0; i < dev->data->nb_rx_queues; i++) 713 eth_vhost_update_intr(dev, i); 714 } 715 716 static void 717 eth_vhost_unconfigure_intr(struct rte_eth_dev *eth_dev) 718 { 719 struct vhost_queue *vq; 720 int i; 721 722 VHOST_LOG_LINE(DEBUG, "Unconfigure intr vec"); 723 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 724 vq = eth_dev->data->rx_queues[i]; 725 if (vq == NULL || vq->vid < 0) 726 continue; 727 728 rte_spinlock_lock(&vq->intr_lock); 729 730 /* Remove previous kickfd from proxy epoll */ 731 if (vq->kickfd >= 0) { 732 if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_DEL, vq->kickfd, &vq->ev) < 0) { 733 VHOST_LOG_LINE(DEBUG, "Failed to unregister %d from rxq-%d epoll: %s", 734 vq->kickfd, i, strerror(errno)); 735 } else { 736 VHOST_LOG_LINE(DEBUG, "Unregistered %d from rxq-%d epoll", 737 vq->kickfd, i); 738 } 739 vq->kickfd = -1; 740 } 741 742 rte_spinlock_unlock(&vq->intr_lock); 743 } 744 } 745 746 static void 747 update_queuing_status(struct rte_eth_dev *dev, bool wait_queuing) 748 { 749 struct pmd_internal *internal = dev->data->dev_private; 750 struct vhost_queue *vq; 751 struct rte_vhost_vring_state *state; 752 unsigned int i; 753 int allow_queuing = 1; 754 755 if (!dev->data->rx_queues || !dev->data->tx_queues) 756 return; 757 758 if (rte_atomic32_read(&internal->started) == 0 || 759 rte_atomic32_read(&internal->dev_attached) == 0) 760 allow_queuing = 0; 761 762 state = vring_states[dev->data->port_id]; 763 764 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 765 for (i = 0; i < dev->data->nb_rx_queues; i++) { 766 vq = dev->data->rx_queues[i]; 767 if (vq == NULL) 768 continue; 769 if (allow_queuing && state->cur[vq->virtqueue_id]) 770 rte_atomic32_set(&vq->allow_queuing, 1); 771 else 772 rte_atomic32_set(&vq->allow_queuing, 0); 773 while (wait_queuing && rte_atomic32_read(&vq->while_queuing)) 774 rte_pause(); 775 } 776 777 for (i = 0; i < dev->data->nb_tx_queues; i++) { 778 vq = dev->data->tx_queues[i]; 779 if (vq == NULL) 780 continue; 781 if (allow_queuing && state->cur[vq->virtqueue_id]) 782 rte_atomic32_set(&vq->allow_queuing, 1); 783 else 784 rte_atomic32_set(&vq->allow_queuing, 0); 785 while (wait_queuing && rte_atomic32_read(&vq->while_queuing)) 786 rte_pause(); 787 } 788 } 789 790 static void 791 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) 792 { 793 struct vhost_queue *vq; 794 int i; 795 796 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 797 vq = eth_dev->data->rx_queues[i]; 798 if (!vq) 799 continue; 800 vq->vid = internal->vid; 801 vq->internal = internal; 802 vq->port = eth_dev->data->port_id; 803 } 804 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 805 vq = eth_dev->data->tx_queues[i]; 806 if (!vq) 807 continue; 808 vq->vid = internal->vid; 809 vq->internal = internal; 810 vq->port = eth_dev->data->port_id; 811 } 812 } 813 814 static int 815 new_device(int vid) 816 { 817 struct rte_eth_dev *eth_dev; 818 struct internal_list *list; 819 struct pmd_internal *internal; 820 struct rte_eth_conf *dev_conf; 821 unsigned i; 822 char ifname[PATH_MAX]; 823 #ifdef RTE_LIBRTE_VHOST_NUMA 824 int newnode; 825 #endif 826 827 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 828 list = find_internal_resource(ifname); 829 if (list == NULL) { 830 VHOST_LOG_LINE(INFO, "Invalid device name: %s", ifname); 831 return -1; 832 } 833 834 eth_dev = list->eth_dev; 835 internal = eth_dev->data->dev_private; 836 dev_conf = ð_dev->data->dev_conf; 837 838 #ifdef RTE_LIBRTE_VHOST_NUMA 839 newnode = rte_vhost_get_numa_node(vid); 840 if (newnode >= 0) 841 eth_dev->data->numa_node = newnode; 842 #endif 843 844 if (rte_vhost_get_negotiated_features(vid, &internal->features)) { 845 VHOST_LOG_LINE(ERR, "Failed to get device features"); 846 return -1; 847 } 848 849 internal->vid = vid; 850 if (rte_atomic32_read(&internal->started) == 1) { 851 queue_setup(eth_dev, internal); 852 if (dev_conf->intr_conf.rxq) 853 eth_vhost_configure_intr(eth_dev); 854 } 855 856 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 857 rte_vhost_enable_guest_notification(vid, i, 0); 858 859 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 860 861 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP; 862 863 vhost_dev_csum_configure(eth_dev); 864 865 rte_atomic32_set(&internal->dev_attached, 1); 866 update_queuing_status(eth_dev, false); 867 868 VHOST_LOG_LINE(INFO, "Vhost device %d created", vid); 869 870 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 871 872 return 0; 873 } 874 875 static void 876 destroy_device(int vid) 877 { 878 struct rte_eth_dev *eth_dev; 879 struct pmd_internal *internal; 880 struct vhost_queue *vq; 881 struct internal_list *list; 882 char ifname[PATH_MAX]; 883 unsigned i; 884 struct rte_vhost_vring_state *state; 885 886 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 887 list = find_internal_resource(ifname); 888 if (list == NULL) { 889 VHOST_LOG_LINE(ERR, "Invalid interface name: %s", ifname); 890 return; 891 } 892 eth_dev = list->eth_dev; 893 internal = eth_dev->data->dev_private; 894 895 rte_atomic32_set(&internal->dev_attached, 0); 896 update_queuing_status(eth_dev, true); 897 eth_vhost_unconfigure_intr(eth_dev); 898 899 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN; 900 901 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { 902 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 903 vq = eth_dev->data->rx_queues[i]; 904 if (!vq) 905 continue; 906 vq->vid = -1; 907 } 908 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 909 vq = eth_dev->data->tx_queues[i]; 910 if (!vq) 911 continue; 912 vq->vid = -1; 913 } 914 } 915 916 state = vring_states[eth_dev->data->port_id]; 917 rte_spinlock_lock(&state->lock); 918 for (i = 0; i <= state->max_vring; i++) { 919 state->cur[i] = false; 920 state->seen[i] = false; 921 } 922 state->max_vring = 0; 923 rte_spinlock_unlock(&state->lock); 924 925 VHOST_LOG_LINE(INFO, "Vhost device %d destroyed", vid); 926 927 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 928 } 929 930 static int 931 vring_state_changed(int vid, uint16_t vring, int enable) 932 { 933 struct rte_vhost_vring_state *state; 934 struct rte_eth_dev *eth_dev; 935 struct internal_list *list; 936 char ifname[PATH_MAX]; 937 938 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 939 list = find_internal_resource(ifname); 940 if (list == NULL) { 941 VHOST_LOG_LINE(ERR, "Invalid interface name: %s", ifname); 942 return -1; 943 } 944 945 eth_dev = list->eth_dev; 946 /* won't be NULL */ 947 state = vring_states[eth_dev->data->port_id]; 948 949 if (eth_dev->data->dev_conf.intr_conf.rxq && vring % 2) 950 eth_vhost_update_intr(eth_dev, (vring - 1) >> 1); 951 952 rte_spinlock_lock(&state->lock); 953 if (state->cur[vring] == enable) { 954 rte_spinlock_unlock(&state->lock); 955 return 0; 956 } 957 state->cur[vring] = enable; 958 state->max_vring = RTE_MAX(vring, state->max_vring); 959 rte_spinlock_unlock(&state->lock); 960 961 update_queuing_status(eth_dev, false); 962 963 VHOST_LOG_LINE(INFO, "vring%u is %s", 964 vring, enable ? "enabled" : "disabled"); 965 966 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); 967 968 return 0; 969 } 970 971 static struct rte_vhost_device_ops vhost_ops = { 972 .new_device = new_device, 973 .destroy_device = destroy_device, 974 .vring_state_changed = vring_state_changed, 975 }; 976 977 static int 978 vhost_driver_setup(struct rte_eth_dev *eth_dev) 979 { 980 struct pmd_internal *internal = eth_dev->data->dev_private; 981 struct internal_list *list = NULL; 982 struct rte_vhost_vring_state *vring_state = NULL; 983 unsigned int numa_node = eth_dev->device->numa_node; 984 const char *name = eth_dev->device->name; 985 986 /* Don't try to setup again if it has already been done. */ 987 list = find_internal_resource(internal->iface_name); 988 if (list) 989 return 0; 990 991 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 992 if (list == NULL) 993 return -1; 994 995 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), 996 0, numa_node); 997 if (vring_state == NULL) 998 goto free_list; 999 1000 list->eth_dev = eth_dev; 1001 pthread_mutex_lock(&internal_list_lock); 1002 TAILQ_INSERT_TAIL(&internal_list, list, next); 1003 pthread_mutex_unlock(&internal_list_lock); 1004 1005 rte_spinlock_init(&vring_state->lock); 1006 vring_states[eth_dev->data->port_id] = vring_state; 1007 1008 if (rte_vhost_driver_register(internal->iface_name, internal->flags)) 1009 goto list_remove; 1010 1011 if (internal->disable_flags) { 1012 if (rte_vhost_driver_disable_features(internal->iface_name, 1013 internal->disable_flags)) 1014 goto drv_unreg; 1015 } 1016 1017 if (rte_vhost_driver_set_max_queue_num(internal->iface_name, internal->max_queues)) 1018 goto drv_unreg; 1019 1020 if (rte_vhost_driver_callback_register(internal->iface_name, 1021 &vhost_ops) < 0) { 1022 VHOST_LOG_LINE(ERR, "Can't register callbacks"); 1023 goto drv_unreg; 1024 } 1025 1026 if (rte_vhost_driver_start(internal->iface_name) < 0) { 1027 VHOST_LOG_LINE(ERR, "Failed to start driver for %s", 1028 internal->iface_name); 1029 goto drv_unreg; 1030 } 1031 1032 return 0; 1033 1034 drv_unreg: 1035 rte_vhost_driver_unregister(internal->iface_name); 1036 list_remove: 1037 vring_states[eth_dev->data->port_id] = NULL; 1038 pthread_mutex_lock(&internal_list_lock); 1039 TAILQ_REMOVE(&internal_list, list, next); 1040 pthread_mutex_unlock(&internal_list_lock); 1041 rte_free(vring_state); 1042 free_list: 1043 rte_free(list); 1044 1045 return -1; 1046 } 1047 1048 int 1049 rte_eth_vhost_get_queue_event(uint16_t port_id, 1050 struct rte_eth_vhost_queue_event *event) 1051 { 1052 struct rte_vhost_vring_state *state; 1053 unsigned int i; 1054 int idx; 1055 1056 if (port_id >= RTE_MAX_ETHPORTS) { 1057 VHOST_LOG_LINE(ERR, "Invalid port id"); 1058 return -1; 1059 } 1060 1061 state = vring_states[port_id]; 1062 if (!state) { 1063 VHOST_LOG_LINE(ERR, "Unused port"); 1064 return -1; 1065 } 1066 1067 rte_spinlock_lock(&state->lock); 1068 for (i = 0; i <= state->max_vring; i++) { 1069 idx = state->index++ % (state->max_vring + 1); 1070 1071 if (state->cur[idx] != state->seen[idx]) { 1072 state->seen[idx] = state->cur[idx]; 1073 event->queue_id = idx / 2; 1074 event->rx = idx & 1; 1075 event->enable = state->cur[idx]; 1076 rte_spinlock_unlock(&state->lock); 1077 return 0; 1078 } 1079 } 1080 rte_spinlock_unlock(&state->lock); 1081 1082 return -1; 1083 } 1084 1085 int 1086 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) 1087 { 1088 struct internal_list *list; 1089 struct rte_eth_dev *eth_dev; 1090 struct vhost_queue *vq; 1091 int vid = -1; 1092 1093 if (!rte_eth_dev_is_valid_port(port_id)) 1094 return -1; 1095 1096 pthread_mutex_lock(&internal_list_lock); 1097 1098 TAILQ_FOREACH(list, &internal_list, next) { 1099 eth_dev = list->eth_dev; 1100 if (eth_dev->data->port_id == port_id) { 1101 vq = eth_dev->data->rx_queues[0]; 1102 if (vq) { 1103 vid = vq->vid; 1104 } 1105 break; 1106 } 1107 } 1108 1109 pthread_mutex_unlock(&internal_list_lock); 1110 1111 return vid; 1112 } 1113 1114 static int 1115 eth_dev_configure(struct rte_eth_dev *dev) 1116 { 1117 struct pmd_internal *internal = dev->data->dev_private; 1118 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1119 1120 /* NOTE: the same process has to operate a vhost interface 1121 * from beginning to end (from eth_dev configure to eth_dev close). 1122 * It is user's responsibility at the moment. 1123 */ 1124 if (vhost_driver_setup(dev) < 0) 1125 return -1; 1126 1127 internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 1128 1129 vhost_dev_csum_configure(dev); 1130 1131 return 0; 1132 } 1133 1134 static int 1135 eth_dev_start(struct rte_eth_dev *eth_dev) 1136 { 1137 struct pmd_internal *internal = eth_dev->data->dev_private; 1138 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 1139 uint16_t i; 1140 1141 eth_vhost_uninstall_intr(eth_dev); 1142 if (dev_conf->intr_conf.rxq && eth_vhost_install_intr(eth_dev) < 0) { 1143 VHOST_LOG_LINE(ERR, "Failed to install interrupt handler."); 1144 return -1; 1145 } 1146 1147 queue_setup(eth_dev, internal); 1148 if (rte_atomic32_read(&internal->dev_attached) == 1 && 1149 dev_conf->intr_conf.rxq) 1150 eth_vhost_configure_intr(eth_dev); 1151 1152 rte_atomic32_set(&internal->started, 1); 1153 update_queuing_status(eth_dev, false); 1154 1155 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) 1156 eth_dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1157 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) 1158 eth_dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1159 1160 return 0; 1161 } 1162 1163 static int 1164 eth_dev_stop(struct rte_eth_dev *dev) 1165 { 1166 struct pmd_internal *internal = dev->data->dev_private; 1167 uint16_t i; 1168 1169 dev->data->dev_started = 0; 1170 rte_atomic32_set(&internal->started, 0); 1171 update_queuing_status(dev, true); 1172 1173 for (i = 0; i < dev->data->nb_rx_queues; i++) 1174 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1175 for (i = 0; i < dev->data->nb_tx_queues; i++) 1176 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1177 1178 return 0; 1179 } 1180 1181 static int 1182 eth_dev_close(struct rte_eth_dev *dev) 1183 { 1184 struct pmd_internal *internal; 1185 struct internal_list *list; 1186 unsigned int i, ret; 1187 1188 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1189 return 0; 1190 1191 internal = dev->data->dev_private; 1192 if (!internal) 1193 return 0; 1194 1195 ret = eth_dev_stop(dev); 1196 1197 list = find_internal_resource(internal->iface_name); 1198 if (list) { 1199 rte_vhost_driver_unregister(internal->iface_name); 1200 pthread_mutex_lock(&internal_list_lock); 1201 TAILQ_REMOVE(&internal_list, list, next); 1202 pthread_mutex_unlock(&internal_list_lock); 1203 rte_free(list); 1204 } 1205 1206 if (dev->data->rx_queues) 1207 for (i = 0; i < dev->data->nb_rx_queues; i++) 1208 rte_free(dev->data->rx_queues[i]); 1209 1210 if (dev->data->tx_queues) 1211 for (i = 0; i < dev->data->nb_tx_queues; i++) 1212 rte_free(dev->data->tx_queues[i]); 1213 1214 rte_free(internal->iface_name); 1215 rte_free(internal); 1216 1217 eth_vhost_uninstall_intr(dev); 1218 1219 dev->data->dev_private = NULL; 1220 1221 rte_free(vring_states[dev->data->port_id]); 1222 vring_states[dev->data->port_id] = NULL; 1223 1224 return ret; 1225 } 1226 1227 static int 1228 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1229 uint16_t nb_rx_desc __rte_unused, 1230 unsigned int socket_id, 1231 const struct rte_eth_rxconf *rx_conf __rte_unused, 1232 struct rte_mempool *mb_pool) 1233 { 1234 struct vhost_queue *vq; 1235 1236 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1237 RTE_CACHE_LINE_SIZE, socket_id); 1238 if (vq == NULL) { 1239 VHOST_LOG_LINE(ERR, "Failed to allocate memory for rx queue"); 1240 return -ENOMEM; 1241 } 1242 1243 vq->mb_pool = mb_pool; 1244 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 1245 rte_spinlock_init(&vq->intr_lock); 1246 vq->kickfd = -1; 1247 dev->data->rx_queues[rx_queue_id] = vq; 1248 1249 return 0; 1250 } 1251 1252 static int 1253 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1254 uint16_t nb_tx_desc __rte_unused, 1255 unsigned int socket_id, 1256 const struct rte_eth_txconf *tx_conf __rte_unused) 1257 { 1258 struct vhost_queue *vq; 1259 1260 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1261 RTE_CACHE_LINE_SIZE, socket_id); 1262 if (vq == NULL) { 1263 VHOST_LOG_LINE(ERR, "Failed to allocate memory for tx queue"); 1264 return -ENOMEM; 1265 } 1266 1267 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 1268 rte_spinlock_init(&vq->intr_lock); 1269 vq->kickfd = -1; 1270 dev->data->tx_queues[tx_queue_id] = vq; 1271 1272 return 0; 1273 } 1274 1275 static int 1276 eth_dev_info(struct rte_eth_dev *dev, 1277 struct rte_eth_dev_info *dev_info) 1278 { 1279 struct pmd_internal *internal; 1280 1281 internal = dev->data->dev_private; 1282 if (internal == NULL) { 1283 VHOST_LOG_LINE(ERR, "Invalid device specified"); 1284 return -ENODEV; 1285 } 1286 1287 dev_info->max_mac_addrs = 1; 1288 dev_info->max_rx_pktlen = (uint32_t)-1; 1289 dev_info->max_rx_queues = internal->max_queues; 1290 dev_info->max_tx_queues = internal->max_queues; 1291 dev_info->min_rx_bufsize = 0; 1292 1293 dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 1294 RTE_ETH_TX_OFFLOAD_VLAN_INSERT; 1295 if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) { 1296 dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM | 1297 RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 1298 } 1299 1300 dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP; 1301 if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) { 1302 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | 1303 RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 1304 } 1305 1306 return 0; 1307 } 1308 1309 static int 1310 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1311 { 1312 unsigned i; 1313 unsigned long rx_total = 0, tx_total = 0; 1314 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 1315 unsigned long tx_total_errors = 0; 1316 struct vhost_queue *vq; 1317 1318 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1319 i < dev->data->nb_rx_queues; i++) { 1320 if (dev->data->rx_queues[i] == NULL) 1321 continue; 1322 vq = dev->data->rx_queues[i]; 1323 stats->q_ipackets[i] = vq->stats.pkts; 1324 rx_total += stats->q_ipackets[i]; 1325 1326 stats->q_ibytes[i] = vq->stats.bytes; 1327 rx_total_bytes += stats->q_ibytes[i]; 1328 } 1329 1330 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1331 i < dev->data->nb_tx_queues; i++) { 1332 if (dev->data->tx_queues[i] == NULL) 1333 continue; 1334 vq = dev->data->tx_queues[i]; 1335 stats->q_opackets[i] = vq->stats.pkts; 1336 tx_total += stats->q_opackets[i]; 1337 1338 stats->q_obytes[i] = vq->stats.bytes; 1339 tx_total_bytes += stats->q_obytes[i]; 1340 1341 tx_total_errors += vq->stats.missed_pkts; 1342 } 1343 1344 stats->ipackets = rx_total; 1345 stats->opackets = tx_total; 1346 stats->ibytes = rx_total_bytes; 1347 stats->obytes = tx_total_bytes; 1348 stats->oerrors = tx_total_errors; 1349 1350 return 0; 1351 } 1352 1353 static int 1354 eth_stats_reset(struct rte_eth_dev *dev) 1355 { 1356 struct vhost_queue *vq; 1357 unsigned i; 1358 1359 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1360 if (dev->data->rx_queues[i] == NULL) 1361 continue; 1362 vq = dev->data->rx_queues[i]; 1363 vq->stats.pkts = 0; 1364 vq->stats.bytes = 0; 1365 } 1366 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1367 if (dev->data->tx_queues[i] == NULL) 1368 continue; 1369 vq = dev->data->tx_queues[i]; 1370 vq->stats.pkts = 0; 1371 vq->stats.bytes = 0; 1372 vq->stats.missed_pkts = 0; 1373 } 1374 1375 return 0; 1376 } 1377 1378 static void 1379 eth_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1380 { 1381 rte_free(dev->data->rx_queues[qid]); 1382 } 1383 1384 static void 1385 eth_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1386 { 1387 rte_free(dev->data->tx_queues[qid]); 1388 } 1389 1390 static int 1391 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 1392 { 1393 /* 1394 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 1395 * and releases mbuf, so nothing to cleanup. 1396 */ 1397 return 0; 1398 } 1399 1400 static int 1401 eth_link_update(struct rte_eth_dev *dev __rte_unused, 1402 int wait_to_complete __rte_unused) 1403 { 1404 return 0; 1405 } 1406 1407 static uint32_t 1408 eth_rx_queue_count(void *rx_queue) 1409 { 1410 struct vhost_queue *vq; 1411 1412 vq = rx_queue; 1413 if (vq == NULL) 1414 return 0; 1415 1416 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 1417 } 1418 1419 #define CLB_VAL_IDX 0 1420 #define CLB_MSK_IDX 1 1421 #define CLB_MATCH_IDX 2 1422 static int 1423 vhost_monitor_callback(const uint64_t value, 1424 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) 1425 { 1426 const uint64_t m = opaque[CLB_MSK_IDX]; 1427 const uint64_t v = opaque[CLB_VAL_IDX]; 1428 const uint64_t c = opaque[CLB_MATCH_IDX]; 1429 1430 if (c) 1431 return (value & m) == v ? -1 : 0; 1432 else 1433 return (value & m) == v ? 0 : -1; 1434 } 1435 1436 static int 1437 vhost_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) 1438 { 1439 struct vhost_queue *vq = rx_queue; 1440 struct rte_vhost_power_monitor_cond vhost_pmc; 1441 int ret; 1442 if (vq == NULL) 1443 return -EINVAL; 1444 ret = rte_vhost_get_monitor_addr(vq->vid, vq->virtqueue_id, 1445 &vhost_pmc); 1446 if (ret < 0) 1447 return -EINVAL; 1448 pmc->addr = vhost_pmc.addr; 1449 pmc->opaque[CLB_VAL_IDX] = vhost_pmc.val; 1450 pmc->opaque[CLB_MSK_IDX] = vhost_pmc.mask; 1451 pmc->opaque[CLB_MATCH_IDX] = vhost_pmc.match; 1452 pmc->size = vhost_pmc.size; 1453 pmc->fn = vhost_monitor_callback; 1454 1455 return 0; 1456 } 1457 1458 static int 1459 vhost_dev_priv_dump(struct rte_eth_dev *dev, FILE *f) 1460 { 1461 struct pmd_internal *internal = dev->data->dev_private; 1462 1463 fprintf(f, "iface_name: %s\n", internal->iface_name); 1464 fprintf(f, "flags: 0x%" PRIx64 "\n", internal->flags); 1465 fprintf(f, "disable_flags: 0x%" PRIx64 "\n", internal->disable_flags); 1466 fprintf(f, "features: 0x%" PRIx64 "\n", internal->features); 1467 fprintf(f, "max_queues: %u\n", internal->max_queues); 1468 fprintf(f, "vid: %d\n", internal->vid); 1469 fprintf(f, "started: %d\n", rte_atomic32_read(&internal->started)); 1470 fprintf(f, "dev_attached: %d\n", rte_atomic32_read(&internal->dev_attached)); 1471 fprintf(f, "vlan_strip: %d\n", internal->vlan_strip); 1472 fprintf(f, "rx_sw_csum: %d\n", internal->rx_sw_csum); 1473 fprintf(f, "tx_sw_csum: %d\n", internal->tx_sw_csum); 1474 1475 return 0; 1476 } 1477 1478 static const struct eth_dev_ops ops = { 1479 .dev_start = eth_dev_start, 1480 .dev_stop = eth_dev_stop, 1481 .dev_close = eth_dev_close, 1482 .dev_configure = eth_dev_configure, 1483 .dev_infos_get = eth_dev_info, 1484 .rx_queue_setup = eth_rx_queue_setup, 1485 .tx_queue_setup = eth_tx_queue_setup, 1486 .rx_queue_release = eth_rx_queue_release, 1487 .tx_queue_release = eth_tx_queue_release, 1488 .tx_done_cleanup = eth_tx_done_cleanup, 1489 .link_update = eth_link_update, 1490 .stats_get = eth_stats_get, 1491 .stats_reset = eth_stats_reset, 1492 .xstats_reset = vhost_dev_xstats_reset, 1493 .xstats_get = vhost_dev_xstats_get, 1494 .xstats_get_names = vhost_dev_xstats_get_names, 1495 .rx_queue_intr_enable = eth_rxq_intr_enable, 1496 .rx_queue_intr_disable = eth_rxq_intr_disable, 1497 .get_monitor_addr = vhost_get_monitor_addr, 1498 .eth_dev_priv_dump = vhost_dev_priv_dump, 1499 }; 1500 1501 static int 1502 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1503 int16_t queues, const unsigned int numa_node, uint64_t flags, 1504 uint64_t disable_flags) 1505 { 1506 const char *name = rte_vdev_device_name(dev); 1507 struct rte_eth_dev_data *data; 1508 struct pmd_internal *internal = NULL; 1509 struct rte_eth_dev *eth_dev = NULL; 1510 struct rte_ether_addr *eth_addr = NULL; 1511 1512 VHOST_LOG_LINE(INFO, "Creating VHOST-USER backend on numa socket %u", 1513 numa_node); 1514 1515 /* reserve an ethdev entry */ 1516 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1517 if (eth_dev == NULL) 1518 goto error; 1519 data = eth_dev->data; 1520 1521 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1522 if (eth_addr == NULL) 1523 goto error; 1524 data->mac_addrs = eth_addr; 1525 *eth_addr = base_eth_addr; 1526 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1527 1528 /* now put it all together 1529 * - store queue data in internal, 1530 * - point eth_dev_data to internals 1531 * - and point eth_dev structure to new eth_dev_data structure 1532 */ 1533 internal = eth_dev->data->dev_private; 1534 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, 1535 0, numa_node); 1536 if (internal->iface_name == NULL) 1537 goto error; 1538 strcpy(internal->iface_name, iface_name); 1539 1540 data->nb_rx_queues = queues; 1541 data->nb_tx_queues = queues; 1542 internal->max_queues = queues; 1543 internal->vid = -1; 1544 internal->flags = flags; 1545 internal->disable_flags = disable_flags; 1546 data->dev_link = pmd_link; 1547 data->dev_flags = RTE_ETH_DEV_INTR_LSC | 1548 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1549 data->promiscuous = 1; 1550 data->all_multicast = 1; 1551 1552 eth_dev->dev_ops = &ops; 1553 eth_dev->rx_queue_count = eth_rx_queue_count; 1554 1555 /* finally assign rx and tx ops */ 1556 eth_dev->rx_pkt_burst = eth_vhost_rx; 1557 eth_dev->tx_pkt_burst = eth_vhost_tx; 1558 1559 rte_eth_dev_probing_finish(eth_dev); 1560 return 0; 1561 1562 error: 1563 if (internal) 1564 rte_free(internal->iface_name); 1565 rte_eth_dev_release_port(eth_dev); 1566 1567 return -1; 1568 } 1569 1570 static inline int 1571 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1572 { 1573 const char **iface_name = extra_args; 1574 1575 if (value == NULL) 1576 return -1; 1577 1578 *iface_name = value; 1579 1580 return 0; 1581 } 1582 1583 static inline int 1584 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1585 { 1586 uint16_t *n = extra_args; 1587 1588 if (value == NULL || extra_args == NULL) 1589 return -EINVAL; 1590 1591 *n = (uint16_t)strtoul(value, NULL, 0); 1592 if (*n == USHRT_MAX && errno == ERANGE) 1593 return -1; 1594 1595 return 0; 1596 } 1597 1598 static int 1599 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1600 { 1601 struct rte_kvargs *kvlist = NULL; 1602 int ret = 0; 1603 char *iface_name; 1604 uint16_t queues; 1605 uint64_t flags = RTE_VHOST_USER_NET_STATS_ENABLE; 1606 uint64_t disable_flags = 0; 1607 int client_mode = 0; 1608 int iommu_support = 0; 1609 int postcopy_support = 0; 1610 int tso = 0; 1611 int linear_buf = 0; 1612 int ext_buf = 0; 1613 int legacy_ol_flags = 0; 1614 struct rte_eth_dev *eth_dev; 1615 const char *name = rte_vdev_device_name(dev); 1616 1617 VHOST_LOG_LINE(INFO, "Initializing pmd_vhost for %s", name); 1618 1619 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1620 eth_dev = rte_eth_dev_attach_secondary(name); 1621 if (!eth_dev) { 1622 VHOST_LOG_LINE(ERR, "Failed to probe %s", name); 1623 return -1; 1624 } 1625 eth_dev->rx_pkt_burst = eth_vhost_rx; 1626 eth_dev->tx_pkt_burst = eth_vhost_tx; 1627 eth_dev->dev_ops = &ops; 1628 if (dev->device.numa_node == SOCKET_ID_ANY) 1629 dev->device.numa_node = rte_socket_id(); 1630 eth_dev->device = &dev->device; 1631 rte_eth_dev_probing_finish(eth_dev); 1632 return 0; 1633 } 1634 1635 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1636 if (kvlist == NULL) 1637 return -1; 1638 1639 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1640 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1641 &open_iface, &iface_name); 1642 if (ret < 0) 1643 goto out_free; 1644 } else { 1645 ret = -1; 1646 goto out_free; 1647 } 1648 1649 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1650 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1651 &open_int, &queues); 1652 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1653 goto out_free; 1654 1655 } else 1656 queues = 1; 1657 1658 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1659 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1660 &open_int, &client_mode); 1661 if (ret < 0) 1662 goto out_free; 1663 1664 if (client_mode) 1665 flags |= RTE_VHOST_USER_CLIENT; 1666 } 1667 1668 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { 1669 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, 1670 &open_int, &iommu_support); 1671 if (ret < 0) 1672 goto out_free; 1673 1674 if (iommu_support) 1675 flags |= RTE_VHOST_USER_IOMMU_SUPPORT; 1676 } 1677 1678 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { 1679 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, 1680 &open_int, &postcopy_support); 1681 if (ret < 0) 1682 goto out_free; 1683 1684 if (postcopy_support) 1685 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; 1686 } 1687 1688 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) { 1689 ret = rte_kvargs_process(kvlist, 1690 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 1691 &open_int, &tso); 1692 if (ret < 0) 1693 goto out_free; 1694 } 1695 1696 if (tso == 0) { 1697 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); 1698 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); 1699 } 1700 1701 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) { 1702 ret = rte_kvargs_process(kvlist, 1703 ETH_VHOST_LINEAR_BUF, 1704 &open_int, &linear_buf); 1705 if (ret < 0) 1706 goto out_free; 1707 1708 if (linear_buf == 1) 1709 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT; 1710 } 1711 1712 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) { 1713 ret = rte_kvargs_process(kvlist, 1714 ETH_VHOST_EXT_BUF, 1715 &open_int, &ext_buf); 1716 if (ret < 0) 1717 goto out_free; 1718 1719 if (ext_buf == 1) 1720 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; 1721 } 1722 1723 if (rte_kvargs_count(kvlist, ETH_VHOST_LEGACY_OL_FLAGS) == 1) { 1724 ret = rte_kvargs_process(kvlist, 1725 ETH_VHOST_LEGACY_OL_FLAGS, 1726 &open_int, &legacy_ol_flags); 1727 if (ret < 0) 1728 goto out_free; 1729 } 1730 1731 if (legacy_ol_flags == 0) 1732 flags |= RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 1733 1734 if (dev->device.numa_node == SOCKET_ID_ANY) 1735 dev->device.numa_node = rte_socket_id(); 1736 1737 ret = eth_dev_vhost_create(dev, iface_name, queues, 1738 dev->device.numa_node, flags, disable_flags); 1739 if (ret == -1) 1740 VHOST_LOG_LINE(ERR, "Failed to create %s", name); 1741 1742 out_free: 1743 rte_kvargs_free(kvlist); 1744 return ret; 1745 } 1746 1747 static int 1748 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1749 { 1750 const char *name; 1751 struct rte_eth_dev *eth_dev = NULL; 1752 1753 name = rte_vdev_device_name(dev); 1754 VHOST_LOG_LINE(INFO, "Un-Initializing pmd_vhost for %s", name); 1755 1756 /* find an ethdev entry */ 1757 eth_dev = rte_eth_dev_allocated(name); 1758 if (eth_dev == NULL) 1759 return 0; 1760 1761 eth_dev_close(eth_dev); 1762 rte_eth_dev_release_port(eth_dev); 1763 1764 return 0; 1765 } 1766 1767 static struct rte_vdev_driver pmd_vhost_drv = { 1768 .probe = rte_pmd_vhost_probe, 1769 .remove = rte_pmd_vhost_remove, 1770 }; 1771 1772 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1773 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1774 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1775 "iface=<ifc> " 1776 "queues=<int> " 1777 "client=<0|1> " 1778 "iommu-support=<0|1> " 1779 "postcopy-support=<0|1> " 1780 "tso=<0|1> " 1781 "linear-buffer=<0|1> " 1782 "ext-buffer=<0|1>"); 1783