1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 IGEL Co., Ltd. 3 * Copyright(c) 2016-2018 Intel Corporation 4 */ 5 #include <unistd.h> 6 #include <pthread.h> 7 #include <stdbool.h> 8 #include <sys/epoll.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_ethdev_driver.h> 12 #include <rte_ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_memcpy.h> 15 #include <rte_bus_vdev.h> 16 #include <rte_kvargs.h> 17 #include <rte_vhost.h> 18 #include <rte_spinlock.h> 19 20 #include "rte_eth_vhost.h" 21 22 RTE_LOG_REGISTER(vhost_logtype, pmd.net.vhost, NOTICE); 23 24 #define VHOST_LOG(level, ...) \ 25 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) 26 27 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 28 29 #define ETH_VHOST_IFACE_ARG "iface" 30 #define ETH_VHOST_QUEUES_ARG "queues" 31 #define ETH_VHOST_CLIENT_ARG "client" 32 #define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" 33 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" 34 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" 35 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" 36 #define ETH_VHOST_LINEAR_BUF "linear-buffer" 37 #define ETH_VHOST_EXT_BUF "ext-buffer" 38 #define VHOST_MAX_PKT_BURST 32 39 40 static const char *valid_arguments[] = { 41 ETH_VHOST_IFACE_ARG, 42 ETH_VHOST_QUEUES_ARG, 43 ETH_VHOST_CLIENT_ARG, 44 ETH_VHOST_DEQUEUE_ZERO_COPY, 45 ETH_VHOST_IOMMU_SUPPORT, 46 ETH_VHOST_POSTCOPY_SUPPORT, 47 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 48 ETH_VHOST_LINEAR_BUF, 49 ETH_VHOST_EXT_BUF, 50 NULL 51 }; 52 53 static struct rte_ether_addr base_eth_addr = { 54 .addr_bytes = { 55 0x56 /* V */, 56 0x48 /* H */, 57 0x4F /* O */, 58 0x53 /* S */, 59 0x54 /* T */, 60 0x00 61 } 62 }; 63 64 enum vhost_xstats_pkts { 65 VHOST_UNDERSIZE_PKT = 0, 66 VHOST_64_PKT, 67 VHOST_65_TO_127_PKT, 68 VHOST_128_TO_255_PKT, 69 VHOST_256_TO_511_PKT, 70 VHOST_512_TO_1023_PKT, 71 VHOST_1024_TO_1522_PKT, 72 VHOST_1523_TO_MAX_PKT, 73 VHOST_BROADCAST_PKT, 74 VHOST_MULTICAST_PKT, 75 VHOST_UNICAST_PKT, 76 VHOST_ERRORS_PKT, 77 VHOST_ERRORS_FRAGMENTED, 78 VHOST_ERRORS_JABBER, 79 VHOST_UNKNOWN_PROTOCOL, 80 VHOST_XSTATS_MAX, 81 }; 82 83 struct vhost_stats { 84 uint64_t pkts; 85 uint64_t bytes; 86 uint64_t missed_pkts; 87 uint64_t xstats[VHOST_XSTATS_MAX]; 88 }; 89 90 struct vhost_queue { 91 int vid; 92 rte_atomic32_t allow_queuing; 93 rte_atomic32_t while_queuing; 94 struct pmd_internal *internal; 95 struct rte_mempool *mb_pool; 96 uint16_t port; 97 uint16_t virtqueue_id; 98 struct vhost_stats stats; 99 int intr_enable; 100 rte_spinlock_t intr_lock; 101 }; 102 103 struct pmd_internal { 104 rte_atomic32_t dev_attached; 105 char *iface_name; 106 uint64_t flags; 107 uint64_t disable_flags; 108 uint16_t max_queues; 109 int vid; 110 rte_atomic32_t started; 111 uint8_t vlan_strip; 112 }; 113 114 struct internal_list { 115 TAILQ_ENTRY(internal_list) next; 116 struct rte_eth_dev *eth_dev; 117 }; 118 119 TAILQ_HEAD(internal_list_head, internal_list); 120 static struct internal_list_head internal_list = 121 TAILQ_HEAD_INITIALIZER(internal_list); 122 123 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 124 125 static struct rte_eth_link pmd_link = { 126 .link_speed = 10000, 127 .link_duplex = ETH_LINK_FULL_DUPLEX, 128 .link_status = ETH_LINK_DOWN 129 }; 130 131 struct rte_vhost_vring_state { 132 rte_spinlock_t lock; 133 134 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 135 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 136 unsigned int index; 137 unsigned int max_vring; 138 }; 139 140 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 141 142 #define VHOST_XSTATS_NAME_SIZE 64 143 144 struct vhost_xstats_name_off { 145 char name[VHOST_XSTATS_NAME_SIZE]; 146 uint64_t offset; 147 }; 148 149 /* [rx]_is prepended to the name string here */ 150 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { 151 {"good_packets", 152 offsetof(struct vhost_queue, stats.pkts)}, 153 {"total_bytes", 154 offsetof(struct vhost_queue, stats.bytes)}, 155 {"missed_pkts", 156 offsetof(struct vhost_queue, stats.missed_pkts)}, 157 {"broadcast_packets", 158 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 159 {"multicast_packets", 160 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 161 {"unicast_packets", 162 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 163 {"undersize_packets", 164 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 165 {"size_64_packets", 166 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 167 {"size_65_to_127_packets", 168 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 169 {"size_128_to_255_packets", 170 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 171 {"size_256_to_511_packets", 172 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 173 {"size_512_to_1023_packets", 174 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 175 {"size_1024_to_1522_packets", 176 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 177 {"size_1523_to_max_packets", 178 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 179 {"errors_with_bad_CRC", 180 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 181 {"fragmented_errors", 182 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])}, 183 {"jabber_errors", 184 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])}, 185 {"unknown_protos_packets", 186 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])}, 187 }; 188 189 /* [tx]_ is prepended to the name string here */ 190 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { 191 {"good_packets", 192 offsetof(struct vhost_queue, stats.pkts)}, 193 {"total_bytes", 194 offsetof(struct vhost_queue, stats.bytes)}, 195 {"missed_pkts", 196 offsetof(struct vhost_queue, stats.missed_pkts)}, 197 {"broadcast_packets", 198 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 199 {"multicast_packets", 200 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 201 {"unicast_packets", 202 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 203 {"undersize_packets", 204 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 205 {"size_64_packets", 206 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 207 {"size_65_to_127_packets", 208 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 209 {"size_128_to_255_packets", 210 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 211 {"size_256_to_511_packets", 212 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 213 {"size_512_to_1023_packets", 214 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 215 {"size_1024_to_1522_packets", 216 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 217 {"size_1523_to_max_packets", 218 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 219 {"errors_with_bad_CRC", 220 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 221 }; 222 223 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \ 224 sizeof(vhost_rxport_stat_strings[0])) 225 226 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \ 227 sizeof(vhost_txport_stat_strings[0])) 228 229 static int 230 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 231 { 232 struct vhost_queue *vq = NULL; 233 unsigned int i = 0; 234 235 for (i = 0; i < dev->data->nb_rx_queues; i++) { 236 vq = dev->data->rx_queues[i]; 237 if (!vq) 238 continue; 239 memset(&vq->stats, 0, sizeof(vq->stats)); 240 } 241 for (i = 0; i < dev->data->nb_tx_queues; i++) { 242 vq = dev->data->tx_queues[i]; 243 if (!vq) 244 continue; 245 memset(&vq->stats, 0, sizeof(vq->stats)); 246 } 247 248 return 0; 249 } 250 251 static int 252 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 253 struct rte_eth_xstat_name *xstats_names, 254 unsigned int limit __rte_unused) 255 { 256 unsigned int t = 0; 257 int count = 0; 258 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 259 260 if (!xstats_names) 261 return nstats; 262 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 263 snprintf(xstats_names[count].name, 264 sizeof(xstats_names[count].name), 265 "rx_%s", vhost_rxport_stat_strings[t].name); 266 count++; 267 } 268 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 269 snprintf(xstats_names[count].name, 270 sizeof(xstats_names[count].name), 271 "tx_%s", vhost_txport_stat_strings[t].name); 272 count++; 273 } 274 return count; 275 } 276 277 static int 278 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 279 unsigned int n) 280 { 281 unsigned int i; 282 unsigned int t; 283 unsigned int count = 0; 284 struct vhost_queue *vq = NULL; 285 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 286 287 if (n < nxstats) 288 return nxstats; 289 290 for (i = 0; i < dev->data->nb_rx_queues; i++) { 291 vq = dev->data->rx_queues[i]; 292 if (!vq) 293 continue; 294 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 295 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 296 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 297 } 298 for (i = 0; i < dev->data->nb_tx_queues; i++) { 299 vq = dev->data->tx_queues[i]; 300 if (!vq) 301 continue; 302 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 303 + vq->stats.missed_pkts 304 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 305 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 306 } 307 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 308 xstats[count].value = 0; 309 for (i = 0; i < dev->data->nb_rx_queues; i++) { 310 vq = dev->data->rx_queues[i]; 311 if (!vq) 312 continue; 313 xstats[count].value += 314 *(uint64_t *)(((char *)vq) 315 + vhost_rxport_stat_strings[t].offset); 316 } 317 xstats[count].id = count; 318 count++; 319 } 320 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 321 xstats[count].value = 0; 322 for (i = 0; i < dev->data->nb_tx_queues; i++) { 323 vq = dev->data->tx_queues[i]; 324 if (!vq) 325 continue; 326 xstats[count].value += 327 *(uint64_t *)(((char *)vq) 328 + vhost_txport_stat_strings[t].offset); 329 } 330 xstats[count].id = count; 331 count++; 332 } 333 return count; 334 } 335 336 static inline void 337 vhost_count_multicast_broadcast(struct vhost_queue *vq, 338 struct rte_mbuf *mbuf) 339 { 340 struct rte_ether_addr *ea = NULL; 341 struct vhost_stats *pstats = &vq->stats; 342 343 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *); 344 if (rte_is_multicast_ether_addr(ea)) { 345 if (rte_is_broadcast_ether_addr(ea)) 346 pstats->xstats[VHOST_BROADCAST_PKT]++; 347 else 348 pstats->xstats[VHOST_MULTICAST_PKT]++; 349 } 350 } 351 352 static void 353 vhost_update_packet_xstats(struct vhost_queue *vq, 354 struct rte_mbuf **bufs, 355 uint16_t count) 356 { 357 uint32_t pkt_len = 0; 358 uint64_t i = 0; 359 uint64_t index; 360 struct vhost_stats *pstats = &vq->stats; 361 362 for (i = 0; i < count ; i++) { 363 pkt_len = bufs[i]->pkt_len; 364 if (pkt_len == 64) { 365 pstats->xstats[VHOST_64_PKT]++; 366 } else if (pkt_len > 64 && pkt_len < 1024) { 367 index = (sizeof(pkt_len) * 8) 368 - __builtin_clz(pkt_len) - 5; 369 pstats->xstats[index]++; 370 } else { 371 if (pkt_len < 64) 372 pstats->xstats[VHOST_UNDERSIZE_PKT]++; 373 else if (pkt_len <= 1522) 374 pstats->xstats[VHOST_1024_TO_1522_PKT]++; 375 else if (pkt_len > 1522) 376 pstats->xstats[VHOST_1523_TO_MAX_PKT]++; 377 } 378 vhost_count_multicast_broadcast(vq, bufs[i]); 379 } 380 } 381 382 static uint16_t 383 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 384 { 385 struct vhost_queue *r = q; 386 uint16_t i, nb_rx = 0; 387 uint16_t nb_receive = nb_bufs; 388 389 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 390 return 0; 391 392 rte_atomic32_set(&r->while_queuing, 1); 393 394 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 395 goto out; 396 397 /* Dequeue packets from guest TX queue */ 398 while (nb_receive) { 399 uint16_t nb_pkts; 400 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 401 VHOST_MAX_PKT_BURST); 402 403 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 404 r->mb_pool, &bufs[nb_rx], 405 num); 406 407 nb_rx += nb_pkts; 408 nb_receive -= nb_pkts; 409 if (nb_pkts < num) 410 break; 411 } 412 413 r->stats.pkts += nb_rx; 414 415 for (i = 0; likely(i < nb_rx); i++) { 416 bufs[i]->port = r->port; 417 bufs[i]->vlan_tci = 0; 418 419 if (r->internal->vlan_strip) 420 rte_vlan_strip(bufs[i]); 421 422 r->stats.bytes += bufs[i]->pkt_len; 423 } 424 425 vhost_update_packet_xstats(r, bufs, nb_rx); 426 427 out: 428 rte_atomic32_set(&r->while_queuing, 0); 429 430 return nb_rx; 431 } 432 433 static uint16_t 434 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 435 { 436 struct vhost_queue *r = q; 437 uint16_t i, nb_tx = 0; 438 uint16_t nb_send = 0; 439 440 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 441 return 0; 442 443 rte_atomic32_set(&r->while_queuing, 1); 444 445 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 446 goto out; 447 448 for (i = 0; i < nb_bufs; i++) { 449 struct rte_mbuf *m = bufs[i]; 450 451 /* Do VLAN tag insertion */ 452 if (m->ol_flags & PKT_TX_VLAN_PKT) { 453 int error = rte_vlan_insert(&m); 454 if (unlikely(error)) { 455 rte_pktmbuf_free(m); 456 continue; 457 } 458 } 459 460 bufs[nb_send] = m; 461 ++nb_send; 462 } 463 464 /* Enqueue packets to guest RX queue */ 465 while (nb_send) { 466 uint16_t nb_pkts; 467 uint16_t num = (uint16_t)RTE_MIN(nb_send, 468 VHOST_MAX_PKT_BURST); 469 470 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 471 &bufs[nb_tx], num); 472 473 nb_tx += nb_pkts; 474 nb_send -= nb_pkts; 475 if (nb_pkts < num) 476 break; 477 } 478 479 r->stats.pkts += nb_tx; 480 r->stats.missed_pkts += nb_bufs - nb_tx; 481 482 for (i = 0; likely(i < nb_tx); i++) 483 r->stats.bytes += bufs[i]->pkt_len; 484 485 vhost_update_packet_xstats(r, bufs, nb_tx); 486 487 /* According to RFC2863 page42 section ifHCOutMulticastPkts and 488 * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast" 489 * are increased when packets are not transmitted successfully. 490 */ 491 for (i = nb_tx; i < nb_bufs; i++) 492 vhost_count_multicast_broadcast(r, bufs[i]); 493 494 for (i = 0; likely(i < nb_tx); i++) 495 rte_pktmbuf_free(bufs[i]); 496 out: 497 rte_atomic32_set(&r->while_queuing, 0); 498 499 return nb_tx; 500 } 501 502 static inline struct internal_list * 503 find_internal_resource(char *ifname) 504 { 505 int found = 0; 506 struct internal_list *list; 507 struct pmd_internal *internal; 508 509 if (ifname == NULL) 510 return NULL; 511 512 pthread_mutex_lock(&internal_list_lock); 513 514 TAILQ_FOREACH(list, &internal_list, next) { 515 internal = list->eth_dev->data->dev_private; 516 if (!strcmp(internal->iface_name, ifname)) { 517 found = 1; 518 break; 519 } 520 } 521 522 pthread_mutex_unlock(&internal_list_lock); 523 524 if (!found) 525 return NULL; 526 527 return list; 528 } 529 530 static int 531 eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx) 532 { 533 struct rte_intr_handle *handle = eth_dev->intr_handle; 534 struct rte_epoll_event rev; 535 int epfd, ret; 536 537 if (!handle) 538 return 0; 539 540 if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd) 541 return 0; 542 543 VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n", 544 rxq_idx); 545 546 if (handle->elist[rxq_idx].fd != -1) 547 VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n", 548 handle->elist[rxq_idx].fd); 549 550 /* 551 * First remove invalid epoll event, and then install 552 * the new one. May be solved with a proper API in the 553 * future. 554 */ 555 epfd = handle->elist[rxq_idx].epfd; 556 rev = handle->elist[rxq_idx]; 557 ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd, 558 &handle->elist[rxq_idx]); 559 if (ret) { 560 VHOST_LOG(ERR, "Delete epoll event failed.\n"); 561 return ret; 562 } 563 564 rev.fd = handle->efds[rxq_idx]; 565 handle->elist[rxq_idx] = rev; 566 ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, 567 &handle->elist[rxq_idx]); 568 if (ret) { 569 VHOST_LOG(ERR, "Add epoll event failed.\n"); 570 return ret; 571 } 572 573 return 0; 574 } 575 576 static int 577 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) 578 { 579 struct rte_vhost_vring vring; 580 struct vhost_queue *vq; 581 int old_intr_enable, ret = 0; 582 583 vq = dev->data->rx_queues[qid]; 584 if (!vq) { 585 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 586 return -1; 587 } 588 589 rte_spinlock_lock(&vq->intr_lock); 590 old_intr_enable = vq->intr_enable; 591 vq->intr_enable = 1; 592 ret = eth_vhost_update_intr(dev, qid); 593 rte_spinlock_unlock(&vq->intr_lock); 594 595 if (ret < 0) { 596 VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid); 597 vq->intr_enable = old_intr_enable; 598 return ret; 599 } 600 601 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 602 if (ret < 0) { 603 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); 604 return ret; 605 } 606 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid); 607 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); 608 rte_wmb(); 609 610 return ret; 611 } 612 613 static int 614 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) 615 { 616 struct rte_vhost_vring vring; 617 struct vhost_queue *vq; 618 int ret = 0; 619 620 vq = dev->data->rx_queues[qid]; 621 if (!vq) { 622 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 623 return -1; 624 } 625 626 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 627 if (ret < 0) { 628 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid); 629 return ret; 630 } 631 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid); 632 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); 633 rte_wmb(); 634 635 vq->intr_enable = 0; 636 637 return 0; 638 } 639 640 static void 641 eth_vhost_uninstall_intr(struct rte_eth_dev *dev) 642 { 643 struct rte_intr_handle *intr_handle = dev->intr_handle; 644 645 if (intr_handle) { 646 if (intr_handle->intr_vec) 647 free(intr_handle->intr_vec); 648 free(intr_handle); 649 } 650 651 dev->intr_handle = NULL; 652 } 653 654 static int 655 eth_vhost_install_intr(struct rte_eth_dev *dev) 656 { 657 struct rte_vhost_vring vring; 658 struct vhost_queue *vq; 659 int nb_rxq = dev->data->nb_rx_queues; 660 int i; 661 int ret; 662 663 /* uninstall firstly if we are reconnecting */ 664 if (dev->intr_handle) 665 eth_vhost_uninstall_intr(dev); 666 667 dev->intr_handle = malloc(sizeof(*dev->intr_handle)); 668 if (!dev->intr_handle) { 669 VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); 670 return -ENOMEM; 671 } 672 memset(dev->intr_handle, 0, sizeof(*dev->intr_handle)); 673 674 dev->intr_handle->efd_counter_size = sizeof(uint64_t); 675 676 dev->intr_handle->intr_vec = 677 malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0])); 678 679 if (!dev->intr_handle->intr_vec) { 680 VHOST_LOG(ERR, 681 "Failed to allocate memory for interrupt vector\n"); 682 free(dev->intr_handle); 683 return -ENOMEM; 684 } 685 686 VHOST_LOG(INFO, "Prepare intr vec\n"); 687 for (i = 0; i < nb_rxq; i++) { 688 dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; 689 dev->intr_handle->efds[i] = -1; 690 vq = dev->data->rx_queues[i]; 691 if (!vq) { 692 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); 693 continue; 694 } 695 696 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring); 697 if (ret < 0) { 698 VHOST_LOG(INFO, 699 "Failed to get rxq-%d's vring, skip!\n", i); 700 continue; 701 } 702 703 if (vring.kickfd < 0) { 704 VHOST_LOG(INFO, 705 "rxq-%d's kickfd is invalid, skip!\n", i); 706 continue; 707 } 708 dev->intr_handle->efds[i] = vring.kickfd; 709 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); 710 } 711 712 dev->intr_handle->nb_efd = nb_rxq; 713 dev->intr_handle->max_intr = nb_rxq + 1; 714 dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; 715 716 return 0; 717 } 718 719 static void 720 update_queuing_status(struct rte_eth_dev *dev) 721 { 722 struct pmd_internal *internal = dev->data->dev_private; 723 struct vhost_queue *vq; 724 unsigned int i; 725 int allow_queuing = 1; 726 727 if (!dev->data->rx_queues || !dev->data->tx_queues) 728 return; 729 730 if (rte_atomic32_read(&internal->started) == 0 || 731 rte_atomic32_read(&internal->dev_attached) == 0) 732 allow_queuing = 0; 733 734 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 735 for (i = 0; i < dev->data->nb_rx_queues; i++) { 736 vq = dev->data->rx_queues[i]; 737 if (vq == NULL) 738 continue; 739 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 740 while (rte_atomic32_read(&vq->while_queuing)) 741 rte_pause(); 742 } 743 744 for (i = 0; i < dev->data->nb_tx_queues; i++) { 745 vq = dev->data->tx_queues[i]; 746 if (vq == NULL) 747 continue; 748 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 749 while (rte_atomic32_read(&vq->while_queuing)) 750 rte_pause(); 751 } 752 } 753 754 static void 755 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) 756 { 757 struct vhost_queue *vq; 758 int i; 759 760 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 761 vq = eth_dev->data->rx_queues[i]; 762 if (!vq) 763 continue; 764 vq->vid = internal->vid; 765 vq->internal = internal; 766 vq->port = eth_dev->data->port_id; 767 } 768 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 769 vq = eth_dev->data->tx_queues[i]; 770 if (!vq) 771 continue; 772 vq->vid = internal->vid; 773 vq->internal = internal; 774 vq->port = eth_dev->data->port_id; 775 } 776 } 777 778 static int 779 new_device(int vid) 780 { 781 struct rte_eth_dev *eth_dev; 782 struct internal_list *list; 783 struct pmd_internal *internal; 784 struct rte_eth_conf *dev_conf; 785 unsigned i; 786 char ifname[PATH_MAX]; 787 #ifdef RTE_LIBRTE_VHOST_NUMA 788 int newnode; 789 #endif 790 791 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 792 list = find_internal_resource(ifname); 793 if (list == NULL) { 794 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname); 795 return -1; 796 } 797 798 eth_dev = list->eth_dev; 799 internal = eth_dev->data->dev_private; 800 dev_conf = ð_dev->data->dev_conf; 801 802 #ifdef RTE_LIBRTE_VHOST_NUMA 803 newnode = rte_vhost_get_numa_node(vid); 804 if (newnode >= 0) 805 eth_dev->data->numa_node = newnode; 806 #endif 807 808 internal->vid = vid; 809 if (rte_atomic32_read(&internal->started) == 1) { 810 queue_setup(eth_dev, internal); 811 812 if (dev_conf->intr_conf.rxq) { 813 if (eth_vhost_install_intr(eth_dev) < 0) { 814 VHOST_LOG(INFO, 815 "Failed to install interrupt handler."); 816 return -1; 817 } 818 } 819 } else { 820 VHOST_LOG(INFO, "RX/TX queues not exist yet\n"); 821 } 822 823 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 824 rte_vhost_enable_guest_notification(vid, i, 0); 825 826 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 827 828 eth_dev->data->dev_link.link_status = ETH_LINK_UP; 829 830 rte_atomic32_set(&internal->dev_attached, 1); 831 update_queuing_status(eth_dev); 832 833 VHOST_LOG(INFO, "Vhost device %d created\n", vid); 834 835 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 836 837 return 0; 838 } 839 840 static void 841 destroy_device(int vid) 842 { 843 struct rte_eth_dev *eth_dev; 844 struct pmd_internal *internal; 845 struct vhost_queue *vq; 846 struct internal_list *list; 847 char ifname[PATH_MAX]; 848 unsigned i; 849 struct rte_vhost_vring_state *state; 850 851 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 852 list = find_internal_resource(ifname); 853 if (list == NULL) { 854 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 855 return; 856 } 857 eth_dev = list->eth_dev; 858 internal = eth_dev->data->dev_private; 859 860 rte_atomic32_set(&internal->dev_attached, 0); 861 update_queuing_status(eth_dev); 862 863 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 864 865 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { 866 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 867 vq = eth_dev->data->rx_queues[i]; 868 if (!vq) 869 continue; 870 vq->vid = -1; 871 } 872 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 873 vq = eth_dev->data->tx_queues[i]; 874 if (!vq) 875 continue; 876 vq->vid = -1; 877 } 878 } 879 880 state = vring_states[eth_dev->data->port_id]; 881 rte_spinlock_lock(&state->lock); 882 for (i = 0; i <= state->max_vring; i++) { 883 state->cur[i] = false; 884 state->seen[i] = false; 885 } 886 state->max_vring = 0; 887 rte_spinlock_unlock(&state->lock); 888 889 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); 890 eth_vhost_uninstall_intr(eth_dev); 891 892 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 893 } 894 895 static int 896 vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id) 897 { 898 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 899 struct pmd_internal *internal = eth_dev->data->dev_private; 900 struct vhost_queue *vq; 901 struct rte_vhost_vring vring; 902 int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1; 903 int ret = 0; 904 905 /* 906 * The vring kickfd may be changed after the new device notification. 907 * Update it when the vring state is updated. 908 */ 909 if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues && 910 rte_atomic32_read(&internal->dev_attached) && 911 rte_atomic32_read(&internal->started) && 912 dev_conf->intr_conf.rxq) { 913 ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring); 914 if (ret) { 915 VHOST_LOG(ERR, "Failed to get vring %d information.\n", 916 vring_id); 917 return ret; 918 } 919 eth_dev->intr_handle->efds[rx_idx] = vring.kickfd; 920 921 vq = eth_dev->data->rx_queues[rx_idx]; 922 if (!vq) { 923 VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx); 924 return -1; 925 } 926 927 rte_spinlock_lock(&vq->intr_lock); 928 if (vq->intr_enable) 929 ret = eth_vhost_update_intr(eth_dev, rx_idx); 930 rte_spinlock_unlock(&vq->intr_lock); 931 } 932 933 return ret; 934 } 935 936 static int 937 vring_state_changed(int vid, uint16_t vring, int enable) 938 { 939 struct rte_vhost_vring_state *state; 940 struct rte_eth_dev *eth_dev; 941 struct internal_list *list; 942 char ifname[PATH_MAX]; 943 944 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 945 list = find_internal_resource(ifname); 946 if (list == NULL) { 947 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 948 return -1; 949 } 950 951 eth_dev = list->eth_dev; 952 /* won't be NULL */ 953 state = vring_states[eth_dev->data->port_id]; 954 955 if (enable && vring_conf_update(vid, eth_dev, vring)) 956 VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n", 957 (int)vring); 958 959 rte_spinlock_lock(&state->lock); 960 if (state->cur[vring] == enable) { 961 rte_spinlock_unlock(&state->lock); 962 return 0; 963 } 964 state->cur[vring] = enable; 965 state->max_vring = RTE_MAX(vring, state->max_vring); 966 rte_spinlock_unlock(&state->lock); 967 968 VHOST_LOG(INFO, "vring%u is %s\n", 969 vring, enable ? "enabled" : "disabled"); 970 971 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); 972 973 return 0; 974 } 975 976 static struct vhost_device_ops vhost_ops = { 977 .new_device = new_device, 978 .destroy_device = destroy_device, 979 .vring_state_changed = vring_state_changed, 980 }; 981 982 static int 983 vhost_driver_setup(struct rte_eth_dev *eth_dev) 984 { 985 struct pmd_internal *internal = eth_dev->data->dev_private; 986 struct internal_list *list = NULL; 987 struct rte_vhost_vring_state *vring_state = NULL; 988 unsigned int numa_node = eth_dev->device->numa_node; 989 const char *name = eth_dev->device->name; 990 991 /* Don't try to setup again if it has already been done. */ 992 list = find_internal_resource(internal->iface_name); 993 if (list) 994 return 0; 995 996 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 997 if (list == NULL) 998 return -1; 999 1000 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), 1001 0, numa_node); 1002 if (vring_state == NULL) 1003 goto free_list; 1004 1005 list->eth_dev = eth_dev; 1006 pthread_mutex_lock(&internal_list_lock); 1007 TAILQ_INSERT_TAIL(&internal_list, list, next); 1008 pthread_mutex_unlock(&internal_list_lock); 1009 1010 rte_spinlock_init(&vring_state->lock); 1011 vring_states[eth_dev->data->port_id] = vring_state; 1012 1013 if (rte_vhost_driver_register(internal->iface_name, internal->flags)) 1014 goto list_remove; 1015 1016 if (internal->disable_flags) { 1017 if (rte_vhost_driver_disable_features(internal->iface_name, 1018 internal->disable_flags)) 1019 goto drv_unreg; 1020 } 1021 1022 if (rte_vhost_driver_callback_register(internal->iface_name, 1023 &vhost_ops) < 0) { 1024 VHOST_LOG(ERR, "Can't register callbacks\n"); 1025 goto drv_unreg; 1026 } 1027 1028 if (rte_vhost_driver_start(internal->iface_name) < 0) { 1029 VHOST_LOG(ERR, "Failed to start driver for %s\n", 1030 internal->iface_name); 1031 goto drv_unreg; 1032 } 1033 1034 return 0; 1035 1036 drv_unreg: 1037 rte_vhost_driver_unregister(internal->iface_name); 1038 list_remove: 1039 vring_states[eth_dev->data->port_id] = NULL; 1040 pthread_mutex_lock(&internal_list_lock); 1041 TAILQ_REMOVE(&internal_list, list, next); 1042 pthread_mutex_unlock(&internal_list_lock); 1043 rte_free(vring_state); 1044 free_list: 1045 rte_free(list); 1046 1047 return -1; 1048 } 1049 1050 int 1051 rte_eth_vhost_get_queue_event(uint16_t port_id, 1052 struct rte_eth_vhost_queue_event *event) 1053 { 1054 struct rte_vhost_vring_state *state; 1055 unsigned int i; 1056 int idx; 1057 1058 if (port_id >= RTE_MAX_ETHPORTS) { 1059 VHOST_LOG(ERR, "Invalid port id\n"); 1060 return -1; 1061 } 1062 1063 state = vring_states[port_id]; 1064 if (!state) { 1065 VHOST_LOG(ERR, "Unused port\n"); 1066 return -1; 1067 } 1068 1069 rte_spinlock_lock(&state->lock); 1070 for (i = 0; i <= state->max_vring; i++) { 1071 idx = state->index++ % (state->max_vring + 1); 1072 1073 if (state->cur[idx] != state->seen[idx]) { 1074 state->seen[idx] = state->cur[idx]; 1075 event->queue_id = idx / 2; 1076 event->rx = idx & 1; 1077 event->enable = state->cur[idx]; 1078 rte_spinlock_unlock(&state->lock); 1079 return 0; 1080 } 1081 } 1082 rte_spinlock_unlock(&state->lock); 1083 1084 return -1; 1085 } 1086 1087 int 1088 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) 1089 { 1090 struct internal_list *list; 1091 struct rte_eth_dev *eth_dev; 1092 struct vhost_queue *vq; 1093 int vid = -1; 1094 1095 if (!rte_eth_dev_is_valid_port(port_id)) 1096 return -1; 1097 1098 pthread_mutex_lock(&internal_list_lock); 1099 1100 TAILQ_FOREACH(list, &internal_list, next) { 1101 eth_dev = list->eth_dev; 1102 if (eth_dev->data->port_id == port_id) { 1103 vq = eth_dev->data->rx_queues[0]; 1104 if (vq) { 1105 vid = vq->vid; 1106 } 1107 break; 1108 } 1109 } 1110 1111 pthread_mutex_unlock(&internal_list_lock); 1112 1113 return vid; 1114 } 1115 1116 static int 1117 eth_dev_configure(struct rte_eth_dev *dev) 1118 { 1119 struct pmd_internal *internal = dev->data->dev_private; 1120 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1121 1122 /* NOTE: the same process has to operate a vhost interface 1123 * from beginning to end (from eth_dev configure to eth_dev close). 1124 * It is user's responsibility at the moment. 1125 */ 1126 if (vhost_driver_setup(dev) < 0) 1127 return -1; 1128 1129 internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1130 1131 return 0; 1132 } 1133 1134 static int 1135 eth_dev_start(struct rte_eth_dev *eth_dev) 1136 { 1137 struct pmd_internal *internal = eth_dev->data->dev_private; 1138 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 1139 1140 queue_setup(eth_dev, internal); 1141 1142 if (rte_atomic32_read(&internal->dev_attached) == 1) { 1143 if (dev_conf->intr_conf.rxq) { 1144 if (eth_vhost_install_intr(eth_dev) < 0) { 1145 VHOST_LOG(INFO, 1146 "Failed to install interrupt handler."); 1147 return -1; 1148 } 1149 } 1150 } 1151 1152 rte_atomic32_set(&internal->started, 1); 1153 update_queuing_status(eth_dev); 1154 1155 return 0; 1156 } 1157 1158 static void 1159 eth_dev_stop(struct rte_eth_dev *dev) 1160 { 1161 struct pmd_internal *internal = dev->data->dev_private; 1162 1163 rte_atomic32_set(&internal->started, 0); 1164 update_queuing_status(dev); 1165 } 1166 1167 static void 1168 eth_dev_close(struct rte_eth_dev *dev) 1169 { 1170 struct pmd_internal *internal; 1171 struct internal_list *list; 1172 unsigned int i; 1173 1174 internal = dev->data->dev_private; 1175 if (!internal) 1176 return; 1177 1178 eth_dev_stop(dev); 1179 1180 list = find_internal_resource(internal->iface_name); 1181 if (list) { 1182 rte_vhost_driver_unregister(internal->iface_name); 1183 pthread_mutex_lock(&internal_list_lock); 1184 TAILQ_REMOVE(&internal_list, list, next); 1185 pthread_mutex_unlock(&internal_list_lock); 1186 rte_free(list); 1187 } 1188 1189 if (dev->data->rx_queues) 1190 for (i = 0; i < dev->data->nb_rx_queues; i++) 1191 rte_free(dev->data->rx_queues[i]); 1192 1193 if (dev->data->tx_queues) 1194 for (i = 0; i < dev->data->nb_tx_queues; i++) 1195 rte_free(dev->data->tx_queues[i]); 1196 1197 rte_free(internal->iface_name); 1198 rte_free(internal); 1199 1200 dev->data->dev_private = NULL; 1201 1202 rte_free(vring_states[dev->data->port_id]); 1203 vring_states[dev->data->port_id] = NULL; 1204 } 1205 1206 static int 1207 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1208 uint16_t nb_rx_desc __rte_unused, 1209 unsigned int socket_id, 1210 const struct rte_eth_rxconf *rx_conf __rte_unused, 1211 struct rte_mempool *mb_pool) 1212 { 1213 struct vhost_queue *vq; 1214 1215 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1216 RTE_CACHE_LINE_SIZE, socket_id); 1217 if (vq == NULL) { 1218 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n"); 1219 return -ENOMEM; 1220 } 1221 1222 vq->mb_pool = mb_pool; 1223 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 1224 rte_spinlock_init(&vq->intr_lock); 1225 dev->data->rx_queues[rx_queue_id] = vq; 1226 1227 return 0; 1228 } 1229 1230 static int 1231 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1232 uint16_t nb_tx_desc __rte_unused, 1233 unsigned int socket_id, 1234 const struct rte_eth_txconf *tx_conf __rte_unused) 1235 { 1236 struct vhost_queue *vq; 1237 1238 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1239 RTE_CACHE_LINE_SIZE, socket_id); 1240 if (vq == NULL) { 1241 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n"); 1242 return -ENOMEM; 1243 } 1244 1245 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 1246 rte_spinlock_init(&vq->intr_lock); 1247 dev->data->tx_queues[tx_queue_id] = vq; 1248 1249 return 0; 1250 } 1251 1252 static int 1253 eth_dev_info(struct rte_eth_dev *dev, 1254 struct rte_eth_dev_info *dev_info) 1255 { 1256 struct pmd_internal *internal; 1257 1258 internal = dev->data->dev_private; 1259 if (internal == NULL) { 1260 VHOST_LOG(ERR, "Invalid device specified\n"); 1261 return -ENODEV; 1262 } 1263 1264 dev_info->max_mac_addrs = 1; 1265 dev_info->max_rx_pktlen = (uint32_t)-1; 1266 dev_info->max_rx_queues = internal->max_queues; 1267 dev_info->max_tx_queues = internal->max_queues; 1268 dev_info->min_rx_bufsize = 0; 1269 1270 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 1271 DEV_TX_OFFLOAD_VLAN_INSERT; 1272 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 1273 1274 return 0; 1275 } 1276 1277 static int 1278 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1279 { 1280 unsigned i; 1281 unsigned long rx_total = 0, tx_total = 0; 1282 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 1283 struct vhost_queue *vq; 1284 1285 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1286 i < dev->data->nb_rx_queues; i++) { 1287 if (dev->data->rx_queues[i] == NULL) 1288 continue; 1289 vq = dev->data->rx_queues[i]; 1290 stats->q_ipackets[i] = vq->stats.pkts; 1291 rx_total += stats->q_ipackets[i]; 1292 1293 stats->q_ibytes[i] = vq->stats.bytes; 1294 rx_total_bytes += stats->q_ibytes[i]; 1295 } 1296 1297 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1298 i < dev->data->nb_tx_queues; i++) { 1299 if (dev->data->tx_queues[i] == NULL) 1300 continue; 1301 vq = dev->data->tx_queues[i]; 1302 stats->q_opackets[i] = vq->stats.pkts; 1303 tx_total += stats->q_opackets[i]; 1304 1305 stats->q_obytes[i] = vq->stats.bytes; 1306 tx_total_bytes += stats->q_obytes[i]; 1307 } 1308 1309 stats->ipackets = rx_total; 1310 stats->opackets = tx_total; 1311 stats->ibytes = rx_total_bytes; 1312 stats->obytes = tx_total_bytes; 1313 1314 return 0; 1315 } 1316 1317 static int 1318 eth_stats_reset(struct rte_eth_dev *dev) 1319 { 1320 struct vhost_queue *vq; 1321 unsigned i; 1322 1323 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1324 if (dev->data->rx_queues[i] == NULL) 1325 continue; 1326 vq = dev->data->rx_queues[i]; 1327 vq->stats.pkts = 0; 1328 vq->stats.bytes = 0; 1329 } 1330 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1331 if (dev->data->tx_queues[i] == NULL) 1332 continue; 1333 vq = dev->data->tx_queues[i]; 1334 vq->stats.pkts = 0; 1335 vq->stats.bytes = 0; 1336 vq->stats.missed_pkts = 0; 1337 } 1338 1339 return 0; 1340 } 1341 1342 static void 1343 eth_queue_release(void *q) 1344 { 1345 rte_free(q); 1346 } 1347 1348 static int 1349 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 1350 { 1351 /* 1352 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 1353 * and releases mbuf, so nothing to cleanup. 1354 */ 1355 return 0; 1356 } 1357 1358 static int 1359 eth_link_update(struct rte_eth_dev *dev __rte_unused, 1360 int wait_to_complete __rte_unused) 1361 { 1362 return 0; 1363 } 1364 1365 static uint32_t 1366 eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1367 { 1368 struct vhost_queue *vq; 1369 1370 vq = dev->data->rx_queues[rx_queue_id]; 1371 if (vq == NULL) 1372 return 0; 1373 1374 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 1375 } 1376 1377 static const struct eth_dev_ops ops = { 1378 .dev_start = eth_dev_start, 1379 .dev_stop = eth_dev_stop, 1380 .dev_close = eth_dev_close, 1381 .dev_configure = eth_dev_configure, 1382 .dev_infos_get = eth_dev_info, 1383 .rx_queue_setup = eth_rx_queue_setup, 1384 .tx_queue_setup = eth_tx_queue_setup, 1385 .rx_queue_release = eth_queue_release, 1386 .tx_queue_release = eth_queue_release, 1387 .tx_done_cleanup = eth_tx_done_cleanup, 1388 .rx_queue_count = eth_rx_queue_count, 1389 .link_update = eth_link_update, 1390 .stats_get = eth_stats_get, 1391 .stats_reset = eth_stats_reset, 1392 .xstats_reset = vhost_dev_xstats_reset, 1393 .xstats_get = vhost_dev_xstats_get, 1394 .xstats_get_names = vhost_dev_xstats_get_names, 1395 .rx_queue_intr_enable = eth_rxq_intr_enable, 1396 .rx_queue_intr_disable = eth_rxq_intr_disable, 1397 }; 1398 1399 static int 1400 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1401 int16_t queues, const unsigned int numa_node, uint64_t flags, 1402 uint64_t disable_flags) 1403 { 1404 const char *name = rte_vdev_device_name(dev); 1405 struct rte_eth_dev_data *data; 1406 struct pmd_internal *internal = NULL; 1407 struct rte_eth_dev *eth_dev = NULL; 1408 struct rte_ether_addr *eth_addr = NULL; 1409 1410 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", 1411 numa_node); 1412 1413 /* reserve an ethdev entry */ 1414 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1415 if (eth_dev == NULL) 1416 goto error; 1417 data = eth_dev->data; 1418 1419 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1420 if (eth_addr == NULL) 1421 goto error; 1422 data->mac_addrs = eth_addr; 1423 *eth_addr = base_eth_addr; 1424 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1425 1426 /* now put it all together 1427 * - store queue data in internal, 1428 * - point eth_dev_data to internals 1429 * - and point eth_dev structure to new eth_dev_data structure 1430 */ 1431 internal = eth_dev->data->dev_private; 1432 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, 1433 0, numa_node); 1434 if (internal->iface_name == NULL) 1435 goto error; 1436 strcpy(internal->iface_name, iface_name); 1437 1438 data->nb_rx_queues = queues; 1439 data->nb_tx_queues = queues; 1440 internal->max_queues = queues; 1441 internal->vid = -1; 1442 internal->flags = flags; 1443 internal->disable_flags = disable_flags; 1444 data->dev_link = pmd_link; 1445 data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE; 1446 data->promiscuous = 1; 1447 data->all_multicast = 1; 1448 1449 eth_dev->dev_ops = &ops; 1450 1451 /* finally assign rx and tx ops */ 1452 eth_dev->rx_pkt_burst = eth_vhost_rx; 1453 eth_dev->tx_pkt_burst = eth_vhost_tx; 1454 1455 rte_eth_dev_probing_finish(eth_dev); 1456 return 0; 1457 1458 error: 1459 if (internal) 1460 rte_free(internal->iface_name); 1461 rte_eth_dev_release_port(eth_dev); 1462 1463 return -1; 1464 } 1465 1466 static inline int 1467 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1468 { 1469 const char **iface_name = extra_args; 1470 1471 if (value == NULL) 1472 return -1; 1473 1474 *iface_name = value; 1475 1476 return 0; 1477 } 1478 1479 static inline int 1480 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1481 { 1482 uint16_t *n = extra_args; 1483 1484 if (value == NULL || extra_args == NULL) 1485 return -EINVAL; 1486 1487 *n = (uint16_t)strtoul(value, NULL, 0); 1488 if (*n == USHRT_MAX && errno == ERANGE) 1489 return -1; 1490 1491 return 0; 1492 } 1493 1494 static int 1495 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1496 { 1497 struct rte_kvargs *kvlist = NULL; 1498 int ret = 0; 1499 char *iface_name; 1500 uint16_t queues; 1501 uint64_t flags = 0; 1502 uint64_t disable_flags = 0; 1503 int client_mode = 0; 1504 int dequeue_zero_copy = 0; 1505 int iommu_support = 0; 1506 int postcopy_support = 0; 1507 int tso = 0; 1508 int linear_buf = 0; 1509 int ext_buf = 0; 1510 struct rte_eth_dev *eth_dev; 1511 const char *name = rte_vdev_device_name(dev); 1512 1513 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name); 1514 1515 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1516 eth_dev = rte_eth_dev_attach_secondary(name); 1517 if (!eth_dev) { 1518 VHOST_LOG(ERR, "Failed to probe %s\n", name); 1519 return -1; 1520 } 1521 eth_dev->rx_pkt_burst = eth_vhost_rx; 1522 eth_dev->tx_pkt_burst = eth_vhost_tx; 1523 eth_dev->dev_ops = &ops; 1524 if (dev->device.numa_node == SOCKET_ID_ANY) 1525 dev->device.numa_node = rte_socket_id(); 1526 eth_dev->device = &dev->device; 1527 rte_eth_dev_probing_finish(eth_dev); 1528 return 0; 1529 } 1530 1531 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1532 if (kvlist == NULL) 1533 return -1; 1534 1535 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1536 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1537 &open_iface, &iface_name); 1538 if (ret < 0) 1539 goto out_free; 1540 } else { 1541 ret = -1; 1542 goto out_free; 1543 } 1544 1545 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1546 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1547 &open_int, &queues); 1548 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1549 goto out_free; 1550 1551 } else 1552 queues = 1; 1553 1554 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1555 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1556 &open_int, &client_mode); 1557 if (ret < 0) 1558 goto out_free; 1559 1560 if (client_mode) 1561 flags |= RTE_VHOST_USER_CLIENT; 1562 } 1563 1564 if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { 1565 ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, 1566 &open_int, &dequeue_zero_copy); 1567 if (ret < 0) 1568 goto out_free; 1569 1570 if (dequeue_zero_copy) 1571 flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 1572 } 1573 1574 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { 1575 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, 1576 &open_int, &iommu_support); 1577 if (ret < 0) 1578 goto out_free; 1579 1580 if (iommu_support) 1581 flags |= RTE_VHOST_USER_IOMMU_SUPPORT; 1582 } 1583 1584 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { 1585 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, 1586 &open_int, &postcopy_support); 1587 if (ret < 0) 1588 goto out_free; 1589 1590 if (postcopy_support) 1591 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; 1592 } 1593 1594 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) { 1595 ret = rte_kvargs_process(kvlist, 1596 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 1597 &open_int, &tso); 1598 if (ret < 0) 1599 goto out_free; 1600 1601 if (tso == 0) { 1602 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); 1603 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); 1604 } 1605 } 1606 1607 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) { 1608 ret = rte_kvargs_process(kvlist, 1609 ETH_VHOST_LINEAR_BUF, 1610 &open_int, &linear_buf); 1611 if (ret < 0) 1612 goto out_free; 1613 1614 if (linear_buf == 1) 1615 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT; 1616 } 1617 1618 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) { 1619 ret = rte_kvargs_process(kvlist, 1620 ETH_VHOST_EXT_BUF, 1621 &open_int, &ext_buf); 1622 if (ret < 0) 1623 goto out_free; 1624 1625 if (ext_buf == 1) 1626 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; 1627 } 1628 1629 if (dev->device.numa_node == SOCKET_ID_ANY) 1630 dev->device.numa_node = rte_socket_id(); 1631 1632 ret = eth_dev_vhost_create(dev, iface_name, queues, 1633 dev->device.numa_node, flags, disable_flags); 1634 if (ret == -1) 1635 VHOST_LOG(ERR, "Failed to create %s\n", name); 1636 1637 out_free: 1638 rte_kvargs_free(kvlist); 1639 return ret; 1640 } 1641 1642 static int 1643 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1644 { 1645 const char *name; 1646 struct rte_eth_dev *eth_dev = NULL; 1647 1648 name = rte_vdev_device_name(dev); 1649 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name); 1650 1651 /* find an ethdev entry */ 1652 eth_dev = rte_eth_dev_allocated(name); 1653 if (eth_dev == NULL) 1654 return 0; 1655 1656 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1657 return rte_eth_dev_release_port(eth_dev); 1658 1659 eth_dev_close(eth_dev); 1660 1661 rte_eth_dev_release_port(eth_dev); 1662 1663 return 0; 1664 } 1665 1666 static struct rte_vdev_driver pmd_vhost_drv = { 1667 .probe = rte_pmd_vhost_probe, 1668 .remove = rte_pmd_vhost_remove, 1669 }; 1670 1671 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1672 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1673 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1674 "iface=<ifc> " 1675 "queues=<int> " 1676 "client=<0|1> " 1677 "dequeue-zero-copy=<0|1> " 1678 "iommu-support=<0|1> " 1679 "postcopy-support=<0|1> " 1680 "tso=<0|1> " 1681 "linear-buffer=<0|1> " 1682 "ext-buffer=<0|1>"); 1683