1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 IGEL Co., Ltd. 3 * Copyright(c) 2016-2018 Intel Corporation 4 */ 5 #include <unistd.h> 6 #include <pthread.h> 7 #include <stdbool.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_ethdev_driver.h> 11 #include <rte_ethdev_vdev.h> 12 #include <rte_malloc.h> 13 #include <rte_memcpy.h> 14 #include <rte_bus_vdev.h> 15 #include <rte_kvargs.h> 16 #include <rte_vhost.h> 17 #include <rte_spinlock.h> 18 19 #include "rte_eth_vhost.h" 20 21 static int vhost_logtype; 22 23 #define VHOST_LOG(level, ...) \ 24 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) 25 26 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 27 28 #define ETH_VHOST_IFACE_ARG "iface" 29 #define ETH_VHOST_QUEUES_ARG "queues" 30 #define ETH_VHOST_CLIENT_ARG "client" 31 #define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" 32 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" 33 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" 34 #define VHOST_MAX_PKT_BURST 32 35 36 static const char *valid_arguments[] = { 37 ETH_VHOST_IFACE_ARG, 38 ETH_VHOST_QUEUES_ARG, 39 ETH_VHOST_CLIENT_ARG, 40 ETH_VHOST_DEQUEUE_ZERO_COPY, 41 ETH_VHOST_IOMMU_SUPPORT, 42 ETH_VHOST_POSTCOPY_SUPPORT, 43 NULL 44 }; 45 46 static struct rte_ether_addr base_eth_addr = { 47 .addr_bytes = { 48 0x56 /* V */, 49 0x48 /* H */, 50 0x4F /* O */, 51 0x53 /* S */, 52 0x54 /* T */, 53 0x00 54 } 55 }; 56 57 enum vhost_xstats_pkts { 58 VHOST_UNDERSIZE_PKT = 0, 59 VHOST_64_PKT, 60 VHOST_65_TO_127_PKT, 61 VHOST_128_TO_255_PKT, 62 VHOST_256_TO_511_PKT, 63 VHOST_512_TO_1023_PKT, 64 VHOST_1024_TO_1522_PKT, 65 VHOST_1523_TO_MAX_PKT, 66 VHOST_BROADCAST_PKT, 67 VHOST_MULTICAST_PKT, 68 VHOST_UNICAST_PKT, 69 VHOST_ERRORS_PKT, 70 VHOST_ERRORS_FRAGMENTED, 71 VHOST_ERRORS_JABBER, 72 VHOST_UNKNOWN_PROTOCOL, 73 VHOST_XSTATS_MAX, 74 }; 75 76 struct vhost_stats { 77 uint64_t pkts; 78 uint64_t bytes; 79 uint64_t missed_pkts; 80 uint64_t xstats[VHOST_XSTATS_MAX]; 81 }; 82 83 struct vhost_queue { 84 int vid; 85 rte_atomic32_t allow_queuing; 86 rte_atomic32_t while_queuing; 87 struct pmd_internal *internal; 88 struct rte_mempool *mb_pool; 89 uint16_t port; 90 uint16_t virtqueue_id; 91 struct vhost_stats stats; 92 }; 93 94 struct pmd_internal { 95 rte_atomic32_t dev_attached; 96 char *dev_name; 97 char *iface_name; 98 uint16_t max_queues; 99 int vid; 100 rte_atomic32_t started; 101 uint8_t vlan_strip; 102 }; 103 104 struct internal_list { 105 TAILQ_ENTRY(internal_list) next; 106 struct rte_eth_dev *eth_dev; 107 }; 108 109 TAILQ_HEAD(internal_list_head, internal_list); 110 static struct internal_list_head internal_list = 111 TAILQ_HEAD_INITIALIZER(internal_list); 112 113 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 114 115 static struct rte_eth_link pmd_link = { 116 .link_speed = 10000, 117 .link_duplex = ETH_LINK_FULL_DUPLEX, 118 .link_status = ETH_LINK_DOWN 119 }; 120 121 struct rte_vhost_vring_state { 122 rte_spinlock_t lock; 123 124 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 125 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 126 unsigned int index; 127 unsigned int max_vring; 128 }; 129 130 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 131 132 #define VHOST_XSTATS_NAME_SIZE 64 133 134 struct vhost_xstats_name_off { 135 char name[VHOST_XSTATS_NAME_SIZE]; 136 uint64_t offset; 137 }; 138 139 /* [rx]_is prepended to the name string here */ 140 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { 141 {"good_packets", 142 offsetof(struct vhost_queue, stats.pkts)}, 143 {"total_bytes", 144 offsetof(struct vhost_queue, stats.bytes)}, 145 {"missed_pkts", 146 offsetof(struct vhost_queue, stats.missed_pkts)}, 147 {"broadcast_packets", 148 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 149 {"multicast_packets", 150 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 151 {"unicast_packets", 152 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 153 {"undersize_packets", 154 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 155 {"size_64_packets", 156 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 157 {"size_65_to_127_packets", 158 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 159 {"size_128_to_255_packets", 160 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 161 {"size_256_to_511_packets", 162 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 163 {"size_512_to_1023_packets", 164 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 165 {"size_1024_to_1522_packets", 166 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 167 {"size_1523_to_max_packets", 168 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 169 {"errors_with_bad_CRC", 170 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 171 {"fragmented_errors", 172 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])}, 173 {"jabber_errors", 174 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])}, 175 {"unknown_protos_packets", 176 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])}, 177 }; 178 179 /* [tx]_ is prepended to the name string here */ 180 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { 181 {"good_packets", 182 offsetof(struct vhost_queue, stats.pkts)}, 183 {"total_bytes", 184 offsetof(struct vhost_queue, stats.bytes)}, 185 {"missed_pkts", 186 offsetof(struct vhost_queue, stats.missed_pkts)}, 187 {"broadcast_packets", 188 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 189 {"multicast_packets", 190 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 191 {"unicast_packets", 192 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 193 {"undersize_packets", 194 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 195 {"size_64_packets", 196 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 197 {"size_65_to_127_packets", 198 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 199 {"size_128_to_255_packets", 200 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 201 {"size_256_to_511_packets", 202 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 203 {"size_512_to_1023_packets", 204 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 205 {"size_1024_to_1522_packets", 206 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 207 {"size_1523_to_max_packets", 208 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 209 {"errors_with_bad_CRC", 210 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 211 }; 212 213 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \ 214 sizeof(vhost_rxport_stat_strings[0])) 215 216 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \ 217 sizeof(vhost_txport_stat_strings[0])) 218 219 static int 220 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 221 { 222 struct vhost_queue *vq = NULL; 223 unsigned int i = 0; 224 225 for (i = 0; i < dev->data->nb_rx_queues; i++) { 226 vq = dev->data->rx_queues[i]; 227 if (!vq) 228 continue; 229 memset(&vq->stats, 0, sizeof(vq->stats)); 230 } 231 for (i = 0; i < dev->data->nb_tx_queues; i++) { 232 vq = dev->data->tx_queues[i]; 233 if (!vq) 234 continue; 235 memset(&vq->stats, 0, sizeof(vq->stats)); 236 } 237 238 return 0; 239 } 240 241 static int 242 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 243 struct rte_eth_xstat_name *xstats_names, 244 unsigned int limit __rte_unused) 245 { 246 unsigned int t = 0; 247 int count = 0; 248 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 249 250 if (!xstats_names) 251 return nstats; 252 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 253 snprintf(xstats_names[count].name, 254 sizeof(xstats_names[count].name), 255 "rx_%s", vhost_rxport_stat_strings[t].name); 256 count++; 257 } 258 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 259 snprintf(xstats_names[count].name, 260 sizeof(xstats_names[count].name), 261 "tx_%s", vhost_txport_stat_strings[t].name); 262 count++; 263 } 264 return count; 265 } 266 267 static int 268 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 269 unsigned int n) 270 { 271 unsigned int i; 272 unsigned int t; 273 unsigned int count = 0; 274 struct vhost_queue *vq = NULL; 275 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 276 277 if (n < nxstats) 278 return nxstats; 279 280 for (i = 0; i < dev->data->nb_rx_queues; i++) { 281 vq = dev->data->rx_queues[i]; 282 if (!vq) 283 continue; 284 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 285 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 286 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 287 } 288 for (i = 0; i < dev->data->nb_tx_queues; i++) { 289 vq = dev->data->tx_queues[i]; 290 if (!vq) 291 continue; 292 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 293 + vq->stats.missed_pkts 294 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 295 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 296 } 297 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 298 xstats[count].value = 0; 299 for (i = 0; i < dev->data->nb_rx_queues; i++) { 300 vq = dev->data->rx_queues[i]; 301 if (!vq) 302 continue; 303 xstats[count].value += 304 *(uint64_t *)(((char *)vq) 305 + vhost_rxport_stat_strings[t].offset); 306 } 307 xstats[count].id = count; 308 count++; 309 } 310 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 311 xstats[count].value = 0; 312 for (i = 0; i < dev->data->nb_tx_queues; i++) { 313 vq = dev->data->tx_queues[i]; 314 if (!vq) 315 continue; 316 xstats[count].value += 317 *(uint64_t *)(((char *)vq) 318 + vhost_txport_stat_strings[t].offset); 319 } 320 xstats[count].id = count; 321 count++; 322 } 323 return count; 324 } 325 326 static inline void 327 vhost_count_multicast_broadcast(struct vhost_queue *vq, 328 struct rte_mbuf *mbuf) 329 { 330 struct rte_ether_addr *ea = NULL; 331 struct vhost_stats *pstats = &vq->stats; 332 333 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *); 334 if (rte_is_multicast_ether_addr(ea)) { 335 if (rte_is_broadcast_ether_addr(ea)) 336 pstats->xstats[VHOST_BROADCAST_PKT]++; 337 else 338 pstats->xstats[VHOST_MULTICAST_PKT]++; 339 } 340 } 341 342 static void 343 vhost_update_packet_xstats(struct vhost_queue *vq, 344 struct rte_mbuf **bufs, 345 uint16_t count) 346 { 347 uint32_t pkt_len = 0; 348 uint64_t i = 0; 349 uint64_t index; 350 struct vhost_stats *pstats = &vq->stats; 351 352 for (i = 0; i < count ; i++) { 353 pkt_len = bufs[i]->pkt_len; 354 if (pkt_len == 64) { 355 pstats->xstats[VHOST_64_PKT]++; 356 } else if (pkt_len > 64 && pkt_len < 1024) { 357 index = (sizeof(pkt_len) * 8) 358 - __builtin_clz(pkt_len) - 5; 359 pstats->xstats[index]++; 360 } else { 361 if (pkt_len < 64) 362 pstats->xstats[VHOST_UNDERSIZE_PKT]++; 363 else if (pkt_len <= 1522) 364 pstats->xstats[VHOST_1024_TO_1522_PKT]++; 365 else if (pkt_len > 1522) 366 pstats->xstats[VHOST_1523_TO_MAX_PKT]++; 367 } 368 vhost_count_multicast_broadcast(vq, bufs[i]); 369 } 370 } 371 372 static uint16_t 373 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 374 { 375 struct vhost_queue *r = q; 376 uint16_t i, nb_rx = 0; 377 uint16_t nb_receive = nb_bufs; 378 379 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 380 return 0; 381 382 rte_atomic32_set(&r->while_queuing, 1); 383 384 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 385 goto out; 386 387 /* Dequeue packets from guest TX queue */ 388 while (nb_receive) { 389 uint16_t nb_pkts; 390 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 391 VHOST_MAX_PKT_BURST); 392 393 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 394 r->mb_pool, &bufs[nb_rx], 395 num); 396 397 nb_rx += nb_pkts; 398 nb_receive -= nb_pkts; 399 if (nb_pkts < num) 400 break; 401 } 402 403 r->stats.pkts += nb_rx; 404 405 for (i = 0; likely(i < nb_rx); i++) { 406 bufs[i]->port = r->port; 407 bufs[i]->vlan_tci = 0; 408 409 if (r->internal->vlan_strip) 410 rte_vlan_strip(bufs[i]); 411 412 r->stats.bytes += bufs[i]->pkt_len; 413 } 414 415 vhost_update_packet_xstats(r, bufs, nb_rx); 416 417 out: 418 rte_atomic32_set(&r->while_queuing, 0); 419 420 return nb_rx; 421 } 422 423 static uint16_t 424 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 425 { 426 struct vhost_queue *r = q; 427 uint16_t i, nb_tx = 0; 428 uint16_t nb_send = 0; 429 430 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 431 return 0; 432 433 rte_atomic32_set(&r->while_queuing, 1); 434 435 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 436 goto out; 437 438 for (i = 0; i < nb_bufs; i++) { 439 struct rte_mbuf *m = bufs[i]; 440 441 /* Do VLAN tag insertion */ 442 if (m->ol_flags & PKT_TX_VLAN_PKT) { 443 int error = rte_vlan_insert(&m); 444 if (unlikely(error)) { 445 rte_pktmbuf_free(m); 446 continue; 447 } 448 } 449 450 bufs[nb_send] = m; 451 ++nb_send; 452 } 453 454 /* Enqueue packets to guest RX queue */ 455 while (nb_send) { 456 uint16_t nb_pkts; 457 uint16_t num = (uint16_t)RTE_MIN(nb_send, 458 VHOST_MAX_PKT_BURST); 459 460 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 461 &bufs[nb_tx], num); 462 463 nb_tx += nb_pkts; 464 nb_send -= nb_pkts; 465 if (nb_pkts < num) 466 break; 467 } 468 469 r->stats.pkts += nb_tx; 470 r->stats.missed_pkts += nb_bufs - nb_tx; 471 472 for (i = 0; likely(i < nb_tx); i++) 473 r->stats.bytes += bufs[i]->pkt_len; 474 475 vhost_update_packet_xstats(r, bufs, nb_tx); 476 477 /* According to RFC2863 page42 section ifHCOutMulticastPkts and 478 * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast" 479 * are increased when packets are not transmitted successfully. 480 */ 481 for (i = nb_tx; i < nb_bufs; i++) 482 vhost_count_multicast_broadcast(r, bufs[i]); 483 484 for (i = 0; likely(i < nb_tx); i++) 485 rte_pktmbuf_free(bufs[i]); 486 out: 487 rte_atomic32_set(&r->while_queuing, 0); 488 489 return nb_tx; 490 } 491 492 static int 493 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 494 { 495 struct pmd_internal *internal = dev->data->dev_private; 496 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 497 498 internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 499 500 return 0; 501 } 502 503 static inline struct internal_list * 504 find_internal_resource(char *ifname) 505 { 506 int found = 0; 507 struct internal_list *list; 508 struct pmd_internal *internal; 509 510 if (ifname == NULL) 511 return NULL; 512 513 pthread_mutex_lock(&internal_list_lock); 514 515 TAILQ_FOREACH(list, &internal_list, next) { 516 internal = list->eth_dev->data->dev_private; 517 if (!strcmp(internal->iface_name, ifname)) { 518 found = 1; 519 break; 520 } 521 } 522 523 pthread_mutex_unlock(&internal_list_lock); 524 525 if (!found) 526 return NULL; 527 528 return list; 529 } 530 531 static int 532 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) 533 { 534 struct rte_vhost_vring vring; 535 struct vhost_queue *vq; 536 int ret = 0; 537 538 vq = dev->data->rx_queues[qid]; 539 if (!vq) { 540 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 541 return -1; 542 } 543 544 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 545 if (ret < 0) { 546 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); 547 return ret; 548 } 549 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid); 550 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); 551 rte_wmb(); 552 553 return ret; 554 } 555 556 static int 557 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) 558 { 559 struct rte_vhost_vring vring; 560 struct vhost_queue *vq; 561 int ret = 0; 562 563 vq = dev->data->rx_queues[qid]; 564 if (!vq) { 565 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 566 return -1; 567 } 568 569 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 570 if (ret < 0) { 571 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid); 572 return ret; 573 } 574 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid); 575 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); 576 rte_wmb(); 577 578 return 0; 579 } 580 581 static void 582 eth_vhost_uninstall_intr(struct rte_eth_dev *dev) 583 { 584 struct rte_intr_handle *intr_handle = dev->intr_handle; 585 586 if (intr_handle) { 587 if (intr_handle->intr_vec) 588 free(intr_handle->intr_vec); 589 free(intr_handle); 590 } 591 592 dev->intr_handle = NULL; 593 } 594 595 static int 596 eth_vhost_install_intr(struct rte_eth_dev *dev) 597 { 598 struct rte_vhost_vring vring; 599 struct vhost_queue *vq; 600 int count = 0; 601 int nb_rxq = dev->data->nb_rx_queues; 602 int i; 603 int ret; 604 605 /* uninstall firstly if we are reconnecting */ 606 if (dev->intr_handle) 607 eth_vhost_uninstall_intr(dev); 608 609 dev->intr_handle = malloc(sizeof(*dev->intr_handle)); 610 if (!dev->intr_handle) { 611 VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); 612 return -ENOMEM; 613 } 614 memset(dev->intr_handle, 0, sizeof(*dev->intr_handle)); 615 616 dev->intr_handle->efd_counter_size = sizeof(uint64_t); 617 618 dev->intr_handle->intr_vec = 619 malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0])); 620 621 if (!dev->intr_handle->intr_vec) { 622 VHOST_LOG(ERR, 623 "Failed to allocate memory for interrupt vector\n"); 624 free(dev->intr_handle); 625 return -ENOMEM; 626 } 627 628 VHOST_LOG(INFO, "Prepare intr vec\n"); 629 for (i = 0; i < nb_rxq; i++) { 630 vq = dev->data->rx_queues[i]; 631 if (!vq) { 632 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); 633 continue; 634 } 635 636 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring); 637 if (ret < 0) { 638 VHOST_LOG(INFO, 639 "Failed to get rxq-%d's vring, skip!\n", i); 640 continue; 641 } 642 643 if (vring.kickfd < 0) { 644 VHOST_LOG(INFO, 645 "rxq-%d's kickfd is invalid, skip!\n", i); 646 continue; 647 } 648 dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; 649 dev->intr_handle->efds[i] = vring.kickfd; 650 count++; 651 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); 652 } 653 654 dev->intr_handle->nb_efd = count; 655 dev->intr_handle->max_intr = count + 1; 656 dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; 657 658 return 0; 659 } 660 661 static void 662 update_queuing_status(struct rte_eth_dev *dev) 663 { 664 struct pmd_internal *internal = dev->data->dev_private; 665 struct vhost_queue *vq; 666 unsigned int i; 667 int allow_queuing = 1; 668 669 if (!dev->data->rx_queues || !dev->data->tx_queues) 670 return; 671 672 if (rte_atomic32_read(&internal->started) == 0 || 673 rte_atomic32_read(&internal->dev_attached) == 0) 674 allow_queuing = 0; 675 676 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 677 for (i = 0; i < dev->data->nb_rx_queues; i++) { 678 vq = dev->data->rx_queues[i]; 679 if (vq == NULL) 680 continue; 681 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 682 while (rte_atomic32_read(&vq->while_queuing)) 683 rte_pause(); 684 } 685 686 for (i = 0; i < dev->data->nb_tx_queues; i++) { 687 vq = dev->data->tx_queues[i]; 688 if (vq == NULL) 689 continue; 690 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 691 while (rte_atomic32_read(&vq->while_queuing)) 692 rte_pause(); 693 } 694 } 695 696 static void 697 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) 698 { 699 struct vhost_queue *vq; 700 int i; 701 702 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 703 vq = eth_dev->data->rx_queues[i]; 704 if (!vq) 705 continue; 706 vq->vid = internal->vid; 707 vq->internal = internal; 708 vq->port = eth_dev->data->port_id; 709 } 710 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 711 vq = eth_dev->data->tx_queues[i]; 712 if (!vq) 713 continue; 714 vq->vid = internal->vid; 715 vq->internal = internal; 716 vq->port = eth_dev->data->port_id; 717 } 718 } 719 720 static int 721 new_device(int vid) 722 { 723 struct rte_eth_dev *eth_dev; 724 struct internal_list *list; 725 struct pmd_internal *internal; 726 struct rte_eth_conf *dev_conf; 727 unsigned i; 728 char ifname[PATH_MAX]; 729 #ifdef RTE_LIBRTE_VHOST_NUMA 730 int newnode; 731 #endif 732 733 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 734 list = find_internal_resource(ifname); 735 if (list == NULL) { 736 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname); 737 return -1; 738 } 739 740 eth_dev = list->eth_dev; 741 internal = eth_dev->data->dev_private; 742 dev_conf = ð_dev->data->dev_conf; 743 744 #ifdef RTE_LIBRTE_VHOST_NUMA 745 newnode = rte_vhost_get_numa_node(vid); 746 if (newnode >= 0) 747 eth_dev->data->numa_node = newnode; 748 #endif 749 750 internal->vid = vid; 751 if (rte_atomic32_read(&internal->started) == 1) { 752 queue_setup(eth_dev, internal); 753 754 if (dev_conf->intr_conf.rxq) { 755 if (eth_vhost_install_intr(eth_dev) < 0) { 756 VHOST_LOG(INFO, 757 "Failed to install interrupt handler."); 758 return -1; 759 } 760 } 761 } else { 762 VHOST_LOG(INFO, "RX/TX queues not exist yet\n"); 763 } 764 765 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 766 rte_vhost_enable_guest_notification(vid, i, 0); 767 768 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 769 770 eth_dev->data->dev_link.link_status = ETH_LINK_UP; 771 772 rte_atomic32_set(&internal->dev_attached, 1); 773 update_queuing_status(eth_dev); 774 775 VHOST_LOG(INFO, "Vhost device %d created\n", vid); 776 777 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 778 779 return 0; 780 } 781 782 static void 783 destroy_device(int vid) 784 { 785 struct rte_eth_dev *eth_dev; 786 struct pmd_internal *internal; 787 struct vhost_queue *vq; 788 struct internal_list *list; 789 char ifname[PATH_MAX]; 790 unsigned i; 791 struct rte_vhost_vring_state *state; 792 793 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 794 list = find_internal_resource(ifname); 795 if (list == NULL) { 796 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 797 return; 798 } 799 eth_dev = list->eth_dev; 800 internal = eth_dev->data->dev_private; 801 802 rte_atomic32_set(&internal->dev_attached, 0); 803 update_queuing_status(eth_dev); 804 805 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 806 807 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { 808 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 809 vq = eth_dev->data->rx_queues[i]; 810 if (!vq) 811 continue; 812 vq->vid = -1; 813 } 814 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 815 vq = eth_dev->data->tx_queues[i]; 816 if (!vq) 817 continue; 818 vq->vid = -1; 819 } 820 } 821 822 state = vring_states[eth_dev->data->port_id]; 823 rte_spinlock_lock(&state->lock); 824 for (i = 0; i <= state->max_vring; i++) { 825 state->cur[i] = false; 826 state->seen[i] = false; 827 } 828 state->max_vring = 0; 829 rte_spinlock_unlock(&state->lock); 830 831 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); 832 eth_vhost_uninstall_intr(eth_dev); 833 834 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 835 } 836 837 static int 838 vring_state_changed(int vid, uint16_t vring, int enable) 839 { 840 struct rte_vhost_vring_state *state; 841 struct rte_eth_dev *eth_dev; 842 struct internal_list *list; 843 char ifname[PATH_MAX]; 844 845 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 846 list = find_internal_resource(ifname); 847 if (list == NULL) { 848 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 849 return -1; 850 } 851 852 eth_dev = list->eth_dev; 853 /* won't be NULL */ 854 state = vring_states[eth_dev->data->port_id]; 855 rte_spinlock_lock(&state->lock); 856 state->cur[vring] = enable; 857 state->max_vring = RTE_MAX(vring, state->max_vring); 858 rte_spinlock_unlock(&state->lock); 859 860 VHOST_LOG(INFO, "vring%u is %s\n", 861 vring, enable ? "enabled" : "disabled"); 862 863 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); 864 865 return 0; 866 } 867 868 static struct vhost_device_ops vhost_ops = { 869 .new_device = new_device, 870 .destroy_device = destroy_device, 871 .vring_state_changed = vring_state_changed, 872 }; 873 874 int 875 rte_eth_vhost_get_queue_event(uint16_t port_id, 876 struct rte_eth_vhost_queue_event *event) 877 { 878 struct rte_vhost_vring_state *state; 879 unsigned int i; 880 int idx; 881 882 if (port_id >= RTE_MAX_ETHPORTS) { 883 VHOST_LOG(ERR, "Invalid port id\n"); 884 return -1; 885 } 886 887 state = vring_states[port_id]; 888 if (!state) { 889 VHOST_LOG(ERR, "Unused port\n"); 890 return -1; 891 } 892 893 rte_spinlock_lock(&state->lock); 894 for (i = 0; i <= state->max_vring; i++) { 895 idx = state->index++ % (state->max_vring + 1); 896 897 if (state->cur[idx] != state->seen[idx]) { 898 state->seen[idx] = state->cur[idx]; 899 event->queue_id = idx / 2; 900 event->rx = idx & 1; 901 event->enable = state->cur[idx]; 902 rte_spinlock_unlock(&state->lock); 903 return 0; 904 } 905 } 906 rte_spinlock_unlock(&state->lock); 907 908 return -1; 909 } 910 911 int 912 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) 913 { 914 struct internal_list *list; 915 struct rte_eth_dev *eth_dev; 916 struct vhost_queue *vq; 917 int vid = -1; 918 919 if (!rte_eth_dev_is_valid_port(port_id)) 920 return -1; 921 922 pthread_mutex_lock(&internal_list_lock); 923 924 TAILQ_FOREACH(list, &internal_list, next) { 925 eth_dev = list->eth_dev; 926 if (eth_dev->data->port_id == port_id) { 927 vq = eth_dev->data->rx_queues[0]; 928 if (vq) { 929 vid = vq->vid; 930 } 931 break; 932 } 933 } 934 935 pthread_mutex_unlock(&internal_list_lock); 936 937 return vid; 938 } 939 940 static int 941 eth_dev_start(struct rte_eth_dev *eth_dev) 942 { 943 struct pmd_internal *internal = eth_dev->data->dev_private; 944 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 945 946 queue_setup(eth_dev, internal); 947 948 if (rte_atomic32_read(&internal->dev_attached) == 1) { 949 if (dev_conf->intr_conf.rxq) { 950 if (eth_vhost_install_intr(eth_dev) < 0) { 951 VHOST_LOG(INFO, 952 "Failed to install interrupt handler."); 953 return -1; 954 } 955 } 956 } 957 958 rte_atomic32_set(&internal->started, 1); 959 update_queuing_status(eth_dev); 960 961 return 0; 962 } 963 964 static void 965 eth_dev_stop(struct rte_eth_dev *dev) 966 { 967 struct pmd_internal *internal = dev->data->dev_private; 968 969 rte_atomic32_set(&internal->started, 0); 970 update_queuing_status(dev); 971 } 972 973 static void 974 eth_dev_close(struct rte_eth_dev *dev) 975 { 976 struct pmd_internal *internal; 977 struct internal_list *list; 978 unsigned int i; 979 980 internal = dev->data->dev_private; 981 if (!internal) 982 return; 983 984 eth_dev_stop(dev); 985 986 rte_vhost_driver_unregister(internal->iface_name); 987 988 list = find_internal_resource(internal->iface_name); 989 if (!list) 990 return; 991 992 pthread_mutex_lock(&internal_list_lock); 993 TAILQ_REMOVE(&internal_list, list, next); 994 pthread_mutex_unlock(&internal_list_lock); 995 rte_free(list); 996 997 if (dev->data->rx_queues) 998 for (i = 0; i < dev->data->nb_rx_queues; i++) 999 rte_free(dev->data->rx_queues[i]); 1000 1001 if (dev->data->tx_queues) 1002 for (i = 0; i < dev->data->nb_tx_queues; i++) 1003 rte_free(dev->data->tx_queues[i]); 1004 1005 free(internal->dev_name); 1006 free(internal->iface_name); 1007 rte_free(internal); 1008 1009 dev->data->dev_private = NULL; 1010 1011 rte_free(vring_states[dev->data->port_id]); 1012 vring_states[dev->data->port_id] = NULL; 1013 } 1014 1015 static int 1016 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1017 uint16_t nb_rx_desc __rte_unused, 1018 unsigned int socket_id, 1019 const struct rte_eth_rxconf *rx_conf __rte_unused, 1020 struct rte_mempool *mb_pool) 1021 { 1022 struct vhost_queue *vq; 1023 1024 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1025 RTE_CACHE_LINE_SIZE, socket_id); 1026 if (vq == NULL) { 1027 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n"); 1028 return -ENOMEM; 1029 } 1030 1031 vq->mb_pool = mb_pool; 1032 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 1033 dev->data->rx_queues[rx_queue_id] = vq; 1034 1035 return 0; 1036 } 1037 1038 static int 1039 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1040 uint16_t nb_tx_desc __rte_unused, 1041 unsigned int socket_id, 1042 const struct rte_eth_txconf *tx_conf __rte_unused) 1043 { 1044 struct vhost_queue *vq; 1045 1046 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1047 RTE_CACHE_LINE_SIZE, socket_id); 1048 if (vq == NULL) { 1049 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n"); 1050 return -ENOMEM; 1051 } 1052 1053 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 1054 dev->data->tx_queues[tx_queue_id] = vq; 1055 1056 return 0; 1057 } 1058 1059 static int 1060 eth_dev_info(struct rte_eth_dev *dev, 1061 struct rte_eth_dev_info *dev_info) 1062 { 1063 struct pmd_internal *internal; 1064 1065 internal = dev->data->dev_private; 1066 if (internal == NULL) { 1067 VHOST_LOG(ERR, "Invalid device specified\n"); 1068 return -ENODEV; 1069 } 1070 1071 dev_info->max_mac_addrs = 1; 1072 dev_info->max_rx_pktlen = (uint32_t)-1; 1073 dev_info->max_rx_queues = internal->max_queues; 1074 dev_info->max_tx_queues = internal->max_queues; 1075 dev_info->min_rx_bufsize = 0; 1076 1077 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 1078 DEV_TX_OFFLOAD_VLAN_INSERT; 1079 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 1080 1081 return 0; 1082 } 1083 1084 static int 1085 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1086 { 1087 unsigned i; 1088 unsigned long rx_total = 0, tx_total = 0; 1089 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 1090 struct vhost_queue *vq; 1091 1092 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1093 i < dev->data->nb_rx_queues; i++) { 1094 if (dev->data->rx_queues[i] == NULL) 1095 continue; 1096 vq = dev->data->rx_queues[i]; 1097 stats->q_ipackets[i] = vq->stats.pkts; 1098 rx_total += stats->q_ipackets[i]; 1099 1100 stats->q_ibytes[i] = vq->stats.bytes; 1101 rx_total_bytes += stats->q_ibytes[i]; 1102 } 1103 1104 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1105 i < dev->data->nb_tx_queues; i++) { 1106 if (dev->data->tx_queues[i] == NULL) 1107 continue; 1108 vq = dev->data->tx_queues[i]; 1109 stats->q_opackets[i] = vq->stats.pkts; 1110 tx_total += stats->q_opackets[i]; 1111 1112 stats->q_obytes[i] = vq->stats.bytes; 1113 tx_total_bytes += stats->q_obytes[i]; 1114 } 1115 1116 stats->ipackets = rx_total; 1117 stats->opackets = tx_total; 1118 stats->ibytes = rx_total_bytes; 1119 stats->obytes = tx_total_bytes; 1120 1121 return 0; 1122 } 1123 1124 static int 1125 eth_stats_reset(struct rte_eth_dev *dev) 1126 { 1127 struct vhost_queue *vq; 1128 unsigned i; 1129 1130 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1131 if (dev->data->rx_queues[i] == NULL) 1132 continue; 1133 vq = dev->data->rx_queues[i]; 1134 vq->stats.pkts = 0; 1135 vq->stats.bytes = 0; 1136 } 1137 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1138 if (dev->data->tx_queues[i] == NULL) 1139 continue; 1140 vq = dev->data->tx_queues[i]; 1141 vq->stats.pkts = 0; 1142 vq->stats.bytes = 0; 1143 vq->stats.missed_pkts = 0; 1144 } 1145 1146 return 0; 1147 } 1148 1149 static void 1150 eth_queue_release(void *q) 1151 { 1152 rte_free(q); 1153 } 1154 1155 static int 1156 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 1157 { 1158 /* 1159 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 1160 * and releases mbuf, so nothing to cleanup. 1161 */ 1162 return 0; 1163 } 1164 1165 static int 1166 eth_link_update(struct rte_eth_dev *dev __rte_unused, 1167 int wait_to_complete __rte_unused) 1168 { 1169 return 0; 1170 } 1171 1172 static uint32_t 1173 eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1174 { 1175 struct vhost_queue *vq; 1176 1177 vq = dev->data->rx_queues[rx_queue_id]; 1178 if (vq == NULL) 1179 return 0; 1180 1181 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 1182 } 1183 1184 static const struct eth_dev_ops ops = { 1185 .dev_start = eth_dev_start, 1186 .dev_stop = eth_dev_stop, 1187 .dev_close = eth_dev_close, 1188 .dev_configure = eth_dev_configure, 1189 .dev_infos_get = eth_dev_info, 1190 .rx_queue_setup = eth_rx_queue_setup, 1191 .tx_queue_setup = eth_tx_queue_setup, 1192 .rx_queue_release = eth_queue_release, 1193 .tx_queue_release = eth_queue_release, 1194 .tx_done_cleanup = eth_tx_done_cleanup, 1195 .rx_queue_count = eth_rx_queue_count, 1196 .link_update = eth_link_update, 1197 .stats_get = eth_stats_get, 1198 .stats_reset = eth_stats_reset, 1199 .xstats_reset = vhost_dev_xstats_reset, 1200 .xstats_get = vhost_dev_xstats_get, 1201 .xstats_get_names = vhost_dev_xstats_get_names, 1202 .rx_queue_intr_enable = eth_rxq_intr_enable, 1203 .rx_queue_intr_disable = eth_rxq_intr_disable, 1204 }; 1205 1206 static int 1207 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1208 int16_t queues, const unsigned int numa_node, uint64_t flags) 1209 { 1210 const char *name = rte_vdev_device_name(dev); 1211 struct rte_eth_dev_data *data; 1212 struct pmd_internal *internal = NULL; 1213 struct rte_eth_dev *eth_dev = NULL; 1214 struct rte_ether_addr *eth_addr = NULL; 1215 struct rte_vhost_vring_state *vring_state = NULL; 1216 struct internal_list *list = NULL; 1217 1218 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", 1219 numa_node); 1220 1221 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 1222 if (list == NULL) 1223 goto error; 1224 1225 /* reserve an ethdev entry */ 1226 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1227 if (eth_dev == NULL) 1228 goto error; 1229 data = eth_dev->data; 1230 1231 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1232 if (eth_addr == NULL) 1233 goto error; 1234 data->mac_addrs = eth_addr; 1235 *eth_addr = base_eth_addr; 1236 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1237 1238 vring_state = rte_zmalloc_socket(name, 1239 sizeof(*vring_state), 0, numa_node); 1240 if (vring_state == NULL) 1241 goto error; 1242 1243 /* now put it all together 1244 * - store queue data in internal, 1245 * - point eth_dev_data to internals 1246 * - and point eth_dev structure to new eth_dev_data structure 1247 */ 1248 internal = eth_dev->data->dev_private; 1249 internal->dev_name = strdup(name); 1250 if (internal->dev_name == NULL) 1251 goto error; 1252 internal->iface_name = strdup(iface_name); 1253 if (internal->iface_name == NULL) 1254 goto error; 1255 1256 list->eth_dev = eth_dev; 1257 pthread_mutex_lock(&internal_list_lock); 1258 TAILQ_INSERT_TAIL(&internal_list, list, next); 1259 pthread_mutex_unlock(&internal_list_lock); 1260 1261 rte_spinlock_init(&vring_state->lock); 1262 vring_states[eth_dev->data->port_id] = vring_state; 1263 1264 data->nb_rx_queues = queues; 1265 data->nb_tx_queues = queues; 1266 internal->max_queues = queues; 1267 internal->vid = -1; 1268 data->dev_link = pmd_link; 1269 data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE; 1270 1271 eth_dev->dev_ops = &ops; 1272 1273 /* finally assign rx and tx ops */ 1274 eth_dev->rx_pkt_burst = eth_vhost_rx; 1275 eth_dev->tx_pkt_burst = eth_vhost_tx; 1276 1277 if (rte_vhost_driver_register(iface_name, flags)) 1278 goto error; 1279 1280 if (rte_vhost_driver_callback_register(iface_name, &vhost_ops) < 0) { 1281 VHOST_LOG(ERR, "Can't register callbacks\n"); 1282 goto error; 1283 } 1284 1285 if (rte_vhost_driver_start(iface_name) < 0) { 1286 VHOST_LOG(ERR, "Failed to start driver for %s\n", 1287 iface_name); 1288 goto error; 1289 } 1290 1291 rte_eth_dev_probing_finish(eth_dev); 1292 return data->port_id; 1293 1294 error: 1295 if (internal) { 1296 free(internal->iface_name); 1297 free(internal->dev_name); 1298 } 1299 rte_free(vring_state); 1300 rte_eth_dev_release_port(eth_dev); 1301 rte_free(list); 1302 1303 return -1; 1304 } 1305 1306 static inline int 1307 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1308 { 1309 const char **iface_name = extra_args; 1310 1311 if (value == NULL) 1312 return -1; 1313 1314 *iface_name = value; 1315 1316 return 0; 1317 } 1318 1319 static inline int 1320 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1321 { 1322 uint16_t *n = extra_args; 1323 1324 if (value == NULL || extra_args == NULL) 1325 return -EINVAL; 1326 1327 *n = (uint16_t)strtoul(value, NULL, 0); 1328 if (*n == USHRT_MAX && errno == ERANGE) 1329 return -1; 1330 1331 return 0; 1332 } 1333 1334 static int 1335 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1336 { 1337 struct rte_kvargs *kvlist = NULL; 1338 int ret = 0; 1339 char *iface_name; 1340 uint16_t queues; 1341 uint64_t flags = 0; 1342 int client_mode = 0; 1343 int dequeue_zero_copy = 0; 1344 int iommu_support = 0; 1345 int postcopy_support = 0; 1346 struct rte_eth_dev *eth_dev; 1347 const char *name = rte_vdev_device_name(dev); 1348 1349 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name); 1350 1351 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1352 eth_dev = rte_eth_dev_attach_secondary(name); 1353 if (!eth_dev) { 1354 VHOST_LOG(ERR, "Failed to probe %s\n", name); 1355 return -1; 1356 } 1357 /* TODO: request info from primary to set up Rx and Tx */ 1358 eth_dev->dev_ops = &ops; 1359 eth_dev->device = &dev->device; 1360 rte_eth_dev_probing_finish(eth_dev); 1361 return 0; 1362 } 1363 1364 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1365 if (kvlist == NULL) 1366 return -1; 1367 1368 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1369 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1370 &open_iface, &iface_name); 1371 if (ret < 0) 1372 goto out_free; 1373 } else { 1374 ret = -1; 1375 goto out_free; 1376 } 1377 1378 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1379 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1380 &open_int, &queues); 1381 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1382 goto out_free; 1383 1384 } else 1385 queues = 1; 1386 1387 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1388 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1389 &open_int, &client_mode); 1390 if (ret < 0) 1391 goto out_free; 1392 1393 if (client_mode) 1394 flags |= RTE_VHOST_USER_CLIENT; 1395 } 1396 1397 if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { 1398 ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, 1399 &open_int, &dequeue_zero_copy); 1400 if (ret < 0) 1401 goto out_free; 1402 1403 if (dequeue_zero_copy) 1404 flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 1405 } 1406 1407 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { 1408 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, 1409 &open_int, &iommu_support); 1410 if (ret < 0) 1411 goto out_free; 1412 1413 if (iommu_support) 1414 flags |= RTE_VHOST_USER_IOMMU_SUPPORT; 1415 } 1416 1417 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { 1418 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, 1419 &open_int, &postcopy_support); 1420 if (ret < 0) 1421 goto out_free; 1422 1423 if (postcopy_support) 1424 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; 1425 } 1426 1427 if (dev->device.numa_node == SOCKET_ID_ANY) 1428 dev->device.numa_node = rte_socket_id(); 1429 1430 eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node, 1431 flags); 1432 1433 out_free: 1434 rte_kvargs_free(kvlist); 1435 return ret; 1436 } 1437 1438 static int 1439 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1440 { 1441 const char *name; 1442 struct rte_eth_dev *eth_dev = NULL; 1443 1444 name = rte_vdev_device_name(dev); 1445 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name); 1446 1447 /* find an ethdev entry */ 1448 eth_dev = rte_eth_dev_allocated(name); 1449 if (eth_dev == NULL) 1450 return 0; 1451 1452 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1453 return rte_eth_dev_release_port(eth_dev); 1454 1455 eth_dev_close(eth_dev); 1456 1457 rte_eth_dev_release_port(eth_dev); 1458 1459 return 0; 1460 } 1461 1462 static struct rte_vdev_driver pmd_vhost_drv = { 1463 .probe = rte_pmd_vhost_probe, 1464 .remove = rte_pmd_vhost_remove, 1465 }; 1466 1467 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1468 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1469 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1470 "iface=<ifc> " 1471 "queues=<int> " 1472 "client=<0|1> " 1473 "dequeue-zero-copy=<0|1> " 1474 "iommu-support=<0|1> " 1475 "postcopy-support=<0|1>"); 1476 1477 RTE_INIT(vhost_init_log) 1478 { 1479 vhost_logtype = rte_log_register("pmd.net.vhost"); 1480 if (vhost_logtype >= 0) 1481 rte_log_set_level(vhost_logtype, RTE_LOG_NOTICE); 1482 } 1483