1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 IGEL Co., Ltd. 3 * Copyright(c) 2016-2018 Intel Corporation 4 */ 5 #include <unistd.h> 6 #include <pthread.h> 7 #include <stdbool.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_ethdev_driver.h> 11 #include <rte_ethdev_vdev.h> 12 #include <rte_malloc.h> 13 #include <rte_memcpy.h> 14 #include <rte_bus_vdev.h> 15 #include <rte_kvargs.h> 16 #include <rte_vhost.h> 17 #include <rte_spinlock.h> 18 19 #include "rte_eth_vhost.h" 20 21 static int vhost_logtype; 22 23 #define VHOST_LOG(level, ...) \ 24 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) 25 26 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 27 28 #define ETH_VHOST_IFACE_ARG "iface" 29 #define ETH_VHOST_QUEUES_ARG "queues" 30 #define ETH_VHOST_CLIENT_ARG "client" 31 #define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" 32 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" 33 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" 34 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" 35 #define VHOST_MAX_PKT_BURST 32 36 37 static const char *valid_arguments[] = { 38 ETH_VHOST_IFACE_ARG, 39 ETH_VHOST_QUEUES_ARG, 40 ETH_VHOST_CLIENT_ARG, 41 ETH_VHOST_DEQUEUE_ZERO_COPY, 42 ETH_VHOST_IOMMU_SUPPORT, 43 ETH_VHOST_POSTCOPY_SUPPORT, 44 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 45 NULL 46 }; 47 48 static struct rte_ether_addr base_eth_addr = { 49 .addr_bytes = { 50 0x56 /* V */, 51 0x48 /* H */, 52 0x4F /* O */, 53 0x53 /* S */, 54 0x54 /* T */, 55 0x00 56 } 57 }; 58 59 enum vhost_xstats_pkts { 60 VHOST_UNDERSIZE_PKT = 0, 61 VHOST_64_PKT, 62 VHOST_65_TO_127_PKT, 63 VHOST_128_TO_255_PKT, 64 VHOST_256_TO_511_PKT, 65 VHOST_512_TO_1023_PKT, 66 VHOST_1024_TO_1522_PKT, 67 VHOST_1523_TO_MAX_PKT, 68 VHOST_BROADCAST_PKT, 69 VHOST_MULTICAST_PKT, 70 VHOST_UNICAST_PKT, 71 VHOST_ERRORS_PKT, 72 VHOST_ERRORS_FRAGMENTED, 73 VHOST_ERRORS_JABBER, 74 VHOST_UNKNOWN_PROTOCOL, 75 VHOST_XSTATS_MAX, 76 }; 77 78 struct vhost_stats { 79 uint64_t pkts; 80 uint64_t bytes; 81 uint64_t missed_pkts; 82 uint64_t xstats[VHOST_XSTATS_MAX]; 83 }; 84 85 struct vhost_queue { 86 int vid; 87 rte_atomic32_t allow_queuing; 88 rte_atomic32_t while_queuing; 89 struct pmd_internal *internal; 90 struct rte_mempool *mb_pool; 91 uint16_t port; 92 uint16_t virtqueue_id; 93 struct vhost_stats stats; 94 }; 95 96 struct pmd_internal { 97 rte_atomic32_t dev_attached; 98 char *iface_name; 99 uint64_t flags; 100 uint64_t disable_flags; 101 uint16_t max_queues; 102 int vid; 103 rte_atomic32_t started; 104 uint8_t vlan_strip; 105 }; 106 107 struct internal_list { 108 TAILQ_ENTRY(internal_list) next; 109 struct rte_eth_dev *eth_dev; 110 }; 111 112 TAILQ_HEAD(internal_list_head, internal_list); 113 static struct internal_list_head internal_list = 114 TAILQ_HEAD_INITIALIZER(internal_list); 115 116 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 117 118 static struct rte_eth_link pmd_link = { 119 .link_speed = 10000, 120 .link_duplex = ETH_LINK_FULL_DUPLEX, 121 .link_status = ETH_LINK_DOWN 122 }; 123 124 struct rte_vhost_vring_state { 125 rte_spinlock_t lock; 126 127 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 128 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 129 unsigned int index; 130 unsigned int max_vring; 131 }; 132 133 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 134 135 #define VHOST_XSTATS_NAME_SIZE 64 136 137 struct vhost_xstats_name_off { 138 char name[VHOST_XSTATS_NAME_SIZE]; 139 uint64_t offset; 140 }; 141 142 /* [rx]_is prepended to the name string here */ 143 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { 144 {"good_packets", 145 offsetof(struct vhost_queue, stats.pkts)}, 146 {"total_bytes", 147 offsetof(struct vhost_queue, stats.bytes)}, 148 {"missed_pkts", 149 offsetof(struct vhost_queue, stats.missed_pkts)}, 150 {"broadcast_packets", 151 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 152 {"multicast_packets", 153 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 154 {"unicast_packets", 155 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 156 {"undersize_packets", 157 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 158 {"size_64_packets", 159 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 160 {"size_65_to_127_packets", 161 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 162 {"size_128_to_255_packets", 163 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 164 {"size_256_to_511_packets", 165 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 166 {"size_512_to_1023_packets", 167 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 168 {"size_1024_to_1522_packets", 169 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 170 {"size_1523_to_max_packets", 171 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 172 {"errors_with_bad_CRC", 173 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 174 {"fragmented_errors", 175 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])}, 176 {"jabber_errors", 177 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])}, 178 {"unknown_protos_packets", 179 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])}, 180 }; 181 182 /* [tx]_ is prepended to the name string here */ 183 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { 184 {"good_packets", 185 offsetof(struct vhost_queue, stats.pkts)}, 186 {"total_bytes", 187 offsetof(struct vhost_queue, stats.bytes)}, 188 {"missed_pkts", 189 offsetof(struct vhost_queue, stats.missed_pkts)}, 190 {"broadcast_packets", 191 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 192 {"multicast_packets", 193 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 194 {"unicast_packets", 195 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 196 {"undersize_packets", 197 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 198 {"size_64_packets", 199 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 200 {"size_65_to_127_packets", 201 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 202 {"size_128_to_255_packets", 203 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 204 {"size_256_to_511_packets", 205 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 206 {"size_512_to_1023_packets", 207 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 208 {"size_1024_to_1522_packets", 209 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 210 {"size_1523_to_max_packets", 211 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 212 {"errors_with_bad_CRC", 213 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 214 }; 215 216 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \ 217 sizeof(vhost_rxport_stat_strings[0])) 218 219 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \ 220 sizeof(vhost_txport_stat_strings[0])) 221 222 static int 223 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 224 { 225 struct vhost_queue *vq = NULL; 226 unsigned int i = 0; 227 228 for (i = 0; i < dev->data->nb_rx_queues; i++) { 229 vq = dev->data->rx_queues[i]; 230 if (!vq) 231 continue; 232 memset(&vq->stats, 0, sizeof(vq->stats)); 233 } 234 for (i = 0; i < dev->data->nb_tx_queues; i++) { 235 vq = dev->data->tx_queues[i]; 236 if (!vq) 237 continue; 238 memset(&vq->stats, 0, sizeof(vq->stats)); 239 } 240 241 return 0; 242 } 243 244 static int 245 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 246 struct rte_eth_xstat_name *xstats_names, 247 unsigned int limit __rte_unused) 248 { 249 unsigned int t = 0; 250 int count = 0; 251 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 252 253 if (!xstats_names) 254 return nstats; 255 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 256 snprintf(xstats_names[count].name, 257 sizeof(xstats_names[count].name), 258 "rx_%s", vhost_rxport_stat_strings[t].name); 259 count++; 260 } 261 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 262 snprintf(xstats_names[count].name, 263 sizeof(xstats_names[count].name), 264 "tx_%s", vhost_txport_stat_strings[t].name); 265 count++; 266 } 267 return count; 268 } 269 270 static int 271 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 272 unsigned int n) 273 { 274 unsigned int i; 275 unsigned int t; 276 unsigned int count = 0; 277 struct vhost_queue *vq = NULL; 278 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 279 280 if (n < nxstats) 281 return nxstats; 282 283 for (i = 0; i < dev->data->nb_rx_queues; i++) { 284 vq = dev->data->rx_queues[i]; 285 if (!vq) 286 continue; 287 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 288 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 289 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 290 } 291 for (i = 0; i < dev->data->nb_tx_queues; i++) { 292 vq = dev->data->tx_queues[i]; 293 if (!vq) 294 continue; 295 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 296 + vq->stats.missed_pkts 297 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 298 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 299 } 300 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 301 xstats[count].value = 0; 302 for (i = 0; i < dev->data->nb_rx_queues; i++) { 303 vq = dev->data->rx_queues[i]; 304 if (!vq) 305 continue; 306 xstats[count].value += 307 *(uint64_t *)(((char *)vq) 308 + vhost_rxport_stat_strings[t].offset); 309 } 310 xstats[count].id = count; 311 count++; 312 } 313 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 314 xstats[count].value = 0; 315 for (i = 0; i < dev->data->nb_tx_queues; i++) { 316 vq = dev->data->tx_queues[i]; 317 if (!vq) 318 continue; 319 xstats[count].value += 320 *(uint64_t *)(((char *)vq) 321 + vhost_txport_stat_strings[t].offset); 322 } 323 xstats[count].id = count; 324 count++; 325 } 326 return count; 327 } 328 329 static inline void 330 vhost_count_multicast_broadcast(struct vhost_queue *vq, 331 struct rte_mbuf *mbuf) 332 { 333 struct rte_ether_addr *ea = NULL; 334 struct vhost_stats *pstats = &vq->stats; 335 336 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *); 337 if (rte_is_multicast_ether_addr(ea)) { 338 if (rte_is_broadcast_ether_addr(ea)) 339 pstats->xstats[VHOST_BROADCAST_PKT]++; 340 else 341 pstats->xstats[VHOST_MULTICAST_PKT]++; 342 } 343 } 344 345 static void 346 vhost_update_packet_xstats(struct vhost_queue *vq, 347 struct rte_mbuf **bufs, 348 uint16_t count) 349 { 350 uint32_t pkt_len = 0; 351 uint64_t i = 0; 352 uint64_t index; 353 struct vhost_stats *pstats = &vq->stats; 354 355 for (i = 0; i < count ; i++) { 356 pkt_len = bufs[i]->pkt_len; 357 if (pkt_len == 64) { 358 pstats->xstats[VHOST_64_PKT]++; 359 } else if (pkt_len > 64 && pkt_len < 1024) { 360 index = (sizeof(pkt_len) * 8) 361 - __builtin_clz(pkt_len) - 5; 362 pstats->xstats[index]++; 363 } else { 364 if (pkt_len < 64) 365 pstats->xstats[VHOST_UNDERSIZE_PKT]++; 366 else if (pkt_len <= 1522) 367 pstats->xstats[VHOST_1024_TO_1522_PKT]++; 368 else if (pkt_len > 1522) 369 pstats->xstats[VHOST_1523_TO_MAX_PKT]++; 370 } 371 vhost_count_multicast_broadcast(vq, bufs[i]); 372 } 373 } 374 375 static uint16_t 376 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 377 { 378 struct vhost_queue *r = q; 379 uint16_t i, nb_rx = 0; 380 uint16_t nb_receive = nb_bufs; 381 382 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 383 return 0; 384 385 rte_atomic32_set(&r->while_queuing, 1); 386 387 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 388 goto out; 389 390 /* Dequeue packets from guest TX queue */ 391 while (nb_receive) { 392 uint16_t nb_pkts; 393 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 394 VHOST_MAX_PKT_BURST); 395 396 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 397 r->mb_pool, &bufs[nb_rx], 398 num); 399 400 nb_rx += nb_pkts; 401 nb_receive -= nb_pkts; 402 if (nb_pkts < num) 403 break; 404 } 405 406 r->stats.pkts += nb_rx; 407 408 for (i = 0; likely(i < nb_rx); i++) { 409 bufs[i]->port = r->port; 410 bufs[i]->vlan_tci = 0; 411 412 if (r->internal->vlan_strip) 413 rte_vlan_strip(bufs[i]); 414 415 r->stats.bytes += bufs[i]->pkt_len; 416 } 417 418 vhost_update_packet_xstats(r, bufs, nb_rx); 419 420 out: 421 rte_atomic32_set(&r->while_queuing, 0); 422 423 return nb_rx; 424 } 425 426 static uint16_t 427 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 428 { 429 struct vhost_queue *r = q; 430 uint16_t i, nb_tx = 0; 431 uint16_t nb_send = 0; 432 433 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 434 return 0; 435 436 rte_atomic32_set(&r->while_queuing, 1); 437 438 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 439 goto out; 440 441 for (i = 0; i < nb_bufs; i++) { 442 struct rte_mbuf *m = bufs[i]; 443 444 /* Do VLAN tag insertion */ 445 if (m->ol_flags & PKT_TX_VLAN_PKT) { 446 int error = rte_vlan_insert(&m); 447 if (unlikely(error)) { 448 rte_pktmbuf_free(m); 449 continue; 450 } 451 } 452 453 bufs[nb_send] = m; 454 ++nb_send; 455 } 456 457 /* Enqueue packets to guest RX queue */ 458 while (nb_send) { 459 uint16_t nb_pkts; 460 uint16_t num = (uint16_t)RTE_MIN(nb_send, 461 VHOST_MAX_PKT_BURST); 462 463 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 464 &bufs[nb_tx], num); 465 466 nb_tx += nb_pkts; 467 nb_send -= nb_pkts; 468 if (nb_pkts < num) 469 break; 470 } 471 472 r->stats.pkts += nb_tx; 473 r->stats.missed_pkts += nb_bufs - nb_tx; 474 475 for (i = 0; likely(i < nb_tx); i++) 476 r->stats.bytes += bufs[i]->pkt_len; 477 478 vhost_update_packet_xstats(r, bufs, nb_tx); 479 480 /* According to RFC2863 page42 section ifHCOutMulticastPkts and 481 * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast" 482 * are increased when packets are not transmitted successfully. 483 */ 484 for (i = nb_tx; i < nb_bufs; i++) 485 vhost_count_multicast_broadcast(r, bufs[i]); 486 487 for (i = 0; likely(i < nb_tx); i++) 488 rte_pktmbuf_free(bufs[i]); 489 out: 490 rte_atomic32_set(&r->while_queuing, 0); 491 492 return nb_tx; 493 } 494 495 static inline struct internal_list * 496 find_internal_resource(char *ifname) 497 { 498 int found = 0; 499 struct internal_list *list; 500 struct pmd_internal *internal; 501 502 if (ifname == NULL) 503 return NULL; 504 505 pthread_mutex_lock(&internal_list_lock); 506 507 TAILQ_FOREACH(list, &internal_list, next) { 508 internal = list->eth_dev->data->dev_private; 509 if (!strcmp(internal->iface_name, ifname)) { 510 found = 1; 511 break; 512 } 513 } 514 515 pthread_mutex_unlock(&internal_list_lock); 516 517 if (!found) 518 return NULL; 519 520 return list; 521 } 522 523 static int 524 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) 525 { 526 struct rte_vhost_vring vring; 527 struct vhost_queue *vq; 528 int ret = 0; 529 530 vq = dev->data->rx_queues[qid]; 531 if (!vq) { 532 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 533 return -1; 534 } 535 536 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 537 if (ret < 0) { 538 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); 539 return ret; 540 } 541 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid); 542 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); 543 rte_wmb(); 544 545 return ret; 546 } 547 548 static int 549 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) 550 { 551 struct rte_vhost_vring vring; 552 struct vhost_queue *vq; 553 int ret = 0; 554 555 vq = dev->data->rx_queues[qid]; 556 if (!vq) { 557 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 558 return -1; 559 } 560 561 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 562 if (ret < 0) { 563 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid); 564 return ret; 565 } 566 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid); 567 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); 568 rte_wmb(); 569 570 return 0; 571 } 572 573 static void 574 eth_vhost_uninstall_intr(struct rte_eth_dev *dev) 575 { 576 struct rte_intr_handle *intr_handle = dev->intr_handle; 577 578 if (intr_handle) { 579 if (intr_handle->intr_vec) 580 free(intr_handle->intr_vec); 581 free(intr_handle); 582 } 583 584 dev->intr_handle = NULL; 585 } 586 587 static int 588 eth_vhost_install_intr(struct rte_eth_dev *dev) 589 { 590 struct rte_vhost_vring vring; 591 struct vhost_queue *vq; 592 int count = 0; 593 int nb_rxq = dev->data->nb_rx_queues; 594 int i; 595 int ret; 596 597 /* uninstall firstly if we are reconnecting */ 598 if (dev->intr_handle) 599 eth_vhost_uninstall_intr(dev); 600 601 dev->intr_handle = malloc(sizeof(*dev->intr_handle)); 602 if (!dev->intr_handle) { 603 VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); 604 return -ENOMEM; 605 } 606 memset(dev->intr_handle, 0, sizeof(*dev->intr_handle)); 607 608 dev->intr_handle->efd_counter_size = sizeof(uint64_t); 609 610 dev->intr_handle->intr_vec = 611 malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0])); 612 613 if (!dev->intr_handle->intr_vec) { 614 VHOST_LOG(ERR, 615 "Failed to allocate memory for interrupt vector\n"); 616 free(dev->intr_handle); 617 return -ENOMEM; 618 } 619 620 VHOST_LOG(INFO, "Prepare intr vec\n"); 621 for (i = 0; i < nb_rxq; i++) { 622 vq = dev->data->rx_queues[i]; 623 if (!vq) { 624 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); 625 continue; 626 } 627 628 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring); 629 if (ret < 0) { 630 VHOST_LOG(INFO, 631 "Failed to get rxq-%d's vring, skip!\n", i); 632 continue; 633 } 634 635 if (vring.kickfd < 0) { 636 VHOST_LOG(INFO, 637 "rxq-%d's kickfd is invalid, skip!\n", i); 638 continue; 639 } 640 dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; 641 dev->intr_handle->efds[i] = vring.kickfd; 642 count++; 643 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); 644 } 645 646 dev->intr_handle->nb_efd = count; 647 dev->intr_handle->max_intr = count + 1; 648 dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; 649 650 return 0; 651 } 652 653 static void 654 update_queuing_status(struct rte_eth_dev *dev) 655 { 656 struct pmd_internal *internal = dev->data->dev_private; 657 struct vhost_queue *vq; 658 unsigned int i; 659 int allow_queuing = 1; 660 661 if (!dev->data->rx_queues || !dev->data->tx_queues) 662 return; 663 664 if (rte_atomic32_read(&internal->started) == 0 || 665 rte_atomic32_read(&internal->dev_attached) == 0) 666 allow_queuing = 0; 667 668 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 669 for (i = 0; i < dev->data->nb_rx_queues; i++) { 670 vq = dev->data->rx_queues[i]; 671 if (vq == NULL) 672 continue; 673 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 674 while (rte_atomic32_read(&vq->while_queuing)) 675 rte_pause(); 676 } 677 678 for (i = 0; i < dev->data->nb_tx_queues; i++) { 679 vq = dev->data->tx_queues[i]; 680 if (vq == NULL) 681 continue; 682 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 683 while (rte_atomic32_read(&vq->while_queuing)) 684 rte_pause(); 685 } 686 } 687 688 static void 689 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) 690 { 691 struct vhost_queue *vq; 692 int i; 693 694 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 695 vq = eth_dev->data->rx_queues[i]; 696 if (!vq) 697 continue; 698 vq->vid = internal->vid; 699 vq->internal = internal; 700 vq->port = eth_dev->data->port_id; 701 } 702 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 703 vq = eth_dev->data->tx_queues[i]; 704 if (!vq) 705 continue; 706 vq->vid = internal->vid; 707 vq->internal = internal; 708 vq->port = eth_dev->data->port_id; 709 } 710 } 711 712 static int 713 new_device(int vid) 714 { 715 struct rte_eth_dev *eth_dev; 716 struct internal_list *list; 717 struct pmd_internal *internal; 718 struct rte_eth_conf *dev_conf; 719 unsigned i; 720 char ifname[PATH_MAX]; 721 #ifdef RTE_LIBRTE_VHOST_NUMA 722 int newnode; 723 #endif 724 725 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 726 list = find_internal_resource(ifname); 727 if (list == NULL) { 728 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname); 729 return -1; 730 } 731 732 eth_dev = list->eth_dev; 733 internal = eth_dev->data->dev_private; 734 dev_conf = ð_dev->data->dev_conf; 735 736 #ifdef RTE_LIBRTE_VHOST_NUMA 737 newnode = rte_vhost_get_numa_node(vid); 738 if (newnode >= 0) 739 eth_dev->data->numa_node = newnode; 740 #endif 741 742 internal->vid = vid; 743 if (rte_atomic32_read(&internal->started) == 1) { 744 queue_setup(eth_dev, internal); 745 746 if (dev_conf->intr_conf.rxq) { 747 if (eth_vhost_install_intr(eth_dev) < 0) { 748 VHOST_LOG(INFO, 749 "Failed to install interrupt handler."); 750 return -1; 751 } 752 } 753 } else { 754 VHOST_LOG(INFO, "RX/TX queues not exist yet\n"); 755 } 756 757 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 758 rte_vhost_enable_guest_notification(vid, i, 0); 759 760 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 761 762 eth_dev->data->dev_link.link_status = ETH_LINK_UP; 763 764 rte_atomic32_set(&internal->dev_attached, 1); 765 update_queuing_status(eth_dev); 766 767 VHOST_LOG(INFO, "Vhost device %d created\n", vid); 768 769 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 770 771 return 0; 772 } 773 774 static void 775 destroy_device(int vid) 776 { 777 struct rte_eth_dev *eth_dev; 778 struct pmd_internal *internal; 779 struct vhost_queue *vq; 780 struct internal_list *list; 781 char ifname[PATH_MAX]; 782 unsigned i; 783 struct rte_vhost_vring_state *state; 784 785 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 786 list = find_internal_resource(ifname); 787 if (list == NULL) { 788 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 789 return; 790 } 791 eth_dev = list->eth_dev; 792 internal = eth_dev->data->dev_private; 793 794 rte_atomic32_set(&internal->dev_attached, 0); 795 update_queuing_status(eth_dev); 796 797 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 798 799 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { 800 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 801 vq = eth_dev->data->rx_queues[i]; 802 if (!vq) 803 continue; 804 vq->vid = -1; 805 } 806 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 807 vq = eth_dev->data->tx_queues[i]; 808 if (!vq) 809 continue; 810 vq->vid = -1; 811 } 812 } 813 814 state = vring_states[eth_dev->data->port_id]; 815 rte_spinlock_lock(&state->lock); 816 for (i = 0; i <= state->max_vring; i++) { 817 state->cur[i] = false; 818 state->seen[i] = false; 819 } 820 state->max_vring = 0; 821 rte_spinlock_unlock(&state->lock); 822 823 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); 824 eth_vhost_uninstall_intr(eth_dev); 825 826 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 827 } 828 829 static int 830 vring_state_changed(int vid, uint16_t vring, int enable) 831 { 832 struct rte_vhost_vring_state *state; 833 struct rte_eth_dev *eth_dev; 834 struct internal_list *list; 835 char ifname[PATH_MAX]; 836 837 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 838 list = find_internal_resource(ifname); 839 if (list == NULL) { 840 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 841 return -1; 842 } 843 844 eth_dev = list->eth_dev; 845 /* won't be NULL */ 846 state = vring_states[eth_dev->data->port_id]; 847 rte_spinlock_lock(&state->lock); 848 if (state->cur[vring] == enable) { 849 rte_spinlock_unlock(&state->lock); 850 return 0; 851 } 852 state->cur[vring] = enable; 853 state->max_vring = RTE_MAX(vring, state->max_vring); 854 rte_spinlock_unlock(&state->lock); 855 856 VHOST_LOG(INFO, "vring%u is %s\n", 857 vring, enable ? "enabled" : "disabled"); 858 859 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); 860 861 return 0; 862 } 863 864 static struct vhost_device_ops vhost_ops = { 865 .new_device = new_device, 866 .destroy_device = destroy_device, 867 .vring_state_changed = vring_state_changed, 868 }; 869 870 static int 871 vhost_driver_setup(struct rte_eth_dev *eth_dev) 872 { 873 struct pmd_internal *internal = eth_dev->data->dev_private; 874 struct internal_list *list = NULL; 875 struct rte_vhost_vring_state *vring_state = NULL; 876 unsigned int numa_node = eth_dev->device->numa_node; 877 const char *name = eth_dev->device->name; 878 879 /* Don't try to setup again if it has already been done. */ 880 list = find_internal_resource(internal->iface_name); 881 if (list) 882 return 0; 883 884 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 885 if (list == NULL) 886 return -1; 887 888 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), 889 0, numa_node); 890 if (vring_state == NULL) 891 goto free_list; 892 893 list->eth_dev = eth_dev; 894 pthread_mutex_lock(&internal_list_lock); 895 TAILQ_INSERT_TAIL(&internal_list, list, next); 896 pthread_mutex_unlock(&internal_list_lock); 897 898 rte_spinlock_init(&vring_state->lock); 899 vring_states[eth_dev->data->port_id] = vring_state; 900 901 if (rte_vhost_driver_register(internal->iface_name, internal->flags)) 902 goto list_remove; 903 904 if (internal->disable_flags) { 905 if (rte_vhost_driver_disable_features(internal->iface_name, 906 internal->disable_flags)) 907 goto drv_unreg; 908 } 909 910 if (rte_vhost_driver_callback_register(internal->iface_name, 911 &vhost_ops) < 0) { 912 VHOST_LOG(ERR, "Can't register callbacks\n"); 913 goto drv_unreg; 914 } 915 916 if (rte_vhost_driver_start(internal->iface_name) < 0) { 917 VHOST_LOG(ERR, "Failed to start driver for %s\n", 918 internal->iface_name); 919 goto drv_unreg; 920 } 921 922 return 0; 923 924 drv_unreg: 925 rte_vhost_driver_unregister(internal->iface_name); 926 list_remove: 927 vring_states[eth_dev->data->port_id] = NULL; 928 pthread_mutex_lock(&internal_list_lock); 929 TAILQ_REMOVE(&internal_list, list, next); 930 pthread_mutex_unlock(&internal_list_lock); 931 rte_free(vring_state); 932 free_list: 933 rte_free(list); 934 935 return -1; 936 } 937 938 int 939 rte_eth_vhost_get_queue_event(uint16_t port_id, 940 struct rte_eth_vhost_queue_event *event) 941 { 942 struct rte_vhost_vring_state *state; 943 unsigned int i; 944 int idx; 945 946 if (port_id >= RTE_MAX_ETHPORTS) { 947 VHOST_LOG(ERR, "Invalid port id\n"); 948 return -1; 949 } 950 951 state = vring_states[port_id]; 952 if (!state) { 953 VHOST_LOG(ERR, "Unused port\n"); 954 return -1; 955 } 956 957 rte_spinlock_lock(&state->lock); 958 for (i = 0; i <= state->max_vring; i++) { 959 idx = state->index++ % (state->max_vring + 1); 960 961 if (state->cur[idx] != state->seen[idx]) { 962 state->seen[idx] = state->cur[idx]; 963 event->queue_id = idx / 2; 964 event->rx = idx & 1; 965 event->enable = state->cur[idx]; 966 rte_spinlock_unlock(&state->lock); 967 return 0; 968 } 969 } 970 rte_spinlock_unlock(&state->lock); 971 972 return -1; 973 } 974 975 int 976 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) 977 { 978 struct internal_list *list; 979 struct rte_eth_dev *eth_dev; 980 struct vhost_queue *vq; 981 int vid = -1; 982 983 if (!rte_eth_dev_is_valid_port(port_id)) 984 return -1; 985 986 pthread_mutex_lock(&internal_list_lock); 987 988 TAILQ_FOREACH(list, &internal_list, next) { 989 eth_dev = list->eth_dev; 990 if (eth_dev->data->port_id == port_id) { 991 vq = eth_dev->data->rx_queues[0]; 992 if (vq) { 993 vid = vq->vid; 994 } 995 break; 996 } 997 } 998 999 pthread_mutex_unlock(&internal_list_lock); 1000 1001 return vid; 1002 } 1003 1004 static int 1005 eth_dev_configure(struct rte_eth_dev *dev) 1006 { 1007 struct pmd_internal *internal = dev->data->dev_private; 1008 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1009 1010 /* NOTE: the same process has to operate a vhost interface 1011 * from beginning to end (from eth_dev configure to eth_dev close). 1012 * It is user's responsibility at the moment. 1013 */ 1014 if (vhost_driver_setup(dev) < 0) 1015 return -1; 1016 1017 internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1018 1019 return 0; 1020 } 1021 1022 static int 1023 eth_dev_start(struct rte_eth_dev *eth_dev) 1024 { 1025 struct pmd_internal *internal = eth_dev->data->dev_private; 1026 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 1027 1028 queue_setup(eth_dev, internal); 1029 1030 if (rte_atomic32_read(&internal->dev_attached) == 1) { 1031 if (dev_conf->intr_conf.rxq) { 1032 if (eth_vhost_install_intr(eth_dev) < 0) { 1033 VHOST_LOG(INFO, 1034 "Failed to install interrupt handler."); 1035 return -1; 1036 } 1037 } 1038 } 1039 1040 rte_atomic32_set(&internal->started, 1); 1041 update_queuing_status(eth_dev); 1042 1043 return 0; 1044 } 1045 1046 static void 1047 eth_dev_stop(struct rte_eth_dev *dev) 1048 { 1049 struct pmd_internal *internal = dev->data->dev_private; 1050 1051 rte_atomic32_set(&internal->started, 0); 1052 update_queuing_status(dev); 1053 } 1054 1055 static void 1056 eth_dev_close(struct rte_eth_dev *dev) 1057 { 1058 struct pmd_internal *internal; 1059 struct internal_list *list; 1060 unsigned int i; 1061 1062 internal = dev->data->dev_private; 1063 if (!internal) 1064 return; 1065 1066 eth_dev_stop(dev); 1067 1068 rte_vhost_driver_unregister(internal->iface_name); 1069 1070 list = find_internal_resource(internal->iface_name); 1071 if (!list) 1072 return; 1073 1074 pthread_mutex_lock(&internal_list_lock); 1075 TAILQ_REMOVE(&internal_list, list, next); 1076 pthread_mutex_unlock(&internal_list_lock); 1077 rte_free(list); 1078 1079 if (dev->data->rx_queues) 1080 for (i = 0; i < dev->data->nb_rx_queues; i++) 1081 rte_free(dev->data->rx_queues[i]); 1082 1083 if (dev->data->tx_queues) 1084 for (i = 0; i < dev->data->nb_tx_queues; i++) 1085 rte_free(dev->data->tx_queues[i]); 1086 1087 rte_free(internal->iface_name); 1088 rte_free(internal); 1089 1090 dev->data->dev_private = NULL; 1091 1092 rte_free(vring_states[dev->data->port_id]); 1093 vring_states[dev->data->port_id] = NULL; 1094 } 1095 1096 static int 1097 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1098 uint16_t nb_rx_desc __rte_unused, 1099 unsigned int socket_id, 1100 const struct rte_eth_rxconf *rx_conf __rte_unused, 1101 struct rte_mempool *mb_pool) 1102 { 1103 struct vhost_queue *vq; 1104 1105 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1106 RTE_CACHE_LINE_SIZE, socket_id); 1107 if (vq == NULL) { 1108 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n"); 1109 return -ENOMEM; 1110 } 1111 1112 vq->mb_pool = mb_pool; 1113 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 1114 dev->data->rx_queues[rx_queue_id] = vq; 1115 1116 return 0; 1117 } 1118 1119 static int 1120 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1121 uint16_t nb_tx_desc __rte_unused, 1122 unsigned int socket_id, 1123 const struct rte_eth_txconf *tx_conf __rte_unused) 1124 { 1125 struct vhost_queue *vq; 1126 1127 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1128 RTE_CACHE_LINE_SIZE, socket_id); 1129 if (vq == NULL) { 1130 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n"); 1131 return -ENOMEM; 1132 } 1133 1134 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 1135 dev->data->tx_queues[tx_queue_id] = vq; 1136 1137 return 0; 1138 } 1139 1140 static int 1141 eth_dev_info(struct rte_eth_dev *dev, 1142 struct rte_eth_dev_info *dev_info) 1143 { 1144 struct pmd_internal *internal; 1145 1146 internal = dev->data->dev_private; 1147 if (internal == NULL) { 1148 VHOST_LOG(ERR, "Invalid device specified\n"); 1149 return -ENODEV; 1150 } 1151 1152 dev_info->max_mac_addrs = 1; 1153 dev_info->max_rx_pktlen = (uint32_t)-1; 1154 dev_info->max_rx_queues = internal->max_queues; 1155 dev_info->max_tx_queues = internal->max_queues; 1156 dev_info->min_rx_bufsize = 0; 1157 1158 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 1159 DEV_TX_OFFLOAD_VLAN_INSERT; 1160 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 1161 1162 return 0; 1163 } 1164 1165 static int 1166 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1167 { 1168 unsigned i; 1169 unsigned long rx_total = 0, tx_total = 0; 1170 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 1171 struct vhost_queue *vq; 1172 1173 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1174 i < dev->data->nb_rx_queues; i++) { 1175 if (dev->data->rx_queues[i] == NULL) 1176 continue; 1177 vq = dev->data->rx_queues[i]; 1178 stats->q_ipackets[i] = vq->stats.pkts; 1179 rx_total += stats->q_ipackets[i]; 1180 1181 stats->q_ibytes[i] = vq->stats.bytes; 1182 rx_total_bytes += stats->q_ibytes[i]; 1183 } 1184 1185 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1186 i < dev->data->nb_tx_queues; i++) { 1187 if (dev->data->tx_queues[i] == NULL) 1188 continue; 1189 vq = dev->data->tx_queues[i]; 1190 stats->q_opackets[i] = vq->stats.pkts; 1191 tx_total += stats->q_opackets[i]; 1192 1193 stats->q_obytes[i] = vq->stats.bytes; 1194 tx_total_bytes += stats->q_obytes[i]; 1195 } 1196 1197 stats->ipackets = rx_total; 1198 stats->opackets = tx_total; 1199 stats->ibytes = rx_total_bytes; 1200 stats->obytes = tx_total_bytes; 1201 1202 return 0; 1203 } 1204 1205 static int 1206 eth_stats_reset(struct rte_eth_dev *dev) 1207 { 1208 struct vhost_queue *vq; 1209 unsigned i; 1210 1211 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1212 if (dev->data->rx_queues[i] == NULL) 1213 continue; 1214 vq = dev->data->rx_queues[i]; 1215 vq->stats.pkts = 0; 1216 vq->stats.bytes = 0; 1217 } 1218 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1219 if (dev->data->tx_queues[i] == NULL) 1220 continue; 1221 vq = dev->data->tx_queues[i]; 1222 vq->stats.pkts = 0; 1223 vq->stats.bytes = 0; 1224 vq->stats.missed_pkts = 0; 1225 } 1226 1227 return 0; 1228 } 1229 1230 static void 1231 eth_queue_release(void *q) 1232 { 1233 rte_free(q); 1234 } 1235 1236 static int 1237 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 1238 { 1239 /* 1240 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 1241 * and releases mbuf, so nothing to cleanup. 1242 */ 1243 return 0; 1244 } 1245 1246 static int 1247 eth_link_update(struct rte_eth_dev *dev __rte_unused, 1248 int wait_to_complete __rte_unused) 1249 { 1250 return 0; 1251 } 1252 1253 static uint32_t 1254 eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1255 { 1256 struct vhost_queue *vq; 1257 1258 vq = dev->data->rx_queues[rx_queue_id]; 1259 if (vq == NULL) 1260 return 0; 1261 1262 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 1263 } 1264 1265 static const struct eth_dev_ops ops = { 1266 .dev_start = eth_dev_start, 1267 .dev_stop = eth_dev_stop, 1268 .dev_close = eth_dev_close, 1269 .dev_configure = eth_dev_configure, 1270 .dev_infos_get = eth_dev_info, 1271 .rx_queue_setup = eth_rx_queue_setup, 1272 .tx_queue_setup = eth_tx_queue_setup, 1273 .rx_queue_release = eth_queue_release, 1274 .tx_queue_release = eth_queue_release, 1275 .tx_done_cleanup = eth_tx_done_cleanup, 1276 .rx_queue_count = eth_rx_queue_count, 1277 .link_update = eth_link_update, 1278 .stats_get = eth_stats_get, 1279 .stats_reset = eth_stats_reset, 1280 .xstats_reset = vhost_dev_xstats_reset, 1281 .xstats_get = vhost_dev_xstats_get, 1282 .xstats_get_names = vhost_dev_xstats_get_names, 1283 .rx_queue_intr_enable = eth_rxq_intr_enable, 1284 .rx_queue_intr_disable = eth_rxq_intr_disable, 1285 }; 1286 1287 static int 1288 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1289 int16_t queues, const unsigned int numa_node, uint64_t flags, 1290 uint64_t disable_flags) 1291 { 1292 const char *name = rte_vdev_device_name(dev); 1293 struct rte_eth_dev_data *data; 1294 struct pmd_internal *internal = NULL; 1295 struct rte_eth_dev *eth_dev = NULL; 1296 struct rte_ether_addr *eth_addr = NULL; 1297 1298 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", 1299 numa_node); 1300 1301 /* reserve an ethdev entry */ 1302 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1303 if (eth_dev == NULL) 1304 goto error; 1305 data = eth_dev->data; 1306 1307 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1308 if (eth_addr == NULL) 1309 goto error; 1310 data->mac_addrs = eth_addr; 1311 *eth_addr = base_eth_addr; 1312 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1313 1314 /* now put it all together 1315 * - store queue data in internal, 1316 * - point eth_dev_data to internals 1317 * - and point eth_dev structure to new eth_dev_data structure 1318 */ 1319 internal = eth_dev->data->dev_private; 1320 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, 1321 0, numa_node); 1322 if (internal->iface_name == NULL) 1323 goto error; 1324 strcpy(internal->iface_name, iface_name); 1325 1326 data->nb_rx_queues = queues; 1327 data->nb_tx_queues = queues; 1328 internal->max_queues = queues; 1329 internal->vid = -1; 1330 internal->flags = flags; 1331 internal->disable_flags = disable_flags; 1332 data->dev_link = pmd_link; 1333 data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE; 1334 1335 eth_dev->dev_ops = &ops; 1336 1337 /* finally assign rx and tx ops */ 1338 eth_dev->rx_pkt_burst = eth_vhost_rx; 1339 eth_dev->tx_pkt_burst = eth_vhost_tx; 1340 1341 rte_eth_dev_probing_finish(eth_dev); 1342 return 0; 1343 1344 error: 1345 if (internal) 1346 rte_free(internal->iface_name); 1347 rte_eth_dev_release_port(eth_dev); 1348 1349 return -1; 1350 } 1351 1352 static inline int 1353 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1354 { 1355 const char **iface_name = extra_args; 1356 1357 if (value == NULL) 1358 return -1; 1359 1360 *iface_name = value; 1361 1362 return 0; 1363 } 1364 1365 static inline int 1366 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1367 { 1368 uint16_t *n = extra_args; 1369 1370 if (value == NULL || extra_args == NULL) 1371 return -EINVAL; 1372 1373 *n = (uint16_t)strtoul(value, NULL, 0); 1374 if (*n == USHRT_MAX && errno == ERANGE) 1375 return -1; 1376 1377 return 0; 1378 } 1379 1380 static int 1381 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1382 { 1383 struct rte_kvargs *kvlist = NULL; 1384 int ret = 0; 1385 char *iface_name; 1386 uint16_t queues; 1387 uint64_t flags = 0; 1388 uint64_t disable_flags = 0; 1389 int client_mode = 0; 1390 int dequeue_zero_copy = 0; 1391 int iommu_support = 0; 1392 int postcopy_support = 0; 1393 int tso = 0; 1394 struct rte_eth_dev *eth_dev; 1395 const char *name = rte_vdev_device_name(dev); 1396 1397 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name); 1398 1399 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1400 eth_dev = rte_eth_dev_attach_secondary(name); 1401 if (!eth_dev) { 1402 VHOST_LOG(ERR, "Failed to probe %s\n", name); 1403 return -1; 1404 } 1405 eth_dev->rx_pkt_burst = eth_vhost_rx; 1406 eth_dev->tx_pkt_burst = eth_vhost_tx; 1407 eth_dev->dev_ops = &ops; 1408 if (dev->device.numa_node == SOCKET_ID_ANY) 1409 dev->device.numa_node = rte_socket_id(); 1410 eth_dev->device = &dev->device; 1411 rte_eth_dev_probing_finish(eth_dev); 1412 return 0; 1413 } 1414 1415 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1416 if (kvlist == NULL) 1417 return -1; 1418 1419 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1420 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1421 &open_iface, &iface_name); 1422 if (ret < 0) 1423 goto out_free; 1424 } else { 1425 ret = -1; 1426 goto out_free; 1427 } 1428 1429 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1430 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1431 &open_int, &queues); 1432 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1433 goto out_free; 1434 1435 } else 1436 queues = 1; 1437 1438 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1439 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1440 &open_int, &client_mode); 1441 if (ret < 0) 1442 goto out_free; 1443 1444 if (client_mode) 1445 flags |= RTE_VHOST_USER_CLIENT; 1446 } 1447 1448 if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { 1449 ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, 1450 &open_int, &dequeue_zero_copy); 1451 if (ret < 0) 1452 goto out_free; 1453 1454 if (dequeue_zero_copy) 1455 flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 1456 } 1457 1458 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { 1459 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, 1460 &open_int, &iommu_support); 1461 if (ret < 0) 1462 goto out_free; 1463 1464 if (iommu_support) 1465 flags |= RTE_VHOST_USER_IOMMU_SUPPORT; 1466 } 1467 1468 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { 1469 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, 1470 &open_int, &postcopy_support); 1471 if (ret < 0) 1472 goto out_free; 1473 1474 if (postcopy_support) 1475 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; 1476 } 1477 1478 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) { 1479 ret = rte_kvargs_process(kvlist, 1480 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 1481 &open_int, &tso); 1482 if (ret < 0) 1483 goto out_free; 1484 1485 if (tso == 0) { 1486 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); 1487 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); 1488 } 1489 } 1490 1491 if (dev->device.numa_node == SOCKET_ID_ANY) 1492 dev->device.numa_node = rte_socket_id(); 1493 1494 ret = eth_dev_vhost_create(dev, iface_name, queues, 1495 dev->device.numa_node, flags, disable_flags); 1496 if (ret == -1) 1497 VHOST_LOG(ERR, "Failed to create %s\n", name); 1498 1499 out_free: 1500 rte_kvargs_free(kvlist); 1501 return ret; 1502 } 1503 1504 static int 1505 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1506 { 1507 const char *name; 1508 struct rte_eth_dev *eth_dev = NULL; 1509 1510 name = rte_vdev_device_name(dev); 1511 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name); 1512 1513 /* find an ethdev entry */ 1514 eth_dev = rte_eth_dev_allocated(name); 1515 if (eth_dev == NULL) 1516 return 0; 1517 1518 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1519 return rte_eth_dev_release_port(eth_dev); 1520 1521 eth_dev_close(eth_dev); 1522 1523 rte_eth_dev_release_port(eth_dev); 1524 1525 return 0; 1526 } 1527 1528 static struct rte_vdev_driver pmd_vhost_drv = { 1529 .probe = rte_pmd_vhost_probe, 1530 .remove = rte_pmd_vhost_remove, 1531 }; 1532 1533 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1534 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1535 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1536 "iface=<ifc> " 1537 "queues=<int> " 1538 "client=<0|1> " 1539 "dequeue-zero-copy=<0|1> " 1540 "iommu-support=<0|1> " 1541 "postcopy-support=<0|1> " 1542 "tso=<0|1>"); 1543 1544 RTE_INIT(vhost_init_log) 1545 { 1546 vhost_logtype = rte_log_register("pmd.net.vhost"); 1547 if (vhost_logtype >= 0) 1548 rte_log_set_level(vhost_logtype, RTE_LOG_NOTICE); 1549 } 1550