1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 IGEL Co., Ltd. 3 * Copyright(c) 2016-2018 Intel Corporation 4 */ 5 #include <unistd.h> 6 #include <pthread.h> 7 #include <stdbool.h> 8 #include <sys/epoll.h> 9 10 #include <rte_mbuf.h> 11 #include <ethdev_driver.h> 12 #include <ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_memcpy.h> 15 #include <rte_bus_vdev.h> 16 #include <rte_kvargs.h> 17 #include <rte_vhost.h> 18 #include <rte_spinlock.h> 19 20 #include "rte_eth_vhost.h" 21 22 RTE_LOG_REGISTER(vhost_logtype, pmd.net.vhost, NOTICE); 23 24 #define VHOST_LOG(level, ...) \ 25 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) 26 27 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 28 29 #define ETH_VHOST_IFACE_ARG "iface" 30 #define ETH_VHOST_QUEUES_ARG "queues" 31 #define ETH_VHOST_CLIENT_ARG "client" 32 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" 33 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" 34 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" 35 #define ETH_VHOST_LINEAR_BUF "linear-buffer" 36 #define ETH_VHOST_EXT_BUF "ext-buffer" 37 #define VHOST_MAX_PKT_BURST 32 38 39 static const char *valid_arguments[] = { 40 ETH_VHOST_IFACE_ARG, 41 ETH_VHOST_QUEUES_ARG, 42 ETH_VHOST_CLIENT_ARG, 43 ETH_VHOST_IOMMU_SUPPORT, 44 ETH_VHOST_POSTCOPY_SUPPORT, 45 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 46 ETH_VHOST_LINEAR_BUF, 47 ETH_VHOST_EXT_BUF, 48 NULL 49 }; 50 51 static struct rte_ether_addr base_eth_addr = { 52 .addr_bytes = { 53 0x56 /* V */, 54 0x48 /* H */, 55 0x4F /* O */, 56 0x53 /* S */, 57 0x54 /* T */, 58 0x00 59 } 60 }; 61 62 enum vhost_xstats_pkts { 63 VHOST_UNDERSIZE_PKT = 0, 64 VHOST_64_PKT, 65 VHOST_65_TO_127_PKT, 66 VHOST_128_TO_255_PKT, 67 VHOST_256_TO_511_PKT, 68 VHOST_512_TO_1023_PKT, 69 VHOST_1024_TO_1522_PKT, 70 VHOST_1523_TO_MAX_PKT, 71 VHOST_BROADCAST_PKT, 72 VHOST_MULTICAST_PKT, 73 VHOST_UNICAST_PKT, 74 VHOST_PKT, 75 VHOST_BYTE, 76 VHOST_MISSED_PKT, 77 VHOST_ERRORS_PKT, 78 VHOST_ERRORS_FRAGMENTED, 79 VHOST_ERRORS_JABBER, 80 VHOST_UNKNOWN_PROTOCOL, 81 VHOST_XSTATS_MAX, 82 }; 83 84 struct vhost_stats { 85 uint64_t pkts; 86 uint64_t bytes; 87 uint64_t missed_pkts; 88 uint64_t xstats[VHOST_XSTATS_MAX]; 89 }; 90 91 struct vhost_queue { 92 int vid; 93 rte_atomic32_t allow_queuing; 94 rte_atomic32_t while_queuing; 95 struct pmd_internal *internal; 96 struct rte_mempool *mb_pool; 97 uint16_t port; 98 uint16_t virtqueue_id; 99 struct vhost_stats stats; 100 int intr_enable; 101 rte_spinlock_t intr_lock; 102 }; 103 104 struct pmd_internal { 105 rte_atomic32_t dev_attached; 106 char *iface_name; 107 uint64_t flags; 108 uint64_t disable_flags; 109 uint16_t max_queues; 110 int vid; 111 rte_atomic32_t started; 112 uint8_t vlan_strip; 113 }; 114 115 struct internal_list { 116 TAILQ_ENTRY(internal_list) next; 117 struct rte_eth_dev *eth_dev; 118 }; 119 120 TAILQ_HEAD(internal_list_head, internal_list); 121 static struct internal_list_head internal_list = 122 TAILQ_HEAD_INITIALIZER(internal_list); 123 124 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 125 126 static struct rte_eth_link pmd_link = { 127 .link_speed = 10000, 128 .link_duplex = ETH_LINK_FULL_DUPLEX, 129 .link_status = ETH_LINK_DOWN 130 }; 131 132 struct rte_vhost_vring_state { 133 rte_spinlock_t lock; 134 135 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 136 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 137 unsigned int index; 138 unsigned int max_vring; 139 }; 140 141 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 142 143 #define VHOST_XSTATS_NAME_SIZE 64 144 145 struct vhost_xstats_name_off { 146 char name[VHOST_XSTATS_NAME_SIZE]; 147 uint64_t offset; 148 }; 149 150 /* [rx]_is prepended to the name string here */ 151 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { 152 {"good_packets", 153 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, 154 {"total_bytes", 155 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, 156 {"missed_pkts", 157 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, 158 {"broadcast_packets", 159 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 160 {"multicast_packets", 161 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 162 {"unicast_packets", 163 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 164 {"undersize_packets", 165 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 166 {"size_64_packets", 167 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 168 {"size_65_to_127_packets", 169 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 170 {"size_128_to_255_packets", 171 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 172 {"size_256_to_511_packets", 173 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 174 {"size_512_to_1023_packets", 175 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 176 {"size_1024_to_1522_packets", 177 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 178 {"size_1523_to_max_packets", 179 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 180 {"errors_with_bad_CRC", 181 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 182 {"fragmented_errors", 183 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])}, 184 {"jabber_errors", 185 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])}, 186 {"unknown_protos_packets", 187 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])}, 188 }; 189 190 /* [tx]_ is prepended to the name string here */ 191 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { 192 {"good_packets", 193 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, 194 {"total_bytes", 195 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, 196 {"missed_pkts", 197 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, 198 {"broadcast_packets", 199 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 200 {"multicast_packets", 201 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 202 {"unicast_packets", 203 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 204 {"undersize_packets", 205 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 206 {"size_64_packets", 207 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 208 {"size_65_to_127_packets", 209 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 210 {"size_128_to_255_packets", 211 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 212 {"size_256_to_511_packets", 213 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 214 {"size_512_to_1023_packets", 215 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 216 {"size_1024_to_1522_packets", 217 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 218 {"size_1523_to_max_packets", 219 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 220 {"errors_with_bad_CRC", 221 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 222 }; 223 224 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \ 225 sizeof(vhost_rxport_stat_strings[0])) 226 227 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \ 228 sizeof(vhost_txport_stat_strings[0])) 229 230 static int 231 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 232 { 233 struct vhost_queue *vq = NULL; 234 unsigned int i = 0; 235 236 for (i = 0; i < dev->data->nb_rx_queues; i++) { 237 vq = dev->data->rx_queues[i]; 238 if (!vq) 239 continue; 240 memset(&vq->stats, 0, sizeof(vq->stats)); 241 } 242 for (i = 0; i < dev->data->nb_tx_queues; i++) { 243 vq = dev->data->tx_queues[i]; 244 if (!vq) 245 continue; 246 memset(&vq->stats, 0, sizeof(vq->stats)); 247 } 248 249 return 0; 250 } 251 252 static int 253 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 254 struct rte_eth_xstat_name *xstats_names, 255 unsigned int limit __rte_unused) 256 { 257 unsigned int t = 0; 258 int count = 0; 259 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 260 261 if (!xstats_names) 262 return nstats; 263 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 264 snprintf(xstats_names[count].name, 265 sizeof(xstats_names[count].name), 266 "rx_%s", vhost_rxport_stat_strings[t].name); 267 count++; 268 } 269 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 270 snprintf(xstats_names[count].name, 271 sizeof(xstats_names[count].name), 272 "tx_%s", vhost_txport_stat_strings[t].name); 273 count++; 274 } 275 return count; 276 } 277 278 static int 279 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 280 unsigned int n) 281 { 282 unsigned int i; 283 unsigned int t; 284 unsigned int count = 0; 285 struct vhost_queue *vq = NULL; 286 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 287 288 if (n < nxstats) 289 return nxstats; 290 291 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 292 xstats[count].value = 0; 293 for (i = 0; i < dev->data->nb_rx_queues; i++) { 294 vq = dev->data->rx_queues[i]; 295 if (!vq) 296 continue; 297 xstats[count].value += 298 *(uint64_t *)(((char *)vq) 299 + vhost_rxport_stat_strings[t].offset); 300 } 301 xstats[count].id = count; 302 count++; 303 } 304 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 305 xstats[count].value = 0; 306 for (i = 0; i < dev->data->nb_tx_queues; i++) { 307 vq = dev->data->tx_queues[i]; 308 if (!vq) 309 continue; 310 xstats[count].value += 311 *(uint64_t *)(((char *)vq) 312 + vhost_txport_stat_strings[t].offset); 313 } 314 xstats[count].id = count; 315 count++; 316 } 317 return count; 318 } 319 320 static inline void 321 vhost_count_xcast_packets(struct vhost_queue *vq, 322 struct rte_mbuf *mbuf) 323 { 324 struct rte_ether_addr *ea = NULL; 325 struct vhost_stats *pstats = &vq->stats; 326 327 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *); 328 if (rte_is_multicast_ether_addr(ea)) { 329 if (rte_is_broadcast_ether_addr(ea)) 330 pstats->xstats[VHOST_BROADCAST_PKT]++; 331 else 332 pstats->xstats[VHOST_MULTICAST_PKT]++; 333 } else { 334 pstats->xstats[VHOST_UNICAST_PKT]++; 335 } 336 } 337 338 static void 339 vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs, 340 uint16_t count, uint64_t nb_bytes, 341 uint64_t nb_missed) 342 { 343 uint32_t pkt_len = 0; 344 uint64_t i = 0; 345 uint64_t index; 346 struct vhost_stats *pstats = &vq->stats; 347 348 pstats->xstats[VHOST_BYTE] += nb_bytes; 349 pstats->xstats[VHOST_MISSED_PKT] += nb_missed; 350 pstats->xstats[VHOST_UNICAST_PKT] += nb_missed; 351 352 for (i = 0; i < count ; i++) { 353 pstats->xstats[VHOST_PKT]++; 354 pkt_len = bufs[i]->pkt_len; 355 if (pkt_len == 64) { 356 pstats->xstats[VHOST_64_PKT]++; 357 } else if (pkt_len > 64 && pkt_len < 1024) { 358 index = (sizeof(pkt_len) * 8) 359 - __builtin_clz(pkt_len) - 5; 360 pstats->xstats[index]++; 361 } else { 362 if (pkt_len < 64) 363 pstats->xstats[VHOST_UNDERSIZE_PKT]++; 364 else if (pkt_len <= 1522) 365 pstats->xstats[VHOST_1024_TO_1522_PKT]++; 366 else if (pkt_len > 1522) 367 pstats->xstats[VHOST_1523_TO_MAX_PKT]++; 368 } 369 vhost_count_xcast_packets(vq, bufs[i]); 370 } 371 } 372 373 static uint16_t 374 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 375 { 376 struct vhost_queue *r = q; 377 uint16_t i, nb_rx = 0; 378 uint16_t nb_receive = nb_bufs; 379 uint64_t nb_bytes = 0; 380 381 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 382 return 0; 383 384 rte_atomic32_set(&r->while_queuing, 1); 385 386 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 387 goto out; 388 389 /* Dequeue packets from guest TX queue */ 390 while (nb_receive) { 391 uint16_t nb_pkts; 392 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 393 VHOST_MAX_PKT_BURST); 394 395 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 396 r->mb_pool, &bufs[nb_rx], 397 num); 398 399 nb_rx += nb_pkts; 400 nb_receive -= nb_pkts; 401 if (nb_pkts < num) 402 break; 403 } 404 405 r->stats.pkts += nb_rx; 406 407 for (i = 0; likely(i < nb_rx); i++) { 408 bufs[i]->port = r->port; 409 bufs[i]->vlan_tci = 0; 410 411 if (r->internal->vlan_strip) 412 rte_vlan_strip(bufs[i]); 413 414 nb_bytes += bufs[i]->pkt_len; 415 } 416 417 r->stats.bytes += nb_bytes; 418 vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0); 419 420 out: 421 rte_atomic32_set(&r->while_queuing, 0); 422 423 return nb_rx; 424 } 425 426 static uint16_t 427 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 428 { 429 struct vhost_queue *r = q; 430 uint16_t i, nb_tx = 0; 431 uint16_t nb_send = 0; 432 uint64_t nb_bytes = 0; 433 uint64_t nb_missed = 0; 434 435 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 436 return 0; 437 438 rte_atomic32_set(&r->while_queuing, 1); 439 440 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 441 goto out; 442 443 for (i = 0; i < nb_bufs; i++) { 444 struct rte_mbuf *m = bufs[i]; 445 446 /* Do VLAN tag insertion */ 447 if (m->ol_flags & PKT_TX_VLAN_PKT) { 448 int error = rte_vlan_insert(&m); 449 if (unlikely(error)) { 450 rte_pktmbuf_free(m); 451 continue; 452 } 453 } 454 455 bufs[nb_send] = m; 456 ++nb_send; 457 } 458 459 /* Enqueue packets to guest RX queue */ 460 while (nb_send) { 461 uint16_t nb_pkts; 462 uint16_t num = (uint16_t)RTE_MIN(nb_send, 463 VHOST_MAX_PKT_BURST); 464 465 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 466 &bufs[nb_tx], num); 467 468 nb_tx += nb_pkts; 469 nb_send -= nb_pkts; 470 if (nb_pkts < num) 471 break; 472 } 473 474 for (i = 0; likely(i < nb_tx); i++) 475 nb_bytes += bufs[i]->pkt_len; 476 477 nb_missed = nb_bufs - nb_tx; 478 479 r->stats.pkts += nb_tx; 480 r->stats.bytes += nb_bytes; 481 r->stats.missed_pkts += nb_bufs - nb_tx; 482 483 vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed); 484 485 /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and 486 * ifHCOutBroadcastPkts counters are increased when packets are not 487 * transmitted successfully. 488 */ 489 for (i = nb_tx; i < nb_bufs; i++) 490 vhost_count_xcast_packets(r, bufs[i]); 491 492 for (i = 0; likely(i < nb_tx); i++) 493 rte_pktmbuf_free(bufs[i]); 494 out: 495 rte_atomic32_set(&r->while_queuing, 0); 496 497 return nb_tx; 498 } 499 500 static inline struct internal_list * 501 find_internal_resource(char *ifname) 502 { 503 int found = 0; 504 struct internal_list *list; 505 struct pmd_internal *internal; 506 507 if (ifname == NULL) 508 return NULL; 509 510 pthread_mutex_lock(&internal_list_lock); 511 512 TAILQ_FOREACH(list, &internal_list, next) { 513 internal = list->eth_dev->data->dev_private; 514 if (!strcmp(internal->iface_name, ifname)) { 515 found = 1; 516 break; 517 } 518 } 519 520 pthread_mutex_unlock(&internal_list_lock); 521 522 if (!found) 523 return NULL; 524 525 return list; 526 } 527 528 static int 529 eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx) 530 { 531 struct rte_intr_handle *handle = eth_dev->intr_handle; 532 struct rte_epoll_event rev; 533 int epfd, ret; 534 535 if (!handle) 536 return 0; 537 538 if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd) 539 return 0; 540 541 VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n", 542 rxq_idx); 543 544 if (handle->elist[rxq_idx].fd != -1) 545 VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n", 546 handle->elist[rxq_idx].fd); 547 548 /* 549 * First remove invalid epoll event, and then install 550 * the new one. May be solved with a proper API in the 551 * future. 552 */ 553 epfd = handle->elist[rxq_idx].epfd; 554 rev = handle->elist[rxq_idx]; 555 ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd, 556 &handle->elist[rxq_idx]); 557 if (ret) { 558 VHOST_LOG(ERR, "Delete epoll event failed.\n"); 559 return ret; 560 } 561 562 rev.fd = handle->efds[rxq_idx]; 563 handle->elist[rxq_idx] = rev; 564 ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, 565 &handle->elist[rxq_idx]); 566 if (ret) { 567 VHOST_LOG(ERR, "Add epoll event failed.\n"); 568 return ret; 569 } 570 571 return 0; 572 } 573 574 static int 575 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) 576 { 577 struct rte_vhost_vring vring; 578 struct vhost_queue *vq; 579 int old_intr_enable, ret = 0; 580 581 vq = dev->data->rx_queues[qid]; 582 if (!vq) { 583 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 584 return -1; 585 } 586 587 rte_spinlock_lock(&vq->intr_lock); 588 old_intr_enable = vq->intr_enable; 589 vq->intr_enable = 1; 590 ret = eth_vhost_update_intr(dev, qid); 591 rte_spinlock_unlock(&vq->intr_lock); 592 593 if (ret < 0) { 594 VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid); 595 vq->intr_enable = old_intr_enable; 596 return ret; 597 } 598 599 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 600 if (ret < 0) { 601 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); 602 return ret; 603 } 604 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid); 605 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); 606 rte_wmb(); 607 608 return ret; 609 } 610 611 static int 612 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) 613 { 614 struct rte_vhost_vring vring; 615 struct vhost_queue *vq; 616 int ret = 0; 617 618 vq = dev->data->rx_queues[qid]; 619 if (!vq) { 620 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 621 return -1; 622 } 623 624 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 625 if (ret < 0) { 626 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid); 627 return ret; 628 } 629 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid); 630 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); 631 rte_wmb(); 632 633 vq->intr_enable = 0; 634 635 return 0; 636 } 637 638 static void 639 eth_vhost_uninstall_intr(struct rte_eth_dev *dev) 640 { 641 struct rte_intr_handle *intr_handle = dev->intr_handle; 642 643 if (intr_handle) { 644 if (intr_handle->intr_vec) 645 free(intr_handle->intr_vec); 646 free(intr_handle); 647 } 648 649 dev->intr_handle = NULL; 650 } 651 652 static int 653 eth_vhost_install_intr(struct rte_eth_dev *dev) 654 { 655 struct rte_vhost_vring vring; 656 struct vhost_queue *vq; 657 int nb_rxq = dev->data->nb_rx_queues; 658 int i; 659 int ret; 660 661 /* uninstall firstly if we are reconnecting */ 662 if (dev->intr_handle) 663 eth_vhost_uninstall_intr(dev); 664 665 dev->intr_handle = malloc(sizeof(*dev->intr_handle)); 666 if (!dev->intr_handle) { 667 VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); 668 return -ENOMEM; 669 } 670 memset(dev->intr_handle, 0, sizeof(*dev->intr_handle)); 671 672 dev->intr_handle->efd_counter_size = sizeof(uint64_t); 673 674 dev->intr_handle->intr_vec = 675 malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0])); 676 677 if (!dev->intr_handle->intr_vec) { 678 VHOST_LOG(ERR, 679 "Failed to allocate memory for interrupt vector\n"); 680 free(dev->intr_handle); 681 return -ENOMEM; 682 } 683 684 VHOST_LOG(INFO, "Prepare intr vec\n"); 685 for (i = 0; i < nb_rxq; i++) { 686 dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; 687 dev->intr_handle->efds[i] = -1; 688 vq = dev->data->rx_queues[i]; 689 if (!vq) { 690 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); 691 continue; 692 } 693 694 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring); 695 if (ret < 0) { 696 VHOST_LOG(INFO, 697 "Failed to get rxq-%d's vring, skip!\n", i); 698 continue; 699 } 700 701 if (vring.kickfd < 0) { 702 VHOST_LOG(INFO, 703 "rxq-%d's kickfd is invalid, skip!\n", i); 704 continue; 705 } 706 dev->intr_handle->efds[i] = vring.kickfd; 707 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); 708 } 709 710 dev->intr_handle->nb_efd = nb_rxq; 711 dev->intr_handle->max_intr = nb_rxq + 1; 712 dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; 713 714 return 0; 715 } 716 717 static void 718 update_queuing_status(struct rte_eth_dev *dev) 719 { 720 struct pmd_internal *internal = dev->data->dev_private; 721 struct vhost_queue *vq; 722 unsigned int i; 723 int allow_queuing = 1; 724 725 if (!dev->data->rx_queues || !dev->data->tx_queues) 726 return; 727 728 if (rte_atomic32_read(&internal->started) == 0 || 729 rte_atomic32_read(&internal->dev_attached) == 0) 730 allow_queuing = 0; 731 732 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 733 for (i = 0; i < dev->data->nb_rx_queues; i++) { 734 vq = dev->data->rx_queues[i]; 735 if (vq == NULL) 736 continue; 737 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 738 while (rte_atomic32_read(&vq->while_queuing)) 739 rte_pause(); 740 } 741 742 for (i = 0; i < dev->data->nb_tx_queues; i++) { 743 vq = dev->data->tx_queues[i]; 744 if (vq == NULL) 745 continue; 746 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 747 while (rte_atomic32_read(&vq->while_queuing)) 748 rte_pause(); 749 } 750 } 751 752 static void 753 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) 754 { 755 struct vhost_queue *vq; 756 int i; 757 758 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 759 vq = eth_dev->data->rx_queues[i]; 760 if (!vq) 761 continue; 762 vq->vid = internal->vid; 763 vq->internal = internal; 764 vq->port = eth_dev->data->port_id; 765 } 766 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 767 vq = eth_dev->data->tx_queues[i]; 768 if (!vq) 769 continue; 770 vq->vid = internal->vid; 771 vq->internal = internal; 772 vq->port = eth_dev->data->port_id; 773 } 774 } 775 776 static int 777 new_device(int vid) 778 { 779 struct rte_eth_dev *eth_dev; 780 struct internal_list *list; 781 struct pmd_internal *internal; 782 struct rte_eth_conf *dev_conf; 783 unsigned i; 784 char ifname[PATH_MAX]; 785 #ifdef RTE_LIBRTE_VHOST_NUMA 786 int newnode; 787 #endif 788 789 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 790 list = find_internal_resource(ifname); 791 if (list == NULL) { 792 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname); 793 return -1; 794 } 795 796 eth_dev = list->eth_dev; 797 internal = eth_dev->data->dev_private; 798 dev_conf = ð_dev->data->dev_conf; 799 800 #ifdef RTE_LIBRTE_VHOST_NUMA 801 newnode = rte_vhost_get_numa_node(vid); 802 if (newnode >= 0) 803 eth_dev->data->numa_node = newnode; 804 #endif 805 806 internal->vid = vid; 807 if (rte_atomic32_read(&internal->started) == 1) { 808 queue_setup(eth_dev, internal); 809 810 if (dev_conf->intr_conf.rxq) { 811 if (eth_vhost_install_intr(eth_dev) < 0) { 812 VHOST_LOG(INFO, 813 "Failed to install interrupt handler."); 814 return -1; 815 } 816 } 817 } else { 818 VHOST_LOG(INFO, "RX/TX queues not exist yet\n"); 819 } 820 821 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 822 rte_vhost_enable_guest_notification(vid, i, 0); 823 824 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 825 826 eth_dev->data->dev_link.link_status = ETH_LINK_UP; 827 828 rte_atomic32_set(&internal->dev_attached, 1); 829 update_queuing_status(eth_dev); 830 831 VHOST_LOG(INFO, "Vhost device %d created\n", vid); 832 833 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 834 835 return 0; 836 } 837 838 static void 839 destroy_device(int vid) 840 { 841 struct rte_eth_dev *eth_dev; 842 struct pmd_internal *internal; 843 struct vhost_queue *vq; 844 struct internal_list *list; 845 char ifname[PATH_MAX]; 846 unsigned i; 847 struct rte_vhost_vring_state *state; 848 849 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 850 list = find_internal_resource(ifname); 851 if (list == NULL) { 852 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 853 return; 854 } 855 eth_dev = list->eth_dev; 856 internal = eth_dev->data->dev_private; 857 858 rte_atomic32_set(&internal->dev_attached, 0); 859 update_queuing_status(eth_dev); 860 861 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 862 863 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { 864 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 865 vq = eth_dev->data->rx_queues[i]; 866 if (!vq) 867 continue; 868 vq->vid = -1; 869 } 870 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 871 vq = eth_dev->data->tx_queues[i]; 872 if (!vq) 873 continue; 874 vq->vid = -1; 875 } 876 } 877 878 state = vring_states[eth_dev->data->port_id]; 879 rte_spinlock_lock(&state->lock); 880 for (i = 0; i <= state->max_vring; i++) { 881 state->cur[i] = false; 882 state->seen[i] = false; 883 } 884 state->max_vring = 0; 885 rte_spinlock_unlock(&state->lock); 886 887 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); 888 eth_vhost_uninstall_intr(eth_dev); 889 890 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 891 } 892 893 static int 894 vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id) 895 { 896 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 897 struct pmd_internal *internal = eth_dev->data->dev_private; 898 struct vhost_queue *vq; 899 struct rte_vhost_vring vring; 900 int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1; 901 int ret = 0; 902 903 /* 904 * The vring kickfd may be changed after the new device notification. 905 * Update it when the vring state is updated. 906 */ 907 if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues && 908 rte_atomic32_read(&internal->dev_attached) && 909 rte_atomic32_read(&internal->started) && 910 dev_conf->intr_conf.rxq) { 911 ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring); 912 if (ret) { 913 VHOST_LOG(ERR, "Failed to get vring %d information.\n", 914 vring_id); 915 return ret; 916 } 917 eth_dev->intr_handle->efds[rx_idx] = vring.kickfd; 918 919 vq = eth_dev->data->rx_queues[rx_idx]; 920 if (!vq) { 921 VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx); 922 return -1; 923 } 924 925 rte_spinlock_lock(&vq->intr_lock); 926 if (vq->intr_enable) 927 ret = eth_vhost_update_intr(eth_dev, rx_idx); 928 rte_spinlock_unlock(&vq->intr_lock); 929 } 930 931 return ret; 932 } 933 934 static int 935 vring_state_changed(int vid, uint16_t vring, int enable) 936 { 937 struct rte_vhost_vring_state *state; 938 struct rte_eth_dev *eth_dev; 939 struct internal_list *list; 940 char ifname[PATH_MAX]; 941 942 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 943 list = find_internal_resource(ifname); 944 if (list == NULL) { 945 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 946 return -1; 947 } 948 949 eth_dev = list->eth_dev; 950 /* won't be NULL */ 951 state = vring_states[eth_dev->data->port_id]; 952 953 if (enable && vring_conf_update(vid, eth_dev, vring)) 954 VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n", 955 (int)vring); 956 957 rte_spinlock_lock(&state->lock); 958 if (state->cur[vring] == enable) { 959 rte_spinlock_unlock(&state->lock); 960 return 0; 961 } 962 state->cur[vring] = enable; 963 state->max_vring = RTE_MAX(vring, state->max_vring); 964 rte_spinlock_unlock(&state->lock); 965 966 VHOST_LOG(INFO, "vring%u is %s\n", 967 vring, enable ? "enabled" : "disabled"); 968 969 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); 970 971 return 0; 972 } 973 974 static struct vhost_device_ops vhost_ops = { 975 .new_device = new_device, 976 .destroy_device = destroy_device, 977 .vring_state_changed = vring_state_changed, 978 }; 979 980 static int 981 vhost_driver_setup(struct rte_eth_dev *eth_dev) 982 { 983 struct pmd_internal *internal = eth_dev->data->dev_private; 984 struct internal_list *list = NULL; 985 struct rte_vhost_vring_state *vring_state = NULL; 986 unsigned int numa_node = eth_dev->device->numa_node; 987 const char *name = eth_dev->device->name; 988 989 /* Don't try to setup again if it has already been done. */ 990 list = find_internal_resource(internal->iface_name); 991 if (list) 992 return 0; 993 994 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 995 if (list == NULL) 996 return -1; 997 998 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), 999 0, numa_node); 1000 if (vring_state == NULL) 1001 goto free_list; 1002 1003 list->eth_dev = eth_dev; 1004 pthread_mutex_lock(&internal_list_lock); 1005 TAILQ_INSERT_TAIL(&internal_list, list, next); 1006 pthread_mutex_unlock(&internal_list_lock); 1007 1008 rte_spinlock_init(&vring_state->lock); 1009 vring_states[eth_dev->data->port_id] = vring_state; 1010 1011 if (rte_vhost_driver_register(internal->iface_name, internal->flags)) 1012 goto list_remove; 1013 1014 if (internal->disable_flags) { 1015 if (rte_vhost_driver_disable_features(internal->iface_name, 1016 internal->disable_flags)) 1017 goto drv_unreg; 1018 } 1019 1020 if (rte_vhost_driver_callback_register(internal->iface_name, 1021 &vhost_ops) < 0) { 1022 VHOST_LOG(ERR, "Can't register callbacks\n"); 1023 goto drv_unreg; 1024 } 1025 1026 if (rte_vhost_driver_start(internal->iface_name) < 0) { 1027 VHOST_LOG(ERR, "Failed to start driver for %s\n", 1028 internal->iface_name); 1029 goto drv_unreg; 1030 } 1031 1032 return 0; 1033 1034 drv_unreg: 1035 rte_vhost_driver_unregister(internal->iface_name); 1036 list_remove: 1037 vring_states[eth_dev->data->port_id] = NULL; 1038 pthread_mutex_lock(&internal_list_lock); 1039 TAILQ_REMOVE(&internal_list, list, next); 1040 pthread_mutex_unlock(&internal_list_lock); 1041 rte_free(vring_state); 1042 free_list: 1043 rte_free(list); 1044 1045 return -1; 1046 } 1047 1048 int 1049 rte_eth_vhost_get_queue_event(uint16_t port_id, 1050 struct rte_eth_vhost_queue_event *event) 1051 { 1052 struct rte_vhost_vring_state *state; 1053 unsigned int i; 1054 int idx; 1055 1056 if (port_id >= RTE_MAX_ETHPORTS) { 1057 VHOST_LOG(ERR, "Invalid port id\n"); 1058 return -1; 1059 } 1060 1061 state = vring_states[port_id]; 1062 if (!state) { 1063 VHOST_LOG(ERR, "Unused port\n"); 1064 return -1; 1065 } 1066 1067 rte_spinlock_lock(&state->lock); 1068 for (i = 0; i <= state->max_vring; i++) { 1069 idx = state->index++ % (state->max_vring + 1); 1070 1071 if (state->cur[idx] != state->seen[idx]) { 1072 state->seen[idx] = state->cur[idx]; 1073 event->queue_id = idx / 2; 1074 event->rx = idx & 1; 1075 event->enable = state->cur[idx]; 1076 rte_spinlock_unlock(&state->lock); 1077 return 0; 1078 } 1079 } 1080 rte_spinlock_unlock(&state->lock); 1081 1082 return -1; 1083 } 1084 1085 int 1086 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) 1087 { 1088 struct internal_list *list; 1089 struct rte_eth_dev *eth_dev; 1090 struct vhost_queue *vq; 1091 int vid = -1; 1092 1093 if (!rte_eth_dev_is_valid_port(port_id)) 1094 return -1; 1095 1096 pthread_mutex_lock(&internal_list_lock); 1097 1098 TAILQ_FOREACH(list, &internal_list, next) { 1099 eth_dev = list->eth_dev; 1100 if (eth_dev->data->port_id == port_id) { 1101 vq = eth_dev->data->rx_queues[0]; 1102 if (vq) { 1103 vid = vq->vid; 1104 } 1105 break; 1106 } 1107 } 1108 1109 pthread_mutex_unlock(&internal_list_lock); 1110 1111 return vid; 1112 } 1113 1114 static int 1115 eth_dev_configure(struct rte_eth_dev *dev) 1116 { 1117 struct pmd_internal *internal = dev->data->dev_private; 1118 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1119 1120 /* NOTE: the same process has to operate a vhost interface 1121 * from beginning to end (from eth_dev configure to eth_dev close). 1122 * It is user's responsibility at the moment. 1123 */ 1124 if (vhost_driver_setup(dev) < 0) 1125 return -1; 1126 1127 internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1128 1129 return 0; 1130 } 1131 1132 static int 1133 eth_dev_start(struct rte_eth_dev *eth_dev) 1134 { 1135 struct pmd_internal *internal = eth_dev->data->dev_private; 1136 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 1137 1138 queue_setup(eth_dev, internal); 1139 1140 if (rte_atomic32_read(&internal->dev_attached) == 1) { 1141 if (dev_conf->intr_conf.rxq) { 1142 if (eth_vhost_install_intr(eth_dev) < 0) { 1143 VHOST_LOG(INFO, 1144 "Failed to install interrupt handler."); 1145 return -1; 1146 } 1147 } 1148 } 1149 1150 rte_atomic32_set(&internal->started, 1); 1151 update_queuing_status(eth_dev); 1152 1153 return 0; 1154 } 1155 1156 static int 1157 eth_dev_stop(struct rte_eth_dev *dev) 1158 { 1159 struct pmd_internal *internal = dev->data->dev_private; 1160 1161 dev->data->dev_started = 0; 1162 rte_atomic32_set(&internal->started, 0); 1163 update_queuing_status(dev); 1164 1165 return 0; 1166 } 1167 1168 static int 1169 eth_dev_close(struct rte_eth_dev *dev) 1170 { 1171 struct pmd_internal *internal; 1172 struct internal_list *list; 1173 unsigned int i, ret; 1174 1175 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1176 return 0; 1177 1178 internal = dev->data->dev_private; 1179 if (!internal) 1180 return 0; 1181 1182 ret = eth_dev_stop(dev); 1183 1184 list = find_internal_resource(internal->iface_name); 1185 if (list) { 1186 rte_vhost_driver_unregister(internal->iface_name); 1187 pthread_mutex_lock(&internal_list_lock); 1188 TAILQ_REMOVE(&internal_list, list, next); 1189 pthread_mutex_unlock(&internal_list_lock); 1190 rte_free(list); 1191 } 1192 1193 if (dev->data->rx_queues) 1194 for (i = 0; i < dev->data->nb_rx_queues; i++) 1195 rte_free(dev->data->rx_queues[i]); 1196 1197 if (dev->data->tx_queues) 1198 for (i = 0; i < dev->data->nb_tx_queues; i++) 1199 rte_free(dev->data->tx_queues[i]); 1200 1201 rte_free(internal->iface_name); 1202 rte_free(internal); 1203 1204 dev->data->dev_private = NULL; 1205 1206 rte_free(vring_states[dev->data->port_id]); 1207 vring_states[dev->data->port_id] = NULL; 1208 1209 return ret; 1210 } 1211 1212 static int 1213 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1214 uint16_t nb_rx_desc __rte_unused, 1215 unsigned int socket_id, 1216 const struct rte_eth_rxconf *rx_conf __rte_unused, 1217 struct rte_mempool *mb_pool) 1218 { 1219 struct vhost_queue *vq; 1220 1221 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1222 RTE_CACHE_LINE_SIZE, socket_id); 1223 if (vq == NULL) { 1224 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n"); 1225 return -ENOMEM; 1226 } 1227 1228 vq->mb_pool = mb_pool; 1229 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 1230 rte_spinlock_init(&vq->intr_lock); 1231 dev->data->rx_queues[rx_queue_id] = vq; 1232 1233 return 0; 1234 } 1235 1236 static int 1237 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1238 uint16_t nb_tx_desc __rte_unused, 1239 unsigned int socket_id, 1240 const struct rte_eth_txconf *tx_conf __rte_unused) 1241 { 1242 struct vhost_queue *vq; 1243 1244 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1245 RTE_CACHE_LINE_SIZE, socket_id); 1246 if (vq == NULL) { 1247 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n"); 1248 return -ENOMEM; 1249 } 1250 1251 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 1252 rte_spinlock_init(&vq->intr_lock); 1253 dev->data->tx_queues[tx_queue_id] = vq; 1254 1255 return 0; 1256 } 1257 1258 static int 1259 eth_dev_info(struct rte_eth_dev *dev, 1260 struct rte_eth_dev_info *dev_info) 1261 { 1262 struct pmd_internal *internal; 1263 1264 internal = dev->data->dev_private; 1265 if (internal == NULL) { 1266 VHOST_LOG(ERR, "Invalid device specified\n"); 1267 return -ENODEV; 1268 } 1269 1270 dev_info->max_mac_addrs = 1; 1271 dev_info->max_rx_pktlen = (uint32_t)-1; 1272 dev_info->max_rx_queues = internal->max_queues; 1273 dev_info->max_tx_queues = internal->max_queues; 1274 dev_info->min_rx_bufsize = 0; 1275 1276 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 1277 DEV_TX_OFFLOAD_VLAN_INSERT; 1278 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 1279 1280 return 0; 1281 } 1282 1283 static int 1284 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1285 { 1286 unsigned i; 1287 unsigned long rx_total = 0, tx_total = 0; 1288 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 1289 struct vhost_queue *vq; 1290 1291 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1292 i < dev->data->nb_rx_queues; i++) { 1293 if (dev->data->rx_queues[i] == NULL) 1294 continue; 1295 vq = dev->data->rx_queues[i]; 1296 stats->q_ipackets[i] = vq->stats.pkts; 1297 rx_total += stats->q_ipackets[i]; 1298 1299 stats->q_ibytes[i] = vq->stats.bytes; 1300 rx_total_bytes += stats->q_ibytes[i]; 1301 } 1302 1303 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1304 i < dev->data->nb_tx_queues; i++) { 1305 if (dev->data->tx_queues[i] == NULL) 1306 continue; 1307 vq = dev->data->tx_queues[i]; 1308 stats->q_opackets[i] = vq->stats.pkts; 1309 tx_total += stats->q_opackets[i]; 1310 1311 stats->q_obytes[i] = vq->stats.bytes; 1312 tx_total_bytes += stats->q_obytes[i]; 1313 } 1314 1315 stats->ipackets = rx_total; 1316 stats->opackets = tx_total; 1317 stats->ibytes = rx_total_bytes; 1318 stats->obytes = tx_total_bytes; 1319 1320 return 0; 1321 } 1322 1323 static int 1324 eth_stats_reset(struct rte_eth_dev *dev) 1325 { 1326 struct vhost_queue *vq; 1327 unsigned i; 1328 1329 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1330 if (dev->data->rx_queues[i] == NULL) 1331 continue; 1332 vq = dev->data->rx_queues[i]; 1333 vq->stats.pkts = 0; 1334 vq->stats.bytes = 0; 1335 } 1336 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1337 if (dev->data->tx_queues[i] == NULL) 1338 continue; 1339 vq = dev->data->tx_queues[i]; 1340 vq->stats.pkts = 0; 1341 vq->stats.bytes = 0; 1342 vq->stats.missed_pkts = 0; 1343 } 1344 1345 return 0; 1346 } 1347 1348 static void 1349 eth_queue_release(void *q) 1350 { 1351 rte_free(q); 1352 } 1353 1354 static int 1355 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 1356 { 1357 /* 1358 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 1359 * and releases mbuf, so nothing to cleanup. 1360 */ 1361 return 0; 1362 } 1363 1364 static int 1365 eth_link_update(struct rte_eth_dev *dev __rte_unused, 1366 int wait_to_complete __rte_unused) 1367 { 1368 return 0; 1369 } 1370 1371 static uint32_t 1372 eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1373 { 1374 struct vhost_queue *vq; 1375 1376 vq = dev->data->rx_queues[rx_queue_id]; 1377 if (vq == NULL) 1378 return 0; 1379 1380 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 1381 } 1382 1383 static const struct eth_dev_ops ops = { 1384 .dev_start = eth_dev_start, 1385 .dev_stop = eth_dev_stop, 1386 .dev_close = eth_dev_close, 1387 .dev_configure = eth_dev_configure, 1388 .dev_infos_get = eth_dev_info, 1389 .rx_queue_setup = eth_rx_queue_setup, 1390 .tx_queue_setup = eth_tx_queue_setup, 1391 .rx_queue_release = eth_queue_release, 1392 .tx_queue_release = eth_queue_release, 1393 .tx_done_cleanup = eth_tx_done_cleanup, 1394 .link_update = eth_link_update, 1395 .stats_get = eth_stats_get, 1396 .stats_reset = eth_stats_reset, 1397 .xstats_reset = vhost_dev_xstats_reset, 1398 .xstats_get = vhost_dev_xstats_get, 1399 .xstats_get_names = vhost_dev_xstats_get_names, 1400 .rx_queue_intr_enable = eth_rxq_intr_enable, 1401 .rx_queue_intr_disable = eth_rxq_intr_disable, 1402 }; 1403 1404 static int 1405 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1406 int16_t queues, const unsigned int numa_node, uint64_t flags, 1407 uint64_t disable_flags) 1408 { 1409 const char *name = rte_vdev_device_name(dev); 1410 struct rte_eth_dev_data *data; 1411 struct pmd_internal *internal = NULL; 1412 struct rte_eth_dev *eth_dev = NULL; 1413 struct rte_ether_addr *eth_addr = NULL; 1414 1415 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", 1416 numa_node); 1417 1418 /* reserve an ethdev entry */ 1419 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1420 if (eth_dev == NULL) 1421 goto error; 1422 data = eth_dev->data; 1423 1424 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1425 if (eth_addr == NULL) 1426 goto error; 1427 data->mac_addrs = eth_addr; 1428 *eth_addr = base_eth_addr; 1429 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1430 1431 /* now put it all together 1432 * - store queue data in internal, 1433 * - point eth_dev_data to internals 1434 * - and point eth_dev structure to new eth_dev_data structure 1435 */ 1436 internal = eth_dev->data->dev_private; 1437 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, 1438 0, numa_node); 1439 if (internal->iface_name == NULL) 1440 goto error; 1441 strcpy(internal->iface_name, iface_name); 1442 1443 data->nb_rx_queues = queues; 1444 data->nb_tx_queues = queues; 1445 internal->max_queues = queues; 1446 internal->vid = -1; 1447 internal->flags = flags; 1448 internal->disable_flags = disable_flags; 1449 data->dev_link = pmd_link; 1450 data->dev_flags = RTE_ETH_DEV_INTR_LSC | 1451 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1452 data->promiscuous = 1; 1453 data->all_multicast = 1; 1454 1455 eth_dev->dev_ops = &ops; 1456 eth_dev->rx_queue_count = eth_rx_queue_count; 1457 1458 /* finally assign rx and tx ops */ 1459 eth_dev->rx_pkt_burst = eth_vhost_rx; 1460 eth_dev->tx_pkt_burst = eth_vhost_tx; 1461 1462 rte_eth_dev_probing_finish(eth_dev); 1463 return 0; 1464 1465 error: 1466 if (internal) 1467 rte_free(internal->iface_name); 1468 rte_eth_dev_release_port(eth_dev); 1469 1470 return -1; 1471 } 1472 1473 static inline int 1474 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1475 { 1476 const char **iface_name = extra_args; 1477 1478 if (value == NULL) 1479 return -1; 1480 1481 *iface_name = value; 1482 1483 return 0; 1484 } 1485 1486 static inline int 1487 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1488 { 1489 uint16_t *n = extra_args; 1490 1491 if (value == NULL || extra_args == NULL) 1492 return -EINVAL; 1493 1494 *n = (uint16_t)strtoul(value, NULL, 0); 1495 if (*n == USHRT_MAX && errno == ERANGE) 1496 return -1; 1497 1498 return 0; 1499 } 1500 1501 static int 1502 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1503 { 1504 struct rte_kvargs *kvlist = NULL; 1505 int ret = 0; 1506 char *iface_name; 1507 uint16_t queues; 1508 uint64_t flags = 0; 1509 uint64_t disable_flags = 0; 1510 int client_mode = 0; 1511 int iommu_support = 0; 1512 int postcopy_support = 0; 1513 int tso = 0; 1514 int linear_buf = 0; 1515 int ext_buf = 0; 1516 struct rte_eth_dev *eth_dev; 1517 const char *name = rte_vdev_device_name(dev); 1518 1519 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name); 1520 1521 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1522 eth_dev = rte_eth_dev_attach_secondary(name); 1523 if (!eth_dev) { 1524 VHOST_LOG(ERR, "Failed to probe %s\n", name); 1525 return -1; 1526 } 1527 eth_dev->rx_pkt_burst = eth_vhost_rx; 1528 eth_dev->tx_pkt_burst = eth_vhost_tx; 1529 eth_dev->dev_ops = &ops; 1530 if (dev->device.numa_node == SOCKET_ID_ANY) 1531 dev->device.numa_node = rte_socket_id(); 1532 eth_dev->device = &dev->device; 1533 rte_eth_dev_probing_finish(eth_dev); 1534 return 0; 1535 } 1536 1537 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1538 if (kvlist == NULL) 1539 return -1; 1540 1541 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1542 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1543 &open_iface, &iface_name); 1544 if (ret < 0) 1545 goto out_free; 1546 } else { 1547 ret = -1; 1548 goto out_free; 1549 } 1550 1551 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1552 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1553 &open_int, &queues); 1554 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1555 goto out_free; 1556 1557 } else 1558 queues = 1; 1559 1560 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1561 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1562 &open_int, &client_mode); 1563 if (ret < 0) 1564 goto out_free; 1565 1566 if (client_mode) 1567 flags |= RTE_VHOST_USER_CLIENT; 1568 } 1569 1570 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { 1571 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, 1572 &open_int, &iommu_support); 1573 if (ret < 0) 1574 goto out_free; 1575 1576 if (iommu_support) 1577 flags |= RTE_VHOST_USER_IOMMU_SUPPORT; 1578 } 1579 1580 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { 1581 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, 1582 &open_int, &postcopy_support); 1583 if (ret < 0) 1584 goto out_free; 1585 1586 if (postcopy_support) 1587 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; 1588 } 1589 1590 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) { 1591 ret = rte_kvargs_process(kvlist, 1592 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 1593 &open_int, &tso); 1594 if (ret < 0) 1595 goto out_free; 1596 1597 if (tso == 0) { 1598 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); 1599 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); 1600 } 1601 } 1602 1603 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) { 1604 ret = rte_kvargs_process(kvlist, 1605 ETH_VHOST_LINEAR_BUF, 1606 &open_int, &linear_buf); 1607 if (ret < 0) 1608 goto out_free; 1609 1610 if (linear_buf == 1) 1611 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT; 1612 } 1613 1614 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) { 1615 ret = rte_kvargs_process(kvlist, 1616 ETH_VHOST_EXT_BUF, 1617 &open_int, &ext_buf); 1618 if (ret < 0) 1619 goto out_free; 1620 1621 if (ext_buf == 1) 1622 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; 1623 } 1624 1625 if (dev->device.numa_node == SOCKET_ID_ANY) 1626 dev->device.numa_node = rte_socket_id(); 1627 1628 ret = eth_dev_vhost_create(dev, iface_name, queues, 1629 dev->device.numa_node, flags, disable_flags); 1630 if (ret == -1) 1631 VHOST_LOG(ERR, "Failed to create %s\n", name); 1632 1633 out_free: 1634 rte_kvargs_free(kvlist); 1635 return ret; 1636 } 1637 1638 static int 1639 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1640 { 1641 const char *name; 1642 struct rte_eth_dev *eth_dev = NULL; 1643 1644 name = rte_vdev_device_name(dev); 1645 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name); 1646 1647 /* find an ethdev entry */ 1648 eth_dev = rte_eth_dev_allocated(name); 1649 if (eth_dev == NULL) 1650 return 0; 1651 1652 eth_dev_close(eth_dev); 1653 rte_eth_dev_release_port(eth_dev); 1654 1655 return 0; 1656 } 1657 1658 static struct rte_vdev_driver pmd_vhost_drv = { 1659 .probe = rte_pmd_vhost_probe, 1660 .remove = rte_pmd_vhost_remove, 1661 }; 1662 1663 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1664 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1665 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1666 "iface=<ifc> " 1667 "queues=<int> " 1668 "client=<0|1> " 1669 "iommu-support=<0|1> " 1670 "postcopy-support=<0|1> " 1671 "tso=<0|1> " 1672 "linear-buffer=<0|1> " 1673 "ext-buffer=<0|1>"); 1674