1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) 2016 IGEL Co., Ltd. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of IGEL Co.,Ltd. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 #include <unistd.h> 34 #include <pthread.h> 35 #include <stdbool.h> 36 37 #include <rte_mbuf.h> 38 #include <rte_ethdev.h> 39 #include <rte_ethdev_vdev.h> 40 #include <rte_malloc.h> 41 #include <rte_memcpy.h> 42 #include <rte_vdev.h> 43 #include <rte_kvargs.h> 44 #include <rte_vhost.h> 45 #include <rte_spinlock.h> 46 47 #include "rte_eth_vhost.h" 48 49 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 50 51 #define ETH_VHOST_IFACE_ARG "iface" 52 #define ETH_VHOST_QUEUES_ARG "queues" 53 #define ETH_VHOST_CLIENT_ARG "client" 54 #define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" 55 #define VHOST_MAX_PKT_BURST 32 56 57 static const char *valid_arguments[] = { 58 ETH_VHOST_IFACE_ARG, 59 ETH_VHOST_QUEUES_ARG, 60 ETH_VHOST_CLIENT_ARG, 61 ETH_VHOST_DEQUEUE_ZERO_COPY, 62 NULL 63 }; 64 65 static struct ether_addr base_eth_addr = { 66 .addr_bytes = { 67 0x56 /* V */, 68 0x48 /* H */, 69 0x4F /* O */, 70 0x53 /* S */, 71 0x54 /* T */, 72 0x00 73 } 74 }; 75 76 enum vhost_xstats_pkts { 77 VHOST_UNDERSIZE_PKT = 0, 78 VHOST_64_PKT, 79 VHOST_65_TO_127_PKT, 80 VHOST_128_TO_255_PKT, 81 VHOST_256_TO_511_PKT, 82 VHOST_512_TO_1023_PKT, 83 VHOST_1024_TO_1522_PKT, 84 VHOST_1523_TO_MAX_PKT, 85 VHOST_BROADCAST_PKT, 86 VHOST_MULTICAST_PKT, 87 VHOST_UNICAST_PKT, 88 VHOST_ERRORS_PKT, 89 VHOST_ERRORS_FRAGMENTED, 90 VHOST_ERRORS_JABBER, 91 VHOST_UNKNOWN_PROTOCOL, 92 VHOST_XSTATS_MAX, 93 }; 94 95 struct vhost_stats { 96 uint64_t pkts; 97 uint64_t bytes; 98 uint64_t missed_pkts; 99 uint64_t xstats[VHOST_XSTATS_MAX]; 100 }; 101 102 struct vhost_queue { 103 int vid; 104 rte_atomic32_t allow_queuing; 105 rte_atomic32_t while_queuing; 106 struct pmd_internal *internal; 107 struct rte_mempool *mb_pool; 108 uint8_t port; 109 uint16_t virtqueue_id; 110 struct vhost_stats stats; 111 }; 112 113 struct pmd_internal { 114 rte_atomic32_t dev_attached; 115 char *dev_name; 116 char *iface_name; 117 uint16_t max_queues; 118 rte_atomic32_t started; 119 }; 120 121 struct internal_list { 122 TAILQ_ENTRY(internal_list) next; 123 struct rte_eth_dev *eth_dev; 124 }; 125 126 TAILQ_HEAD(internal_list_head, internal_list); 127 static struct internal_list_head internal_list = 128 TAILQ_HEAD_INITIALIZER(internal_list); 129 130 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 131 132 static struct rte_eth_link pmd_link = { 133 .link_speed = 10000, 134 .link_duplex = ETH_LINK_FULL_DUPLEX, 135 .link_status = ETH_LINK_DOWN 136 }; 137 138 struct rte_vhost_vring_state { 139 rte_spinlock_t lock; 140 141 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 142 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 143 unsigned int index; 144 unsigned int max_vring; 145 }; 146 147 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 148 149 #define VHOST_XSTATS_NAME_SIZE 64 150 151 struct vhost_xstats_name_off { 152 char name[VHOST_XSTATS_NAME_SIZE]; 153 uint64_t offset; 154 }; 155 156 /* [rx]_is prepended to the name string here */ 157 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { 158 {"good_packets", 159 offsetof(struct vhost_queue, stats.pkts)}, 160 {"total_bytes", 161 offsetof(struct vhost_queue, stats.bytes)}, 162 {"missed_pkts", 163 offsetof(struct vhost_queue, stats.missed_pkts)}, 164 {"broadcast_packets", 165 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 166 {"multicast_packets", 167 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 168 {"unicast_packets", 169 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 170 {"undersize_packets", 171 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 172 {"size_64_packets", 173 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 174 {"size_65_to_127_packets", 175 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 176 {"size_128_to_255_packets", 177 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 178 {"size_256_to_511_packets", 179 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 180 {"size_512_to_1023_packets", 181 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 182 {"size_1024_to_1522_packets", 183 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 184 {"size_1523_to_max_packets", 185 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 186 {"errors_with_bad_CRC", 187 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 188 {"fragmented_errors", 189 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])}, 190 {"jabber_errors", 191 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])}, 192 {"unknown_protos_packets", 193 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])}, 194 }; 195 196 /* [tx]_ is prepended to the name string here */ 197 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { 198 {"good_packets", 199 offsetof(struct vhost_queue, stats.pkts)}, 200 {"total_bytes", 201 offsetof(struct vhost_queue, stats.bytes)}, 202 {"missed_pkts", 203 offsetof(struct vhost_queue, stats.missed_pkts)}, 204 {"broadcast_packets", 205 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 206 {"multicast_packets", 207 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 208 {"unicast_packets", 209 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 210 {"undersize_packets", 211 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 212 {"size_64_packets", 213 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 214 {"size_65_to_127_packets", 215 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 216 {"size_128_to_255_packets", 217 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 218 {"size_256_to_511_packets", 219 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 220 {"size_512_to_1023_packets", 221 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 222 {"size_1024_to_1522_packets", 223 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 224 {"size_1523_to_max_packets", 225 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 226 {"errors_with_bad_CRC", 227 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 228 }; 229 230 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \ 231 sizeof(vhost_rxport_stat_strings[0])) 232 233 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \ 234 sizeof(vhost_txport_stat_strings[0])) 235 236 static void 237 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 238 { 239 struct vhost_queue *vq = NULL; 240 unsigned int i = 0; 241 242 for (i = 0; i < dev->data->nb_rx_queues; i++) { 243 vq = dev->data->rx_queues[i]; 244 if (!vq) 245 continue; 246 memset(&vq->stats, 0, sizeof(vq->stats)); 247 } 248 for (i = 0; i < dev->data->nb_tx_queues; i++) { 249 vq = dev->data->tx_queues[i]; 250 if (!vq) 251 continue; 252 memset(&vq->stats, 0, sizeof(vq->stats)); 253 } 254 } 255 256 static int 257 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 258 struct rte_eth_xstat_name *xstats_names, 259 unsigned int limit __rte_unused) 260 { 261 unsigned int t = 0; 262 int count = 0; 263 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 264 265 if (!xstats_names) 266 return nstats; 267 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 268 snprintf(xstats_names[count].name, 269 sizeof(xstats_names[count].name), 270 "rx_%s", vhost_rxport_stat_strings[t].name); 271 count++; 272 } 273 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 274 snprintf(xstats_names[count].name, 275 sizeof(xstats_names[count].name), 276 "tx_%s", vhost_txport_stat_strings[t].name); 277 count++; 278 } 279 return count; 280 } 281 282 static int 283 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 284 unsigned int n) 285 { 286 unsigned int i; 287 unsigned int t; 288 unsigned int count = 0; 289 struct vhost_queue *vq = NULL; 290 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 291 292 if (n < nxstats) 293 return nxstats; 294 295 for (i = 0; i < dev->data->nb_rx_queues; i++) { 296 vq = dev->data->rx_queues[i]; 297 if (!vq) 298 continue; 299 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 300 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 301 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 302 } 303 for (i = 0; i < dev->data->nb_tx_queues; i++) { 304 vq = dev->data->tx_queues[i]; 305 if (!vq) 306 continue; 307 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts 308 + vq->stats.missed_pkts 309 - (vq->stats.xstats[VHOST_BROADCAST_PKT] 310 + vq->stats.xstats[VHOST_MULTICAST_PKT]); 311 } 312 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 313 xstats[count].value = 0; 314 for (i = 0; i < dev->data->nb_rx_queues; i++) { 315 vq = dev->data->rx_queues[i]; 316 if (!vq) 317 continue; 318 xstats[count].value += 319 *(uint64_t *)(((char *)vq) 320 + vhost_rxport_stat_strings[t].offset); 321 } 322 xstats[count].id = count; 323 count++; 324 } 325 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 326 xstats[count].value = 0; 327 for (i = 0; i < dev->data->nb_tx_queues; i++) { 328 vq = dev->data->tx_queues[i]; 329 if (!vq) 330 continue; 331 xstats[count].value += 332 *(uint64_t *)(((char *)vq) 333 + vhost_txport_stat_strings[t].offset); 334 } 335 xstats[count].id = count; 336 count++; 337 } 338 return count; 339 } 340 341 static inline void 342 vhost_count_multicast_broadcast(struct vhost_queue *vq, 343 struct rte_mbuf *mbuf) 344 { 345 struct ether_addr *ea = NULL; 346 struct vhost_stats *pstats = &vq->stats; 347 348 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 349 if (is_multicast_ether_addr(ea)) { 350 if (is_broadcast_ether_addr(ea)) 351 pstats->xstats[VHOST_BROADCAST_PKT]++; 352 else 353 pstats->xstats[VHOST_MULTICAST_PKT]++; 354 } 355 } 356 357 static void 358 vhost_update_packet_xstats(struct vhost_queue *vq, 359 struct rte_mbuf **bufs, 360 uint16_t count) 361 { 362 uint32_t pkt_len = 0; 363 uint64_t i = 0; 364 uint64_t index; 365 struct vhost_stats *pstats = &vq->stats; 366 367 for (i = 0; i < count ; i++) { 368 pkt_len = bufs[i]->pkt_len; 369 if (pkt_len == 64) { 370 pstats->xstats[VHOST_64_PKT]++; 371 } else if (pkt_len > 64 && pkt_len < 1024) { 372 index = (sizeof(pkt_len) * 8) 373 - __builtin_clz(pkt_len) - 5; 374 pstats->xstats[index]++; 375 } else { 376 if (pkt_len < 64) 377 pstats->xstats[VHOST_UNDERSIZE_PKT]++; 378 else if (pkt_len <= 1522) 379 pstats->xstats[VHOST_1024_TO_1522_PKT]++; 380 else if (pkt_len > 1522) 381 pstats->xstats[VHOST_1523_TO_MAX_PKT]++; 382 } 383 vhost_count_multicast_broadcast(vq, bufs[i]); 384 } 385 } 386 387 static uint16_t 388 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 389 { 390 struct vhost_queue *r = q; 391 uint16_t i, nb_rx = 0; 392 uint16_t nb_receive = nb_bufs; 393 394 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 395 return 0; 396 397 rte_atomic32_set(&r->while_queuing, 1); 398 399 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 400 goto out; 401 402 /* Dequeue packets from guest TX queue */ 403 while (nb_receive) { 404 uint16_t nb_pkts; 405 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 406 VHOST_MAX_PKT_BURST); 407 408 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 409 r->mb_pool, &bufs[nb_rx], 410 num); 411 412 nb_rx += nb_pkts; 413 nb_receive -= nb_pkts; 414 if (nb_pkts < num) 415 break; 416 } 417 418 r->stats.pkts += nb_rx; 419 420 for (i = 0; likely(i < nb_rx); i++) { 421 bufs[i]->port = r->port; 422 r->stats.bytes += bufs[i]->pkt_len; 423 } 424 425 vhost_update_packet_xstats(r, bufs, nb_rx); 426 427 out: 428 rte_atomic32_set(&r->while_queuing, 0); 429 430 return nb_rx; 431 } 432 433 static uint16_t 434 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 435 { 436 struct vhost_queue *r = q; 437 uint16_t i, nb_tx = 0; 438 uint16_t nb_send = nb_bufs; 439 440 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 441 return 0; 442 443 rte_atomic32_set(&r->while_queuing, 1); 444 445 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 446 goto out; 447 448 /* Enqueue packets to guest RX queue */ 449 while (nb_send) { 450 uint16_t nb_pkts; 451 uint16_t num = (uint16_t)RTE_MIN(nb_send, 452 VHOST_MAX_PKT_BURST); 453 454 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 455 &bufs[nb_tx], num); 456 457 nb_tx += nb_pkts; 458 nb_send -= nb_pkts; 459 if (nb_pkts < num) 460 break; 461 } 462 463 r->stats.pkts += nb_tx; 464 r->stats.missed_pkts += nb_bufs - nb_tx; 465 466 for (i = 0; likely(i < nb_tx); i++) 467 r->stats.bytes += bufs[i]->pkt_len; 468 469 vhost_update_packet_xstats(r, bufs, nb_tx); 470 471 /* According to RFC2863 page42 section ifHCOutMulticastPkts and 472 * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast" 473 * are increased when packets are not transmitted successfully. 474 */ 475 for (i = nb_tx; i < nb_bufs; i++) 476 vhost_count_multicast_broadcast(r, bufs[i]); 477 478 for (i = 0; likely(i < nb_tx); i++) 479 rte_pktmbuf_free(bufs[i]); 480 out: 481 rte_atomic32_set(&r->while_queuing, 0); 482 483 return nb_tx; 484 } 485 486 static int 487 eth_dev_configure(struct rte_eth_dev *dev __rte_unused) 488 { 489 return 0; 490 } 491 492 static inline struct internal_list * 493 find_internal_resource(char *ifname) 494 { 495 int found = 0; 496 struct internal_list *list; 497 struct pmd_internal *internal; 498 499 if (ifname == NULL) 500 return NULL; 501 502 pthread_mutex_lock(&internal_list_lock); 503 504 TAILQ_FOREACH(list, &internal_list, next) { 505 internal = list->eth_dev->data->dev_private; 506 if (!strcmp(internal->iface_name, ifname)) { 507 found = 1; 508 break; 509 } 510 } 511 512 pthread_mutex_unlock(&internal_list_lock); 513 514 if (!found) 515 return NULL; 516 517 return list; 518 } 519 520 static void 521 update_queuing_status(struct rte_eth_dev *dev) 522 { 523 struct pmd_internal *internal = dev->data->dev_private; 524 struct vhost_queue *vq; 525 unsigned int i; 526 int allow_queuing = 1; 527 528 if (rte_atomic32_read(&internal->started) == 0 || 529 rte_atomic32_read(&internal->dev_attached) == 0) 530 allow_queuing = 0; 531 532 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 533 for (i = 0; i < dev->data->nb_rx_queues; i++) { 534 vq = dev->data->rx_queues[i]; 535 if (vq == NULL) 536 continue; 537 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 538 while (rte_atomic32_read(&vq->while_queuing)) 539 rte_pause(); 540 } 541 542 for (i = 0; i < dev->data->nb_tx_queues; i++) { 543 vq = dev->data->tx_queues[i]; 544 if (vq == NULL) 545 continue; 546 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 547 while (rte_atomic32_read(&vq->while_queuing)) 548 rte_pause(); 549 } 550 } 551 552 static int 553 new_device(int vid) 554 { 555 struct rte_eth_dev *eth_dev; 556 struct internal_list *list; 557 struct pmd_internal *internal; 558 struct vhost_queue *vq; 559 unsigned i; 560 char ifname[PATH_MAX]; 561 #ifdef RTE_LIBRTE_VHOST_NUMA 562 int newnode; 563 #endif 564 565 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 566 list = find_internal_resource(ifname); 567 if (list == NULL) { 568 RTE_LOG(INFO, PMD, "Invalid device name: %s\n", ifname); 569 return -1; 570 } 571 572 eth_dev = list->eth_dev; 573 internal = eth_dev->data->dev_private; 574 575 #ifdef RTE_LIBRTE_VHOST_NUMA 576 newnode = rte_vhost_get_numa_node(vid); 577 if (newnode >= 0) 578 eth_dev->data->numa_node = newnode; 579 #endif 580 581 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 582 vq = eth_dev->data->rx_queues[i]; 583 if (vq == NULL) 584 continue; 585 vq->vid = vid; 586 vq->internal = internal; 587 vq->port = eth_dev->data->port_id; 588 } 589 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 590 vq = eth_dev->data->tx_queues[i]; 591 if (vq == NULL) 592 continue; 593 vq->vid = vid; 594 vq->internal = internal; 595 vq->port = eth_dev->data->port_id; 596 } 597 598 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 599 rte_vhost_enable_guest_notification(vid, i, 0); 600 601 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 602 603 eth_dev->data->dev_link.link_status = ETH_LINK_UP; 604 605 rte_atomic32_set(&internal->dev_attached, 1); 606 update_queuing_status(eth_dev); 607 608 RTE_LOG(INFO, PMD, "New connection established\n"); 609 610 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, 611 NULL, NULL); 612 613 return 0; 614 } 615 616 static void 617 destroy_device(int vid) 618 { 619 struct rte_eth_dev *eth_dev; 620 struct pmd_internal *internal; 621 struct vhost_queue *vq; 622 struct internal_list *list; 623 char ifname[PATH_MAX]; 624 unsigned i; 625 struct rte_vhost_vring_state *state; 626 627 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 628 list = find_internal_resource(ifname); 629 if (list == NULL) { 630 RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname); 631 return; 632 } 633 eth_dev = list->eth_dev; 634 internal = eth_dev->data->dev_private; 635 636 rte_atomic32_set(&internal->dev_attached, 0); 637 update_queuing_status(eth_dev); 638 639 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 640 641 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 642 vq = eth_dev->data->rx_queues[i]; 643 if (vq == NULL) 644 continue; 645 vq->vid = -1; 646 } 647 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 648 vq = eth_dev->data->tx_queues[i]; 649 if (vq == NULL) 650 continue; 651 vq->vid = -1; 652 } 653 654 state = vring_states[eth_dev->data->port_id]; 655 rte_spinlock_lock(&state->lock); 656 for (i = 0; i <= state->max_vring; i++) { 657 state->cur[i] = false; 658 state->seen[i] = false; 659 } 660 state->max_vring = 0; 661 rte_spinlock_unlock(&state->lock); 662 663 RTE_LOG(INFO, PMD, "Connection closed\n"); 664 665 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, 666 NULL, NULL); 667 } 668 669 static int 670 vring_state_changed(int vid, uint16_t vring, int enable) 671 { 672 struct rte_vhost_vring_state *state; 673 struct rte_eth_dev *eth_dev; 674 struct internal_list *list; 675 char ifname[PATH_MAX]; 676 677 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 678 list = find_internal_resource(ifname); 679 if (list == NULL) { 680 RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname); 681 return -1; 682 } 683 684 eth_dev = list->eth_dev; 685 /* won't be NULL */ 686 state = vring_states[eth_dev->data->port_id]; 687 rte_spinlock_lock(&state->lock); 688 state->cur[vring] = enable; 689 state->max_vring = RTE_MAX(vring, state->max_vring); 690 rte_spinlock_unlock(&state->lock); 691 692 RTE_LOG(INFO, PMD, "vring%u is %s\n", 693 vring, enable ? "enabled" : "disabled"); 694 695 _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, 696 NULL, NULL); 697 698 return 0; 699 } 700 701 static struct vhost_device_ops vhost_ops = { 702 .new_device = new_device, 703 .destroy_device = destroy_device, 704 .vring_state_changed = vring_state_changed, 705 }; 706 707 int 708 rte_eth_vhost_get_queue_event(uint8_t port_id, 709 struct rte_eth_vhost_queue_event *event) 710 { 711 struct rte_vhost_vring_state *state; 712 unsigned int i; 713 int idx; 714 715 if (port_id >= RTE_MAX_ETHPORTS) { 716 RTE_LOG(ERR, PMD, "Invalid port id\n"); 717 return -1; 718 } 719 720 state = vring_states[port_id]; 721 if (!state) { 722 RTE_LOG(ERR, PMD, "Unused port\n"); 723 return -1; 724 } 725 726 rte_spinlock_lock(&state->lock); 727 for (i = 0; i <= state->max_vring; i++) { 728 idx = state->index++ % (state->max_vring + 1); 729 730 if (state->cur[idx] != state->seen[idx]) { 731 state->seen[idx] = state->cur[idx]; 732 event->queue_id = idx / 2; 733 event->rx = idx & 1; 734 event->enable = state->cur[idx]; 735 rte_spinlock_unlock(&state->lock); 736 return 0; 737 } 738 } 739 rte_spinlock_unlock(&state->lock); 740 741 return -1; 742 } 743 744 int 745 rte_eth_vhost_get_vid_from_port_id(uint8_t port_id) 746 { 747 struct internal_list *list; 748 struct rte_eth_dev *eth_dev; 749 struct vhost_queue *vq; 750 int vid = -1; 751 752 if (!rte_eth_dev_is_valid_port(port_id)) 753 return -1; 754 755 pthread_mutex_lock(&internal_list_lock); 756 757 TAILQ_FOREACH(list, &internal_list, next) { 758 eth_dev = list->eth_dev; 759 if (eth_dev->data->port_id == port_id) { 760 vq = eth_dev->data->rx_queues[0]; 761 if (vq) { 762 vid = vq->vid; 763 } 764 break; 765 } 766 } 767 768 pthread_mutex_unlock(&internal_list_lock); 769 770 return vid; 771 } 772 773 static int 774 eth_dev_start(struct rte_eth_dev *dev) 775 { 776 struct pmd_internal *internal = dev->data->dev_private; 777 778 rte_atomic32_set(&internal->started, 1); 779 update_queuing_status(dev); 780 781 return 0; 782 } 783 784 static void 785 eth_dev_stop(struct rte_eth_dev *dev) 786 { 787 struct pmd_internal *internal = dev->data->dev_private; 788 789 rte_atomic32_set(&internal->started, 0); 790 update_queuing_status(dev); 791 } 792 793 static void 794 eth_dev_close(struct rte_eth_dev *dev) 795 { 796 struct pmd_internal *internal; 797 struct internal_list *list; 798 unsigned int i; 799 800 internal = dev->data->dev_private; 801 if (!internal) 802 return; 803 804 eth_dev_stop(dev); 805 806 rte_vhost_driver_unregister(internal->iface_name); 807 808 list = find_internal_resource(internal->iface_name); 809 if (!list) 810 return; 811 812 pthread_mutex_lock(&internal_list_lock); 813 TAILQ_REMOVE(&internal_list, list, next); 814 pthread_mutex_unlock(&internal_list_lock); 815 rte_free(list); 816 817 for (i = 0; i < dev->data->nb_rx_queues; i++) 818 rte_free(dev->data->rx_queues[i]); 819 for (i = 0; i < dev->data->nb_tx_queues; i++) 820 rte_free(dev->data->tx_queues[i]); 821 822 rte_free(dev->data->mac_addrs); 823 free(internal->dev_name); 824 free(internal->iface_name); 825 rte_free(internal); 826 827 dev->data->dev_private = NULL; 828 } 829 830 static int 831 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 832 uint16_t nb_rx_desc __rte_unused, 833 unsigned int socket_id, 834 const struct rte_eth_rxconf *rx_conf __rte_unused, 835 struct rte_mempool *mb_pool) 836 { 837 struct vhost_queue *vq; 838 839 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 840 RTE_CACHE_LINE_SIZE, socket_id); 841 if (vq == NULL) { 842 RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n"); 843 return -ENOMEM; 844 } 845 846 vq->mb_pool = mb_pool; 847 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 848 dev->data->rx_queues[rx_queue_id] = vq; 849 850 return 0; 851 } 852 853 static int 854 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 855 uint16_t nb_tx_desc __rte_unused, 856 unsigned int socket_id, 857 const struct rte_eth_txconf *tx_conf __rte_unused) 858 { 859 struct vhost_queue *vq; 860 861 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 862 RTE_CACHE_LINE_SIZE, socket_id); 863 if (vq == NULL) { 864 RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n"); 865 return -ENOMEM; 866 } 867 868 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 869 dev->data->tx_queues[tx_queue_id] = vq; 870 871 return 0; 872 } 873 874 static void 875 eth_dev_info(struct rte_eth_dev *dev, 876 struct rte_eth_dev_info *dev_info) 877 { 878 struct pmd_internal *internal; 879 880 internal = dev->data->dev_private; 881 if (internal == NULL) { 882 RTE_LOG(ERR, PMD, "Invalid device specified\n"); 883 return; 884 } 885 886 dev_info->max_mac_addrs = 1; 887 dev_info->max_rx_pktlen = (uint32_t)-1; 888 dev_info->max_rx_queues = internal->max_queues; 889 dev_info->max_tx_queues = internal->max_queues; 890 dev_info->min_rx_bufsize = 0; 891 } 892 893 static void 894 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 895 { 896 unsigned i; 897 unsigned long rx_total = 0, tx_total = 0, tx_missed_total = 0; 898 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 899 struct vhost_queue *vq; 900 901 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 902 i < dev->data->nb_rx_queues; i++) { 903 if (dev->data->rx_queues[i] == NULL) 904 continue; 905 vq = dev->data->rx_queues[i]; 906 stats->q_ipackets[i] = vq->stats.pkts; 907 rx_total += stats->q_ipackets[i]; 908 909 stats->q_ibytes[i] = vq->stats.bytes; 910 rx_total_bytes += stats->q_ibytes[i]; 911 } 912 913 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 914 i < dev->data->nb_tx_queues; i++) { 915 if (dev->data->tx_queues[i] == NULL) 916 continue; 917 vq = dev->data->tx_queues[i]; 918 stats->q_opackets[i] = vq->stats.pkts; 919 tx_missed_total += vq->stats.missed_pkts; 920 tx_total += stats->q_opackets[i]; 921 922 stats->q_obytes[i] = vq->stats.bytes; 923 tx_total_bytes += stats->q_obytes[i]; 924 } 925 926 stats->ipackets = rx_total; 927 stats->opackets = tx_total; 928 stats->oerrors = tx_missed_total; 929 stats->ibytes = rx_total_bytes; 930 stats->obytes = tx_total_bytes; 931 } 932 933 static void 934 eth_stats_reset(struct rte_eth_dev *dev) 935 { 936 struct vhost_queue *vq; 937 unsigned i; 938 939 for (i = 0; i < dev->data->nb_rx_queues; i++) { 940 if (dev->data->rx_queues[i] == NULL) 941 continue; 942 vq = dev->data->rx_queues[i]; 943 vq->stats.pkts = 0; 944 vq->stats.bytes = 0; 945 } 946 for (i = 0; i < dev->data->nb_tx_queues; i++) { 947 if (dev->data->tx_queues[i] == NULL) 948 continue; 949 vq = dev->data->tx_queues[i]; 950 vq->stats.pkts = 0; 951 vq->stats.bytes = 0; 952 vq->stats.missed_pkts = 0; 953 } 954 } 955 956 static void 957 eth_queue_release(void *q) 958 { 959 rte_free(q); 960 } 961 962 static int 963 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 964 { 965 /* 966 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 967 * and releases mbuf, so nothing to cleanup. 968 */ 969 return 0; 970 } 971 972 static int 973 eth_link_update(struct rte_eth_dev *dev __rte_unused, 974 int wait_to_complete __rte_unused) 975 { 976 return 0; 977 } 978 979 static uint32_t 980 eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 981 { 982 struct vhost_queue *vq; 983 984 vq = dev->data->rx_queues[rx_queue_id]; 985 if (vq == NULL) 986 return 0; 987 988 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 989 } 990 991 static const struct eth_dev_ops ops = { 992 .dev_start = eth_dev_start, 993 .dev_stop = eth_dev_stop, 994 .dev_close = eth_dev_close, 995 .dev_configure = eth_dev_configure, 996 .dev_infos_get = eth_dev_info, 997 .rx_queue_setup = eth_rx_queue_setup, 998 .tx_queue_setup = eth_tx_queue_setup, 999 .rx_queue_release = eth_queue_release, 1000 .tx_queue_release = eth_queue_release, 1001 .tx_done_cleanup = eth_tx_done_cleanup, 1002 .rx_queue_count = eth_rx_queue_count, 1003 .link_update = eth_link_update, 1004 .stats_get = eth_stats_get, 1005 .stats_reset = eth_stats_reset, 1006 .xstats_reset = vhost_dev_xstats_reset, 1007 .xstats_get = vhost_dev_xstats_get, 1008 .xstats_get_names = vhost_dev_xstats_get_names, 1009 }; 1010 1011 static struct rte_vdev_driver pmd_vhost_drv; 1012 1013 static int 1014 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1015 int16_t queues, const unsigned int numa_node, uint64_t flags) 1016 { 1017 const char *name = rte_vdev_device_name(dev); 1018 struct rte_eth_dev_data *data = NULL; 1019 struct pmd_internal *internal = NULL; 1020 struct rte_eth_dev *eth_dev = NULL; 1021 struct ether_addr *eth_addr = NULL; 1022 struct rte_vhost_vring_state *vring_state = NULL; 1023 struct internal_list *list = NULL; 1024 1025 RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n", 1026 numa_node); 1027 1028 /* now do all data allocation - for eth_dev structure and internal 1029 * (private) data 1030 */ 1031 data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); 1032 if (data == NULL) 1033 goto error; 1034 1035 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 1036 if (list == NULL) 1037 goto error; 1038 1039 /* reserve an ethdev entry */ 1040 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1041 if (eth_dev == NULL) 1042 goto error; 1043 1044 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1045 if (eth_addr == NULL) 1046 goto error; 1047 *eth_addr = base_eth_addr; 1048 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1049 1050 vring_state = rte_zmalloc_socket(name, 1051 sizeof(*vring_state), 0, numa_node); 1052 if (vring_state == NULL) 1053 goto error; 1054 1055 /* now put it all together 1056 * - store queue data in internal, 1057 * - point eth_dev_data to internals 1058 * - and point eth_dev structure to new eth_dev_data structure 1059 */ 1060 internal = eth_dev->data->dev_private; 1061 internal->dev_name = strdup(name); 1062 if (internal->dev_name == NULL) 1063 goto error; 1064 internal->iface_name = strdup(iface_name); 1065 if (internal->iface_name == NULL) 1066 goto error; 1067 1068 list->eth_dev = eth_dev; 1069 pthread_mutex_lock(&internal_list_lock); 1070 TAILQ_INSERT_TAIL(&internal_list, list, next); 1071 pthread_mutex_unlock(&internal_list_lock); 1072 1073 rte_spinlock_init(&vring_state->lock); 1074 vring_states[eth_dev->data->port_id] = vring_state; 1075 1076 /* We'll replace the 'data' originally allocated by eth_dev. So the 1077 * vhost PMD resources won't be shared between multi processes. 1078 */ 1079 rte_memcpy(data, eth_dev->data, sizeof(*data)); 1080 eth_dev->data = data; 1081 1082 data->nb_rx_queues = queues; 1083 data->nb_tx_queues = queues; 1084 internal->max_queues = queues; 1085 data->dev_link = pmd_link; 1086 data->mac_addrs = eth_addr; 1087 data->dev_flags = 1088 RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC; 1089 1090 eth_dev->dev_ops = &ops; 1091 1092 /* finally assign rx and tx ops */ 1093 eth_dev->rx_pkt_burst = eth_vhost_rx; 1094 eth_dev->tx_pkt_burst = eth_vhost_tx; 1095 1096 if (rte_vhost_driver_register(iface_name, flags)) 1097 goto error; 1098 1099 if (rte_vhost_driver_callback_register(iface_name, &vhost_ops) < 0) { 1100 RTE_LOG(ERR, PMD, "Can't register callbacks\n"); 1101 goto error; 1102 } 1103 1104 if (rte_vhost_driver_start(iface_name) < 0) { 1105 RTE_LOG(ERR, PMD, "Failed to start driver for %s\n", 1106 iface_name); 1107 goto error; 1108 } 1109 1110 return data->port_id; 1111 1112 error: 1113 if (internal) { 1114 free(internal->iface_name); 1115 free(internal->dev_name); 1116 } 1117 rte_free(vring_state); 1118 rte_free(eth_addr); 1119 if (eth_dev) 1120 rte_eth_dev_release_port(eth_dev); 1121 rte_free(internal); 1122 rte_free(list); 1123 rte_free(data); 1124 1125 return -1; 1126 } 1127 1128 static inline int 1129 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1130 { 1131 const char **iface_name = extra_args; 1132 1133 if (value == NULL) 1134 return -1; 1135 1136 *iface_name = value; 1137 1138 return 0; 1139 } 1140 1141 static inline int 1142 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1143 { 1144 uint16_t *n = extra_args; 1145 1146 if (value == NULL || extra_args == NULL) 1147 return -EINVAL; 1148 1149 *n = (uint16_t)strtoul(value, NULL, 0); 1150 if (*n == USHRT_MAX && errno == ERANGE) 1151 return -1; 1152 1153 return 0; 1154 } 1155 1156 static int 1157 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1158 { 1159 struct rte_kvargs *kvlist = NULL; 1160 int ret = 0; 1161 char *iface_name; 1162 uint16_t queues; 1163 uint64_t flags = 0; 1164 int client_mode = 0; 1165 int dequeue_zero_copy = 0; 1166 1167 RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", 1168 rte_vdev_device_name(dev)); 1169 1170 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1171 if (kvlist == NULL) 1172 return -1; 1173 1174 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1175 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1176 &open_iface, &iface_name); 1177 if (ret < 0) 1178 goto out_free; 1179 } else { 1180 ret = -1; 1181 goto out_free; 1182 } 1183 1184 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1185 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1186 &open_int, &queues); 1187 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1188 goto out_free; 1189 1190 } else 1191 queues = 1; 1192 1193 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1194 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1195 &open_int, &client_mode); 1196 if (ret < 0) 1197 goto out_free; 1198 1199 if (client_mode) 1200 flags |= RTE_VHOST_USER_CLIENT; 1201 } 1202 1203 if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { 1204 ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, 1205 &open_int, &dequeue_zero_copy); 1206 if (ret < 0) 1207 goto out_free; 1208 1209 if (dequeue_zero_copy) 1210 flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 1211 } 1212 1213 if (dev->device.numa_node == SOCKET_ID_ANY) 1214 dev->device.numa_node = rte_socket_id(); 1215 1216 eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node, 1217 flags); 1218 1219 out_free: 1220 rte_kvargs_free(kvlist); 1221 return ret; 1222 } 1223 1224 static int 1225 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1226 { 1227 const char *name; 1228 struct rte_eth_dev *eth_dev = NULL; 1229 1230 name = rte_vdev_device_name(dev); 1231 RTE_LOG(INFO, PMD, "Un-Initializing pmd_vhost for %s\n", name); 1232 1233 /* find an ethdev entry */ 1234 eth_dev = rte_eth_dev_allocated(name); 1235 if (eth_dev == NULL) 1236 return -ENODEV; 1237 1238 eth_dev_close(eth_dev); 1239 1240 rte_free(vring_states[eth_dev->data->port_id]); 1241 vring_states[eth_dev->data->port_id] = NULL; 1242 1243 rte_free(eth_dev->data); 1244 1245 rte_eth_dev_release_port(eth_dev); 1246 1247 return 0; 1248 } 1249 1250 static struct rte_vdev_driver pmd_vhost_drv = { 1251 .probe = rte_pmd_vhost_probe, 1252 .remove = rte_pmd_vhost_remove, 1253 }; 1254 1255 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1256 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1257 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1258 "iface=<ifc> " 1259 "queues=<int>"); 1260