1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <fcntl.h> 8 #include <string.h> 9 #include <errno.h> 10 #include <sys/mman.h> 11 #include <unistd.h> 12 #include <sys/eventfd.h> 13 #include <sys/types.h> 14 #include <sys/stat.h> 15 16 #include <rte_eal_memconfig.h> 17 18 #include "vhost.h" 19 #include "virtio_user_dev.h" 20 #include "../virtio_ethdev.h" 21 22 #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb" 23 24 static int 25 virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) 26 { 27 /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come 28 * firstly because vhost depends on this msg to allocate virtqueue 29 * pair. 30 */ 31 struct vhost_vring_file file; 32 33 file.index = queue_sel; 34 file.fd = dev->callfds[queue_sel]; 35 dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file); 36 37 return 0; 38 } 39 40 static int 41 virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) 42 { 43 struct vhost_vring_file file; 44 struct vhost_vring_state state; 45 struct vring *vring = &dev->vrings[queue_sel]; 46 struct vhost_vring_addr addr = { 47 .index = queue_sel, 48 .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, 49 .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, 50 .used_user_addr = (uint64_t)(uintptr_t)vring->used, 51 .log_guest_addr = 0, 52 .flags = 0, /* disable log */ 53 }; 54 55 state.index = queue_sel; 56 state.num = vring->num; 57 dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state); 58 59 state.index = queue_sel; 60 state.num = 0; /* no reservation */ 61 dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state); 62 63 dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr); 64 65 /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes 66 * lastly because vhost depends on this msg to judge if 67 * virtio is ready. 68 */ 69 file.index = queue_sel; 70 file.fd = dev->kickfds[queue_sel]; 71 dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file); 72 73 return 0; 74 } 75 76 static int 77 virtio_user_queue_setup(struct virtio_user_dev *dev, 78 int (*fn)(struct virtio_user_dev *, uint32_t)) 79 { 80 uint32_t i, queue_sel; 81 82 for (i = 0; i < dev->max_queue_pairs; ++i) { 83 queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX; 84 if (fn(dev, queue_sel) < 0) { 85 PMD_DRV_LOG(INFO, "setup rx vq fails: %u", i); 86 return -1; 87 } 88 } 89 for (i = 0; i < dev->max_queue_pairs; ++i) { 90 queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX; 91 if (fn(dev, queue_sel) < 0) { 92 PMD_DRV_LOG(INFO, "setup tx vq fails: %u", i); 93 return -1; 94 } 95 } 96 97 return 0; 98 } 99 100 int 101 is_vhost_user_by_type(const char *path) 102 { 103 struct stat sb; 104 105 if (stat(path, &sb) == -1) 106 return 0; 107 108 return S_ISSOCK(sb.st_mode); 109 } 110 111 int 112 virtio_user_start_device(struct virtio_user_dev *dev) 113 { 114 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 115 uint64_t features; 116 int ret; 117 118 /* 119 * XXX workaround! 120 * 121 * We need to make sure that the locks will be 122 * taken in the correct order to avoid deadlocks. 123 * 124 * Before releasing this lock, this thread should 125 * not trigger any memory hotplug events. 126 * 127 * This is a temporary workaround, and should be 128 * replaced when we get proper supports from the 129 * memory subsystem in the future. 130 */ 131 rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); 132 pthread_mutex_lock(&dev->mutex); 133 134 if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0) 135 goto error; 136 137 /* Step 0: tell vhost to create queues */ 138 if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0) 139 goto error; 140 141 /* Step 1: set features */ 142 features = dev->features; 143 /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ 144 features &= ~(1ull << VIRTIO_NET_F_MAC); 145 /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */ 146 features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); 147 features &= ~(1ull << VIRTIO_NET_F_STATUS); 148 ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features); 149 if (ret < 0) 150 goto error; 151 PMD_DRV_LOG(INFO, "set features: %" PRIx64, features); 152 153 /* Step 2: share memory regions */ 154 ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL); 155 if (ret < 0) 156 goto error; 157 158 /* Step 3: kick queues */ 159 if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0) 160 goto error; 161 162 /* Step 4: enable queues 163 * we enable the 1st queue pair by default. 164 */ 165 dev->ops->enable_qp(dev, 0, 1); 166 167 dev->started = true; 168 pthread_mutex_unlock(&dev->mutex); 169 rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); 170 171 return 0; 172 error: 173 pthread_mutex_unlock(&dev->mutex); 174 rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); 175 /* TODO: free resource here or caller to check */ 176 return -1; 177 } 178 179 int virtio_user_stop_device(struct virtio_user_dev *dev) 180 { 181 struct vhost_vring_state state; 182 uint32_t i; 183 int error = 0; 184 185 pthread_mutex_lock(&dev->mutex); 186 if (!dev->started) 187 goto out; 188 189 for (i = 0; i < dev->max_queue_pairs; ++i) 190 dev->ops->enable_qp(dev, i, 0); 191 192 /* Stop the backend. */ 193 for (i = 0; i < dev->max_queue_pairs * 2; ++i) { 194 state.index = i; 195 if (dev->ops->send_request(dev, VHOST_USER_GET_VRING_BASE, 196 &state) < 0) { 197 PMD_DRV_LOG(ERR, "get_vring_base failed, index=%u\n", 198 i); 199 error = -1; 200 goto out; 201 } 202 } 203 204 dev->started = false; 205 out: 206 pthread_mutex_unlock(&dev->mutex); 207 208 return error; 209 } 210 211 static inline void 212 parse_mac(struct virtio_user_dev *dev, const char *mac) 213 { 214 int i, r; 215 uint32_t tmp[ETHER_ADDR_LEN]; 216 217 if (!mac) 218 return; 219 220 r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0], 221 &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]); 222 if (r == ETHER_ADDR_LEN) { 223 for (i = 0; i < ETHER_ADDR_LEN; ++i) 224 dev->mac_addr[i] = (uint8_t)tmp[i]; 225 dev->mac_specified = 1; 226 } else { 227 /* ignore the wrong mac, use random mac */ 228 PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac); 229 } 230 } 231 232 static int 233 virtio_user_dev_init_notify(struct virtio_user_dev *dev) 234 { 235 uint32_t i, j; 236 int callfd; 237 int kickfd; 238 239 for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; ++i) { 240 if (i >= dev->max_queue_pairs * 2) { 241 dev->kickfds[i] = -1; 242 dev->callfds[i] = -1; 243 continue; 244 } 245 246 /* May use invalid flag, but some backend uses kickfd and 247 * callfd as criteria to judge if dev is alive. so finally we 248 * use real event_fd. 249 */ 250 callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 251 if (callfd < 0) { 252 PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno)); 253 break; 254 } 255 kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 256 if (kickfd < 0) { 257 PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno)); 258 break; 259 } 260 dev->callfds[i] = callfd; 261 dev->kickfds[i] = kickfd; 262 } 263 264 if (i < VIRTIO_MAX_VIRTQUEUES) { 265 for (j = 0; j <= i; ++j) { 266 close(dev->callfds[j]); 267 close(dev->kickfds[j]); 268 } 269 270 return -1; 271 } 272 273 return 0; 274 } 275 276 static int 277 virtio_user_fill_intr_handle(struct virtio_user_dev *dev) 278 { 279 uint32_t i; 280 struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id]; 281 282 if (!eth_dev->intr_handle) { 283 eth_dev->intr_handle = malloc(sizeof(*eth_dev->intr_handle)); 284 if (!eth_dev->intr_handle) { 285 PMD_DRV_LOG(ERR, "fail to allocate intr_handle"); 286 return -1; 287 } 288 memset(eth_dev->intr_handle, 0, sizeof(*eth_dev->intr_handle)); 289 } 290 291 for (i = 0; i < dev->max_queue_pairs; ++i) 292 eth_dev->intr_handle->efds[i] = dev->callfds[i]; 293 eth_dev->intr_handle->nb_efd = dev->max_queue_pairs; 294 eth_dev->intr_handle->max_intr = dev->max_queue_pairs + 1; 295 eth_dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; 296 /* For virtio vdev, no need to read counter for clean */ 297 eth_dev->intr_handle->efd_counter_size = 0; 298 eth_dev->intr_handle->fd = -1; 299 if (dev->vhostfd >= 0) 300 eth_dev->intr_handle->fd = dev->vhostfd; 301 else if (dev->is_server) 302 eth_dev->intr_handle->fd = dev->listenfd; 303 304 return 0; 305 } 306 307 static void 308 virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused, 309 const void *addr __rte_unused, 310 size_t len __rte_unused, 311 void *arg) 312 { 313 struct virtio_user_dev *dev = arg; 314 struct rte_memseg_list *msl; 315 uint16_t i; 316 317 /* ignore externally allocated memory */ 318 msl = rte_mem_virt2memseg_list(addr); 319 if (msl->external) 320 return; 321 322 pthread_mutex_lock(&dev->mutex); 323 324 if (dev->started == false) 325 goto exit; 326 327 /* Step 1: pause the active queues */ 328 for (i = 0; i < dev->queue_pairs; i++) 329 dev->ops->enable_qp(dev, i, 0); 330 331 /* Step 2: update memory regions */ 332 dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL); 333 334 /* Step 3: resume the active queues */ 335 for (i = 0; i < dev->queue_pairs; i++) 336 dev->ops->enable_qp(dev, i, 1); 337 338 exit: 339 pthread_mutex_unlock(&dev->mutex); 340 } 341 342 static int 343 virtio_user_dev_setup(struct virtio_user_dev *dev) 344 { 345 uint32_t q; 346 347 dev->vhostfd = -1; 348 dev->vhostfds = NULL; 349 dev->tapfds = NULL; 350 351 if (dev->is_server) { 352 if (access(dev->path, F_OK) == 0 && 353 !is_vhost_user_by_type(dev->path)) { 354 PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!"); 355 return -1; 356 } 357 dev->ops = &virtio_ops_user; 358 } else { 359 if (is_vhost_user_by_type(dev->path)) { 360 dev->ops = &virtio_ops_user; 361 } else { 362 dev->ops = &virtio_ops_kernel; 363 364 dev->vhostfds = malloc(dev->max_queue_pairs * 365 sizeof(int)); 366 dev->tapfds = malloc(dev->max_queue_pairs * 367 sizeof(int)); 368 if (!dev->vhostfds || !dev->tapfds) { 369 PMD_INIT_LOG(ERR, "Failed to malloc"); 370 return -1; 371 } 372 373 for (q = 0; q < dev->max_queue_pairs; ++q) { 374 dev->vhostfds[q] = -1; 375 dev->tapfds[q] = -1; 376 } 377 } 378 } 379 380 if (dev->ops->setup(dev) < 0) 381 return -1; 382 383 if (virtio_user_dev_init_notify(dev) < 0) 384 return -1; 385 386 if (virtio_user_fill_intr_handle(dev) < 0) 387 return -1; 388 389 return 0; 390 } 391 392 /* Use below macro to filter features from vhost backend */ 393 #define VIRTIO_USER_SUPPORTED_FEATURES \ 394 (1ULL << VIRTIO_NET_F_MAC | \ 395 1ULL << VIRTIO_NET_F_STATUS | \ 396 1ULL << VIRTIO_NET_F_MQ | \ 397 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR | \ 398 1ULL << VIRTIO_NET_F_CTRL_VQ | \ 399 1ULL << VIRTIO_NET_F_CTRL_RX | \ 400 1ULL << VIRTIO_NET_F_CTRL_VLAN | \ 401 1ULL << VIRTIO_NET_F_CSUM | \ 402 1ULL << VIRTIO_NET_F_HOST_TSO4 | \ 403 1ULL << VIRTIO_NET_F_HOST_TSO6 | \ 404 1ULL << VIRTIO_NET_F_MRG_RXBUF | \ 405 1ULL << VIRTIO_RING_F_INDIRECT_DESC | \ 406 1ULL << VIRTIO_NET_F_GUEST_CSUM | \ 407 1ULL << VIRTIO_NET_F_GUEST_TSO4 | \ 408 1ULL << VIRTIO_NET_F_GUEST_TSO6 | \ 409 1ULL << VIRTIO_F_IN_ORDER | \ 410 1ULL << VIRTIO_F_VERSION_1) 411 412 int 413 virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, 414 int cq, int queue_size, const char *mac, char **ifname, 415 int mrg_rxbuf, int in_order) 416 { 417 pthread_mutex_init(&dev->mutex, NULL); 418 snprintf(dev->path, PATH_MAX, "%s", path); 419 dev->started = 0; 420 dev->max_queue_pairs = queues; 421 dev->queue_pairs = 1; /* mq disabled by default */ 422 dev->queue_size = queue_size; 423 dev->mac_specified = 0; 424 dev->frontend_features = 0; 425 dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES; 426 parse_mac(dev, mac); 427 428 if (*ifname) { 429 dev->ifname = *ifname; 430 *ifname = NULL; 431 } 432 433 if (virtio_user_dev_setup(dev) < 0) { 434 PMD_INIT_LOG(ERR, "backend set up fails"); 435 return -1; 436 } 437 438 if (!dev->is_server) { 439 if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, 440 NULL) < 0) { 441 PMD_INIT_LOG(ERR, "set_owner fails: %s", 442 strerror(errno)); 443 return -1; 444 } 445 446 if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, 447 &dev->device_features) < 0) { 448 PMD_INIT_LOG(ERR, "get_features failed: %s", 449 strerror(errno)); 450 return -1; 451 } 452 } else { 453 /* We just pretend vhost-user can support all these features. 454 * Note that this could be problematic that if some feature is 455 * negotiated but not supported by the vhost-user which comes 456 * later. 457 */ 458 dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES; 459 } 460 461 if (!mrg_rxbuf) 462 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF); 463 464 if (!in_order) 465 dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER); 466 467 if (dev->mac_specified) 468 dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC); 469 else 470 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC); 471 472 if (cq) { 473 /* device does not really need to know anything about CQ, 474 * so if necessary, we just claim to support CQ 475 */ 476 dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); 477 } else { 478 dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); 479 /* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */ 480 dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX); 481 dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN); 482 dev->unsupported_features |= 483 (1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); 484 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ); 485 dev->unsupported_features |= 486 (1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); 487 } 488 489 /* The backend will not report this feature, we add it explicitly */ 490 if (is_vhost_user_by_type(dev->path)) 491 dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS); 492 493 /* 494 * Device features = 495 * (frontend_features | backend_features) & ~unsupported_features; 496 */ 497 dev->device_features |= dev->frontend_features; 498 dev->device_features &= ~dev->unsupported_features; 499 500 if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME, 501 virtio_user_mem_event_cb, dev)) { 502 if (rte_errno != ENOTSUP) { 503 PMD_INIT_LOG(ERR, "Failed to register mem event" 504 " callback\n"); 505 return -1; 506 } 507 } 508 509 return 0; 510 } 511 512 void 513 virtio_user_dev_uninit(struct virtio_user_dev *dev) 514 { 515 uint32_t i; 516 517 virtio_user_stop_device(dev); 518 519 rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev); 520 521 for (i = 0; i < dev->max_queue_pairs * 2; ++i) { 522 close(dev->callfds[i]); 523 close(dev->kickfds[i]); 524 } 525 526 close(dev->vhostfd); 527 528 if (dev->is_server && dev->listenfd >= 0) { 529 close(dev->listenfd); 530 dev->listenfd = -1; 531 } 532 533 if (dev->vhostfds) { 534 for (i = 0; i < dev->max_queue_pairs; ++i) 535 close(dev->vhostfds[i]); 536 free(dev->vhostfds); 537 free(dev->tapfds); 538 } 539 540 free(dev->ifname); 541 542 if (dev->is_server) 543 unlink(dev->path); 544 } 545 546 uint8_t 547 virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) 548 { 549 uint16_t i; 550 uint8_t ret = 0; 551 552 if (q_pairs > dev->max_queue_pairs) { 553 PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", 554 q_pairs, dev->max_queue_pairs); 555 return -1; 556 } 557 558 /* Server mode can't enable queue pairs if vhostfd is invalid, 559 * always return 0 in this case. 560 */ 561 if (!dev->is_server || dev->vhostfd >= 0) { 562 for (i = 0; i < q_pairs; ++i) 563 ret |= dev->ops->enable_qp(dev, i, 1); 564 for (i = q_pairs; i < dev->max_queue_pairs; ++i) 565 ret |= dev->ops->enable_qp(dev, i, 0); 566 } 567 dev->queue_pairs = q_pairs; 568 569 return ret; 570 } 571 572 static uint32_t 573 virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, 574 uint16_t idx_hdr) 575 { 576 struct virtio_net_ctrl_hdr *hdr; 577 virtio_net_ctrl_ack status = ~0; 578 uint16_t i, idx_data, idx_status; 579 uint32_t n_descs = 0; 580 581 /* locate desc for header, data, and status */ 582 idx_data = vring->desc[idx_hdr].next; 583 n_descs++; 584 585 i = idx_data; 586 while (vring->desc[i].flags == VRING_DESC_F_NEXT) { 587 i = vring->desc[i].next; 588 n_descs++; 589 } 590 591 /* locate desc for status */ 592 idx_status = i; 593 n_descs++; 594 595 hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr; 596 if (hdr->class == VIRTIO_NET_CTRL_MQ && 597 hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 598 uint16_t queues; 599 600 queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr; 601 status = virtio_user_handle_mq(dev, queues); 602 } 603 604 /* Update status */ 605 *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status; 606 607 return n_descs; 608 } 609 610 void 611 virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) 612 { 613 uint16_t avail_idx, desc_idx; 614 struct vring_used_elem *uep; 615 uint32_t n_descs; 616 struct vring *vring = &dev->vrings[queue_idx]; 617 618 /* Consume avail ring, using used ring idx as first one */ 619 while (vring->used->idx != vring->avail->idx) { 620 avail_idx = (vring->used->idx) & (vring->num - 1); 621 desc_idx = vring->avail->ring[avail_idx]; 622 623 n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx); 624 625 /* Update used ring */ 626 uep = &vring->used->ring[avail_idx]; 627 uep->id = avail_idx; 628 uep->len = n_descs; 629 630 vring->used->idx++; 631 } 632 } 633