1 /* $NetBSD: nvmm.c,v 1.39 2020/09/05 07:22:25 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net 5 * All rights reserved. 6 * 7 * This code is part of the NVMM hypervisor. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.39 2020/09/05 07:22:25 maxv Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 38 #include <sys/atomic.h> 39 #include <sys/cpu.h> 40 #include <sys/conf.h> 41 #include <sys/kmem.h> 42 #include <sys/module.h> 43 #include <sys/proc.h> 44 #include <sys/mman.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/device.h> 48 49 #include <uvm/uvm.h> 50 #include <uvm/uvm_page.h> 51 52 #include "ioconf.h" 53 54 #include <dev/nvmm/nvmm.h> 55 #include <dev/nvmm/nvmm_internal.h> 56 #include <dev/nvmm/nvmm_ioctl.h> 57 58 static struct nvmm_machine machines[NVMM_MAX_MACHINES]; 59 static volatile unsigned int nmachines __cacheline_aligned; 60 61 static const struct nvmm_impl *nvmm_impl_list[] = { 62 #if defined(__x86_64__) 63 &nvmm_x86_svm, /* x86 AMD SVM */ 64 &nvmm_x86_vmx /* x86 Intel VMX */ 65 #endif 66 }; 67 68 static const struct nvmm_impl *nvmm_impl __read_mostly = NULL; 69 70 static struct nvmm_owner root_owner; 71 72 /* -------------------------------------------------------------------------- */ 73 74 static int 75 nvmm_machine_alloc(struct nvmm_machine **ret) 76 { 77 struct nvmm_machine *mach; 78 size_t i; 79 80 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 81 mach = &machines[i]; 82 83 rw_enter(&mach->lock, RW_WRITER); 84 if (mach->present) { 85 rw_exit(&mach->lock); 86 continue; 87 } 88 89 mach->present = true; 90 mach->time = time_second; 91 *ret = mach; 92 atomic_inc_uint(&nmachines); 93 return 0; 94 } 95 96 return ENOBUFS; 97 } 98 99 static void 100 nvmm_machine_free(struct nvmm_machine *mach) 101 { 102 KASSERT(rw_write_held(&mach->lock)); 103 KASSERT(mach->present); 104 mach->present = false; 105 atomic_dec_uint(&nmachines); 106 } 107 108 static int 109 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid, 110 struct nvmm_machine **ret, bool writer) 111 { 112 struct nvmm_machine *mach; 113 krw_t op = writer ? RW_WRITER : RW_READER; 114 115 if (__predict_false(machid >= NVMM_MAX_MACHINES)) { 116 return EINVAL; 117 } 118 mach = &machines[machid]; 119 120 rw_enter(&mach->lock, op); 121 if (__predict_false(!mach->present)) { 122 rw_exit(&mach->lock); 123 return ENOENT; 124 } 125 if (__predict_false(mach->owner != owner && owner != &root_owner)) { 126 rw_exit(&mach->lock); 127 return EPERM; 128 } 129 *ret = mach; 130 131 return 0; 132 } 133 134 static void 135 nvmm_machine_put(struct nvmm_machine *mach) 136 { 137 rw_exit(&mach->lock); 138 } 139 140 /* -------------------------------------------------------------------------- */ 141 142 static int 143 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 144 struct nvmm_cpu **ret) 145 { 146 struct nvmm_cpu *vcpu; 147 148 if (cpuid >= NVMM_MAX_VCPUS) { 149 return EINVAL; 150 } 151 vcpu = &mach->cpus[cpuid]; 152 153 mutex_enter(&vcpu->lock); 154 if (vcpu->present) { 155 mutex_exit(&vcpu->lock); 156 return EBUSY; 157 } 158 159 vcpu->present = true; 160 vcpu->comm = NULL; 161 vcpu->hcpu_last = -1; 162 *ret = vcpu; 163 return 0; 164 } 165 166 static void 167 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 168 { 169 KASSERT(mutex_owned(&vcpu->lock)); 170 vcpu->present = false; 171 if (vcpu->comm != NULL) { 172 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE); 173 } 174 } 175 176 static int 177 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 178 struct nvmm_cpu **ret) 179 { 180 struct nvmm_cpu *vcpu; 181 182 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) { 183 return EINVAL; 184 } 185 vcpu = &mach->cpus[cpuid]; 186 187 mutex_enter(&vcpu->lock); 188 if (__predict_false(!vcpu->present)) { 189 mutex_exit(&vcpu->lock); 190 return ENOENT; 191 } 192 *ret = vcpu; 193 194 return 0; 195 } 196 197 static void 198 nvmm_vcpu_put(struct nvmm_cpu *vcpu) 199 { 200 mutex_exit(&vcpu->lock); 201 } 202 203 /* -------------------------------------------------------------------------- */ 204 205 static void 206 nvmm_kill_machines(struct nvmm_owner *owner) 207 { 208 struct nvmm_machine *mach; 209 struct nvmm_cpu *vcpu; 210 size_t i, j; 211 int error; 212 213 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 214 mach = &machines[i]; 215 216 rw_enter(&mach->lock, RW_WRITER); 217 if (!mach->present || mach->owner != owner) { 218 rw_exit(&mach->lock); 219 continue; 220 } 221 222 /* Kill it. */ 223 for (j = 0; j < NVMM_MAX_VCPUS; j++) { 224 error = nvmm_vcpu_get(mach, j, &vcpu); 225 if (error) 226 continue; 227 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 228 nvmm_vcpu_free(mach, vcpu); 229 nvmm_vcpu_put(vcpu); 230 atomic_dec_uint(&mach->ncpus); 231 } 232 (*nvmm_impl->machine_destroy)(mach); 233 uvmspace_free(mach->vm); 234 235 /* Drop the kernel UOBJ refs. */ 236 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) { 237 if (!mach->hmap[j].present) 238 continue; 239 uao_detach(mach->hmap[j].uobj); 240 } 241 242 nvmm_machine_free(mach); 243 244 rw_exit(&mach->lock); 245 } 246 } 247 248 /* -------------------------------------------------------------------------- */ 249 250 static int 251 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args) 252 { 253 args->cap.version = NVMM_KERN_VERSION; 254 args->cap.state_size = nvmm_impl->state_size; 255 args->cap.max_machines = NVMM_MAX_MACHINES; 256 args->cap.max_vcpus = NVMM_MAX_VCPUS; 257 args->cap.max_ram = NVMM_MAX_RAM; 258 259 (*nvmm_impl->capability)(&args->cap); 260 261 return 0; 262 } 263 264 static int 265 nvmm_machine_create(struct nvmm_owner *owner, 266 struct nvmm_ioc_machine_create *args) 267 { 268 struct nvmm_machine *mach; 269 int error; 270 271 error = nvmm_machine_alloc(&mach); 272 if (error) 273 return error; 274 275 /* Curproc owns the machine. */ 276 mach->owner = owner; 277 278 /* Zero out the host mappings. */ 279 memset(&mach->hmap, 0, sizeof(mach->hmap)); 280 281 /* Create the machine vmspace. */ 282 mach->gpa_begin = 0; 283 mach->gpa_end = NVMM_MAX_RAM; 284 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false); 285 286 /* Create the comm uobj. */ 287 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0); 288 289 (*nvmm_impl->machine_create)(mach); 290 291 args->machid = mach->machid; 292 nvmm_machine_put(mach); 293 294 return 0; 295 } 296 297 static int 298 nvmm_machine_destroy(struct nvmm_owner *owner, 299 struct nvmm_ioc_machine_destroy *args) 300 { 301 struct nvmm_machine *mach; 302 struct nvmm_cpu *vcpu; 303 int error; 304 size_t i; 305 306 error = nvmm_machine_get(owner, args->machid, &mach, true); 307 if (error) 308 return error; 309 310 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 311 error = nvmm_vcpu_get(mach, i, &vcpu); 312 if (error) 313 continue; 314 315 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 316 nvmm_vcpu_free(mach, vcpu); 317 nvmm_vcpu_put(vcpu); 318 atomic_dec_uint(&mach->ncpus); 319 } 320 321 (*nvmm_impl->machine_destroy)(mach); 322 323 /* Free the machine vmspace. */ 324 uvmspace_free(mach->vm); 325 326 /* Drop the kernel UOBJ refs. */ 327 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 328 if (!mach->hmap[i].present) 329 continue; 330 uao_detach(mach->hmap[i].uobj); 331 } 332 333 nvmm_machine_free(mach); 334 nvmm_machine_put(mach); 335 336 return 0; 337 } 338 339 static int 340 nvmm_machine_configure(struct nvmm_owner *owner, 341 struct nvmm_ioc_machine_configure *args) 342 { 343 struct nvmm_machine *mach; 344 size_t allocsz; 345 uint64_t op; 346 void *data; 347 int error; 348 349 op = NVMM_MACH_CONF_MD(args->op); 350 if (__predict_false(op >= nvmm_impl->mach_conf_max)) { 351 return EINVAL; 352 } 353 354 allocsz = nvmm_impl->mach_conf_sizes[op]; 355 data = kmem_alloc(allocsz, KM_SLEEP); 356 357 error = nvmm_machine_get(owner, args->machid, &mach, true); 358 if (error) { 359 kmem_free(data, allocsz); 360 return error; 361 } 362 363 error = copyin(args->conf, data, allocsz); 364 if (error) { 365 goto out; 366 } 367 368 error = (*nvmm_impl->machine_configure)(mach, op, data); 369 370 out: 371 nvmm_machine_put(mach); 372 kmem_free(data, allocsz); 373 return error; 374 } 375 376 static int 377 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args) 378 { 379 struct nvmm_machine *mach; 380 struct nvmm_cpu *vcpu; 381 int error; 382 383 error = nvmm_machine_get(owner, args->machid, &mach, false); 384 if (error) 385 return error; 386 387 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu); 388 if (error) 389 goto out; 390 391 /* Allocate the comm page. */ 392 uao_reference(mach->commuobj); 393 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE, 394 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW, 395 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); 396 if (error) { 397 uao_detach(mach->commuobj); 398 nvmm_vcpu_free(mach, vcpu); 399 nvmm_vcpu_put(vcpu); 400 goto out; 401 } 402 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm, 403 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0); 404 if (error) { 405 nvmm_vcpu_free(mach, vcpu); 406 nvmm_vcpu_put(vcpu); 407 goto out; 408 } 409 memset(vcpu->comm, 0, PAGE_SIZE); 410 411 error = (*nvmm_impl->vcpu_create)(mach, vcpu); 412 if (error) { 413 nvmm_vcpu_free(mach, vcpu); 414 nvmm_vcpu_put(vcpu); 415 goto out; 416 } 417 418 nvmm_vcpu_put(vcpu); 419 atomic_inc_uint(&mach->ncpus); 420 421 out: 422 nvmm_machine_put(mach); 423 return error; 424 } 425 426 static int 427 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args) 428 { 429 struct nvmm_machine *mach; 430 struct nvmm_cpu *vcpu; 431 int error; 432 433 error = nvmm_machine_get(owner, args->machid, &mach, false); 434 if (error) 435 return error; 436 437 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 438 if (error) 439 goto out; 440 441 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 442 nvmm_vcpu_free(mach, vcpu); 443 nvmm_vcpu_put(vcpu); 444 atomic_dec_uint(&mach->ncpus); 445 446 out: 447 nvmm_machine_put(mach); 448 return error; 449 } 450 451 static int 452 nvmm_vcpu_configure(struct nvmm_owner *owner, 453 struct nvmm_ioc_vcpu_configure *args) 454 { 455 struct nvmm_machine *mach; 456 struct nvmm_cpu *vcpu; 457 size_t allocsz; 458 uint64_t op; 459 void *data; 460 int error; 461 462 op = NVMM_VCPU_CONF_MD(args->op); 463 if (__predict_false(op >= nvmm_impl->vcpu_conf_max)) 464 return EINVAL; 465 466 allocsz = nvmm_impl->vcpu_conf_sizes[op]; 467 data = kmem_alloc(allocsz, KM_SLEEP); 468 469 error = nvmm_machine_get(owner, args->machid, &mach, false); 470 if (error) { 471 kmem_free(data, allocsz); 472 return error; 473 } 474 475 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 476 if (error) { 477 nvmm_machine_put(mach); 478 kmem_free(data, allocsz); 479 return error; 480 } 481 482 error = copyin(args->conf, data, allocsz); 483 if (error) { 484 goto out; 485 } 486 487 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data); 488 489 out: 490 nvmm_vcpu_put(vcpu); 491 nvmm_machine_put(mach); 492 kmem_free(data, allocsz); 493 return error; 494 } 495 496 static int 497 nvmm_vcpu_setstate(struct nvmm_owner *owner, 498 struct nvmm_ioc_vcpu_setstate *args) 499 { 500 struct nvmm_machine *mach; 501 struct nvmm_cpu *vcpu; 502 int error; 503 504 error = nvmm_machine_get(owner, args->machid, &mach, false); 505 if (error) 506 return error; 507 508 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 509 if (error) 510 goto out; 511 512 (*nvmm_impl->vcpu_setstate)(vcpu); 513 nvmm_vcpu_put(vcpu); 514 515 out: 516 nvmm_machine_put(mach); 517 return error; 518 } 519 520 static int 521 nvmm_vcpu_getstate(struct nvmm_owner *owner, 522 struct nvmm_ioc_vcpu_getstate *args) 523 { 524 struct nvmm_machine *mach; 525 struct nvmm_cpu *vcpu; 526 int error; 527 528 error = nvmm_machine_get(owner, args->machid, &mach, false); 529 if (error) 530 return error; 531 532 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 533 if (error) 534 goto out; 535 536 (*nvmm_impl->vcpu_getstate)(vcpu); 537 nvmm_vcpu_put(vcpu); 538 539 out: 540 nvmm_machine_put(mach); 541 return error; 542 } 543 544 static int 545 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args) 546 { 547 struct nvmm_machine *mach; 548 struct nvmm_cpu *vcpu; 549 int error; 550 551 error = nvmm_machine_get(owner, args->machid, &mach, false); 552 if (error) 553 return error; 554 555 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 556 if (error) 557 goto out; 558 559 error = (*nvmm_impl->vcpu_inject)(vcpu); 560 nvmm_vcpu_put(vcpu); 561 562 out: 563 nvmm_machine_put(mach); 564 return error; 565 } 566 567 static int 568 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 569 struct nvmm_vcpu_exit *exit) 570 { 571 struct vmspace *vm = mach->vm; 572 int ret; 573 574 while (1) { 575 /* Got a signal? Or pending resched? Leave. */ 576 if (__predict_false(nvmm_return_needed())) { 577 exit->reason = NVMM_VCPU_EXIT_NONE; 578 return 0; 579 } 580 581 /* Run the VCPU. */ 582 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit); 583 if (__predict_false(ret != 0)) { 584 return ret; 585 } 586 587 /* Process nested page faults. */ 588 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) { 589 break; 590 } 591 if (exit->u.mem.gpa >= mach->gpa_end) { 592 break; 593 } 594 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) { 595 break; 596 } 597 } 598 599 return 0; 600 } 601 602 static int 603 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args) 604 { 605 struct nvmm_machine *mach; 606 struct nvmm_cpu *vcpu; 607 int error; 608 609 error = nvmm_machine_get(owner, args->machid, &mach, false); 610 if (error) 611 return error; 612 613 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 614 if (error) 615 goto out; 616 617 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit); 618 nvmm_vcpu_put(vcpu); 619 620 out: 621 nvmm_machine_put(mach); 622 return error; 623 } 624 625 /* -------------------------------------------------------------------------- */ 626 627 static struct uvm_object * 628 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size, 629 size_t *off) 630 { 631 struct nvmm_hmapping *hmapping; 632 size_t i; 633 634 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 635 hmapping = &mach->hmap[i]; 636 if (!hmapping->present) { 637 continue; 638 } 639 if (hva >= hmapping->hva && 640 hva + size <= hmapping->hva + hmapping->size) { 641 *off = hva - hmapping->hva; 642 return hmapping->uobj; 643 } 644 } 645 646 return NULL; 647 } 648 649 static int 650 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size) 651 { 652 struct nvmm_hmapping *hmapping; 653 size_t i; 654 655 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) { 656 return EINVAL; 657 } 658 if (hva == 0) { 659 return EINVAL; 660 } 661 662 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 663 hmapping = &mach->hmap[i]; 664 if (!hmapping->present) { 665 continue; 666 } 667 668 if (hva >= hmapping->hva && 669 hva + size <= hmapping->hva + hmapping->size) { 670 break; 671 } 672 673 if (hva >= hmapping->hva && 674 hva < hmapping->hva + hmapping->size) { 675 return EEXIST; 676 } 677 if (hva + size > hmapping->hva && 678 hva + size <= hmapping->hva + hmapping->size) { 679 return EEXIST; 680 } 681 if (hva <= hmapping->hva && 682 hva + size >= hmapping->hva + hmapping->size) { 683 return EEXIST; 684 } 685 } 686 687 return 0; 688 } 689 690 static struct nvmm_hmapping * 691 nvmm_hmapping_alloc(struct nvmm_machine *mach) 692 { 693 struct nvmm_hmapping *hmapping; 694 size_t i; 695 696 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 697 hmapping = &mach->hmap[i]; 698 if (!hmapping->present) { 699 hmapping->present = true; 700 return hmapping; 701 } 702 } 703 704 return NULL; 705 } 706 707 static int 708 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size) 709 { 710 struct vmspace *vmspace = curproc->p_vmspace; 711 struct nvmm_hmapping *hmapping; 712 size_t i; 713 714 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 715 hmapping = &mach->hmap[i]; 716 if (!hmapping->present || hmapping->hva != hva || 717 hmapping->size != size) { 718 continue; 719 } 720 721 uvm_unmap(&vmspace->vm_map, hmapping->hva, 722 hmapping->hva + hmapping->size); 723 uao_detach(hmapping->uobj); 724 725 hmapping->uobj = NULL; 726 hmapping->present = false; 727 728 return 0; 729 } 730 731 return ENOENT; 732 } 733 734 static int 735 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args) 736 { 737 struct vmspace *vmspace = curproc->p_vmspace; 738 struct nvmm_machine *mach; 739 struct nvmm_hmapping *hmapping; 740 vaddr_t uva; 741 int error; 742 743 error = nvmm_machine_get(owner, args->machid, &mach, true); 744 if (error) 745 return error; 746 747 error = nvmm_hmapping_validate(mach, args->hva, args->size); 748 if (error) 749 goto out; 750 751 hmapping = nvmm_hmapping_alloc(mach); 752 if (hmapping == NULL) { 753 error = ENOBUFS; 754 goto out; 755 } 756 757 hmapping->hva = args->hva; 758 hmapping->size = args->size; 759 hmapping->uobj = uao_create(hmapping->size, 0); 760 uva = hmapping->hva; 761 762 /* Take a reference for the user. */ 763 uao_reference(hmapping->uobj); 764 765 /* Map the uobj into the user address space, as pageable. */ 766 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj, 767 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE, 768 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 769 if (error) { 770 uao_detach(hmapping->uobj); 771 } 772 773 out: 774 nvmm_machine_put(mach); 775 return error; 776 } 777 778 static int 779 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args) 780 { 781 struct nvmm_machine *mach; 782 int error; 783 784 error = nvmm_machine_get(owner, args->machid, &mach, true); 785 if (error) 786 return error; 787 788 error = nvmm_hmapping_free(mach, args->hva, args->size); 789 790 nvmm_machine_put(mach); 791 return error; 792 } 793 794 /* -------------------------------------------------------------------------- */ 795 796 static int 797 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args) 798 { 799 struct nvmm_machine *mach; 800 struct uvm_object *uobj; 801 gpaddr_t gpa; 802 size_t off; 803 int error; 804 805 error = nvmm_machine_get(owner, args->machid, &mach, false); 806 if (error) 807 return error; 808 809 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) { 810 error = EINVAL; 811 goto out; 812 } 813 814 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 || 815 (args->hva % PAGE_SIZE) != 0) { 816 error = EINVAL; 817 goto out; 818 } 819 if (args->hva == 0) { 820 error = EINVAL; 821 goto out; 822 } 823 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 824 error = EINVAL; 825 goto out; 826 } 827 if (args->gpa + args->size <= args->gpa) { 828 error = EINVAL; 829 goto out; 830 } 831 if (args->gpa + args->size > mach->gpa_end) { 832 error = EINVAL; 833 goto out; 834 } 835 gpa = args->gpa; 836 837 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off); 838 if (uobj == NULL) { 839 error = EINVAL; 840 goto out; 841 } 842 843 /* Take a reference for the machine. */ 844 uao_reference(uobj); 845 846 /* Map the uobj into the machine address space, as pageable. */ 847 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0, 848 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE, 849 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 850 if (error) { 851 uao_detach(uobj); 852 goto out; 853 } 854 if (gpa != args->gpa) { 855 uao_detach(uobj); 856 printf("[!] uvm_map problem\n"); 857 error = EINVAL; 858 goto out; 859 } 860 861 out: 862 nvmm_machine_put(mach); 863 return error; 864 } 865 866 static int 867 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args) 868 { 869 struct nvmm_machine *mach; 870 gpaddr_t gpa; 871 int error; 872 873 error = nvmm_machine_get(owner, args->machid, &mach, false); 874 if (error) 875 return error; 876 877 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) { 878 error = EINVAL; 879 goto out; 880 } 881 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 882 error = EINVAL; 883 goto out; 884 } 885 if (args->gpa + args->size <= args->gpa) { 886 error = EINVAL; 887 goto out; 888 } 889 if (args->gpa + args->size >= mach->gpa_end) { 890 error = EINVAL; 891 goto out; 892 } 893 gpa = args->gpa; 894 895 /* Unmap the memory from the machine. */ 896 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size); 897 898 out: 899 nvmm_machine_put(mach); 900 return error; 901 } 902 903 /* -------------------------------------------------------------------------- */ 904 905 static int 906 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 907 { 908 struct nvmm_ctl_mach_info ctl; 909 struct nvmm_machine *mach; 910 int error; 911 size_t i; 912 913 if (args->size != sizeof(ctl)) 914 return EINVAL; 915 error = copyin(args->data, &ctl, sizeof(ctl)); 916 if (error) 917 return error; 918 919 error = nvmm_machine_get(owner, ctl.machid, &mach, true); 920 if (error) 921 return error; 922 923 ctl.nvcpus = mach->ncpus; 924 925 ctl.nram = 0; 926 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 927 if (!mach->hmap[i].present) 928 continue; 929 ctl.nram += mach->hmap[i].size; 930 } 931 932 ctl.pid = mach->owner->pid; 933 ctl.time = mach->time; 934 935 nvmm_machine_put(mach); 936 937 error = copyout(&ctl, args->data, sizeof(ctl)); 938 if (error) 939 return error; 940 941 return 0; 942 } 943 944 static int 945 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 946 { 947 switch (args->op) { 948 case NVMM_CTL_MACH_INFO: 949 return nvmm_ctl_mach_info(owner, args); 950 default: 951 return EINVAL; 952 } 953 } 954 955 /* -------------------------------------------------------------------------- */ 956 957 static const struct nvmm_impl * 958 nvmm_ident(void) 959 { 960 size_t i; 961 962 for (i = 0; i < __arraycount(nvmm_impl_list); i++) { 963 if ((*nvmm_impl_list[i]->ident)()) 964 return nvmm_impl_list[i]; 965 } 966 967 return NULL; 968 } 969 970 static int 971 nvmm_init(void) 972 { 973 size_t i, n; 974 975 nvmm_impl = nvmm_ident(); 976 if (nvmm_impl == NULL) 977 return ENOTSUP; 978 979 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 980 machines[i].machid = i; 981 rw_init(&machines[i].lock); 982 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 983 machines[i].cpus[n].present = false; 984 machines[i].cpus[n].cpuid = n; 985 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT, 986 IPL_NONE); 987 } 988 } 989 990 (*nvmm_impl->init)(); 991 992 return 0; 993 } 994 995 static void 996 nvmm_fini(void) 997 { 998 size_t i, n; 999 1000 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 1001 rw_destroy(&machines[i].lock); 1002 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 1003 mutex_destroy(&machines[i].cpus[n].lock); 1004 } 1005 } 1006 1007 (*nvmm_impl->fini)(); 1008 nvmm_impl = NULL; 1009 } 1010 1011 /* -------------------------------------------------------------------------- */ 1012 1013 static dev_type_open(nvmm_open); 1014 1015 const struct cdevsw nvmm_cdevsw = { 1016 .d_open = nvmm_open, 1017 .d_close = noclose, 1018 .d_read = noread, 1019 .d_write = nowrite, 1020 .d_ioctl = noioctl, 1021 .d_stop = nostop, 1022 .d_tty = notty, 1023 .d_poll = nopoll, 1024 .d_mmap = nommap, 1025 .d_kqfilter = nokqfilter, 1026 .d_discard = nodiscard, 1027 .d_flag = D_OTHER | D_MPSAFE 1028 }; 1029 1030 static int nvmm_ioctl(file_t *, u_long, void *); 1031 static int nvmm_close(file_t *); 1032 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *, 1033 struct uvm_object **, int *); 1034 1035 static const struct fileops nvmm_fileops = { 1036 .fo_read = fbadop_read, 1037 .fo_write = fbadop_write, 1038 .fo_ioctl = nvmm_ioctl, 1039 .fo_fcntl = fnullop_fcntl, 1040 .fo_poll = fnullop_poll, 1041 .fo_stat = fbadop_stat, 1042 .fo_close = nvmm_close, 1043 .fo_kqfilter = fnullop_kqfilter, 1044 .fo_restart = fnullop_restart, 1045 .fo_mmap = nvmm_mmap, 1046 }; 1047 1048 static int 1049 nvmm_open(dev_t dev, int flags, int type, struct lwp *l) 1050 { 1051 struct nvmm_owner *owner; 1052 struct file *fp; 1053 int error, fd; 1054 1055 if (__predict_false(nvmm_impl == NULL)) 1056 return ENXIO; 1057 if (minor(dev) != 0) 1058 return EXDEV; 1059 if (!(flags & O_CLOEXEC)) 1060 return EINVAL; 1061 error = fd_allocfile(&fp, &fd); 1062 if (error) 1063 return error; 1064 1065 if (OFLAGS(flags) & O_WRONLY) { 1066 owner = &root_owner; 1067 } else { 1068 owner = kmem_alloc(sizeof(*owner), KM_SLEEP); 1069 owner->pid = l->l_proc->p_pid; 1070 } 1071 1072 return fd_clone(fp, fd, flags, &nvmm_fileops, owner); 1073 } 1074 1075 static int 1076 nvmm_close(file_t *fp) 1077 { 1078 struct nvmm_owner *owner = fp->f_data; 1079 1080 KASSERT(owner != NULL); 1081 nvmm_kill_machines(owner); 1082 if (owner != &root_owner) { 1083 kmem_free(owner, sizeof(*owner)); 1084 } 1085 fp->f_data = NULL; 1086 1087 return 0; 1088 } 1089 1090 static int 1091 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, 1092 int *advicep, struct uvm_object **uobjp, int *maxprotp) 1093 { 1094 struct nvmm_owner *owner = fp->f_data; 1095 struct nvmm_machine *mach; 1096 nvmm_machid_t machid; 1097 nvmm_cpuid_t cpuid; 1098 int error; 1099 1100 if (prot & PROT_EXEC) 1101 return EACCES; 1102 if (size != PAGE_SIZE) 1103 return EINVAL; 1104 1105 cpuid = NVMM_COMM_CPUID(*offp); 1106 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) 1107 return EINVAL; 1108 1109 machid = NVMM_COMM_MACHID(*offp); 1110 error = nvmm_machine_get(owner, machid, &mach, false); 1111 if (error) 1112 return error; 1113 1114 uao_reference(mach->commuobj); 1115 *uobjp = mach->commuobj; 1116 *offp = cpuid * PAGE_SIZE; 1117 *maxprotp = prot; 1118 *advicep = UVM_ADV_RANDOM; 1119 1120 nvmm_machine_put(mach); 1121 return 0; 1122 } 1123 1124 static int 1125 nvmm_ioctl(file_t *fp, u_long cmd, void *data) 1126 { 1127 struct nvmm_owner *owner = fp->f_data; 1128 1129 KASSERT(owner != NULL); 1130 1131 switch (cmd) { 1132 case NVMM_IOC_CAPABILITY: 1133 return nvmm_capability(owner, data); 1134 case NVMM_IOC_MACHINE_CREATE: 1135 return nvmm_machine_create(owner, data); 1136 case NVMM_IOC_MACHINE_DESTROY: 1137 return nvmm_machine_destroy(owner, data); 1138 case NVMM_IOC_MACHINE_CONFIGURE: 1139 return nvmm_machine_configure(owner, data); 1140 case NVMM_IOC_VCPU_CREATE: 1141 return nvmm_vcpu_create(owner, data); 1142 case NVMM_IOC_VCPU_DESTROY: 1143 return nvmm_vcpu_destroy(owner, data); 1144 case NVMM_IOC_VCPU_CONFIGURE: 1145 return nvmm_vcpu_configure(owner, data); 1146 case NVMM_IOC_VCPU_SETSTATE: 1147 return nvmm_vcpu_setstate(owner, data); 1148 case NVMM_IOC_VCPU_GETSTATE: 1149 return nvmm_vcpu_getstate(owner, data); 1150 case NVMM_IOC_VCPU_INJECT: 1151 return nvmm_vcpu_inject(owner, data); 1152 case NVMM_IOC_VCPU_RUN: 1153 return nvmm_vcpu_run(owner, data); 1154 case NVMM_IOC_GPA_MAP: 1155 return nvmm_gpa_map(owner, data); 1156 case NVMM_IOC_GPA_UNMAP: 1157 return nvmm_gpa_unmap(owner, data); 1158 case NVMM_IOC_HVA_MAP: 1159 return nvmm_hva_map(owner, data); 1160 case NVMM_IOC_HVA_UNMAP: 1161 return nvmm_hva_unmap(owner, data); 1162 case NVMM_IOC_CTL: 1163 return nvmm_ctl(owner, data); 1164 default: 1165 return EINVAL; 1166 } 1167 } 1168 1169 /* -------------------------------------------------------------------------- */ 1170 1171 static int nvmm_match(device_t, cfdata_t, void *); 1172 static void nvmm_attach(device_t, device_t, void *); 1173 static int nvmm_detach(device_t, int); 1174 1175 extern struct cfdriver nvmm_cd; 1176 1177 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL); 1178 1179 static struct cfdata nvmm_cfdata[] = { 1180 { 1181 .cf_name = "nvmm", 1182 .cf_atname = "nvmm", 1183 .cf_unit = 0, 1184 .cf_fstate = FSTATE_STAR, 1185 .cf_loc = NULL, 1186 .cf_flags = 0, 1187 .cf_pspec = NULL, 1188 }, 1189 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL } 1190 }; 1191 1192 static int 1193 nvmm_match(device_t self, cfdata_t cfdata, void *arg) 1194 { 1195 return 1; 1196 } 1197 1198 static void 1199 nvmm_attach(device_t parent, device_t self, void *aux) 1200 { 1201 int error; 1202 1203 error = nvmm_init(); 1204 if (error) 1205 panic("%s: impossible", __func__); 1206 aprint_normal_dev(self, "attached, using backend %s\n", 1207 nvmm_impl->name); 1208 } 1209 1210 static int 1211 nvmm_detach(device_t self, int flags) 1212 { 1213 if (atomic_load_relaxed(&nmachines) > 0) 1214 return EBUSY; 1215 nvmm_fini(); 1216 return 0; 1217 } 1218 1219 void 1220 nvmmattach(int nunits) 1221 { 1222 /* nothing */ 1223 } 1224 1225 MODULE(MODULE_CLASS_MISC, nvmm, NULL); 1226 1227 #if defined(_MODULE) 1228 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL); 1229 #endif 1230 1231 static int 1232 nvmm_modcmd(modcmd_t cmd, void *arg) 1233 { 1234 #if defined(_MODULE) 1235 devmajor_t bmajor = NODEVMAJOR; 1236 devmajor_t cmajor = 345; 1237 #endif 1238 int error; 1239 1240 switch (cmd) { 1241 case MODULE_CMD_INIT: 1242 if (nvmm_ident() == NULL) { 1243 aprint_error("%s: cpu not supported\n", 1244 nvmm_cd.cd_name); 1245 return ENOTSUP; 1246 } 1247 #if defined(_MODULE) 1248 error = config_cfdriver_attach(&nvmm_cd); 1249 if (error) 1250 return error; 1251 #endif 1252 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca); 1253 if (error) { 1254 config_cfdriver_detach(&nvmm_cd); 1255 aprint_error("%s: config_cfattach_attach failed\n", 1256 nvmm_cd.cd_name); 1257 return error; 1258 } 1259 1260 error = config_cfdata_attach(nvmm_cfdata, 1); 1261 if (error) { 1262 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1263 config_cfdriver_detach(&nvmm_cd); 1264 aprint_error("%s: unable to register cfdata\n", 1265 nvmm_cd.cd_name); 1266 return error; 1267 } 1268 1269 if (config_attach_pseudo(nvmm_cfdata) == NULL) { 1270 aprint_error("%s: config_attach_pseudo failed\n", 1271 nvmm_cd.cd_name); 1272 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1273 config_cfdriver_detach(&nvmm_cd); 1274 return ENXIO; 1275 } 1276 1277 #if defined(_MODULE) 1278 /* mknod /dev/nvmm c 345 0 */ 1279 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor, 1280 &nvmm_cdevsw, &cmajor); 1281 if (error) { 1282 aprint_error("%s: unable to register devsw\n", 1283 nvmm_cd.cd_name); 1284 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1285 config_cfdriver_detach(&nvmm_cd); 1286 return error; 1287 } 1288 #endif 1289 return 0; 1290 case MODULE_CMD_FINI: 1291 error = config_cfdata_detach(nvmm_cfdata); 1292 if (error) 1293 return error; 1294 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1295 if (error) 1296 return error; 1297 #if defined(_MODULE) 1298 config_cfdriver_detach(&nvmm_cd); 1299 devsw_detach(NULL, &nvmm_cdevsw); 1300 #endif 1301 return 0; 1302 case MODULE_CMD_AUTOUNLOAD: 1303 return EBUSY; 1304 default: 1305 return ENOTTY; 1306 } 1307 } 1308