1 /* $NetBSD: nvmm.c,v 1.41 2020/09/08 16:58:38 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net 5 * All rights reserved. 6 * 7 * This code is part of the NVMM hypervisor. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.41 2020/09/08 16:58:38 maxv Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 38 #include <sys/atomic.h> 39 #include <sys/cpu.h> 40 #include <sys/conf.h> 41 #include <sys/kmem.h> 42 #include <sys/module.h> 43 #include <sys/proc.h> 44 #include <sys/mman.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/device.h> 48 49 #include <uvm/uvm_aobj.h> 50 #include <uvm/uvm_extern.h> 51 #include <uvm/uvm_page.h> 52 53 #include "ioconf.h" 54 55 #include <dev/nvmm/nvmm.h> 56 #include <dev/nvmm/nvmm_internal.h> 57 #include <dev/nvmm/nvmm_ioctl.h> 58 59 static struct nvmm_machine machines[NVMM_MAX_MACHINES]; 60 static volatile unsigned int nmachines __cacheline_aligned; 61 62 static const struct nvmm_impl *nvmm_impl_list[] = { 63 #if defined(__x86_64__) 64 &nvmm_x86_svm, /* x86 AMD SVM */ 65 &nvmm_x86_vmx /* x86 Intel VMX */ 66 #endif 67 }; 68 69 static const struct nvmm_impl *nvmm_impl __read_mostly = NULL; 70 71 static struct nvmm_owner root_owner; 72 73 /* -------------------------------------------------------------------------- */ 74 75 static int 76 nvmm_machine_alloc(struct nvmm_machine **ret) 77 { 78 struct nvmm_machine *mach; 79 size_t i; 80 81 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 82 mach = &machines[i]; 83 84 rw_enter(&mach->lock, RW_WRITER); 85 if (mach->present) { 86 rw_exit(&mach->lock); 87 continue; 88 } 89 90 mach->present = true; 91 mach->time = time_second; 92 *ret = mach; 93 atomic_inc_uint(&nmachines); 94 return 0; 95 } 96 97 return ENOBUFS; 98 } 99 100 static void 101 nvmm_machine_free(struct nvmm_machine *mach) 102 { 103 KASSERT(rw_write_held(&mach->lock)); 104 KASSERT(mach->present); 105 mach->present = false; 106 atomic_dec_uint(&nmachines); 107 } 108 109 static int 110 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid, 111 struct nvmm_machine **ret, bool writer) 112 { 113 struct nvmm_machine *mach; 114 krw_t op = writer ? RW_WRITER : RW_READER; 115 116 if (__predict_false(machid >= NVMM_MAX_MACHINES)) { 117 return EINVAL; 118 } 119 mach = &machines[machid]; 120 121 rw_enter(&mach->lock, op); 122 if (__predict_false(!mach->present)) { 123 rw_exit(&mach->lock); 124 return ENOENT; 125 } 126 if (__predict_false(mach->owner != owner && owner != &root_owner)) { 127 rw_exit(&mach->lock); 128 return EPERM; 129 } 130 *ret = mach; 131 132 return 0; 133 } 134 135 static void 136 nvmm_machine_put(struct nvmm_machine *mach) 137 { 138 rw_exit(&mach->lock); 139 } 140 141 /* -------------------------------------------------------------------------- */ 142 143 static int 144 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 145 struct nvmm_cpu **ret) 146 { 147 struct nvmm_cpu *vcpu; 148 149 if (cpuid >= NVMM_MAX_VCPUS) { 150 return EINVAL; 151 } 152 vcpu = &mach->cpus[cpuid]; 153 154 mutex_enter(&vcpu->lock); 155 if (vcpu->present) { 156 mutex_exit(&vcpu->lock); 157 return EBUSY; 158 } 159 160 vcpu->present = true; 161 vcpu->comm = NULL; 162 vcpu->hcpu_last = -1; 163 *ret = vcpu; 164 return 0; 165 } 166 167 static void 168 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 169 { 170 KASSERT(mutex_owned(&vcpu->lock)); 171 vcpu->present = false; 172 if (vcpu->comm != NULL) { 173 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE); 174 } 175 } 176 177 static int 178 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 179 struct nvmm_cpu **ret) 180 { 181 struct nvmm_cpu *vcpu; 182 183 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) { 184 return EINVAL; 185 } 186 vcpu = &mach->cpus[cpuid]; 187 188 mutex_enter(&vcpu->lock); 189 if (__predict_false(!vcpu->present)) { 190 mutex_exit(&vcpu->lock); 191 return ENOENT; 192 } 193 *ret = vcpu; 194 195 return 0; 196 } 197 198 static void 199 nvmm_vcpu_put(struct nvmm_cpu *vcpu) 200 { 201 mutex_exit(&vcpu->lock); 202 } 203 204 /* -------------------------------------------------------------------------- */ 205 206 static void 207 nvmm_kill_machines(struct nvmm_owner *owner) 208 { 209 struct nvmm_machine *mach; 210 struct nvmm_cpu *vcpu; 211 size_t i, j; 212 int error; 213 214 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 215 mach = &machines[i]; 216 217 rw_enter(&mach->lock, RW_WRITER); 218 if (!mach->present || mach->owner != owner) { 219 rw_exit(&mach->lock); 220 continue; 221 } 222 223 /* Kill it. */ 224 for (j = 0; j < NVMM_MAX_VCPUS; j++) { 225 error = nvmm_vcpu_get(mach, j, &vcpu); 226 if (error) 227 continue; 228 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 229 nvmm_vcpu_free(mach, vcpu); 230 nvmm_vcpu_put(vcpu); 231 atomic_dec_uint(&mach->ncpus); 232 } 233 (*nvmm_impl->machine_destroy)(mach); 234 uvmspace_free(mach->vm); 235 236 /* Drop the kernel UOBJ refs. */ 237 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) { 238 if (!mach->hmap[j].present) 239 continue; 240 uao_detach(mach->hmap[j].uobj); 241 } 242 243 nvmm_machine_free(mach); 244 245 rw_exit(&mach->lock); 246 } 247 } 248 249 /* -------------------------------------------------------------------------- */ 250 251 static int 252 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args) 253 { 254 args->cap.version = NVMM_KERN_VERSION; 255 args->cap.state_size = nvmm_impl->state_size; 256 args->cap.max_machines = NVMM_MAX_MACHINES; 257 args->cap.max_vcpus = NVMM_MAX_VCPUS; 258 args->cap.max_ram = NVMM_MAX_RAM; 259 260 (*nvmm_impl->capability)(&args->cap); 261 262 return 0; 263 } 264 265 static int 266 nvmm_machine_create(struct nvmm_owner *owner, 267 struct nvmm_ioc_machine_create *args) 268 { 269 struct nvmm_machine *mach; 270 int error; 271 272 error = nvmm_machine_alloc(&mach); 273 if (error) 274 return error; 275 276 /* Curproc owns the machine. */ 277 mach->owner = owner; 278 279 /* Zero out the host mappings. */ 280 memset(&mach->hmap, 0, sizeof(mach->hmap)); 281 282 /* Create the machine vmspace. */ 283 mach->gpa_begin = 0; 284 mach->gpa_end = NVMM_MAX_RAM; 285 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false); 286 287 /* Create the comm uobj. */ 288 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0); 289 290 (*nvmm_impl->machine_create)(mach); 291 292 args->machid = mach->machid; 293 nvmm_machine_put(mach); 294 295 return 0; 296 } 297 298 static int 299 nvmm_machine_destroy(struct nvmm_owner *owner, 300 struct nvmm_ioc_machine_destroy *args) 301 { 302 struct nvmm_machine *mach; 303 struct nvmm_cpu *vcpu; 304 int error; 305 size_t i; 306 307 error = nvmm_machine_get(owner, args->machid, &mach, true); 308 if (error) 309 return error; 310 311 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 312 error = nvmm_vcpu_get(mach, i, &vcpu); 313 if (error) 314 continue; 315 316 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 317 nvmm_vcpu_free(mach, vcpu); 318 nvmm_vcpu_put(vcpu); 319 atomic_dec_uint(&mach->ncpus); 320 } 321 322 (*nvmm_impl->machine_destroy)(mach); 323 324 /* Free the machine vmspace. */ 325 uvmspace_free(mach->vm); 326 327 /* Drop the kernel UOBJ refs. */ 328 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 329 if (!mach->hmap[i].present) 330 continue; 331 uao_detach(mach->hmap[i].uobj); 332 } 333 334 nvmm_machine_free(mach); 335 nvmm_machine_put(mach); 336 337 return 0; 338 } 339 340 static int 341 nvmm_machine_configure(struct nvmm_owner *owner, 342 struct nvmm_ioc_machine_configure *args) 343 { 344 struct nvmm_machine *mach; 345 size_t allocsz; 346 uint64_t op; 347 void *data; 348 int error; 349 350 op = NVMM_MACH_CONF_MD(args->op); 351 if (__predict_false(op >= nvmm_impl->mach_conf_max)) { 352 return EINVAL; 353 } 354 355 allocsz = nvmm_impl->mach_conf_sizes[op]; 356 data = kmem_alloc(allocsz, KM_SLEEP); 357 358 error = nvmm_machine_get(owner, args->machid, &mach, true); 359 if (error) { 360 kmem_free(data, allocsz); 361 return error; 362 } 363 364 error = copyin(args->conf, data, allocsz); 365 if (error) { 366 goto out; 367 } 368 369 error = (*nvmm_impl->machine_configure)(mach, op, data); 370 371 out: 372 nvmm_machine_put(mach); 373 kmem_free(data, allocsz); 374 return error; 375 } 376 377 static int 378 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args) 379 { 380 struct nvmm_machine *mach; 381 struct nvmm_cpu *vcpu; 382 int error; 383 384 error = nvmm_machine_get(owner, args->machid, &mach, false); 385 if (error) 386 return error; 387 388 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu); 389 if (error) 390 goto out; 391 392 /* Allocate the comm page. */ 393 uao_reference(mach->commuobj); 394 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE, 395 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW, 396 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); 397 if (error) { 398 uao_detach(mach->commuobj); 399 nvmm_vcpu_free(mach, vcpu); 400 nvmm_vcpu_put(vcpu); 401 goto out; 402 } 403 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm, 404 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0); 405 if (error) { 406 nvmm_vcpu_free(mach, vcpu); 407 nvmm_vcpu_put(vcpu); 408 goto out; 409 } 410 memset(vcpu->comm, 0, PAGE_SIZE); 411 412 error = (*nvmm_impl->vcpu_create)(mach, vcpu); 413 if (error) { 414 nvmm_vcpu_free(mach, vcpu); 415 nvmm_vcpu_put(vcpu); 416 goto out; 417 } 418 419 nvmm_vcpu_put(vcpu); 420 atomic_inc_uint(&mach->ncpus); 421 422 out: 423 nvmm_machine_put(mach); 424 return error; 425 } 426 427 static int 428 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args) 429 { 430 struct nvmm_machine *mach; 431 struct nvmm_cpu *vcpu; 432 int error; 433 434 error = nvmm_machine_get(owner, args->machid, &mach, false); 435 if (error) 436 return error; 437 438 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 439 if (error) 440 goto out; 441 442 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 443 nvmm_vcpu_free(mach, vcpu); 444 nvmm_vcpu_put(vcpu); 445 atomic_dec_uint(&mach->ncpus); 446 447 out: 448 nvmm_machine_put(mach); 449 return error; 450 } 451 452 static int 453 nvmm_vcpu_configure(struct nvmm_owner *owner, 454 struct nvmm_ioc_vcpu_configure *args) 455 { 456 struct nvmm_machine *mach; 457 struct nvmm_cpu *vcpu; 458 size_t allocsz; 459 uint64_t op; 460 void *data; 461 int error; 462 463 op = NVMM_VCPU_CONF_MD(args->op); 464 if (__predict_false(op >= nvmm_impl->vcpu_conf_max)) 465 return EINVAL; 466 467 allocsz = nvmm_impl->vcpu_conf_sizes[op]; 468 data = kmem_alloc(allocsz, KM_SLEEP); 469 470 error = nvmm_machine_get(owner, args->machid, &mach, false); 471 if (error) { 472 kmem_free(data, allocsz); 473 return error; 474 } 475 476 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 477 if (error) { 478 nvmm_machine_put(mach); 479 kmem_free(data, allocsz); 480 return error; 481 } 482 483 error = copyin(args->conf, data, allocsz); 484 if (error) { 485 goto out; 486 } 487 488 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data); 489 490 out: 491 nvmm_vcpu_put(vcpu); 492 nvmm_machine_put(mach); 493 kmem_free(data, allocsz); 494 return error; 495 } 496 497 static int 498 nvmm_vcpu_setstate(struct nvmm_owner *owner, 499 struct nvmm_ioc_vcpu_setstate *args) 500 { 501 struct nvmm_machine *mach; 502 struct nvmm_cpu *vcpu; 503 int error; 504 505 error = nvmm_machine_get(owner, args->machid, &mach, false); 506 if (error) 507 return error; 508 509 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 510 if (error) 511 goto out; 512 513 (*nvmm_impl->vcpu_setstate)(vcpu); 514 nvmm_vcpu_put(vcpu); 515 516 out: 517 nvmm_machine_put(mach); 518 return error; 519 } 520 521 static int 522 nvmm_vcpu_getstate(struct nvmm_owner *owner, 523 struct nvmm_ioc_vcpu_getstate *args) 524 { 525 struct nvmm_machine *mach; 526 struct nvmm_cpu *vcpu; 527 int error; 528 529 error = nvmm_machine_get(owner, args->machid, &mach, false); 530 if (error) 531 return error; 532 533 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 534 if (error) 535 goto out; 536 537 (*nvmm_impl->vcpu_getstate)(vcpu); 538 nvmm_vcpu_put(vcpu); 539 540 out: 541 nvmm_machine_put(mach); 542 return error; 543 } 544 545 static int 546 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args) 547 { 548 struct nvmm_machine *mach; 549 struct nvmm_cpu *vcpu; 550 int error; 551 552 error = nvmm_machine_get(owner, args->machid, &mach, false); 553 if (error) 554 return error; 555 556 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 557 if (error) 558 goto out; 559 560 error = (*nvmm_impl->vcpu_inject)(vcpu); 561 nvmm_vcpu_put(vcpu); 562 563 out: 564 nvmm_machine_put(mach); 565 return error; 566 } 567 568 static int 569 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 570 struct nvmm_vcpu_exit *exit) 571 { 572 struct vmspace *vm = mach->vm; 573 int ret; 574 575 while (1) { 576 /* Got a signal? Or pending resched? Leave. */ 577 if (__predict_false(nvmm_return_needed())) { 578 exit->reason = NVMM_VCPU_EXIT_NONE; 579 return 0; 580 } 581 582 /* Run the VCPU. */ 583 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit); 584 if (__predict_false(ret != 0)) { 585 return ret; 586 } 587 588 /* Process nested page faults. */ 589 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) { 590 break; 591 } 592 if (exit->u.mem.gpa >= mach->gpa_end) { 593 break; 594 } 595 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) { 596 break; 597 } 598 } 599 600 return 0; 601 } 602 603 static int 604 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args) 605 { 606 struct nvmm_machine *mach; 607 struct nvmm_cpu *vcpu; 608 int error; 609 610 error = nvmm_machine_get(owner, args->machid, &mach, false); 611 if (error) 612 return error; 613 614 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 615 if (error) 616 goto out; 617 618 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit); 619 nvmm_vcpu_put(vcpu); 620 621 out: 622 nvmm_machine_put(mach); 623 return error; 624 } 625 626 /* -------------------------------------------------------------------------- */ 627 628 static struct uvm_object * 629 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size, 630 size_t *off) 631 { 632 struct nvmm_hmapping *hmapping; 633 size_t i; 634 635 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 636 hmapping = &mach->hmap[i]; 637 if (!hmapping->present) { 638 continue; 639 } 640 if (hva >= hmapping->hva && 641 hva + size <= hmapping->hva + hmapping->size) { 642 *off = hva - hmapping->hva; 643 return hmapping->uobj; 644 } 645 } 646 647 return NULL; 648 } 649 650 static int 651 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size) 652 { 653 struct nvmm_hmapping *hmapping; 654 size_t i; 655 656 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) { 657 return EINVAL; 658 } 659 if (hva == 0) { 660 return EINVAL; 661 } 662 663 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 664 hmapping = &mach->hmap[i]; 665 if (!hmapping->present) { 666 continue; 667 } 668 669 if (hva >= hmapping->hva && 670 hva + size <= hmapping->hva + hmapping->size) { 671 break; 672 } 673 674 if (hva >= hmapping->hva && 675 hva < hmapping->hva + hmapping->size) { 676 return EEXIST; 677 } 678 if (hva + size > hmapping->hva && 679 hva + size <= hmapping->hva + hmapping->size) { 680 return EEXIST; 681 } 682 if (hva <= hmapping->hva && 683 hva + size >= hmapping->hva + hmapping->size) { 684 return EEXIST; 685 } 686 } 687 688 return 0; 689 } 690 691 static struct nvmm_hmapping * 692 nvmm_hmapping_alloc(struct nvmm_machine *mach) 693 { 694 struct nvmm_hmapping *hmapping; 695 size_t i; 696 697 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 698 hmapping = &mach->hmap[i]; 699 if (!hmapping->present) { 700 hmapping->present = true; 701 return hmapping; 702 } 703 } 704 705 return NULL; 706 } 707 708 static int 709 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size) 710 { 711 struct vmspace *vmspace = curproc->p_vmspace; 712 struct nvmm_hmapping *hmapping; 713 size_t i; 714 715 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 716 hmapping = &mach->hmap[i]; 717 if (!hmapping->present || hmapping->hva != hva || 718 hmapping->size != size) { 719 continue; 720 } 721 722 uvm_unmap(&vmspace->vm_map, hmapping->hva, 723 hmapping->hva + hmapping->size); 724 uao_detach(hmapping->uobj); 725 726 hmapping->uobj = NULL; 727 hmapping->present = false; 728 729 return 0; 730 } 731 732 return ENOENT; 733 } 734 735 static int 736 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args) 737 { 738 struct vmspace *vmspace = curproc->p_vmspace; 739 struct nvmm_machine *mach; 740 struct nvmm_hmapping *hmapping; 741 vaddr_t uva; 742 int error; 743 744 error = nvmm_machine_get(owner, args->machid, &mach, true); 745 if (error) 746 return error; 747 748 error = nvmm_hmapping_validate(mach, args->hva, args->size); 749 if (error) 750 goto out; 751 752 hmapping = nvmm_hmapping_alloc(mach); 753 if (hmapping == NULL) { 754 error = ENOBUFS; 755 goto out; 756 } 757 758 hmapping->hva = args->hva; 759 hmapping->size = args->size; 760 hmapping->uobj = uao_create(hmapping->size, 0); 761 uva = hmapping->hva; 762 763 /* Take a reference for the user. */ 764 uao_reference(hmapping->uobj); 765 766 /* Map the uobj into the user address space, as pageable. */ 767 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj, 768 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE, 769 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 770 if (error) { 771 uao_detach(hmapping->uobj); 772 } 773 774 out: 775 nvmm_machine_put(mach); 776 return error; 777 } 778 779 static int 780 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args) 781 { 782 struct nvmm_machine *mach; 783 int error; 784 785 error = nvmm_machine_get(owner, args->machid, &mach, true); 786 if (error) 787 return error; 788 789 error = nvmm_hmapping_free(mach, args->hva, args->size); 790 791 nvmm_machine_put(mach); 792 return error; 793 } 794 795 /* -------------------------------------------------------------------------- */ 796 797 static int 798 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args) 799 { 800 struct nvmm_machine *mach; 801 struct uvm_object *uobj; 802 gpaddr_t gpa; 803 size_t off; 804 int error; 805 806 error = nvmm_machine_get(owner, args->machid, &mach, false); 807 if (error) 808 return error; 809 810 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) { 811 error = EINVAL; 812 goto out; 813 } 814 815 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 || 816 (args->hva % PAGE_SIZE) != 0) { 817 error = EINVAL; 818 goto out; 819 } 820 if (args->hva == 0) { 821 error = EINVAL; 822 goto out; 823 } 824 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 825 error = EINVAL; 826 goto out; 827 } 828 if (args->gpa + args->size <= args->gpa) { 829 error = EINVAL; 830 goto out; 831 } 832 if (args->gpa + args->size > mach->gpa_end) { 833 error = EINVAL; 834 goto out; 835 } 836 gpa = args->gpa; 837 838 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off); 839 if (uobj == NULL) { 840 error = EINVAL; 841 goto out; 842 } 843 844 /* Take a reference for the machine. */ 845 uao_reference(uobj); 846 847 /* Map the uobj into the machine address space, as pageable. */ 848 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0, 849 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE, 850 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 851 if (error) { 852 uao_detach(uobj); 853 goto out; 854 } 855 if (gpa != args->gpa) { 856 uao_detach(uobj); 857 printf("[!] uvm_map problem\n"); 858 error = EINVAL; 859 goto out; 860 } 861 862 out: 863 nvmm_machine_put(mach); 864 return error; 865 } 866 867 static int 868 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args) 869 { 870 struct nvmm_machine *mach; 871 gpaddr_t gpa; 872 int error; 873 874 error = nvmm_machine_get(owner, args->machid, &mach, false); 875 if (error) 876 return error; 877 878 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) { 879 error = EINVAL; 880 goto out; 881 } 882 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 883 error = EINVAL; 884 goto out; 885 } 886 if (args->gpa + args->size <= args->gpa) { 887 error = EINVAL; 888 goto out; 889 } 890 if (args->gpa + args->size >= mach->gpa_end) { 891 error = EINVAL; 892 goto out; 893 } 894 gpa = args->gpa; 895 896 /* Unmap the memory from the machine. */ 897 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size); 898 899 out: 900 nvmm_machine_put(mach); 901 return error; 902 } 903 904 /* -------------------------------------------------------------------------- */ 905 906 static int 907 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 908 { 909 struct nvmm_ctl_mach_info ctl; 910 struct nvmm_machine *mach; 911 int error; 912 size_t i; 913 914 if (args->size != sizeof(ctl)) 915 return EINVAL; 916 error = copyin(args->data, &ctl, sizeof(ctl)); 917 if (error) 918 return error; 919 920 error = nvmm_machine_get(owner, ctl.machid, &mach, true); 921 if (error) 922 return error; 923 924 ctl.nvcpus = mach->ncpus; 925 926 ctl.nram = 0; 927 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 928 if (!mach->hmap[i].present) 929 continue; 930 ctl.nram += mach->hmap[i].size; 931 } 932 933 ctl.pid = mach->owner->pid; 934 ctl.time = mach->time; 935 936 nvmm_machine_put(mach); 937 938 error = copyout(&ctl, args->data, sizeof(ctl)); 939 if (error) 940 return error; 941 942 return 0; 943 } 944 945 static int 946 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 947 { 948 switch (args->op) { 949 case NVMM_CTL_MACH_INFO: 950 return nvmm_ctl_mach_info(owner, args); 951 default: 952 return EINVAL; 953 } 954 } 955 956 /* -------------------------------------------------------------------------- */ 957 958 static const struct nvmm_impl * 959 nvmm_ident(void) 960 { 961 size_t i; 962 963 for (i = 0; i < __arraycount(nvmm_impl_list); i++) { 964 if ((*nvmm_impl_list[i]->ident)()) 965 return nvmm_impl_list[i]; 966 } 967 968 return NULL; 969 } 970 971 static int 972 nvmm_init(void) 973 { 974 size_t i, n; 975 976 nvmm_impl = nvmm_ident(); 977 if (nvmm_impl == NULL) 978 return ENOTSUP; 979 980 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 981 machines[i].machid = i; 982 rw_init(&machines[i].lock); 983 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 984 machines[i].cpus[n].present = false; 985 machines[i].cpus[n].cpuid = n; 986 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT, 987 IPL_NONE); 988 } 989 } 990 991 (*nvmm_impl->init)(); 992 993 return 0; 994 } 995 996 static void 997 nvmm_fini(void) 998 { 999 size_t i, n; 1000 1001 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 1002 rw_destroy(&machines[i].lock); 1003 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 1004 mutex_destroy(&machines[i].cpus[n].lock); 1005 } 1006 } 1007 1008 (*nvmm_impl->fini)(); 1009 nvmm_impl = NULL; 1010 } 1011 1012 /* -------------------------------------------------------------------------- */ 1013 1014 static dev_type_open(nvmm_open); 1015 1016 const struct cdevsw nvmm_cdevsw = { 1017 .d_open = nvmm_open, 1018 .d_close = noclose, 1019 .d_read = noread, 1020 .d_write = nowrite, 1021 .d_ioctl = noioctl, 1022 .d_stop = nostop, 1023 .d_tty = notty, 1024 .d_poll = nopoll, 1025 .d_mmap = nommap, 1026 .d_kqfilter = nokqfilter, 1027 .d_discard = nodiscard, 1028 .d_flag = D_OTHER | D_MPSAFE 1029 }; 1030 1031 static int nvmm_ioctl(file_t *, u_long, void *); 1032 static int nvmm_close(file_t *); 1033 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *, 1034 struct uvm_object **, int *); 1035 1036 static const struct fileops nvmm_fileops = { 1037 .fo_read = fbadop_read, 1038 .fo_write = fbadop_write, 1039 .fo_ioctl = nvmm_ioctl, 1040 .fo_fcntl = fnullop_fcntl, 1041 .fo_poll = fnullop_poll, 1042 .fo_stat = fbadop_stat, 1043 .fo_close = nvmm_close, 1044 .fo_kqfilter = fnullop_kqfilter, 1045 .fo_restart = fnullop_restart, 1046 .fo_mmap = nvmm_mmap, 1047 }; 1048 1049 static int 1050 nvmm_open(dev_t dev, int flags, int type, struct lwp *l) 1051 { 1052 struct nvmm_owner *owner; 1053 struct file *fp; 1054 int error, fd; 1055 1056 if (__predict_false(nvmm_impl == NULL)) 1057 return ENXIO; 1058 if (minor(dev) != 0) 1059 return EXDEV; 1060 if (!(flags & O_CLOEXEC)) 1061 return EINVAL; 1062 error = fd_allocfile(&fp, &fd); 1063 if (error) 1064 return error; 1065 1066 if (OFLAGS(flags) & O_WRONLY) { 1067 owner = &root_owner; 1068 } else { 1069 owner = kmem_alloc(sizeof(*owner), KM_SLEEP); 1070 owner->pid = l->l_proc->p_pid; 1071 } 1072 1073 return fd_clone(fp, fd, flags, &nvmm_fileops, owner); 1074 } 1075 1076 static int 1077 nvmm_close(file_t *fp) 1078 { 1079 struct nvmm_owner *owner = fp->f_data; 1080 1081 KASSERT(owner != NULL); 1082 nvmm_kill_machines(owner); 1083 if (owner != &root_owner) { 1084 kmem_free(owner, sizeof(*owner)); 1085 } 1086 fp->f_data = NULL; 1087 1088 return 0; 1089 } 1090 1091 static int 1092 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, 1093 int *advicep, struct uvm_object **uobjp, int *maxprotp) 1094 { 1095 struct nvmm_owner *owner = fp->f_data; 1096 struct nvmm_machine *mach; 1097 nvmm_machid_t machid; 1098 nvmm_cpuid_t cpuid; 1099 int error; 1100 1101 if (prot & PROT_EXEC) 1102 return EACCES; 1103 if (size != PAGE_SIZE) 1104 return EINVAL; 1105 1106 cpuid = NVMM_COMM_CPUID(*offp); 1107 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) 1108 return EINVAL; 1109 1110 machid = NVMM_COMM_MACHID(*offp); 1111 error = nvmm_machine_get(owner, machid, &mach, false); 1112 if (error) 1113 return error; 1114 1115 uao_reference(mach->commuobj); 1116 *uobjp = mach->commuobj; 1117 *offp = cpuid * PAGE_SIZE; 1118 *maxprotp = prot; 1119 *advicep = UVM_ADV_RANDOM; 1120 1121 nvmm_machine_put(mach); 1122 return 0; 1123 } 1124 1125 static int 1126 nvmm_ioctl(file_t *fp, u_long cmd, void *data) 1127 { 1128 struct nvmm_owner *owner = fp->f_data; 1129 1130 KASSERT(owner != NULL); 1131 1132 switch (cmd) { 1133 case NVMM_IOC_CAPABILITY: 1134 return nvmm_capability(owner, data); 1135 case NVMM_IOC_MACHINE_CREATE: 1136 return nvmm_machine_create(owner, data); 1137 case NVMM_IOC_MACHINE_DESTROY: 1138 return nvmm_machine_destroy(owner, data); 1139 case NVMM_IOC_MACHINE_CONFIGURE: 1140 return nvmm_machine_configure(owner, data); 1141 case NVMM_IOC_VCPU_CREATE: 1142 return nvmm_vcpu_create(owner, data); 1143 case NVMM_IOC_VCPU_DESTROY: 1144 return nvmm_vcpu_destroy(owner, data); 1145 case NVMM_IOC_VCPU_CONFIGURE: 1146 return nvmm_vcpu_configure(owner, data); 1147 case NVMM_IOC_VCPU_SETSTATE: 1148 return nvmm_vcpu_setstate(owner, data); 1149 case NVMM_IOC_VCPU_GETSTATE: 1150 return nvmm_vcpu_getstate(owner, data); 1151 case NVMM_IOC_VCPU_INJECT: 1152 return nvmm_vcpu_inject(owner, data); 1153 case NVMM_IOC_VCPU_RUN: 1154 return nvmm_vcpu_run(owner, data); 1155 case NVMM_IOC_GPA_MAP: 1156 return nvmm_gpa_map(owner, data); 1157 case NVMM_IOC_GPA_UNMAP: 1158 return nvmm_gpa_unmap(owner, data); 1159 case NVMM_IOC_HVA_MAP: 1160 return nvmm_hva_map(owner, data); 1161 case NVMM_IOC_HVA_UNMAP: 1162 return nvmm_hva_unmap(owner, data); 1163 case NVMM_IOC_CTL: 1164 return nvmm_ctl(owner, data); 1165 default: 1166 return EINVAL; 1167 } 1168 } 1169 1170 /* -------------------------------------------------------------------------- */ 1171 1172 static int nvmm_match(device_t, cfdata_t, void *); 1173 static void nvmm_attach(device_t, device_t, void *); 1174 static int nvmm_detach(device_t, int); 1175 1176 extern struct cfdriver nvmm_cd; 1177 1178 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL); 1179 1180 static struct cfdata nvmm_cfdata[] = { 1181 { 1182 .cf_name = "nvmm", 1183 .cf_atname = "nvmm", 1184 .cf_unit = 0, 1185 .cf_fstate = FSTATE_STAR, 1186 .cf_loc = NULL, 1187 .cf_flags = 0, 1188 .cf_pspec = NULL, 1189 }, 1190 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL } 1191 }; 1192 1193 static int 1194 nvmm_match(device_t self, cfdata_t cfdata, void *arg) 1195 { 1196 return 1; 1197 } 1198 1199 static void 1200 nvmm_attach(device_t parent, device_t self, void *aux) 1201 { 1202 int error; 1203 1204 error = nvmm_init(); 1205 if (error) 1206 panic("%s: impossible", __func__); 1207 aprint_normal_dev(self, "attached, using backend %s\n", 1208 nvmm_impl->name); 1209 } 1210 1211 static int 1212 nvmm_detach(device_t self, int flags) 1213 { 1214 if (atomic_load_relaxed(&nmachines) > 0) 1215 return EBUSY; 1216 nvmm_fini(); 1217 return 0; 1218 } 1219 1220 void 1221 nvmmattach(int nunits) 1222 { 1223 /* nothing */ 1224 } 1225 1226 MODULE(MODULE_CLASS_MISC, nvmm, NULL); 1227 1228 #if defined(_MODULE) 1229 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL); 1230 #endif 1231 1232 static int 1233 nvmm_modcmd(modcmd_t cmd, void *arg) 1234 { 1235 #if defined(_MODULE) 1236 devmajor_t bmajor = NODEVMAJOR; 1237 devmajor_t cmajor = 345; 1238 #endif 1239 int error; 1240 1241 switch (cmd) { 1242 case MODULE_CMD_INIT: 1243 if (nvmm_ident() == NULL) { 1244 aprint_error("%s: cpu not supported\n", 1245 nvmm_cd.cd_name); 1246 return ENOTSUP; 1247 } 1248 #if defined(_MODULE) 1249 error = config_cfdriver_attach(&nvmm_cd); 1250 if (error) 1251 return error; 1252 #endif 1253 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca); 1254 if (error) { 1255 config_cfdriver_detach(&nvmm_cd); 1256 aprint_error("%s: config_cfattach_attach failed\n", 1257 nvmm_cd.cd_name); 1258 return error; 1259 } 1260 1261 error = config_cfdata_attach(nvmm_cfdata, 1); 1262 if (error) { 1263 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1264 config_cfdriver_detach(&nvmm_cd); 1265 aprint_error("%s: unable to register cfdata\n", 1266 nvmm_cd.cd_name); 1267 return error; 1268 } 1269 1270 if (config_attach_pseudo(nvmm_cfdata) == NULL) { 1271 aprint_error("%s: config_attach_pseudo failed\n", 1272 nvmm_cd.cd_name); 1273 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1274 config_cfdriver_detach(&nvmm_cd); 1275 return ENXIO; 1276 } 1277 1278 #if defined(_MODULE) 1279 /* mknod /dev/nvmm c 345 0 */ 1280 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor, 1281 &nvmm_cdevsw, &cmajor); 1282 if (error) { 1283 aprint_error("%s: unable to register devsw\n", 1284 nvmm_cd.cd_name); 1285 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1286 config_cfdriver_detach(&nvmm_cd); 1287 return error; 1288 } 1289 #endif 1290 return 0; 1291 case MODULE_CMD_FINI: 1292 error = config_cfdata_detach(nvmm_cfdata); 1293 if (error) 1294 return error; 1295 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1296 if (error) 1297 return error; 1298 #if defined(_MODULE) 1299 config_cfdriver_detach(&nvmm_cd); 1300 devsw_detach(NULL, &nvmm_cdevsw); 1301 #endif 1302 return 0; 1303 case MODULE_CMD_AUTOUNLOAD: 1304 return EBUSY; 1305 default: 1306 return ENOTTY; 1307 } 1308 } 1309