1 /* $NetBSD: nvmm.c,v 1.35 2020/08/18 17:04:37 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.35 2020/08/18 17:04:37 maxv Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 39 #include <sys/cpu.h> 40 #include <sys/conf.h> 41 #include <sys/kmem.h> 42 #include <sys/module.h> 43 #include <sys/proc.h> 44 #include <sys/mman.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/device.h> 48 49 #include <uvm/uvm.h> 50 #include <uvm/uvm_page.h> 51 52 #include "ioconf.h" 53 54 #include <dev/nvmm/nvmm.h> 55 #include <dev/nvmm/nvmm_internal.h> 56 #include <dev/nvmm/nvmm_ioctl.h> 57 58 static struct nvmm_machine machines[NVMM_MAX_MACHINES]; 59 static volatile unsigned int nmachines __cacheline_aligned; 60 61 static const struct nvmm_impl *nvmm_impl_list[] = { 62 #if defined(__x86_64__) 63 &nvmm_x86_svm, /* x86 AMD SVM */ 64 &nvmm_x86_vmx /* x86 Intel VMX */ 65 #endif 66 }; 67 68 static const struct nvmm_impl *nvmm_impl = NULL; 69 70 static struct nvmm_owner root_owner; 71 72 /* -------------------------------------------------------------------------- */ 73 74 static int 75 nvmm_machine_alloc(struct nvmm_machine **ret) 76 { 77 struct nvmm_machine *mach; 78 size_t i; 79 80 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 81 mach = &machines[i]; 82 83 rw_enter(&mach->lock, RW_WRITER); 84 if (mach->present) { 85 rw_exit(&mach->lock); 86 continue; 87 } 88 89 mach->present = true; 90 mach->time = time_second; 91 *ret = mach; 92 atomic_inc_uint(&nmachines); 93 return 0; 94 } 95 96 return ENOBUFS; 97 } 98 99 static void 100 nvmm_machine_free(struct nvmm_machine *mach) 101 { 102 KASSERT(rw_write_held(&mach->lock)); 103 KASSERT(mach->present); 104 mach->present = false; 105 atomic_dec_uint(&nmachines); 106 } 107 108 static int 109 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid, 110 struct nvmm_machine **ret, bool writer) 111 { 112 struct nvmm_machine *mach; 113 krw_t op = writer ? RW_WRITER : RW_READER; 114 115 if (machid >= NVMM_MAX_MACHINES) { 116 return EINVAL; 117 } 118 mach = &machines[machid]; 119 120 rw_enter(&mach->lock, op); 121 if (!mach->present) { 122 rw_exit(&mach->lock); 123 return ENOENT; 124 } 125 if (owner != &root_owner && mach->owner != owner) { 126 rw_exit(&mach->lock); 127 return EPERM; 128 } 129 *ret = mach; 130 131 return 0; 132 } 133 134 static void 135 nvmm_machine_put(struct nvmm_machine *mach) 136 { 137 rw_exit(&mach->lock); 138 } 139 140 /* -------------------------------------------------------------------------- */ 141 142 static int 143 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 144 struct nvmm_cpu **ret) 145 { 146 struct nvmm_cpu *vcpu; 147 148 if (cpuid >= NVMM_MAX_VCPUS) { 149 return EINVAL; 150 } 151 vcpu = &mach->cpus[cpuid]; 152 153 mutex_enter(&vcpu->lock); 154 if (vcpu->present) { 155 mutex_exit(&vcpu->lock); 156 return EBUSY; 157 } 158 159 vcpu->present = true; 160 vcpu->comm = NULL; 161 vcpu->hcpu_last = -1; 162 *ret = vcpu; 163 return 0; 164 } 165 166 static void 167 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 168 { 169 KASSERT(mutex_owned(&vcpu->lock)); 170 vcpu->present = false; 171 if (vcpu->comm != NULL) { 172 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE); 173 } 174 } 175 176 static int 177 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 178 struct nvmm_cpu **ret) 179 { 180 struct nvmm_cpu *vcpu; 181 182 if (cpuid >= NVMM_MAX_VCPUS) { 183 return EINVAL; 184 } 185 vcpu = &mach->cpus[cpuid]; 186 187 mutex_enter(&vcpu->lock); 188 if (!vcpu->present) { 189 mutex_exit(&vcpu->lock); 190 return ENOENT; 191 } 192 *ret = vcpu; 193 194 return 0; 195 } 196 197 static void 198 nvmm_vcpu_put(struct nvmm_cpu *vcpu) 199 { 200 mutex_exit(&vcpu->lock); 201 } 202 203 /* -------------------------------------------------------------------------- */ 204 205 static void 206 nvmm_kill_machines(struct nvmm_owner *owner) 207 { 208 struct nvmm_machine *mach; 209 struct nvmm_cpu *vcpu; 210 size_t i, j; 211 int error; 212 213 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 214 mach = &machines[i]; 215 216 rw_enter(&mach->lock, RW_WRITER); 217 if (!mach->present || mach->owner != owner) { 218 rw_exit(&mach->lock); 219 continue; 220 } 221 222 /* Kill it. */ 223 for (j = 0; j < NVMM_MAX_VCPUS; j++) { 224 error = nvmm_vcpu_get(mach, j, &vcpu); 225 if (error) 226 continue; 227 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 228 nvmm_vcpu_free(mach, vcpu); 229 nvmm_vcpu_put(vcpu); 230 } 231 (*nvmm_impl->machine_destroy)(mach); 232 uvmspace_free(mach->vm); 233 234 /* Drop the kernel UOBJ refs. */ 235 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) { 236 if (!mach->hmap[j].present) 237 continue; 238 uao_detach(mach->hmap[j].uobj); 239 } 240 241 nvmm_machine_free(mach); 242 243 rw_exit(&mach->lock); 244 } 245 } 246 247 /* -------------------------------------------------------------------------- */ 248 249 static int 250 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args) 251 { 252 args->cap.version = NVMM_KERN_VERSION; 253 args->cap.state_size = nvmm_impl->state_size; 254 args->cap.max_machines = NVMM_MAX_MACHINES; 255 args->cap.max_vcpus = NVMM_MAX_VCPUS; 256 args->cap.max_ram = NVMM_MAX_RAM; 257 258 (*nvmm_impl->capability)(&args->cap); 259 260 return 0; 261 } 262 263 static int 264 nvmm_machine_create(struct nvmm_owner *owner, 265 struct nvmm_ioc_machine_create *args) 266 { 267 struct nvmm_machine *mach; 268 int error; 269 270 error = nvmm_machine_alloc(&mach); 271 if (error) 272 return error; 273 274 /* Curproc owns the machine. */ 275 mach->owner = owner; 276 277 /* Zero out the host mappings. */ 278 memset(&mach->hmap, 0, sizeof(mach->hmap)); 279 280 /* Create the machine vmspace. */ 281 mach->gpa_begin = 0; 282 mach->gpa_end = NVMM_MAX_RAM; 283 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false); 284 285 /* Create the comm uobj. */ 286 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0); 287 288 (*nvmm_impl->machine_create)(mach); 289 290 args->machid = mach->machid; 291 nvmm_machine_put(mach); 292 293 return 0; 294 } 295 296 static int 297 nvmm_machine_destroy(struct nvmm_owner *owner, 298 struct nvmm_ioc_machine_destroy *args) 299 { 300 struct nvmm_machine *mach; 301 struct nvmm_cpu *vcpu; 302 int error; 303 size_t i; 304 305 error = nvmm_machine_get(owner, args->machid, &mach, true); 306 if (error) 307 return error; 308 309 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 310 error = nvmm_vcpu_get(mach, i, &vcpu); 311 if (error) 312 continue; 313 314 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 315 nvmm_vcpu_free(mach, vcpu); 316 nvmm_vcpu_put(vcpu); 317 } 318 319 (*nvmm_impl->machine_destroy)(mach); 320 321 /* Free the machine vmspace. */ 322 uvmspace_free(mach->vm); 323 324 /* Drop the kernel UOBJ refs. */ 325 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 326 if (!mach->hmap[i].present) 327 continue; 328 uao_detach(mach->hmap[i].uobj); 329 } 330 331 nvmm_machine_free(mach); 332 nvmm_machine_put(mach); 333 334 return 0; 335 } 336 337 static int 338 nvmm_machine_configure(struct nvmm_owner *owner, 339 struct nvmm_ioc_machine_configure *args) 340 { 341 struct nvmm_machine *mach; 342 size_t allocsz; 343 uint64_t op; 344 void *data; 345 int error; 346 347 op = NVMM_MACH_CONF_MD(args->op); 348 if (__predict_false(op >= nvmm_impl->mach_conf_max)) { 349 return EINVAL; 350 } 351 352 allocsz = nvmm_impl->mach_conf_sizes[op]; 353 data = kmem_alloc(allocsz, KM_SLEEP); 354 355 error = nvmm_machine_get(owner, args->machid, &mach, true); 356 if (error) { 357 kmem_free(data, allocsz); 358 return error; 359 } 360 361 error = copyin(args->conf, data, allocsz); 362 if (error) { 363 goto out; 364 } 365 366 error = (*nvmm_impl->machine_configure)(mach, op, data); 367 368 out: 369 nvmm_machine_put(mach); 370 kmem_free(data, allocsz); 371 return error; 372 } 373 374 static int 375 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args) 376 { 377 struct nvmm_machine *mach; 378 struct nvmm_cpu *vcpu; 379 int error; 380 381 error = nvmm_machine_get(owner, args->machid, &mach, false); 382 if (error) 383 return error; 384 385 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu); 386 if (error) 387 goto out; 388 389 /* Allocate the comm page. */ 390 uao_reference(mach->commuobj); 391 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE, 392 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW, 393 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); 394 if (error) { 395 uao_detach(mach->commuobj); 396 nvmm_vcpu_free(mach, vcpu); 397 nvmm_vcpu_put(vcpu); 398 goto out; 399 } 400 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm, 401 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0); 402 if (error) { 403 nvmm_vcpu_free(mach, vcpu); 404 nvmm_vcpu_put(vcpu); 405 goto out; 406 } 407 memset(vcpu->comm, 0, PAGE_SIZE); 408 409 error = (*nvmm_impl->vcpu_create)(mach, vcpu); 410 if (error) { 411 nvmm_vcpu_free(mach, vcpu); 412 nvmm_vcpu_put(vcpu); 413 goto out; 414 } 415 416 nvmm_vcpu_put(vcpu); 417 418 atomic_inc_uint(&mach->ncpus); 419 420 out: 421 nvmm_machine_put(mach); 422 return error; 423 } 424 425 static int 426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args) 427 { 428 struct nvmm_machine *mach; 429 struct nvmm_cpu *vcpu; 430 int error; 431 432 error = nvmm_machine_get(owner, args->machid, &mach, false); 433 if (error) 434 return error; 435 436 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 437 if (error) 438 goto out; 439 440 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 441 nvmm_vcpu_free(mach, vcpu); 442 nvmm_vcpu_put(vcpu); 443 444 atomic_dec_uint(&mach->ncpus); 445 446 out: 447 nvmm_machine_put(mach); 448 return error; 449 } 450 451 static int 452 nvmm_vcpu_configure(struct nvmm_owner *owner, 453 struct nvmm_ioc_vcpu_configure *args) 454 { 455 struct nvmm_machine *mach; 456 struct nvmm_cpu *vcpu; 457 size_t allocsz; 458 uint64_t op; 459 void *data; 460 int error; 461 462 op = NVMM_VCPU_CONF_MD(args->op); 463 if (__predict_false(op >= nvmm_impl->vcpu_conf_max)) 464 return EINVAL; 465 466 allocsz = nvmm_impl->vcpu_conf_sizes[op]; 467 data = kmem_alloc(allocsz, KM_SLEEP); 468 469 error = nvmm_machine_get(owner, args->machid, &mach, false); 470 if (error) { 471 kmem_free(data, allocsz); 472 return error; 473 } 474 475 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 476 if (error) { 477 nvmm_machine_put(mach); 478 kmem_free(data, allocsz); 479 return error; 480 } 481 482 error = copyin(args->conf, data, allocsz); 483 if (error) { 484 goto out; 485 } 486 487 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data); 488 489 out: 490 nvmm_vcpu_put(vcpu); 491 nvmm_machine_put(mach); 492 kmem_free(data, allocsz); 493 return error; 494 } 495 496 static int 497 nvmm_vcpu_setstate(struct nvmm_owner *owner, 498 struct nvmm_ioc_vcpu_setstate *args) 499 { 500 struct nvmm_machine *mach; 501 struct nvmm_cpu *vcpu; 502 int error; 503 504 error = nvmm_machine_get(owner, args->machid, &mach, false); 505 if (error) 506 return error; 507 508 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 509 if (error) 510 goto out; 511 512 (*nvmm_impl->vcpu_setstate)(vcpu); 513 nvmm_vcpu_put(vcpu); 514 515 out: 516 nvmm_machine_put(mach); 517 return error; 518 } 519 520 static int 521 nvmm_vcpu_getstate(struct nvmm_owner *owner, 522 struct nvmm_ioc_vcpu_getstate *args) 523 { 524 struct nvmm_machine *mach; 525 struct nvmm_cpu *vcpu; 526 int error; 527 528 error = nvmm_machine_get(owner, args->machid, &mach, false); 529 if (error) 530 return error; 531 532 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 533 if (error) 534 goto out; 535 536 (*nvmm_impl->vcpu_getstate)(vcpu); 537 nvmm_vcpu_put(vcpu); 538 539 out: 540 nvmm_machine_put(mach); 541 return error; 542 } 543 544 static int 545 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args) 546 { 547 struct nvmm_machine *mach; 548 struct nvmm_cpu *vcpu; 549 int error; 550 551 error = nvmm_machine_get(owner, args->machid, &mach, false); 552 if (error) 553 return error; 554 555 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 556 if (error) 557 goto out; 558 559 error = (*nvmm_impl->vcpu_inject)(vcpu); 560 nvmm_vcpu_put(vcpu); 561 562 out: 563 nvmm_machine_put(mach); 564 return error; 565 } 566 567 static int 568 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 569 struct nvmm_vcpu_exit *exit) 570 { 571 struct vmspace *vm = mach->vm; 572 int ret; 573 574 while (1) { 575 /* Got a signal? Or pending resched? Leave. */ 576 if (__predict_false(nvmm_return_needed())) { 577 exit->reason = NVMM_VCPU_EXIT_NONE; 578 return 0; 579 } 580 581 /* Run the VCPU. */ 582 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit); 583 if (__predict_false(ret != 0)) { 584 return ret; 585 } 586 587 /* Process nested page faults. */ 588 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) { 589 break; 590 } 591 if (exit->u.mem.gpa >= mach->gpa_end) { 592 break; 593 } 594 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) { 595 break; 596 } 597 } 598 599 return 0; 600 } 601 602 static int 603 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args) 604 { 605 struct nvmm_machine *mach; 606 struct nvmm_cpu *vcpu; 607 int error; 608 609 error = nvmm_machine_get(owner, args->machid, &mach, false); 610 if (error) 611 return error; 612 613 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 614 if (error) 615 goto out; 616 617 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit); 618 nvmm_vcpu_put(vcpu); 619 620 out: 621 nvmm_machine_put(mach); 622 return error; 623 } 624 625 /* -------------------------------------------------------------------------- */ 626 627 static struct uvm_object * 628 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size, 629 size_t *off) 630 { 631 struct nvmm_hmapping *hmapping; 632 size_t i; 633 634 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 635 hmapping = &mach->hmap[i]; 636 if (!hmapping->present) { 637 continue; 638 } 639 if (hva >= hmapping->hva && 640 hva + size <= hmapping->hva + hmapping->size) { 641 *off = hva - hmapping->hva; 642 return hmapping->uobj; 643 } 644 } 645 646 return NULL; 647 } 648 649 static int 650 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size) 651 { 652 struct nvmm_hmapping *hmapping; 653 size_t i; 654 655 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) { 656 return EINVAL; 657 } 658 if (hva == 0) { 659 return EINVAL; 660 } 661 662 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 663 hmapping = &mach->hmap[i]; 664 if (!hmapping->present) { 665 continue; 666 } 667 668 if (hva >= hmapping->hva && 669 hva + size <= hmapping->hva + hmapping->size) { 670 break; 671 } 672 673 if (hva >= hmapping->hva && 674 hva < hmapping->hva + hmapping->size) { 675 return EEXIST; 676 } 677 if (hva + size > hmapping->hva && 678 hva + size <= hmapping->hva + hmapping->size) { 679 return EEXIST; 680 } 681 if (hva <= hmapping->hva && 682 hva + size >= hmapping->hva + hmapping->size) { 683 return EEXIST; 684 } 685 } 686 687 return 0; 688 } 689 690 static struct nvmm_hmapping * 691 nvmm_hmapping_alloc(struct nvmm_machine *mach) 692 { 693 struct nvmm_hmapping *hmapping; 694 size_t i; 695 696 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 697 hmapping = &mach->hmap[i]; 698 if (!hmapping->present) { 699 hmapping->present = true; 700 return hmapping; 701 } 702 } 703 704 return NULL; 705 } 706 707 static int 708 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size) 709 { 710 struct vmspace *vmspace = curproc->p_vmspace; 711 struct nvmm_hmapping *hmapping; 712 size_t i; 713 714 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 715 hmapping = &mach->hmap[i]; 716 if (!hmapping->present || hmapping->hva != hva || 717 hmapping->size != size) { 718 continue; 719 } 720 721 uvm_unmap(&vmspace->vm_map, hmapping->hva, 722 hmapping->hva + hmapping->size); 723 uao_detach(hmapping->uobj); 724 725 hmapping->uobj = NULL; 726 hmapping->present = false; 727 728 return 0; 729 } 730 731 return ENOENT; 732 } 733 734 static int 735 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args) 736 { 737 struct vmspace *vmspace = curproc->p_vmspace; 738 struct nvmm_machine *mach; 739 struct nvmm_hmapping *hmapping; 740 vaddr_t uva; 741 int error; 742 743 error = nvmm_machine_get(owner, args->machid, &mach, true); 744 if (error) 745 return error; 746 747 error = nvmm_hmapping_validate(mach, args->hva, args->size); 748 if (error) 749 goto out; 750 751 hmapping = nvmm_hmapping_alloc(mach); 752 if (hmapping == NULL) { 753 error = ENOBUFS; 754 goto out; 755 } 756 757 hmapping->hva = args->hva; 758 hmapping->size = args->size; 759 hmapping->uobj = uao_create(hmapping->size, 0); 760 uva = hmapping->hva; 761 762 /* Take a reference for the user. */ 763 uao_reference(hmapping->uobj); 764 765 /* Map the uobj into the user address space, as pageable. */ 766 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj, 767 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE, 768 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 769 if (error) { 770 uao_detach(hmapping->uobj); 771 } 772 773 out: 774 nvmm_machine_put(mach); 775 return error; 776 } 777 778 static int 779 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args) 780 { 781 struct nvmm_machine *mach; 782 int error; 783 784 error = nvmm_machine_get(owner, args->machid, &mach, true); 785 if (error) 786 return error; 787 788 error = nvmm_hmapping_free(mach, args->hva, args->size); 789 790 nvmm_machine_put(mach); 791 return error; 792 } 793 794 /* -------------------------------------------------------------------------- */ 795 796 static int 797 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args) 798 { 799 struct nvmm_machine *mach; 800 struct uvm_object *uobj; 801 gpaddr_t gpa; 802 size_t off; 803 int error; 804 805 error = nvmm_machine_get(owner, args->machid, &mach, false); 806 if (error) 807 return error; 808 809 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) { 810 error = EINVAL; 811 goto out; 812 } 813 814 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 || 815 (args->hva % PAGE_SIZE) != 0) { 816 error = EINVAL; 817 goto out; 818 } 819 if (args->hva == 0) { 820 error = EINVAL; 821 goto out; 822 } 823 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 824 error = EINVAL; 825 goto out; 826 } 827 if (args->gpa + args->size <= args->gpa) { 828 error = EINVAL; 829 goto out; 830 } 831 if (args->gpa + args->size > mach->gpa_end) { 832 error = EINVAL; 833 goto out; 834 } 835 gpa = args->gpa; 836 837 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off); 838 if (uobj == NULL) { 839 error = EINVAL; 840 goto out; 841 } 842 843 /* Take a reference for the machine. */ 844 uao_reference(uobj); 845 846 /* Map the uobj into the machine address space, as pageable. */ 847 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0, 848 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE, 849 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 850 if (error) { 851 uao_detach(uobj); 852 goto out; 853 } 854 if (gpa != args->gpa) { 855 uao_detach(uobj); 856 printf("[!] uvm_map problem\n"); 857 error = EINVAL; 858 goto out; 859 } 860 861 out: 862 nvmm_machine_put(mach); 863 return error; 864 } 865 866 static int 867 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args) 868 { 869 struct nvmm_machine *mach; 870 gpaddr_t gpa; 871 int error; 872 873 error = nvmm_machine_get(owner, args->machid, &mach, false); 874 if (error) 875 return error; 876 877 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) { 878 error = EINVAL; 879 goto out; 880 } 881 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 882 error = EINVAL; 883 goto out; 884 } 885 if (args->gpa + args->size <= args->gpa) { 886 error = EINVAL; 887 goto out; 888 } 889 if (args->gpa + args->size >= mach->gpa_end) { 890 error = EINVAL; 891 goto out; 892 } 893 gpa = args->gpa; 894 895 /* Unmap the memory from the machine. */ 896 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size); 897 898 out: 899 nvmm_machine_put(mach); 900 return error; 901 } 902 903 /* -------------------------------------------------------------------------- */ 904 905 static int 906 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 907 { 908 struct nvmm_ctl_mach_info ctl; 909 struct nvmm_machine *mach; 910 struct nvmm_cpu *vcpu; 911 int error; 912 size_t i; 913 914 if (args->size != sizeof(ctl)) 915 return EINVAL; 916 error = copyin(args->data, &ctl, sizeof(ctl)); 917 if (error) 918 return error; 919 920 error = nvmm_machine_get(owner, ctl.machid, &mach, true); 921 if (error) 922 return error; 923 924 ctl.nvcpus = 0; 925 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 926 error = nvmm_vcpu_get(mach, i, &vcpu); 927 if (error) 928 continue; 929 ctl.nvcpus++; 930 nvmm_vcpu_put(vcpu); 931 } 932 933 ctl.nram = 0; 934 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 935 if (!mach->hmap[i].present) 936 continue; 937 ctl.nram += mach->hmap[i].size; 938 } 939 940 ctl.pid = mach->owner->pid; 941 ctl.time = mach->time; 942 943 nvmm_machine_put(mach); 944 945 error = copyout(&ctl, args->data, sizeof(ctl)); 946 if (error) 947 return error; 948 949 return 0; 950 } 951 952 static int 953 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 954 { 955 switch (args->op) { 956 case NVMM_CTL_MACH_INFO: 957 return nvmm_ctl_mach_info(owner, args); 958 default: 959 return EINVAL; 960 } 961 } 962 963 /* -------------------------------------------------------------------------- */ 964 965 static const struct nvmm_impl * 966 nvmm_ident(void) 967 { 968 size_t i; 969 970 for (i = 0; i < __arraycount(nvmm_impl_list); i++) { 971 if ((*nvmm_impl_list[i]->ident)()) 972 return nvmm_impl_list[i]; 973 } 974 975 return NULL; 976 } 977 978 static int 979 nvmm_init(void) 980 { 981 size_t i, n; 982 983 nvmm_impl = nvmm_ident(); 984 if (nvmm_impl == NULL) 985 return ENOTSUP; 986 987 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 988 machines[i].machid = i; 989 rw_init(&machines[i].lock); 990 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 991 machines[i].cpus[n].present = false; 992 machines[i].cpus[n].cpuid = n; 993 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT, 994 IPL_NONE); 995 } 996 } 997 998 (*nvmm_impl->init)(); 999 1000 return 0; 1001 } 1002 1003 static void 1004 nvmm_fini(void) 1005 { 1006 size_t i, n; 1007 1008 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 1009 rw_destroy(&machines[i].lock); 1010 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 1011 mutex_destroy(&machines[i].cpus[n].lock); 1012 } 1013 } 1014 1015 (*nvmm_impl->fini)(); 1016 nvmm_impl = NULL; 1017 } 1018 1019 /* -------------------------------------------------------------------------- */ 1020 1021 static dev_type_open(nvmm_open); 1022 1023 const struct cdevsw nvmm_cdevsw = { 1024 .d_open = nvmm_open, 1025 .d_close = noclose, 1026 .d_read = noread, 1027 .d_write = nowrite, 1028 .d_ioctl = noioctl, 1029 .d_stop = nostop, 1030 .d_tty = notty, 1031 .d_poll = nopoll, 1032 .d_mmap = nommap, 1033 .d_kqfilter = nokqfilter, 1034 .d_discard = nodiscard, 1035 .d_flag = D_OTHER | D_MPSAFE 1036 }; 1037 1038 static int nvmm_ioctl(file_t *, u_long, void *); 1039 static int nvmm_close(file_t *); 1040 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *, 1041 struct uvm_object **, int *); 1042 1043 static const struct fileops nvmm_fileops = { 1044 .fo_read = fbadop_read, 1045 .fo_write = fbadop_write, 1046 .fo_ioctl = nvmm_ioctl, 1047 .fo_fcntl = fnullop_fcntl, 1048 .fo_poll = fnullop_poll, 1049 .fo_stat = fbadop_stat, 1050 .fo_close = nvmm_close, 1051 .fo_kqfilter = fnullop_kqfilter, 1052 .fo_restart = fnullop_restart, 1053 .fo_mmap = nvmm_mmap, 1054 }; 1055 1056 static int 1057 nvmm_open(dev_t dev, int flags, int type, struct lwp *l) 1058 { 1059 struct nvmm_owner *owner; 1060 struct file *fp; 1061 int error, fd; 1062 1063 if (__predict_false(nvmm_impl == NULL)) 1064 return ENXIO; 1065 if (minor(dev) != 0) 1066 return EXDEV; 1067 if (!(flags & O_CLOEXEC)) 1068 return EINVAL; 1069 error = fd_allocfile(&fp, &fd); 1070 if (error) 1071 return error; 1072 1073 if (OFLAGS(flags) & O_WRONLY) { 1074 owner = &root_owner; 1075 } else { 1076 owner = kmem_alloc(sizeof(*owner), KM_SLEEP); 1077 owner->pid = l->l_proc->p_pid; 1078 } 1079 1080 return fd_clone(fp, fd, flags, &nvmm_fileops, owner); 1081 } 1082 1083 static int 1084 nvmm_close(file_t *fp) 1085 { 1086 struct nvmm_owner *owner = fp->f_data; 1087 1088 KASSERT(owner != NULL); 1089 nvmm_kill_machines(owner); 1090 if (owner != &root_owner) { 1091 kmem_free(owner, sizeof(*owner)); 1092 } 1093 fp->f_data = NULL; 1094 1095 return 0; 1096 } 1097 1098 static int 1099 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, 1100 int *advicep, struct uvm_object **uobjp, int *maxprotp) 1101 { 1102 struct nvmm_owner *owner = fp->f_data; 1103 struct nvmm_machine *mach; 1104 nvmm_machid_t machid; 1105 nvmm_cpuid_t cpuid; 1106 int error; 1107 1108 if (prot & PROT_EXEC) 1109 return EACCES; 1110 if (size != PAGE_SIZE) 1111 return EINVAL; 1112 1113 cpuid = NVMM_COMM_CPUID(*offp); 1114 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) 1115 return EINVAL; 1116 1117 machid = NVMM_COMM_MACHID(*offp); 1118 error = nvmm_machine_get(owner, machid, &mach, false); 1119 if (error) 1120 return error; 1121 1122 uao_reference(mach->commuobj); 1123 *uobjp = mach->commuobj; 1124 *offp = cpuid * PAGE_SIZE; 1125 *maxprotp = prot; 1126 *advicep = UVM_ADV_RANDOM; 1127 1128 nvmm_machine_put(mach); 1129 return 0; 1130 } 1131 1132 static int 1133 nvmm_ioctl(file_t *fp, u_long cmd, void *data) 1134 { 1135 struct nvmm_owner *owner = fp->f_data; 1136 1137 KASSERT(owner != NULL); 1138 1139 switch (cmd) { 1140 case NVMM_IOC_CAPABILITY: 1141 return nvmm_capability(owner, data); 1142 case NVMM_IOC_MACHINE_CREATE: 1143 return nvmm_machine_create(owner, data); 1144 case NVMM_IOC_MACHINE_DESTROY: 1145 return nvmm_machine_destroy(owner, data); 1146 case NVMM_IOC_MACHINE_CONFIGURE: 1147 return nvmm_machine_configure(owner, data); 1148 case NVMM_IOC_VCPU_CREATE: 1149 return nvmm_vcpu_create(owner, data); 1150 case NVMM_IOC_VCPU_DESTROY: 1151 return nvmm_vcpu_destroy(owner, data); 1152 case NVMM_IOC_VCPU_CONFIGURE: 1153 return nvmm_vcpu_configure(owner, data); 1154 case NVMM_IOC_VCPU_SETSTATE: 1155 return nvmm_vcpu_setstate(owner, data); 1156 case NVMM_IOC_VCPU_GETSTATE: 1157 return nvmm_vcpu_getstate(owner, data); 1158 case NVMM_IOC_VCPU_INJECT: 1159 return nvmm_vcpu_inject(owner, data); 1160 case NVMM_IOC_VCPU_RUN: 1161 return nvmm_vcpu_run(owner, data); 1162 case NVMM_IOC_GPA_MAP: 1163 return nvmm_gpa_map(owner, data); 1164 case NVMM_IOC_GPA_UNMAP: 1165 return nvmm_gpa_unmap(owner, data); 1166 case NVMM_IOC_HVA_MAP: 1167 return nvmm_hva_map(owner, data); 1168 case NVMM_IOC_HVA_UNMAP: 1169 return nvmm_hva_unmap(owner, data); 1170 case NVMM_IOC_CTL: 1171 return nvmm_ctl(owner, data); 1172 default: 1173 return EINVAL; 1174 } 1175 } 1176 1177 /* -------------------------------------------------------------------------- */ 1178 1179 static int nvmm_match(device_t, cfdata_t, void *); 1180 static void nvmm_attach(device_t, device_t, void *); 1181 static int nvmm_detach(device_t, int); 1182 1183 extern struct cfdriver nvmm_cd; 1184 1185 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL); 1186 1187 static struct cfdata nvmm_cfdata[] = { 1188 { 1189 .cf_name = "nvmm", 1190 .cf_atname = "nvmm", 1191 .cf_unit = 0, 1192 .cf_fstate = FSTATE_STAR, 1193 .cf_loc = NULL, 1194 .cf_flags = 0, 1195 .cf_pspec = NULL, 1196 }, 1197 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL } 1198 }; 1199 1200 static int 1201 nvmm_match(device_t self, cfdata_t cfdata, void *arg) 1202 { 1203 return 1; 1204 } 1205 1206 static void 1207 nvmm_attach(device_t parent, device_t self, void *aux) 1208 { 1209 int error; 1210 1211 error = nvmm_init(); 1212 if (error) 1213 panic("%s: impossible", __func__); 1214 aprint_normal_dev(self, "attached, using backend %s\n", 1215 nvmm_impl->name); 1216 } 1217 1218 static int 1219 nvmm_detach(device_t self, int flags) 1220 { 1221 if (atomic_load_relaxed(&nmachines) > 0) 1222 return EBUSY; 1223 nvmm_fini(); 1224 return 0; 1225 } 1226 1227 void 1228 nvmmattach(int nunits) 1229 { 1230 /* nothing */ 1231 } 1232 1233 MODULE(MODULE_CLASS_MISC, nvmm, NULL); 1234 1235 #if defined(_MODULE) 1236 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL); 1237 #endif 1238 1239 static int 1240 nvmm_modcmd(modcmd_t cmd, void *arg) 1241 { 1242 #if defined(_MODULE) 1243 devmajor_t bmajor = NODEVMAJOR; 1244 devmajor_t cmajor = 345; 1245 #endif 1246 int error; 1247 1248 switch (cmd) { 1249 case MODULE_CMD_INIT: 1250 if (nvmm_ident() == NULL) { 1251 aprint_error("%s: cpu not supported\n", 1252 nvmm_cd.cd_name); 1253 return ENOTSUP; 1254 } 1255 #if defined(_MODULE) 1256 error = config_cfdriver_attach(&nvmm_cd); 1257 if (error) 1258 return error; 1259 #endif 1260 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca); 1261 if (error) { 1262 config_cfdriver_detach(&nvmm_cd); 1263 aprint_error("%s: config_cfattach_attach failed\n", 1264 nvmm_cd.cd_name); 1265 return error; 1266 } 1267 1268 error = config_cfdata_attach(nvmm_cfdata, 1); 1269 if (error) { 1270 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1271 config_cfdriver_detach(&nvmm_cd); 1272 aprint_error("%s: unable to register cfdata\n", 1273 nvmm_cd.cd_name); 1274 return error; 1275 } 1276 1277 if (config_attach_pseudo(nvmm_cfdata) == NULL) { 1278 aprint_error("%s: config_attach_pseudo failed\n", 1279 nvmm_cd.cd_name); 1280 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1281 config_cfdriver_detach(&nvmm_cd); 1282 return ENXIO; 1283 } 1284 1285 #if defined(_MODULE) 1286 /* mknod /dev/nvmm c 345 0 */ 1287 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor, 1288 &nvmm_cdevsw, &cmajor); 1289 if (error) { 1290 aprint_error("%s: unable to register devsw\n", 1291 nvmm_cd.cd_name); 1292 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1293 config_cfdriver_detach(&nvmm_cd); 1294 return error; 1295 } 1296 #endif 1297 return 0; 1298 case MODULE_CMD_FINI: 1299 error = config_cfdata_detach(nvmm_cfdata); 1300 if (error) 1301 return error; 1302 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1303 if (error) 1304 return error; 1305 #if defined(_MODULE) 1306 config_cfdriver_detach(&nvmm_cd); 1307 devsw_detach(NULL, &nvmm_cdevsw); 1308 #endif 1309 return 0; 1310 case MODULE_CMD_AUTOUNLOAD: 1311 return EBUSY; 1312 default: 1313 return ENOTTY; 1314 } 1315 } 1316