1 /* $NetBSD: nvmm.c,v 1.32 2020/07/03 16:09:54 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.32 2020/07/03 16:09:54 maxv Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 39 #include <sys/cpu.h> 40 #include <sys/conf.h> 41 #include <sys/kmem.h> 42 #include <sys/module.h> 43 #include <sys/proc.h> 44 #include <sys/mman.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/device.h> 48 49 #include <uvm/uvm.h> 50 #include <uvm/uvm_page.h> 51 52 #include "ioconf.h" 53 54 #include <dev/nvmm/nvmm.h> 55 #include <dev/nvmm/nvmm_internal.h> 56 #include <dev/nvmm/nvmm_ioctl.h> 57 58 static struct nvmm_machine machines[NVMM_MAX_MACHINES]; 59 static volatile unsigned int nmachines __cacheline_aligned; 60 61 static const struct nvmm_impl *nvmm_impl_list[] = { 62 &nvmm_x86_svm, /* x86 AMD SVM */ 63 &nvmm_x86_vmx /* x86 Intel VMX */ 64 }; 65 66 static const struct nvmm_impl *nvmm_impl = NULL; 67 68 static struct nvmm_owner root_owner; 69 70 /* -------------------------------------------------------------------------- */ 71 72 static int 73 nvmm_machine_alloc(struct nvmm_machine **ret) 74 { 75 struct nvmm_machine *mach; 76 size_t i; 77 78 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 79 mach = &machines[i]; 80 81 rw_enter(&mach->lock, RW_WRITER); 82 if (mach->present) { 83 rw_exit(&mach->lock); 84 continue; 85 } 86 87 mach->present = true; 88 mach->time = time_second; 89 *ret = mach; 90 atomic_inc_uint(&nmachines); 91 return 0; 92 } 93 94 return ENOBUFS; 95 } 96 97 static void 98 nvmm_machine_free(struct nvmm_machine *mach) 99 { 100 KASSERT(rw_write_held(&mach->lock)); 101 KASSERT(mach->present); 102 mach->present = false; 103 atomic_dec_uint(&nmachines); 104 } 105 106 static int 107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid, 108 struct nvmm_machine **ret, bool writer) 109 { 110 struct nvmm_machine *mach; 111 krw_t op = writer ? RW_WRITER : RW_READER; 112 113 if (machid >= NVMM_MAX_MACHINES) { 114 return EINVAL; 115 } 116 mach = &machines[machid]; 117 118 rw_enter(&mach->lock, op); 119 if (!mach->present) { 120 rw_exit(&mach->lock); 121 return ENOENT; 122 } 123 if (owner != &root_owner && mach->owner != owner) { 124 rw_exit(&mach->lock); 125 return EPERM; 126 } 127 *ret = mach; 128 129 return 0; 130 } 131 132 static void 133 nvmm_machine_put(struct nvmm_machine *mach) 134 { 135 rw_exit(&mach->lock); 136 } 137 138 /* -------------------------------------------------------------------------- */ 139 140 static int 141 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 142 struct nvmm_cpu **ret) 143 { 144 struct nvmm_cpu *vcpu; 145 146 if (cpuid >= NVMM_MAX_VCPUS) { 147 return EINVAL; 148 } 149 vcpu = &mach->cpus[cpuid]; 150 151 mutex_enter(&vcpu->lock); 152 if (vcpu->present) { 153 mutex_exit(&vcpu->lock); 154 return EBUSY; 155 } 156 157 vcpu->present = true; 158 vcpu->comm = NULL; 159 vcpu->hcpu_last = -1; 160 *ret = vcpu; 161 return 0; 162 } 163 164 static void 165 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 166 { 167 KASSERT(mutex_owned(&vcpu->lock)); 168 vcpu->present = false; 169 if (vcpu->comm != NULL) { 170 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE); 171 } 172 } 173 174 static int 175 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 176 struct nvmm_cpu **ret) 177 { 178 struct nvmm_cpu *vcpu; 179 180 if (cpuid >= NVMM_MAX_VCPUS) { 181 return EINVAL; 182 } 183 vcpu = &mach->cpus[cpuid]; 184 185 mutex_enter(&vcpu->lock); 186 if (!vcpu->present) { 187 mutex_exit(&vcpu->lock); 188 return ENOENT; 189 } 190 *ret = vcpu; 191 192 return 0; 193 } 194 195 static void 196 nvmm_vcpu_put(struct nvmm_cpu *vcpu) 197 { 198 mutex_exit(&vcpu->lock); 199 } 200 201 /* -------------------------------------------------------------------------- */ 202 203 static void 204 nvmm_kill_machines(struct nvmm_owner *owner) 205 { 206 struct nvmm_machine *mach; 207 struct nvmm_cpu *vcpu; 208 size_t i, j; 209 int error; 210 211 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 212 mach = &machines[i]; 213 214 rw_enter(&mach->lock, RW_WRITER); 215 if (!mach->present || mach->owner != owner) { 216 rw_exit(&mach->lock); 217 continue; 218 } 219 220 /* Kill it. */ 221 for (j = 0; j < NVMM_MAX_VCPUS; j++) { 222 error = nvmm_vcpu_get(mach, j, &vcpu); 223 if (error) 224 continue; 225 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 226 nvmm_vcpu_free(mach, vcpu); 227 nvmm_vcpu_put(vcpu); 228 } 229 (*nvmm_impl->machine_destroy)(mach); 230 uvmspace_free(mach->vm); 231 232 /* Drop the kernel UOBJ refs. */ 233 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) { 234 if (!mach->hmap[j].present) 235 continue; 236 uao_detach(mach->hmap[j].uobj); 237 } 238 239 nvmm_machine_free(mach); 240 241 rw_exit(&mach->lock); 242 } 243 } 244 245 /* -------------------------------------------------------------------------- */ 246 247 static int 248 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args) 249 { 250 args->cap.version = NVMM_KERN_VERSION; 251 args->cap.state_size = nvmm_impl->state_size; 252 args->cap.max_machines = NVMM_MAX_MACHINES; 253 args->cap.max_vcpus = NVMM_MAX_VCPUS; 254 args->cap.max_ram = NVMM_MAX_RAM; 255 256 (*nvmm_impl->capability)(&args->cap); 257 258 return 0; 259 } 260 261 static int 262 nvmm_machine_create(struct nvmm_owner *owner, 263 struct nvmm_ioc_machine_create *args) 264 { 265 struct nvmm_machine *mach; 266 int error; 267 268 error = nvmm_machine_alloc(&mach); 269 if (error) 270 return error; 271 272 /* Curproc owns the machine. */ 273 mach->owner = owner; 274 275 /* Zero out the host mappings. */ 276 memset(&mach->hmap, 0, sizeof(mach->hmap)); 277 278 /* Create the machine vmspace. */ 279 mach->gpa_begin = 0; 280 mach->gpa_end = NVMM_MAX_RAM; 281 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false); 282 283 /* Create the comm uobj. */ 284 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0); 285 286 (*nvmm_impl->machine_create)(mach); 287 288 args->machid = mach->machid; 289 nvmm_machine_put(mach); 290 291 return 0; 292 } 293 294 static int 295 nvmm_machine_destroy(struct nvmm_owner *owner, 296 struct nvmm_ioc_machine_destroy *args) 297 { 298 struct nvmm_machine *mach; 299 struct nvmm_cpu *vcpu; 300 int error; 301 size_t i; 302 303 error = nvmm_machine_get(owner, args->machid, &mach, true); 304 if (error) 305 return error; 306 307 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 308 error = nvmm_vcpu_get(mach, i, &vcpu); 309 if (error) 310 continue; 311 312 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 313 nvmm_vcpu_free(mach, vcpu); 314 nvmm_vcpu_put(vcpu); 315 } 316 317 (*nvmm_impl->machine_destroy)(mach); 318 319 /* Free the machine vmspace. */ 320 uvmspace_free(mach->vm); 321 322 /* Drop the kernel UOBJ refs. */ 323 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 324 if (!mach->hmap[i].present) 325 continue; 326 uao_detach(mach->hmap[i].uobj); 327 } 328 329 nvmm_machine_free(mach); 330 nvmm_machine_put(mach); 331 332 return 0; 333 } 334 335 static int 336 nvmm_machine_configure(struct nvmm_owner *owner, 337 struct nvmm_ioc_machine_configure *args) 338 { 339 struct nvmm_machine *mach; 340 size_t allocsz; 341 uint64_t op; 342 void *data; 343 int error; 344 345 op = NVMM_MACH_CONF_MD(args->op); 346 if (__predict_false(op >= nvmm_impl->mach_conf_max)) { 347 return EINVAL; 348 } 349 350 allocsz = nvmm_impl->mach_conf_sizes[op]; 351 data = kmem_alloc(allocsz, KM_SLEEP); 352 353 error = nvmm_machine_get(owner, args->machid, &mach, true); 354 if (error) { 355 kmem_free(data, allocsz); 356 return error; 357 } 358 359 error = copyin(args->conf, data, allocsz); 360 if (error) { 361 goto out; 362 } 363 364 error = (*nvmm_impl->machine_configure)(mach, op, data); 365 366 out: 367 nvmm_machine_put(mach); 368 kmem_free(data, allocsz); 369 return error; 370 } 371 372 static int 373 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args) 374 { 375 struct nvmm_machine *mach; 376 struct nvmm_cpu *vcpu; 377 int error; 378 379 error = nvmm_machine_get(owner, args->machid, &mach, false); 380 if (error) 381 return error; 382 383 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu); 384 if (error) 385 goto out; 386 387 /* Allocate the comm page. */ 388 uao_reference(mach->commuobj); 389 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE, 390 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW, 391 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); 392 if (error) { 393 uao_detach(mach->commuobj); 394 nvmm_vcpu_free(mach, vcpu); 395 nvmm_vcpu_put(vcpu); 396 goto out; 397 } 398 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm, 399 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0); 400 if (error) { 401 nvmm_vcpu_free(mach, vcpu); 402 nvmm_vcpu_put(vcpu); 403 goto out; 404 } 405 memset(vcpu->comm, 0, PAGE_SIZE); 406 407 error = (*nvmm_impl->vcpu_create)(mach, vcpu); 408 if (error) { 409 nvmm_vcpu_free(mach, vcpu); 410 nvmm_vcpu_put(vcpu); 411 goto out; 412 } 413 414 nvmm_vcpu_put(vcpu); 415 416 atomic_inc_uint(&mach->ncpus); 417 418 out: 419 nvmm_machine_put(mach); 420 return error; 421 } 422 423 static int 424 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args) 425 { 426 struct nvmm_machine *mach; 427 struct nvmm_cpu *vcpu; 428 int error; 429 430 error = nvmm_machine_get(owner, args->machid, &mach, false); 431 if (error) 432 return error; 433 434 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 435 if (error) 436 goto out; 437 438 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 439 nvmm_vcpu_free(mach, vcpu); 440 nvmm_vcpu_put(vcpu); 441 442 atomic_dec_uint(&mach->ncpus); 443 444 out: 445 nvmm_machine_put(mach); 446 return error; 447 } 448 449 static int 450 nvmm_vcpu_configure(struct nvmm_owner *owner, 451 struct nvmm_ioc_vcpu_configure *args) 452 { 453 struct nvmm_machine *mach; 454 struct nvmm_cpu *vcpu; 455 size_t allocsz; 456 uint64_t op; 457 void *data; 458 int error; 459 460 op = NVMM_VCPU_CONF_MD(args->op); 461 if (__predict_false(op >= nvmm_impl->vcpu_conf_max)) 462 return EINVAL; 463 464 allocsz = nvmm_impl->vcpu_conf_sizes[op]; 465 data = kmem_alloc(allocsz, KM_SLEEP); 466 467 error = nvmm_machine_get(owner, args->machid, &mach, false); 468 if (error) { 469 kmem_free(data, allocsz); 470 return error; 471 } 472 473 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 474 if (error) { 475 nvmm_machine_put(mach); 476 kmem_free(data, allocsz); 477 return error; 478 } 479 480 error = copyin(args->conf, data, allocsz); 481 if (error) { 482 goto out; 483 } 484 485 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data); 486 487 out: 488 nvmm_vcpu_put(vcpu); 489 nvmm_machine_put(mach); 490 kmem_free(data, allocsz); 491 return error; 492 } 493 494 static int 495 nvmm_vcpu_setstate(struct nvmm_owner *owner, 496 struct nvmm_ioc_vcpu_setstate *args) 497 { 498 struct nvmm_machine *mach; 499 struct nvmm_cpu *vcpu; 500 int error; 501 502 error = nvmm_machine_get(owner, args->machid, &mach, false); 503 if (error) 504 return error; 505 506 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 507 if (error) 508 goto out; 509 510 (*nvmm_impl->vcpu_setstate)(vcpu); 511 nvmm_vcpu_put(vcpu); 512 513 out: 514 nvmm_machine_put(mach); 515 return error; 516 } 517 518 static int 519 nvmm_vcpu_getstate(struct nvmm_owner *owner, 520 struct nvmm_ioc_vcpu_getstate *args) 521 { 522 struct nvmm_machine *mach; 523 struct nvmm_cpu *vcpu; 524 int error; 525 526 error = nvmm_machine_get(owner, args->machid, &mach, false); 527 if (error) 528 return error; 529 530 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 531 if (error) 532 goto out; 533 534 (*nvmm_impl->vcpu_getstate)(vcpu); 535 nvmm_vcpu_put(vcpu); 536 537 out: 538 nvmm_machine_put(mach); 539 return error; 540 } 541 542 static int 543 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args) 544 { 545 struct nvmm_machine *mach; 546 struct nvmm_cpu *vcpu; 547 int error; 548 549 error = nvmm_machine_get(owner, args->machid, &mach, false); 550 if (error) 551 return error; 552 553 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 554 if (error) 555 goto out; 556 557 error = (*nvmm_impl->vcpu_inject)(vcpu); 558 nvmm_vcpu_put(vcpu); 559 560 out: 561 nvmm_machine_put(mach); 562 return error; 563 } 564 565 static int 566 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 567 struct nvmm_vcpu_exit *exit) 568 { 569 struct vmspace *vm = mach->vm; 570 int ret; 571 572 while (1) { 573 /* Got a signal? Or pending resched? Leave. */ 574 if (__predict_false(nvmm_return_needed())) { 575 exit->reason = NVMM_VCPU_EXIT_NONE; 576 return 0; 577 } 578 579 /* Run the VCPU. */ 580 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit); 581 if (__predict_false(ret != 0)) { 582 return ret; 583 } 584 585 /* Process nested page faults. */ 586 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) { 587 break; 588 } 589 if (exit->u.mem.gpa >= mach->gpa_end) { 590 break; 591 } 592 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) { 593 break; 594 } 595 } 596 597 return 0; 598 } 599 600 static int 601 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args) 602 { 603 struct nvmm_machine *mach; 604 struct nvmm_cpu *vcpu; 605 int error; 606 607 error = nvmm_machine_get(owner, args->machid, &mach, false); 608 if (error) 609 return error; 610 611 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 612 if (error) 613 goto out; 614 615 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit); 616 nvmm_vcpu_put(vcpu); 617 618 out: 619 nvmm_machine_put(mach); 620 return error; 621 } 622 623 /* -------------------------------------------------------------------------- */ 624 625 static struct uvm_object * 626 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size, 627 size_t *off) 628 { 629 struct nvmm_hmapping *hmapping; 630 size_t i; 631 632 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 633 hmapping = &mach->hmap[i]; 634 if (!hmapping->present) { 635 continue; 636 } 637 if (hva >= hmapping->hva && 638 hva + size <= hmapping->hva + hmapping->size) { 639 *off = hva - hmapping->hva; 640 return hmapping->uobj; 641 } 642 } 643 644 return NULL; 645 } 646 647 static int 648 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size) 649 { 650 struct nvmm_hmapping *hmapping; 651 size_t i; 652 653 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) { 654 return EINVAL; 655 } 656 if (hva == 0) { 657 return EINVAL; 658 } 659 660 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 661 hmapping = &mach->hmap[i]; 662 if (!hmapping->present) { 663 continue; 664 } 665 666 if (hva >= hmapping->hva && 667 hva + size <= hmapping->hva + hmapping->size) { 668 break; 669 } 670 671 if (hva >= hmapping->hva && 672 hva < hmapping->hva + hmapping->size) { 673 return EEXIST; 674 } 675 if (hva + size > hmapping->hva && 676 hva + size <= hmapping->hva + hmapping->size) { 677 return EEXIST; 678 } 679 if (hva <= hmapping->hva && 680 hva + size >= hmapping->hva + hmapping->size) { 681 return EEXIST; 682 } 683 } 684 685 return 0; 686 } 687 688 static struct nvmm_hmapping * 689 nvmm_hmapping_alloc(struct nvmm_machine *mach) 690 { 691 struct nvmm_hmapping *hmapping; 692 size_t i; 693 694 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 695 hmapping = &mach->hmap[i]; 696 if (!hmapping->present) { 697 hmapping->present = true; 698 return hmapping; 699 } 700 } 701 702 return NULL; 703 } 704 705 static int 706 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size) 707 { 708 struct vmspace *vmspace = curproc->p_vmspace; 709 struct nvmm_hmapping *hmapping; 710 size_t i; 711 712 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 713 hmapping = &mach->hmap[i]; 714 if (!hmapping->present || hmapping->hva != hva || 715 hmapping->size != size) { 716 continue; 717 } 718 719 uvm_unmap(&vmspace->vm_map, hmapping->hva, 720 hmapping->hva + hmapping->size); 721 uao_detach(hmapping->uobj); 722 723 hmapping->uobj = NULL; 724 hmapping->present = false; 725 726 return 0; 727 } 728 729 return ENOENT; 730 } 731 732 static int 733 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args) 734 { 735 struct vmspace *vmspace = curproc->p_vmspace; 736 struct nvmm_machine *mach; 737 struct nvmm_hmapping *hmapping; 738 vaddr_t uva; 739 int error; 740 741 error = nvmm_machine_get(owner, args->machid, &mach, true); 742 if (error) 743 return error; 744 745 error = nvmm_hmapping_validate(mach, args->hva, args->size); 746 if (error) 747 goto out; 748 749 hmapping = nvmm_hmapping_alloc(mach); 750 if (hmapping == NULL) { 751 error = ENOBUFS; 752 goto out; 753 } 754 755 hmapping->hva = args->hva; 756 hmapping->size = args->size; 757 hmapping->uobj = uao_create(hmapping->size, 0); 758 uva = hmapping->hva; 759 760 /* Take a reference for the user. */ 761 uao_reference(hmapping->uobj); 762 763 /* Map the uobj into the user address space, as pageable. */ 764 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj, 765 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE, 766 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 767 if (error) { 768 uao_detach(hmapping->uobj); 769 } 770 771 out: 772 nvmm_machine_put(mach); 773 return error; 774 } 775 776 static int 777 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args) 778 { 779 struct nvmm_machine *mach; 780 int error; 781 782 error = nvmm_machine_get(owner, args->machid, &mach, true); 783 if (error) 784 return error; 785 786 error = nvmm_hmapping_free(mach, args->hva, args->size); 787 788 nvmm_machine_put(mach); 789 return error; 790 } 791 792 /* -------------------------------------------------------------------------- */ 793 794 static int 795 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args) 796 { 797 struct nvmm_machine *mach; 798 struct uvm_object *uobj; 799 gpaddr_t gpa; 800 size_t off; 801 int error; 802 803 error = nvmm_machine_get(owner, args->machid, &mach, false); 804 if (error) 805 return error; 806 807 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) { 808 error = EINVAL; 809 goto out; 810 } 811 812 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 || 813 (args->hva % PAGE_SIZE) != 0) { 814 error = EINVAL; 815 goto out; 816 } 817 if (args->hva == 0) { 818 error = EINVAL; 819 goto out; 820 } 821 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 822 error = EINVAL; 823 goto out; 824 } 825 if (args->gpa + args->size <= args->gpa) { 826 error = EINVAL; 827 goto out; 828 } 829 if (args->gpa + args->size > mach->gpa_end) { 830 error = EINVAL; 831 goto out; 832 } 833 gpa = args->gpa; 834 835 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off); 836 if (uobj == NULL) { 837 error = EINVAL; 838 goto out; 839 } 840 841 /* Take a reference for the machine. */ 842 uao_reference(uobj); 843 844 /* Map the uobj into the machine address space, as pageable. */ 845 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0, 846 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE, 847 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); 848 if (error) { 849 uao_detach(uobj); 850 goto out; 851 } 852 if (gpa != args->gpa) { 853 uao_detach(uobj); 854 printf("[!] uvm_map problem\n"); 855 error = EINVAL; 856 goto out; 857 } 858 859 out: 860 nvmm_machine_put(mach); 861 return error; 862 } 863 864 static int 865 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args) 866 { 867 struct nvmm_machine *mach; 868 gpaddr_t gpa; 869 int error; 870 871 error = nvmm_machine_get(owner, args->machid, &mach, false); 872 if (error) 873 return error; 874 875 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) { 876 error = EINVAL; 877 goto out; 878 } 879 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) { 880 error = EINVAL; 881 goto out; 882 } 883 if (args->gpa + args->size <= args->gpa) { 884 error = EINVAL; 885 goto out; 886 } 887 if (args->gpa + args->size >= mach->gpa_end) { 888 error = EINVAL; 889 goto out; 890 } 891 gpa = args->gpa; 892 893 /* Unmap the memory from the machine. */ 894 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size); 895 896 out: 897 nvmm_machine_put(mach); 898 return error; 899 } 900 901 /* -------------------------------------------------------------------------- */ 902 903 static int 904 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 905 { 906 struct nvmm_ctl_mach_info ctl; 907 struct nvmm_machine *mach; 908 struct nvmm_cpu *vcpu; 909 int error; 910 size_t i; 911 912 if (args->size != sizeof(ctl)) 913 return EINVAL; 914 error = copyin(args->data, &ctl, sizeof(ctl)); 915 if (error) 916 return error; 917 918 error = nvmm_machine_get(owner, ctl.machid, &mach, true); 919 if (error) 920 return error; 921 922 ctl.nvcpus = 0; 923 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 924 error = nvmm_vcpu_get(mach, i, &vcpu); 925 if (error) 926 continue; 927 ctl.nvcpus++; 928 nvmm_vcpu_put(vcpu); 929 } 930 931 ctl.nram = 0; 932 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 933 if (!mach->hmap[i].present) 934 continue; 935 ctl.nram += mach->hmap[i].size; 936 } 937 938 ctl.pid = mach->owner->pid; 939 ctl.time = mach->time; 940 941 nvmm_machine_put(mach); 942 943 error = copyout(&ctl, args->data, sizeof(ctl)); 944 if (error) 945 return error; 946 947 return 0; 948 } 949 950 static int 951 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 952 { 953 switch (args->op) { 954 case NVMM_CTL_MACH_INFO: 955 return nvmm_ctl_mach_info(owner, args); 956 default: 957 return EINVAL; 958 } 959 } 960 961 /* -------------------------------------------------------------------------- */ 962 963 static const struct nvmm_impl * 964 nvmm_ident(void) 965 { 966 size_t i; 967 968 for (i = 0; i < __arraycount(nvmm_impl_list); i++) { 969 if ((*nvmm_impl_list[i]->ident)()) 970 return nvmm_impl_list[i]; 971 } 972 973 return NULL; 974 } 975 976 static int 977 nvmm_init(void) 978 { 979 size_t i, n; 980 981 nvmm_impl = nvmm_ident(); 982 if (nvmm_impl == NULL) 983 return ENOTSUP; 984 985 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 986 machines[i].machid = i; 987 rw_init(&machines[i].lock); 988 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 989 machines[i].cpus[n].present = false; 990 machines[i].cpus[n].cpuid = n; 991 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT, 992 IPL_NONE); 993 } 994 } 995 996 (*nvmm_impl->init)(); 997 998 return 0; 999 } 1000 1001 static void 1002 nvmm_fini(void) 1003 { 1004 size_t i, n; 1005 1006 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 1007 rw_destroy(&machines[i].lock); 1008 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 1009 mutex_destroy(&machines[i].cpus[n].lock); 1010 } 1011 } 1012 1013 (*nvmm_impl->fini)(); 1014 nvmm_impl = NULL; 1015 } 1016 1017 /* -------------------------------------------------------------------------- */ 1018 1019 static dev_type_open(nvmm_open); 1020 1021 const struct cdevsw nvmm_cdevsw = { 1022 .d_open = nvmm_open, 1023 .d_close = noclose, 1024 .d_read = noread, 1025 .d_write = nowrite, 1026 .d_ioctl = noioctl, 1027 .d_stop = nostop, 1028 .d_tty = notty, 1029 .d_poll = nopoll, 1030 .d_mmap = nommap, 1031 .d_kqfilter = nokqfilter, 1032 .d_discard = nodiscard, 1033 .d_flag = D_OTHER | D_MPSAFE 1034 }; 1035 1036 static int nvmm_ioctl(file_t *, u_long, void *); 1037 static int nvmm_close(file_t *); 1038 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *, 1039 struct uvm_object **, int *); 1040 1041 const struct fileops nvmm_fileops = { 1042 .fo_read = fbadop_read, 1043 .fo_write = fbadop_write, 1044 .fo_ioctl = nvmm_ioctl, 1045 .fo_fcntl = fnullop_fcntl, 1046 .fo_poll = fnullop_poll, 1047 .fo_stat = fbadop_stat, 1048 .fo_close = nvmm_close, 1049 .fo_kqfilter = fnullop_kqfilter, 1050 .fo_restart = fnullop_restart, 1051 .fo_mmap = nvmm_mmap, 1052 }; 1053 1054 static int 1055 nvmm_open(dev_t dev, int flags, int type, struct lwp *l) 1056 { 1057 struct nvmm_owner *owner; 1058 struct file *fp; 1059 int error, fd; 1060 1061 if (__predict_false(nvmm_impl == NULL)) 1062 return ENXIO; 1063 if (minor(dev) != 0) 1064 return EXDEV; 1065 if (!(flags & O_CLOEXEC)) 1066 return EINVAL; 1067 error = fd_allocfile(&fp, &fd); 1068 if (error) 1069 return error; 1070 1071 if (OFLAGS(flags) & O_WRONLY) { 1072 owner = &root_owner; 1073 } else { 1074 owner = kmem_alloc(sizeof(*owner), KM_SLEEP); 1075 owner->pid = l->l_proc->p_pid; 1076 } 1077 1078 return fd_clone(fp, fd, flags, &nvmm_fileops, owner); 1079 } 1080 1081 static int 1082 nvmm_close(file_t *fp) 1083 { 1084 struct nvmm_owner *owner = fp->f_data; 1085 1086 KASSERT(owner != NULL); 1087 nvmm_kill_machines(owner); 1088 if (owner != &root_owner) { 1089 kmem_free(owner, sizeof(*owner)); 1090 } 1091 fp->f_data = NULL; 1092 1093 return 0; 1094 } 1095 1096 static int 1097 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, 1098 int *advicep, struct uvm_object **uobjp, int *maxprotp) 1099 { 1100 struct nvmm_owner *owner = fp->f_data; 1101 struct nvmm_machine *mach; 1102 nvmm_machid_t machid; 1103 nvmm_cpuid_t cpuid; 1104 int error; 1105 1106 if (prot & PROT_EXEC) 1107 return EACCES; 1108 if (size != PAGE_SIZE) 1109 return EINVAL; 1110 1111 cpuid = NVMM_COMM_CPUID(*offp); 1112 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) 1113 return EINVAL; 1114 1115 machid = NVMM_COMM_MACHID(*offp); 1116 error = nvmm_machine_get(owner, machid, &mach, false); 1117 if (error) 1118 return error; 1119 1120 uao_reference(mach->commuobj); 1121 *uobjp = mach->commuobj; 1122 *offp = cpuid * PAGE_SIZE; 1123 *maxprotp = prot; 1124 *advicep = UVM_ADV_RANDOM; 1125 1126 nvmm_machine_put(mach); 1127 return 0; 1128 } 1129 1130 static int 1131 nvmm_ioctl(file_t *fp, u_long cmd, void *data) 1132 { 1133 struct nvmm_owner *owner = fp->f_data; 1134 1135 KASSERT(owner != NULL); 1136 1137 switch (cmd) { 1138 case NVMM_IOC_CAPABILITY: 1139 return nvmm_capability(owner, data); 1140 case NVMM_IOC_MACHINE_CREATE: 1141 return nvmm_machine_create(owner, data); 1142 case NVMM_IOC_MACHINE_DESTROY: 1143 return nvmm_machine_destroy(owner, data); 1144 case NVMM_IOC_MACHINE_CONFIGURE: 1145 return nvmm_machine_configure(owner, data); 1146 case NVMM_IOC_VCPU_CREATE: 1147 return nvmm_vcpu_create(owner, data); 1148 case NVMM_IOC_VCPU_DESTROY: 1149 return nvmm_vcpu_destroy(owner, data); 1150 case NVMM_IOC_VCPU_CONFIGURE: 1151 return nvmm_vcpu_configure(owner, data); 1152 case NVMM_IOC_VCPU_SETSTATE: 1153 return nvmm_vcpu_setstate(owner, data); 1154 case NVMM_IOC_VCPU_GETSTATE: 1155 return nvmm_vcpu_getstate(owner, data); 1156 case NVMM_IOC_VCPU_INJECT: 1157 return nvmm_vcpu_inject(owner, data); 1158 case NVMM_IOC_VCPU_RUN: 1159 return nvmm_vcpu_run(owner, data); 1160 case NVMM_IOC_GPA_MAP: 1161 return nvmm_gpa_map(owner, data); 1162 case NVMM_IOC_GPA_UNMAP: 1163 return nvmm_gpa_unmap(owner, data); 1164 case NVMM_IOC_HVA_MAP: 1165 return nvmm_hva_map(owner, data); 1166 case NVMM_IOC_HVA_UNMAP: 1167 return nvmm_hva_unmap(owner, data); 1168 case NVMM_IOC_CTL: 1169 return nvmm_ctl(owner, data); 1170 default: 1171 return EINVAL; 1172 } 1173 } 1174 1175 /* -------------------------------------------------------------------------- */ 1176 1177 static int nvmm_match(device_t, cfdata_t, void *); 1178 static void nvmm_attach(device_t, device_t, void *); 1179 static int nvmm_detach(device_t, int); 1180 1181 extern struct cfdriver nvmm_cd; 1182 1183 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL); 1184 1185 static struct cfdata nvmm_cfdata[] = { 1186 { 1187 .cf_name = "nvmm", 1188 .cf_atname = "nvmm", 1189 .cf_unit = 0, 1190 .cf_fstate = FSTATE_STAR, 1191 .cf_loc = NULL, 1192 .cf_flags = 0, 1193 .cf_pspec = NULL, 1194 }, 1195 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL } 1196 }; 1197 1198 static int 1199 nvmm_match(device_t self, cfdata_t cfdata, void *arg) 1200 { 1201 return 1; 1202 } 1203 1204 static void 1205 nvmm_attach(device_t parent, device_t self, void *aux) 1206 { 1207 int error; 1208 1209 error = nvmm_init(); 1210 if (error) 1211 panic("%s: impossible", __func__); 1212 aprint_normal_dev(self, "attached, using backend %s\n", 1213 nvmm_impl->name); 1214 } 1215 1216 static int 1217 nvmm_detach(device_t self, int flags) 1218 { 1219 if (nmachines > 0) 1220 return EBUSY; 1221 nvmm_fini(); 1222 return 0; 1223 } 1224 1225 void 1226 nvmmattach(int nunits) 1227 { 1228 /* nothing */ 1229 } 1230 1231 MODULE(MODULE_CLASS_MISC, nvmm, NULL); 1232 1233 #if defined(_MODULE) 1234 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL); 1235 #endif 1236 1237 static int 1238 nvmm_modcmd(modcmd_t cmd, void *arg) 1239 { 1240 #if defined(_MODULE) 1241 devmajor_t bmajor = NODEVMAJOR; 1242 devmajor_t cmajor = 345; 1243 #endif 1244 int error; 1245 1246 switch (cmd) { 1247 case MODULE_CMD_INIT: 1248 if (nvmm_ident() == NULL) { 1249 aprint_error("%s: cpu not supported\n", 1250 nvmm_cd.cd_name); 1251 return ENOTSUP; 1252 } 1253 #if defined(_MODULE) 1254 error = config_cfdriver_attach(&nvmm_cd); 1255 if (error) 1256 return error; 1257 #endif 1258 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca); 1259 if (error) { 1260 config_cfdriver_detach(&nvmm_cd); 1261 aprint_error("%s: config_cfattach_attach failed\n", 1262 nvmm_cd.cd_name); 1263 return error; 1264 } 1265 1266 error = config_cfdata_attach(nvmm_cfdata, 1); 1267 if (error) { 1268 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1269 config_cfdriver_detach(&nvmm_cd); 1270 aprint_error("%s: unable to register cfdata\n", 1271 nvmm_cd.cd_name); 1272 return error; 1273 } 1274 1275 if (config_attach_pseudo(nvmm_cfdata) == NULL) { 1276 aprint_error("%s: config_attach_pseudo failed\n", 1277 nvmm_cd.cd_name); 1278 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1279 config_cfdriver_detach(&nvmm_cd); 1280 return ENXIO; 1281 } 1282 1283 #if defined(_MODULE) 1284 /* mknod /dev/nvmm c 345 0 */ 1285 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor, 1286 &nvmm_cdevsw, &cmajor); 1287 if (error) { 1288 aprint_error("%s: unable to register devsw\n", 1289 nvmm_cd.cd_name); 1290 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1291 config_cfdriver_detach(&nvmm_cd); 1292 return error; 1293 } 1294 #endif 1295 return 0; 1296 case MODULE_CMD_FINI: 1297 error = config_cfdata_detach(nvmm_cfdata); 1298 if (error) 1299 return error; 1300 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca); 1301 if (error) 1302 return error; 1303 #if defined(_MODULE) 1304 config_cfdriver_detach(&nvmm_cd); 1305 devsw_detach(NULL, &nvmm_cdevsw); 1306 #endif 1307 return 0; 1308 case MODULE_CMD_AUTOUNLOAD: 1309 return EBUSY; 1310 default: 1311 return ENOTTY; 1312 } 1313 } 1314