1 /*
2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3 * All rights reserved.
4 *
5 * This code is part of the NVMM hypervisor.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31
32 #include <sys/kernel.h>
33 #include <sys/mman.h>
34
35 #include "nvmm.h"
36 #include "nvmm_internal.h"
37 #include "nvmm_ioctl.h"
38
39 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
40 volatile unsigned int nmachines __cacheline_aligned;
41
42 static const struct nvmm_impl *nvmm_impl_list[] = {
43 #if defined(__x86_64__)
44 &nvmm_x86_svm, /* x86 AMD SVM */
45 &nvmm_x86_vmx /* x86 Intel VMX */
46 #endif
47 };
48
49 const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
50
51 struct nvmm_owner nvmm_root_owner;
52
53 /* -------------------------------------------------------------------------- */
54
55 static int
nvmm_machine_alloc(struct nvmm_machine ** ret)56 nvmm_machine_alloc(struct nvmm_machine **ret)
57 {
58 struct nvmm_machine *mach;
59 size_t i;
60
61 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
62 mach = &machines[i];
63
64 os_rwl_wlock(&mach->lock);
65 if (mach->present) {
66 os_rwl_unlock(&mach->lock);
67 continue;
68 }
69
70 mach->present = true;
71 mach->time = time_second;
72 *ret = mach;
73 os_atomic_inc_uint(&nmachines);
74 return 0;
75 }
76
77 return ENOBUFS;
78 }
79
80 static void
nvmm_machine_free(struct nvmm_machine * mach)81 nvmm_machine_free(struct nvmm_machine *mach)
82 {
83 OS_ASSERT(os_rwl_wheld(&mach->lock));
84 OS_ASSERT(mach->present);
85 mach->present = false;
86 os_atomic_dec_uint(&nmachines);
87 }
88
89 static int
nvmm_machine_get(struct nvmm_owner * owner,nvmm_machid_t machid,struct nvmm_machine ** ret,bool writer)90 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
91 struct nvmm_machine **ret, bool writer)
92 {
93 struct nvmm_machine *mach;
94
95 if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
96 return EINVAL;
97 }
98 mach = &machines[machid];
99
100 if (__predict_false(writer)) {
101 os_rwl_wlock(&mach->lock);
102 } else {
103 os_rwl_rlock(&mach->lock);
104 }
105 if (__predict_false(!mach->present)) {
106 os_rwl_unlock(&mach->lock);
107 return ENOENT;
108 }
109 if (__predict_false(mach->owner != owner &&
110 owner != &nvmm_root_owner)) {
111 os_rwl_unlock(&mach->lock);
112 return EPERM;
113 }
114 *ret = mach;
115
116 return 0;
117 }
118
119 static void
nvmm_machine_put(struct nvmm_machine * mach)120 nvmm_machine_put(struct nvmm_machine *mach)
121 {
122 os_rwl_unlock(&mach->lock);
123 }
124
125 /* -------------------------------------------------------------------------- */
126
127 static int
nvmm_vcpu_alloc(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_cpu ** ret)128 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
129 struct nvmm_cpu **ret)
130 {
131 struct nvmm_cpu *vcpu;
132
133 if (cpuid >= NVMM_MAX_VCPUS) {
134 return EINVAL;
135 }
136 vcpu = &mach->cpus[cpuid];
137
138 os_mtx_lock(&vcpu->lock);
139 if (vcpu->present) {
140 os_mtx_unlock(&vcpu->lock);
141 return EBUSY;
142 }
143
144 vcpu->present = true;
145 vcpu->comm = NULL;
146 vcpu->hcpu_last = -1;
147 *ret = vcpu;
148 return 0;
149 }
150
151 static void
nvmm_vcpu_free(struct nvmm_machine * mach,struct nvmm_cpu * vcpu)152 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
153 {
154 OS_ASSERT(os_mtx_owned(&vcpu->lock));
155 vcpu->present = false;
156 if (vcpu->comm != NULL) {
157 os_vmobj_unmap(os_kernel_map, (vaddr_t)vcpu->comm,
158 (vaddr_t)vcpu->comm + NVMM_COMM_PAGE_SIZE, true);
159 /*
160 * Require userland to unmap the comm page from its address
161 * space, because os_curproc_map at this point (fd close)
162 * is not guaranteed to be the correct address space.
163 */
164 }
165 }
166
167 static int
nvmm_vcpu_get(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_cpu ** ret)168 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
169 struct nvmm_cpu **ret)
170 {
171 struct nvmm_cpu *vcpu;
172
173 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
174 return EINVAL;
175 }
176 vcpu = &mach->cpus[cpuid];
177
178 os_mtx_lock(&vcpu->lock);
179 if (__predict_false(!vcpu->present)) {
180 os_mtx_unlock(&vcpu->lock);
181 return ENOENT;
182 }
183 *ret = vcpu;
184
185 return 0;
186 }
187
188 static void
nvmm_vcpu_put(struct nvmm_cpu * vcpu)189 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
190 {
191 os_mtx_unlock(&vcpu->lock);
192 }
193
194 /* -------------------------------------------------------------------------- */
195
196 void
nvmm_kill_machines(struct nvmm_owner * owner)197 nvmm_kill_machines(struct nvmm_owner *owner)
198 {
199 struct nvmm_machine *mach;
200 struct nvmm_cpu *vcpu;
201 size_t i, j;
202 int error;
203
204 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
205 mach = &machines[i];
206
207 os_rwl_wlock(&mach->lock);
208 if (!mach->present || mach->owner != owner) {
209 os_rwl_unlock(&mach->lock);
210 continue;
211 }
212
213 /* Kill it. */
214 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
215 error = nvmm_vcpu_get(mach, j, &vcpu);
216 if (error)
217 continue;
218 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
219 nvmm_vcpu_free(mach, vcpu);
220 nvmm_vcpu_put(vcpu);
221 os_atomic_dec_uint(&mach->ncpus);
222 }
223 (*nvmm_impl->machine_destroy)(mach);
224 os_vmspace_destroy(mach->vm);
225
226 /* Drop the kernel vmobj refs. */
227 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
228 if (!mach->hmap[j].present)
229 continue;
230 os_vmobj_rel(mach->hmap[j].vmobj);
231 }
232
233 nvmm_machine_free(mach);
234
235 os_rwl_unlock(&mach->lock);
236 }
237 }
238
239 /* -------------------------------------------------------------------------- */
240
241 static int
nvmm_capability(struct nvmm_owner * owner,struct nvmm_ioc_capability * args)242 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
243 {
244 args->cap.version = NVMM_KERN_VERSION;
245 args->cap.state_size = nvmm_impl->state_size;
246 args->cap.comm_size = NVMM_COMM_PAGE_SIZE;
247 args->cap.max_machines = NVMM_MAX_MACHINES;
248 args->cap.max_vcpus = NVMM_MAX_VCPUS;
249 args->cap.max_ram = NVMM_MAX_RAM;
250
251 (*nvmm_impl->capability)(&args->cap);
252
253 return 0;
254 }
255
256 static int
nvmm_machine_create(struct nvmm_owner * owner,struct nvmm_ioc_machine_create * args)257 nvmm_machine_create(struct nvmm_owner *owner,
258 struct nvmm_ioc_machine_create *args)
259 {
260 struct nvmm_machine *mach;
261 int error;
262
263 error = nvmm_machine_alloc(&mach);
264 if (error)
265 return error;
266
267 /* Curproc owns the machine. */
268 mach->owner = owner;
269
270 /* Zero out the host mappings. */
271 memset(&mach->hmap, 0, sizeof(mach->hmap));
272
273 /* Create the machine vmspace. */
274 mach->gpa_begin = 0;
275 mach->gpa_end = NVMM_MAX_RAM;
276 mach->vm = os_vmspace_create(mach->gpa_begin, mach->gpa_end);
277
278 /* Create the comm vmobj. */
279 mach->commvmobj = os_vmobj_create(
280 NVMM_MAX_VCPUS * NVMM_COMM_PAGE_SIZE);
281
282 (*nvmm_impl->machine_create)(mach);
283
284 args->machid = mach->machid;
285 nvmm_machine_put(mach);
286
287 return 0;
288 }
289
290 static int
nvmm_machine_destroy(struct nvmm_owner * owner,struct nvmm_ioc_machine_destroy * args)291 nvmm_machine_destroy(struct nvmm_owner *owner,
292 struct nvmm_ioc_machine_destroy *args)
293 {
294 struct nvmm_machine *mach;
295 struct nvmm_cpu *vcpu;
296 int error;
297 size_t i;
298
299 error = nvmm_machine_get(owner, args->machid, &mach, true);
300 if (error)
301 return error;
302
303 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
304 error = nvmm_vcpu_get(mach, i, &vcpu);
305 if (error)
306 continue;
307
308 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
309 nvmm_vcpu_free(mach, vcpu);
310 nvmm_vcpu_put(vcpu);
311 os_atomic_dec_uint(&mach->ncpus);
312 }
313
314 (*nvmm_impl->machine_destroy)(mach);
315
316 /* Free the machine vmspace. */
317 os_vmspace_destroy(mach->vm);
318
319 /* Drop the kernel vmobj refs. */
320 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
321 if (!mach->hmap[i].present)
322 continue;
323 os_vmobj_rel(mach->hmap[i].vmobj);
324 }
325
326 nvmm_machine_free(mach);
327 nvmm_machine_put(mach);
328
329 return 0;
330 }
331
332 static int
nvmm_machine_configure(struct nvmm_owner * owner,struct nvmm_ioc_machine_configure * args)333 nvmm_machine_configure(struct nvmm_owner *owner,
334 struct nvmm_ioc_machine_configure *args)
335 {
336 struct nvmm_machine *mach;
337 size_t allocsz;
338 uint64_t op;
339 void *data;
340 int error;
341
342 op = NVMM_MACH_CONF_MD(args->op);
343 if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
344 return EINVAL;
345 }
346
347 allocsz = nvmm_impl->mach_conf_sizes[op];
348 data = os_mem_alloc(allocsz);
349
350 error = nvmm_machine_get(owner, args->machid, &mach, true);
351 if (error) {
352 os_mem_free(data, allocsz);
353 return error;
354 }
355
356 error = copyin(args->conf, data, allocsz);
357 if (error) {
358 goto out;
359 }
360
361 error = (*nvmm_impl->machine_configure)(mach, op, data);
362
363 out:
364 nvmm_machine_put(mach);
365 os_mem_free(data, allocsz);
366 return error;
367 }
368
369 static int
nvmm_vcpu_create(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_create * args)370 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
371 {
372 struct nvmm_machine *mach;
373 struct nvmm_cpu *vcpu;
374 int error;
375
376 error = nvmm_machine_get(owner, args->machid, &mach, false);
377 if (error)
378 return error;
379
380 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
381 if (error)
382 goto out;
383
384 /* Map the comm page on the kernel side, as wired. */
385 error = os_vmobj_map(os_kernel_map, (vaddr_t *)&vcpu->comm,
386 NVMM_COMM_PAGE_SIZE, mach->commvmobj,
387 args->cpuid * NVMM_COMM_PAGE_SIZE, true /* wired */,
388 false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
389 PROT_READ | PROT_WRITE);
390 if (error) {
391 nvmm_vcpu_free(mach, vcpu);
392 nvmm_vcpu_put(vcpu);
393 goto out;
394 }
395
396 memset(vcpu->comm, 0, NVMM_COMM_PAGE_SIZE);
397
398 /* Map the comm page on the user side, as pageable. */
399 error = os_vmobj_map(os_curproc_map, (vaddr_t *)&args->comm,
400 NVMM_COMM_PAGE_SIZE, mach->commvmobj,
401 args->cpuid * NVMM_COMM_PAGE_SIZE, false /* !wired */,
402 false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
403 PROT_READ | PROT_WRITE);
404 if (error) {
405 nvmm_vcpu_free(mach, vcpu);
406 nvmm_vcpu_put(vcpu);
407 goto out;
408 }
409
410 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
411 if (error) {
412 nvmm_vcpu_free(mach, vcpu);
413 nvmm_vcpu_put(vcpu);
414 goto out;
415 }
416
417 nvmm_vcpu_put(vcpu);
418 os_atomic_inc_uint(&mach->ncpus);
419
420 out:
421 nvmm_machine_put(mach);
422 return error;
423 }
424
425 static int
nvmm_vcpu_destroy(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_destroy * args)426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
427 {
428 struct nvmm_machine *mach;
429 struct nvmm_cpu *vcpu;
430 int error;
431
432 error = nvmm_machine_get(owner, args->machid, &mach, false);
433 if (error)
434 return error;
435
436 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
437 if (error)
438 goto out;
439
440 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
441 nvmm_vcpu_free(mach, vcpu);
442 nvmm_vcpu_put(vcpu);
443 os_atomic_dec_uint(&mach->ncpus);
444
445 out:
446 nvmm_machine_put(mach);
447 return error;
448 }
449
450 static int
nvmm_vcpu_configure(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_configure * args)451 nvmm_vcpu_configure(struct nvmm_owner *owner,
452 struct nvmm_ioc_vcpu_configure *args)
453 {
454 struct nvmm_machine *mach;
455 struct nvmm_cpu *vcpu;
456 size_t allocsz;
457 uint64_t op;
458 void *data;
459 int error;
460
461 op = NVMM_VCPU_CONF_MD(args->op);
462 if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
463 return EINVAL;
464
465 allocsz = nvmm_impl->vcpu_conf_sizes[op];
466 data = os_mem_alloc(allocsz);
467
468 error = nvmm_machine_get(owner, args->machid, &mach, false);
469 if (error) {
470 os_mem_free(data, allocsz);
471 return error;
472 }
473
474 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
475 if (error) {
476 nvmm_machine_put(mach);
477 os_mem_free(data, allocsz);
478 return error;
479 }
480
481 error = copyin(args->conf, data, allocsz);
482 if (error) {
483 goto out;
484 }
485
486 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
487
488 out:
489 nvmm_vcpu_put(vcpu);
490 nvmm_machine_put(mach);
491 os_mem_free(data, allocsz);
492 return error;
493 }
494
495 static int
nvmm_vcpu_setstate(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_setstate * args)496 nvmm_vcpu_setstate(struct nvmm_owner *owner,
497 struct nvmm_ioc_vcpu_setstate *args)
498 {
499 struct nvmm_machine *mach;
500 struct nvmm_cpu *vcpu;
501 int error;
502
503 error = nvmm_machine_get(owner, args->machid, &mach, false);
504 if (error)
505 return error;
506
507 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
508 if (error)
509 goto out;
510
511 (*nvmm_impl->vcpu_setstate)(vcpu);
512 nvmm_vcpu_put(vcpu);
513
514 out:
515 nvmm_machine_put(mach);
516 return error;
517 }
518
519 static int
nvmm_vcpu_getstate(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_getstate * args)520 nvmm_vcpu_getstate(struct nvmm_owner *owner,
521 struct nvmm_ioc_vcpu_getstate *args)
522 {
523 struct nvmm_machine *mach;
524 struct nvmm_cpu *vcpu;
525 int error;
526
527 error = nvmm_machine_get(owner, args->machid, &mach, false);
528 if (error)
529 return error;
530
531 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
532 if (error)
533 goto out;
534
535 (*nvmm_impl->vcpu_getstate)(vcpu);
536 nvmm_vcpu_put(vcpu);
537
538 out:
539 nvmm_machine_put(mach);
540 return error;
541 }
542
543 static int
nvmm_vcpu_inject(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_inject * args)544 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
545 {
546 struct nvmm_machine *mach;
547 struct nvmm_cpu *vcpu;
548 int error;
549
550 error = nvmm_machine_get(owner, args->machid, &mach, false);
551 if (error)
552 return error;
553
554 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
555 if (error)
556 goto out;
557
558 error = (*nvmm_impl->vcpu_inject)(vcpu);
559 nvmm_vcpu_put(vcpu);
560
561 out:
562 nvmm_machine_put(mach);
563 return error;
564 }
565
566 static int
nvmm_do_vcpu_run(struct nvmm_machine * mach,struct nvmm_cpu * vcpu,struct nvmm_vcpu_exit * exit)567 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
568 struct nvmm_vcpu_exit *exit)
569 {
570 struct vmspace *vm = mach->vm;
571 int ret;
572
573 while (1) {
574 /* Got a signal? Or pending resched? Leave. */
575 if (__predict_false(os_return_needed())) {
576 exit->reason = NVMM_VCPU_EXIT_NONE;
577 return 0;
578 }
579
580 /* Run the VCPU. */
581 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
582 if (__predict_false(ret != 0)) {
583 return ret;
584 }
585
586 /* Process nested page faults. */
587 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
588 break;
589 }
590 if (exit->u.mem.gpa >= mach->gpa_end) {
591 break;
592 }
593 if (os_vmspace_fault(vm, exit->u.mem.gpa, exit->u.mem.prot)) {
594 break;
595 }
596 }
597
598 return 0;
599 }
600
601 static int
nvmm_vcpu_run(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_run * args)602 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
603 {
604 struct nvmm_machine *mach;
605 struct nvmm_cpu *vcpu;
606 int error;
607
608 error = nvmm_machine_get(owner, args->machid, &mach, false);
609 if (error)
610 return error;
611
612 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
613 if (error)
614 goto out;
615
616 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
617 nvmm_vcpu_put(vcpu);
618
619 out:
620 nvmm_machine_put(mach);
621 return error;
622 }
623
624 /* -------------------------------------------------------------------------- */
625
626 static os_vmobj_t *
nvmm_hmapping_getvmobj(struct nvmm_machine * mach,uintptr_t hva,size_t size,size_t * off)627 nvmm_hmapping_getvmobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
628 size_t *off)
629 {
630 struct nvmm_hmapping *hmapping;
631 size_t i;
632
633 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
634 hmapping = &mach->hmap[i];
635 if (!hmapping->present) {
636 continue;
637 }
638 if (hva >= hmapping->hva &&
639 hva + size <= hmapping->hva + hmapping->size) {
640 *off = hva - hmapping->hva;
641 return hmapping->vmobj;
642 }
643 }
644
645 return NULL;
646 }
647
648 static int
nvmm_hmapping_validate(struct nvmm_machine * mach,uintptr_t hva,size_t size)649 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
650 {
651 struct nvmm_hmapping *hmapping;
652 size_t i;
653 uintptr_t hva_end;
654 uintptr_t hmap_end;
655
656 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
657 return EINVAL;
658 }
659 if (hva == 0) {
660 return EINVAL;
661 }
662
663 /*
664 * Overflow tests MUST be done very carefully to avoid compiler
665 * optimizations from effectively deleting the test.
666 */
667 hva_end = hva + size;
668 if (hva_end <= hva)
669 return EINVAL;
670
671 /*
672 * Overlap tests
673 */
674 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
675 hmapping = &mach->hmap[i];
676
677 if (!hmapping->present) {
678 continue;
679 }
680 hmap_end = hmapping->hva + hmapping->size;
681
682 if (hva >= hmapping->hva && hva_end <= hmap_end)
683 break;
684 if (hva >= hmapping->hva && hva < hmap_end)
685 return EEXIST;
686 if (hva_end > hmapping->hva && hva_end <= hmap_end)
687 return EEXIST;
688 if (hva <= hmapping->hva && hva_end >= hmap_end)
689 return EEXIST;
690 }
691
692 return 0;
693 }
694
695 static struct nvmm_hmapping *
nvmm_hmapping_alloc(struct nvmm_machine * mach)696 nvmm_hmapping_alloc(struct nvmm_machine *mach)
697 {
698 struct nvmm_hmapping *hmapping;
699 size_t i;
700
701 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
702 hmapping = &mach->hmap[i];
703 if (!hmapping->present) {
704 hmapping->present = true;
705 return hmapping;
706 }
707 }
708
709 return NULL;
710 }
711
712 static int
nvmm_hmapping_free(struct nvmm_machine * mach,uintptr_t hva,size_t size)713 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
714 {
715 struct nvmm_hmapping *hmapping;
716 size_t i;
717
718 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
719 hmapping = &mach->hmap[i];
720 if (!hmapping->present || hmapping->hva != hva ||
721 hmapping->size != size) {
722 continue;
723 }
724
725 os_vmobj_unmap(os_curproc_map, hmapping->hva,
726 hmapping->hva + hmapping->size, false);
727 os_vmobj_rel(hmapping->vmobj);
728
729 hmapping->vmobj = NULL;
730 hmapping->present = false;
731
732 return 0;
733 }
734
735 return ENOENT;
736 }
737
738 static int
nvmm_hva_map(struct nvmm_owner * owner,struct nvmm_ioc_hva_map * args)739 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
740 {
741 struct nvmm_machine *mach;
742 struct nvmm_hmapping *hmapping;
743 vaddr_t uva;
744 int error;
745
746 error = nvmm_machine_get(owner, args->machid, &mach, true);
747 if (error)
748 return error;
749
750 error = nvmm_hmapping_validate(mach, args->hva, args->size);
751 if (error)
752 goto out;
753
754 hmapping = nvmm_hmapping_alloc(mach);
755 if (hmapping == NULL) {
756 error = ENOBUFS;
757 goto out;
758 }
759
760 hmapping->hva = args->hva;
761 hmapping->size = args->size;
762 hmapping->vmobj = os_vmobj_create(hmapping->size);
763 uva = hmapping->hva;
764
765 /* Map the vmobj into the user address space, as pageable. */
766 error = os_vmobj_map(os_curproc_map, &uva, hmapping->size,
767 hmapping->vmobj, 0, false /* !wired */, true /* fixed */,
768 true /* shared */, PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE);
769
770 out:
771 nvmm_machine_put(mach);
772 return error;
773 }
774
775 static int
nvmm_hva_unmap(struct nvmm_owner * owner,struct nvmm_ioc_hva_unmap * args)776 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
777 {
778 struct nvmm_machine *mach;
779 int error;
780
781 error = nvmm_machine_get(owner, args->machid, &mach, true);
782 if (error)
783 return error;
784
785 error = nvmm_hmapping_free(mach, args->hva, args->size);
786
787 nvmm_machine_put(mach);
788 return error;
789 }
790
791 /* -------------------------------------------------------------------------- */
792
793 static int
nvmm_gpa_map(struct nvmm_owner * owner,struct nvmm_ioc_gpa_map * args)794 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
795 {
796 struct nvmm_machine *mach;
797 os_vmobj_t *vmobj;
798 gpaddr_t gpa;
799 gpaddr_t gpa_end;
800 size_t off;
801 int error;
802
803 error = nvmm_machine_get(owner, args->machid, &mach, false);
804 if (error)
805 return error;
806
807 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
808 error = EINVAL;
809 goto out;
810 }
811
812 /*
813 * Overflow tests MUST be done very carefully to avoid compiler
814 * optimizations from effectively deleting the test.
815 */
816 gpa = args->gpa;
817 gpa_end = gpa + args->size;
818 if (gpa_end <= gpa) {
819 error = EINVAL;
820 goto out;
821 }
822
823 if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
824 (args->hva % PAGE_SIZE) != 0) {
825 error = EINVAL;
826 goto out;
827 }
828 if (args->hva == 0) {
829 error = EINVAL;
830 goto out;
831 }
832
833 if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
834 error = EINVAL;
835 goto out;
836 }
837 if (gpa_end > mach->gpa_end) {
838 error = EINVAL;
839 goto out;
840 }
841
842 vmobj = nvmm_hmapping_getvmobj(mach, args->hva, args->size, &off);
843 if (vmobj == NULL) {
844 error = EINVAL;
845 goto out;
846 }
847
848 /* Map the vmobj into the machine address space, as pageable. */
849 error = os_vmobj_map(&mach->vm->vm_map, &gpa, args->size, vmobj, off,
850 false /* !wired */, true /* fixed */, false /* !shared */,
851 args->prot, PROT_READ | PROT_WRITE | PROT_EXEC);
852
853 out:
854 nvmm_machine_put(mach);
855 return error;
856 }
857
858 static int
nvmm_gpa_unmap(struct nvmm_owner * owner,struct nvmm_ioc_gpa_unmap * args)859 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
860 {
861 struct nvmm_machine *mach;
862 gpaddr_t gpa;
863 gpaddr_t gpa_end;
864 int error;
865
866 error = nvmm_machine_get(owner, args->machid, &mach, false);
867 if (error)
868 return error;
869
870 /*
871 * Overflow tests MUST be done very carefully to avoid compiler
872 * optimizations from effectively deleting the test.
873 */
874 gpa = args->gpa;
875 gpa_end = gpa + args->size;
876 if (gpa_end <= gpa) {
877 error = EINVAL;
878 goto out;
879 }
880
881 if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
882 error = EINVAL;
883 goto out;
884 }
885 if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
886 error = EINVAL;
887 goto out;
888 }
889 if (gpa_end >= mach->gpa_end) {
890 error = EINVAL;
891 goto out;
892 }
893
894 /* Unmap the memory from the machine. */
895 os_vmobj_unmap(&mach->vm->vm_map, gpa, gpa + args->size, false);
896
897 out:
898 nvmm_machine_put(mach);
899 return error;
900 }
901
902 /* -------------------------------------------------------------------------- */
903
904 static int
nvmm_ctl_mach_info(struct nvmm_owner * owner,struct nvmm_ioc_ctl * args)905 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
906 {
907 struct nvmm_ctl_mach_info ctl;
908 struct nvmm_machine *mach;
909 int error;
910 size_t i;
911
912 if (args->size != sizeof(ctl))
913 return EINVAL;
914 error = copyin(args->data, &ctl, sizeof(ctl));
915 if (error)
916 return error;
917
918 error = nvmm_machine_get(owner, ctl.machid, &mach, true);
919 if (error)
920 return error;
921
922 ctl.nvcpus = mach->ncpus;
923
924 ctl.nram = 0;
925 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
926 if (!mach->hmap[i].present)
927 continue;
928 ctl.nram += mach->hmap[i].size;
929 }
930
931 ctl.pid = mach->owner->pid;
932 ctl.time = mach->time;
933
934 nvmm_machine_put(mach);
935
936 error = copyout(&ctl, args->data, sizeof(ctl));
937 if (error)
938 return error;
939
940 return 0;
941 }
942
943 static int
nvmm_ctl(struct nvmm_owner * owner,struct nvmm_ioc_ctl * args)944 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
945 {
946 switch (args->op) {
947 case NVMM_CTL_MACH_INFO:
948 return nvmm_ctl_mach_info(owner, args);
949 default:
950 return EINVAL;
951 }
952 }
953
954 /* -------------------------------------------------------------------------- */
955
956 const struct nvmm_impl *
nvmm_ident(void)957 nvmm_ident(void)
958 {
959 size_t i;
960
961 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
962 if ((*nvmm_impl_list[i]->ident)())
963 return nvmm_impl_list[i];
964 }
965
966 return NULL;
967 }
968
969 int
nvmm_init(void)970 nvmm_init(void)
971 {
972 size_t i, n;
973
974 nvmm_impl = nvmm_ident();
975 if (nvmm_impl == NULL)
976 return ENOTSUP;
977
978 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
979 machines[i].machid = i;
980 os_rwl_init(&machines[i].lock);
981 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
982 machines[i].cpus[n].present = false;
983 machines[i].cpus[n].cpuid = n;
984 os_mtx_init(&machines[i].cpus[n].lock);
985 }
986 }
987
988 (*nvmm_impl->init)();
989
990 return 0;
991 }
992
993 void
nvmm_fini(void)994 nvmm_fini(void)
995 {
996 size_t i, n;
997
998 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
999 os_rwl_destroy(&machines[i].lock);
1000 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1001 os_mtx_destroy(&machines[i].cpus[n].lock);
1002 }
1003 }
1004
1005 (*nvmm_impl->fini)();
1006 nvmm_impl = NULL;
1007 }
1008
1009 /* -------------------------------------------------------------------------- */
1010
1011 int
nvmm_ioctl(struct nvmm_owner * owner,unsigned long cmd,void * data)1012 nvmm_ioctl(struct nvmm_owner *owner, unsigned long cmd, void *data)
1013 {
1014 switch (cmd) {
1015 case NVMM_IOC_CAPABILITY:
1016 return nvmm_capability(owner, data);
1017 case NVMM_IOC_MACHINE_CREATE:
1018 return nvmm_machine_create(owner, data);
1019 case NVMM_IOC_MACHINE_DESTROY:
1020 return nvmm_machine_destroy(owner, data);
1021 case NVMM_IOC_MACHINE_CONFIGURE:
1022 return nvmm_machine_configure(owner, data);
1023 case NVMM_IOC_VCPU_CREATE:
1024 return nvmm_vcpu_create(owner, data);
1025 case NVMM_IOC_VCPU_DESTROY:
1026 return nvmm_vcpu_destroy(owner, data);
1027 case NVMM_IOC_VCPU_CONFIGURE:
1028 return nvmm_vcpu_configure(owner, data);
1029 case NVMM_IOC_VCPU_SETSTATE:
1030 return nvmm_vcpu_setstate(owner, data);
1031 case NVMM_IOC_VCPU_GETSTATE:
1032 return nvmm_vcpu_getstate(owner, data);
1033 case NVMM_IOC_VCPU_INJECT:
1034 return nvmm_vcpu_inject(owner, data);
1035 case NVMM_IOC_VCPU_RUN:
1036 return nvmm_vcpu_run(owner, data);
1037 case NVMM_IOC_GPA_MAP:
1038 return nvmm_gpa_map(owner, data);
1039 case NVMM_IOC_GPA_UNMAP:
1040 return nvmm_gpa_unmap(owner, data);
1041 case NVMM_IOC_HVA_MAP:
1042 return nvmm_hva_map(owner, data);
1043 case NVMM_IOC_HVA_UNMAP:
1044 return nvmm_hva_unmap(owner, data);
1045 case NVMM_IOC_CTL:
1046 return nvmm_ctl(owner, data);
1047 default:
1048 return EINVAL;
1049 }
1050 }
1051