xref: /netbsd-src/sys/dev/nvmm/nvmm.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /*	$NetBSD: nvmm.c,v 1.35 2020/08/18 17:04:37 maxv Exp $	*/
2 
3 /*
4  * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.35 2020/08/18 17:04:37 maxv Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44 #include <sys/mman.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/device.h>
48 
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_page.h>
51 
52 #include "ioconf.h"
53 
54 #include <dev/nvmm/nvmm.h>
55 #include <dev/nvmm/nvmm_internal.h>
56 #include <dev/nvmm/nvmm_ioctl.h>
57 
58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
59 static volatile unsigned int nmachines __cacheline_aligned;
60 
61 static const struct nvmm_impl *nvmm_impl_list[] = {
62 #if defined(__x86_64__)
63 	&nvmm_x86_svm,	/* x86 AMD SVM */
64 	&nvmm_x86_vmx	/* x86 Intel VMX */
65 #endif
66 };
67 
68 static const struct nvmm_impl *nvmm_impl = NULL;
69 
70 static struct nvmm_owner root_owner;
71 
72 /* -------------------------------------------------------------------------- */
73 
74 static int
75 nvmm_machine_alloc(struct nvmm_machine **ret)
76 {
77 	struct nvmm_machine *mach;
78 	size_t i;
79 
80 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
81 		mach = &machines[i];
82 
83 		rw_enter(&mach->lock, RW_WRITER);
84 		if (mach->present) {
85 			rw_exit(&mach->lock);
86 			continue;
87 		}
88 
89 		mach->present = true;
90 		mach->time = time_second;
91 		*ret = mach;
92 		atomic_inc_uint(&nmachines);
93 		return 0;
94 	}
95 
96 	return ENOBUFS;
97 }
98 
99 static void
100 nvmm_machine_free(struct nvmm_machine *mach)
101 {
102 	KASSERT(rw_write_held(&mach->lock));
103 	KASSERT(mach->present);
104 	mach->present = false;
105 	atomic_dec_uint(&nmachines);
106 }
107 
108 static int
109 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
110     struct nvmm_machine **ret, bool writer)
111 {
112 	struct nvmm_machine *mach;
113 	krw_t op = writer ? RW_WRITER : RW_READER;
114 
115 	if (machid >= NVMM_MAX_MACHINES) {
116 		return EINVAL;
117 	}
118 	mach = &machines[machid];
119 
120 	rw_enter(&mach->lock, op);
121 	if (!mach->present) {
122 		rw_exit(&mach->lock);
123 		return ENOENT;
124 	}
125 	if (owner != &root_owner && mach->owner != owner) {
126 		rw_exit(&mach->lock);
127 		return EPERM;
128 	}
129 	*ret = mach;
130 
131 	return 0;
132 }
133 
134 static void
135 nvmm_machine_put(struct nvmm_machine *mach)
136 {
137 	rw_exit(&mach->lock);
138 }
139 
140 /* -------------------------------------------------------------------------- */
141 
142 static int
143 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
144     struct nvmm_cpu **ret)
145 {
146 	struct nvmm_cpu *vcpu;
147 
148 	if (cpuid >= NVMM_MAX_VCPUS) {
149 		return EINVAL;
150 	}
151 	vcpu = &mach->cpus[cpuid];
152 
153 	mutex_enter(&vcpu->lock);
154 	if (vcpu->present) {
155 		mutex_exit(&vcpu->lock);
156 		return EBUSY;
157 	}
158 
159 	vcpu->present = true;
160 	vcpu->comm = NULL;
161 	vcpu->hcpu_last = -1;
162 	*ret = vcpu;
163 	return 0;
164 }
165 
166 static void
167 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
168 {
169 	KASSERT(mutex_owned(&vcpu->lock));
170 	vcpu->present = false;
171 	if (vcpu->comm != NULL) {
172 		uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
173 	}
174 }
175 
176 static int
177 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
178     struct nvmm_cpu **ret)
179 {
180 	struct nvmm_cpu *vcpu;
181 
182 	if (cpuid >= NVMM_MAX_VCPUS) {
183 		return EINVAL;
184 	}
185 	vcpu = &mach->cpus[cpuid];
186 
187 	mutex_enter(&vcpu->lock);
188 	if (!vcpu->present) {
189 		mutex_exit(&vcpu->lock);
190 		return ENOENT;
191 	}
192 	*ret = vcpu;
193 
194 	return 0;
195 }
196 
197 static void
198 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
199 {
200 	mutex_exit(&vcpu->lock);
201 }
202 
203 /* -------------------------------------------------------------------------- */
204 
205 static void
206 nvmm_kill_machines(struct nvmm_owner *owner)
207 {
208 	struct nvmm_machine *mach;
209 	struct nvmm_cpu *vcpu;
210 	size_t i, j;
211 	int error;
212 
213 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
214 		mach = &machines[i];
215 
216 		rw_enter(&mach->lock, RW_WRITER);
217 		if (!mach->present || mach->owner != owner) {
218 			rw_exit(&mach->lock);
219 			continue;
220 		}
221 
222 		/* Kill it. */
223 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
224 			error = nvmm_vcpu_get(mach, j, &vcpu);
225 			if (error)
226 				continue;
227 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
228 			nvmm_vcpu_free(mach, vcpu);
229 			nvmm_vcpu_put(vcpu);
230 		}
231 		(*nvmm_impl->machine_destroy)(mach);
232 		uvmspace_free(mach->vm);
233 
234 		/* Drop the kernel UOBJ refs. */
235 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
236 			if (!mach->hmap[j].present)
237 				continue;
238 			uao_detach(mach->hmap[j].uobj);
239 		}
240 
241 		nvmm_machine_free(mach);
242 
243 		rw_exit(&mach->lock);
244 	}
245 }
246 
247 /* -------------------------------------------------------------------------- */
248 
249 static int
250 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
251 {
252 	args->cap.version = NVMM_KERN_VERSION;
253 	args->cap.state_size = nvmm_impl->state_size;
254 	args->cap.max_machines = NVMM_MAX_MACHINES;
255 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
256 	args->cap.max_ram = NVMM_MAX_RAM;
257 
258 	(*nvmm_impl->capability)(&args->cap);
259 
260 	return 0;
261 }
262 
263 static int
264 nvmm_machine_create(struct nvmm_owner *owner,
265     struct nvmm_ioc_machine_create *args)
266 {
267 	struct nvmm_machine *mach;
268 	int error;
269 
270 	error = nvmm_machine_alloc(&mach);
271 	if (error)
272 		return error;
273 
274 	/* Curproc owns the machine. */
275 	mach->owner = owner;
276 
277 	/* Zero out the host mappings. */
278 	memset(&mach->hmap, 0, sizeof(mach->hmap));
279 
280 	/* Create the machine vmspace. */
281 	mach->gpa_begin = 0;
282 	mach->gpa_end = NVMM_MAX_RAM;
283 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
284 
285 	/* Create the comm uobj. */
286 	mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
287 
288 	(*nvmm_impl->machine_create)(mach);
289 
290 	args->machid = mach->machid;
291 	nvmm_machine_put(mach);
292 
293 	return 0;
294 }
295 
296 static int
297 nvmm_machine_destroy(struct nvmm_owner *owner,
298     struct nvmm_ioc_machine_destroy *args)
299 {
300 	struct nvmm_machine *mach;
301 	struct nvmm_cpu *vcpu;
302 	int error;
303 	size_t i;
304 
305 	error = nvmm_machine_get(owner, args->machid, &mach, true);
306 	if (error)
307 		return error;
308 
309 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
310 		error = nvmm_vcpu_get(mach, i, &vcpu);
311 		if (error)
312 			continue;
313 
314 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
315 		nvmm_vcpu_free(mach, vcpu);
316 		nvmm_vcpu_put(vcpu);
317 	}
318 
319 	(*nvmm_impl->machine_destroy)(mach);
320 
321 	/* Free the machine vmspace. */
322 	uvmspace_free(mach->vm);
323 
324 	/* Drop the kernel UOBJ refs. */
325 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
326 		if (!mach->hmap[i].present)
327 			continue;
328 		uao_detach(mach->hmap[i].uobj);
329 	}
330 
331 	nvmm_machine_free(mach);
332 	nvmm_machine_put(mach);
333 
334 	return 0;
335 }
336 
337 static int
338 nvmm_machine_configure(struct nvmm_owner *owner,
339     struct nvmm_ioc_machine_configure *args)
340 {
341 	struct nvmm_machine *mach;
342 	size_t allocsz;
343 	uint64_t op;
344 	void *data;
345 	int error;
346 
347 	op = NVMM_MACH_CONF_MD(args->op);
348 	if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
349 		return EINVAL;
350 	}
351 
352 	allocsz = nvmm_impl->mach_conf_sizes[op];
353 	data = kmem_alloc(allocsz, KM_SLEEP);
354 
355 	error = nvmm_machine_get(owner, args->machid, &mach, true);
356 	if (error) {
357 		kmem_free(data, allocsz);
358 		return error;
359 	}
360 
361 	error = copyin(args->conf, data, allocsz);
362 	if (error) {
363 		goto out;
364 	}
365 
366 	error = (*nvmm_impl->machine_configure)(mach, op, data);
367 
368 out:
369 	nvmm_machine_put(mach);
370 	kmem_free(data, allocsz);
371 	return error;
372 }
373 
374 static int
375 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
376 {
377 	struct nvmm_machine *mach;
378 	struct nvmm_cpu *vcpu;
379 	int error;
380 
381 	error = nvmm_machine_get(owner, args->machid, &mach, false);
382 	if (error)
383 		return error;
384 
385 	error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
386 	if (error)
387 		goto out;
388 
389 	/* Allocate the comm page. */
390 	uao_reference(mach->commuobj);
391 	error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
392 	    mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
393 	    UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
394 	if (error) {
395 		uao_detach(mach->commuobj);
396 		nvmm_vcpu_free(mach, vcpu);
397 		nvmm_vcpu_put(vcpu);
398 		goto out;
399 	}
400 	error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
401 	    (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
402 	if (error) {
403 		nvmm_vcpu_free(mach, vcpu);
404 		nvmm_vcpu_put(vcpu);
405 		goto out;
406 	}
407 	memset(vcpu->comm, 0, PAGE_SIZE);
408 
409 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
410 	if (error) {
411 		nvmm_vcpu_free(mach, vcpu);
412 		nvmm_vcpu_put(vcpu);
413 		goto out;
414 	}
415 
416 	nvmm_vcpu_put(vcpu);
417 
418 	atomic_inc_uint(&mach->ncpus);
419 
420 out:
421 	nvmm_machine_put(mach);
422 	return error;
423 }
424 
425 static int
426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
427 {
428 	struct nvmm_machine *mach;
429 	struct nvmm_cpu *vcpu;
430 	int error;
431 
432 	error = nvmm_machine_get(owner, args->machid, &mach, false);
433 	if (error)
434 		return error;
435 
436 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
437 	if (error)
438 		goto out;
439 
440 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
441 	nvmm_vcpu_free(mach, vcpu);
442 	nvmm_vcpu_put(vcpu);
443 
444 	atomic_dec_uint(&mach->ncpus);
445 
446 out:
447 	nvmm_machine_put(mach);
448 	return error;
449 }
450 
451 static int
452 nvmm_vcpu_configure(struct nvmm_owner *owner,
453     struct nvmm_ioc_vcpu_configure *args)
454 {
455 	struct nvmm_machine *mach;
456 	struct nvmm_cpu *vcpu;
457 	size_t allocsz;
458 	uint64_t op;
459 	void *data;
460 	int error;
461 
462 	op = NVMM_VCPU_CONF_MD(args->op);
463 	if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
464 		return EINVAL;
465 
466 	allocsz = nvmm_impl->vcpu_conf_sizes[op];
467 	data = kmem_alloc(allocsz, KM_SLEEP);
468 
469 	error = nvmm_machine_get(owner, args->machid, &mach, false);
470 	if (error) {
471 		kmem_free(data, allocsz);
472 		return error;
473 	}
474 
475 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
476 	if (error) {
477 		nvmm_machine_put(mach);
478 		kmem_free(data, allocsz);
479 		return error;
480 	}
481 
482 	error = copyin(args->conf, data, allocsz);
483 	if (error) {
484 		goto out;
485 	}
486 
487 	error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
488 
489 out:
490 	nvmm_vcpu_put(vcpu);
491 	nvmm_machine_put(mach);
492 	kmem_free(data, allocsz);
493 	return error;
494 }
495 
496 static int
497 nvmm_vcpu_setstate(struct nvmm_owner *owner,
498     struct nvmm_ioc_vcpu_setstate *args)
499 {
500 	struct nvmm_machine *mach;
501 	struct nvmm_cpu *vcpu;
502 	int error;
503 
504 	error = nvmm_machine_get(owner, args->machid, &mach, false);
505 	if (error)
506 		return error;
507 
508 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
509 	if (error)
510 		goto out;
511 
512 	(*nvmm_impl->vcpu_setstate)(vcpu);
513 	nvmm_vcpu_put(vcpu);
514 
515 out:
516 	nvmm_machine_put(mach);
517 	return error;
518 }
519 
520 static int
521 nvmm_vcpu_getstate(struct nvmm_owner *owner,
522     struct nvmm_ioc_vcpu_getstate *args)
523 {
524 	struct nvmm_machine *mach;
525 	struct nvmm_cpu *vcpu;
526 	int error;
527 
528 	error = nvmm_machine_get(owner, args->machid, &mach, false);
529 	if (error)
530 		return error;
531 
532 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
533 	if (error)
534 		goto out;
535 
536 	(*nvmm_impl->vcpu_getstate)(vcpu);
537 	nvmm_vcpu_put(vcpu);
538 
539 out:
540 	nvmm_machine_put(mach);
541 	return error;
542 }
543 
544 static int
545 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
546 {
547 	struct nvmm_machine *mach;
548 	struct nvmm_cpu *vcpu;
549 	int error;
550 
551 	error = nvmm_machine_get(owner, args->machid, &mach, false);
552 	if (error)
553 		return error;
554 
555 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
556 	if (error)
557 		goto out;
558 
559 	error = (*nvmm_impl->vcpu_inject)(vcpu);
560 	nvmm_vcpu_put(vcpu);
561 
562 out:
563 	nvmm_machine_put(mach);
564 	return error;
565 }
566 
567 static int
568 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
569     struct nvmm_vcpu_exit *exit)
570 {
571 	struct vmspace *vm = mach->vm;
572 	int ret;
573 
574 	while (1) {
575 		/* Got a signal? Or pending resched? Leave. */
576 		if (__predict_false(nvmm_return_needed())) {
577 			exit->reason = NVMM_VCPU_EXIT_NONE;
578 			return 0;
579 		}
580 
581 		/* Run the VCPU. */
582 		ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
583 		if (__predict_false(ret != 0)) {
584 			return ret;
585 		}
586 
587 		/* Process nested page faults. */
588 		if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
589 			break;
590 		}
591 		if (exit->u.mem.gpa >= mach->gpa_end) {
592 			break;
593 		}
594 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
595 			break;
596 		}
597 	}
598 
599 	return 0;
600 }
601 
602 static int
603 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
604 {
605 	struct nvmm_machine *mach;
606 	struct nvmm_cpu *vcpu;
607 	int error;
608 
609 	error = nvmm_machine_get(owner, args->machid, &mach, false);
610 	if (error)
611 		return error;
612 
613 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
614 	if (error)
615 		goto out;
616 
617 	error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
618 	nvmm_vcpu_put(vcpu);
619 
620 out:
621 	nvmm_machine_put(mach);
622 	return error;
623 }
624 
625 /* -------------------------------------------------------------------------- */
626 
627 static struct uvm_object *
628 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
629    size_t *off)
630 {
631 	struct nvmm_hmapping *hmapping;
632 	size_t i;
633 
634 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
635 		hmapping = &mach->hmap[i];
636 		if (!hmapping->present) {
637 			continue;
638 		}
639 		if (hva >= hmapping->hva &&
640 		    hva + size <= hmapping->hva + hmapping->size) {
641 			*off = hva - hmapping->hva;
642 			return hmapping->uobj;
643 		}
644 	}
645 
646 	return NULL;
647 }
648 
649 static int
650 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
651 {
652 	struct nvmm_hmapping *hmapping;
653 	size_t i;
654 
655 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
656 		return EINVAL;
657 	}
658 	if (hva == 0) {
659 		return EINVAL;
660 	}
661 
662 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
663 		hmapping = &mach->hmap[i];
664 		if (!hmapping->present) {
665 			continue;
666 		}
667 
668 		if (hva >= hmapping->hva &&
669 		    hva + size <= hmapping->hva + hmapping->size) {
670 			break;
671 		}
672 
673 		if (hva >= hmapping->hva &&
674 		    hva < hmapping->hva + hmapping->size) {
675 			return EEXIST;
676 		}
677 		if (hva + size > hmapping->hva &&
678 		    hva + size <= hmapping->hva + hmapping->size) {
679 			return EEXIST;
680 		}
681 		if (hva <= hmapping->hva &&
682 		    hva + size >= hmapping->hva + hmapping->size) {
683 			return EEXIST;
684 		}
685 	}
686 
687 	return 0;
688 }
689 
690 static struct nvmm_hmapping *
691 nvmm_hmapping_alloc(struct nvmm_machine *mach)
692 {
693 	struct nvmm_hmapping *hmapping;
694 	size_t i;
695 
696 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
697 		hmapping = &mach->hmap[i];
698 		if (!hmapping->present) {
699 			hmapping->present = true;
700 			return hmapping;
701 		}
702 	}
703 
704 	return NULL;
705 }
706 
707 static int
708 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
709 {
710 	struct vmspace *vmspace = curproc->p_vmspace;
711 	struct nvmm_hmapping *hmapping;
712 	size_t i;
713 
714 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
715 		hmapping = &mach->hmap[i];
716 		if (!hmapping->present || hmapping->hva != hva ||
717 		    hmapping->size != size) {
718 			continue;
719 		}
720 
721 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
722 		    hmapping->hva + hmapping->size);
723 		uao_detach(hmapping->uobj);
724 
725 		hmapping->uobj = NULL;
726 		hmapping->present = false;
727 
728 		return 0;
729 	}
730 
731 	return ENOENT;
732 }
733 
734 static int
735 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
736 {
737 	struct vmspace *vmspace = curproc->p_vmspace;
738 	struct nvmm_machine *mach;
739 	struct nvmm_hmapping *hmapping;
740 	vaddr_t uva;
741 	int error;
742 
743 	error = nvmm_machine_get(owner, args->machid, &mach, true);
744 	if (error)
745 		return error;
746 
747 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
748 	if (error)
749 		goto out;
750 
751 	hmapping = nvmm_hmapping_alloc(mach);
752 	if (hmapping == NULL) {
753 		error = ENOBUFS;
754 		goto out;
755 	}
756 
757 	hmapping->hva = args->hva;
758 	hmapping->size = args->size;
759 	hmapping->uobj = uao_create(hmapping->size, 0);
760 	uva = hmapping->hva;
761 
762 	/* Take a reference for the user. */
763 	uao_reference(hmapping->uobj);
764 
765 	/* Map the uobj into the user address space, as pageable. */
766 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
767 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
768 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
769 	if (error) {
770 		uao_detach(hmapping->uobj);
771 	}
772 
773 out:
774 	nvmm_machine_put(mach);
775 	return error;
776 }
777 
778 static int
779 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
780 {
781 	struct nvmm_machine *mach;
782 	int error;
783 
784 	error = nvmm_machine_get(owner, args->machid, &mach, true);
785 	if (error)
786 		return error;
787 
788 	error = nvmm_hmapping_free(mach, args->hva, args->size);
789 
790 	nvmm_machine_put(mach);
791 	return error;
792 }
793 
794 /* -------------------------------------------------------------------------- */
795 
796 static int
797 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
798 {
799 	struct nvmm_machine *mach;
800 	struct uvm_object *uobj;
801 	gpaddr_t gpa;
802 	size_t off;
803 	int error;
804 
805 	error = nvmm_machine_get(owner, args->machid, &mach, false);
806 	if (error)
807 		return error;
808 
809 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
810 		error = EINVAL;
811 		goto out;
812 	}
813 
814 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
815 	    (args->hva % PAGE_SIZE) != 0) {
816 		error = EINVAL;
817 		goto out;
818 	}
819 	if (args->hva == 0) {
820 		error = EINVAL;
821 		goto out;
822 	}
823 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
824 		error = EINVAL;
825 		goto out;
826 	}
827 	if (args->gpa + args->size <= args->gpa) {
828 		error = EINVAL;
829 		goto out;
830 	}
831 	if (args->gpa + args->size > mach->gpa_end) {
832 		error = EINVAL;
833 		goto out;
834 	}
835 	gpa = args->gpa;
836 
837 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
838 	if (uobj == NULL) {
839 		error = EINVAL;
840 		goto out;
841 	}
842 
843 	/* Take a reference for the machine. */
844 	uao_reference(uobj);
845 
846 	/* Map the uobj into the machine address space, as pageable. */
847 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
848 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
849 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
850 	if (error) {
851 		uao_detach(uobj);
852 		goto out;
853 	}
854 	if (gpa != args->gpa) {
855 		uao_detach(uobj);
856 		printf("[!] uvm_map problem\n");
857 		error = EINVAL;
858 		goto out;
859 	}
860 
861 out:
862 	nvmm_machine_put(mach);
863 	return error;
864 }
865 
866 static int
867 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
868 {
869 	struct nvmm_machine *mach;
870 	gpaddr_t gpa;
871 	int error;
872 
873 	error = nvmm_machine_get(owner, args->machid, &mach, false);
874 	if (error)
875 		return error;
876 
877 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
878 		error = EINVAL;
879 		goto out;
880 	}
881 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
882 		error = EINVAL;
883 		goto out;
884 	}
885 	if (args->gpa + args->size <= args->gpa) {
886 		error = EINVAL;
887 		goto out;
888 	}
889 	if (args->gpa + args->size >= mach->gpa_end) {
890 		error = EINVAL;
891 		goto out;
892 	}
893 	gpa = args->gpa;
894 
895 	/* Unmap the memory from the machine. */
896 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
897 
898 out:
899 	nvmm_machine_put(mach);
900 	return error;
901 }
902 
903 /* -------------------------------------------------------------------------- */
904 
905 static int
906 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
907 {
908 	struct nvmm_ctl_mach_info ctl;
909 	struct nvmm_machine *mach;
910 	struct nvmm_cpu *vcpu;
911 	int error;
912 	size_t i;
913 
914 	if (args->size != sizeof(ctl))
915 		return EINVAL;
916 	error = copyin(args->data, &ctl, sizeof(ctl));
917 	if (error)
918 		return error;
919 
920 	error = nvmm_machine_get(owner, ctl.machid, &mach, true);
921 	if (error)
922 		return error;
923 
924 	ctl.nvcpus = 0;
925 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
926 		error = nvmm_vcpu_get(mach, i, &vcpu);
927 		if (error)
928 			continue;
929 		ctl.nvcpus++;
930 		nvmm_vcpu_put(vcpu);
931 	}
932 
933 	ctl.nram = 0;
934 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
935 		if (!mach->hmap[i].present)
936 			continue;
937 		ctl.nram += mach->hmap[i].size;
938 	}
939 
940 	ctl.pid = mach->owner->pid;
941 	ctl.time = mach->time;
942 
943 	nvmm_machine_put(mach);
944 
945 	error = copyout(&ctl, args->data, sizeof(ctl));
946 	if (error)
947 		return error;
948 
949 	return 0;
950 }
951 
952 static int
953 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
954 {
955 	switch (args->op) {
956 	case NVMM_CTL_MACH_INFO:
957 		return nvmm_ctl_mach_info(owner, args);
958 	default:
959 		return EINVAL;
960 	}
961 }
962 
963 /* -------------------------------------------------------------------------- */
964 
965 static const struct nvmm_impl *
966 nvmm_ident(void)
967 {
968 	size_t i;
969 
970 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
971 		if ((*nvmm_impl_list[i]->ident)())
972 			return nvmm_impl_list[i];
973 	}
974 
975 	return NULL;
976 }
977 
978 static int
979 nvmm_init(void)
980 {
981 	size_t i, n;
982 
983 	nvmm_impl = nvmm_ident();
984 	if (nvmm_impl == NULL)
985 		return ENOTSUP;
986 
987 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
988 		machines[i].machid = i;
989 		rw_init(&machines[i].lock);
990 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
991 			machines[i].cpus[n].present = false;
992 			machines[i].cpus[n].cpuid = n;
993 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
994 			    IPL_NONE);
995 		}
996 	}
997 
998 	(*nvmm_impl->init)();
999 
1000 	return 0;
1001 }
1002 
1003 static void
1004 nvmm_fini(void)
1005 {
1006 	size_t i, n;
1007 
1008 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1009 		rw_destroy(&machines[i].lock);
1010 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1011 			mutex_destroy(&machines[i].cpus[n].lock);
1012 		}
1013 	}
1014 
1015 	(*nvmm_impl->fini)();
1016 	nvmm_impl = NULL;
1017 }
1018 
1019 /* -------------------------------------------------------------------------- */
1020 
1021 static dev_type_open(nvmm_open);
1022 
1023 const struct cdevsw nvmm_cdevsw = {
1024 	.d_open = nvmm_open,
1025 	.d_close = noclose,
1026 	.d_read = noread,
1027 	.d_write = nowrite,
1028 	.d_ioctl = noioctl,
1029 	.d_stop = nostop,
1030 	.d_tty = notty,
1031 	.d_poll = nopoll,
1032 	.d_mmap = nommap,
1033 	.d_kqfilter = nokqfilter,
1034 	.d_discard = nodiscard,
1035 	.d_flag = D_OTHER | D_MPSAFE
1036 };
1037 
1038 static int nvmm_ioctl(file_t *, u_long, void *);
1039 static int nvmm_close(file_t *);
1040 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1041     struct uvm_object **, int *);
1042 
1043 static const struct fileops nvmm_fileops = {
1044 	.fo_read = fbadop_read,
1045 	.fo_write = fbadop_write,
1046 	.fo_ioctl = nvmm_ioctl,
1047 	.fo_fcntl = fnullop_fcntl,
1048 	.fo_poll = fnullop_poll,
1049 	.fo_stat = fbadop_stat,
1050 	.fo_close = nvmm_close,
1051 	.fo_kqfilter = fnullop_kqfilter,
1052 	.fo_restart = fnullop_restart,
1053 	.fo_mmap = nvmm_mmap,
1054 };
1055 
1056 static int
1057 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1058 {
1059 	struct nvmm_owner *owner;
1060 	struct file *fp;
1061 	int error, fd;
1062 
1063 	if (__predict_false(nvmm_impl == NULL))
1064 		return ENXIO;
1065 	if (minor(dev) != 0)
1066 		return EXDEV;
1067 	if (!(flags & O_CLOEXEC))
1068 		return EINVAL;
1069 	error = fd_allocfile(&fp, &fd);
1070 	if (error)
1071 		return error;
1072 
1073 	if (OFLAGS(flags) & O_WRONLY) {
1074 		owner = &root_owner;
1075 	} else {
1076 		owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1077 		owner->pid = l->l_proc->p_pid;
1078 	}
1079 
1080 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1081 }
1082 
1083 static int
1084 nvmm_close(file_t *fp)
1085 {
1086 	struct nvmm_owner *owner = fp->f_data;
1087 
1088 	KASSERT(owner != NULL);
1089 	nvmm_kill_machines(owner);
1090 	if (owner != &root_owner) {
1091 		kmem_free(owner, sizeof(*owner));
1092 	}
1093 	fp->f_data = NULL;
1094 
1095    	return 0;
1096 }
1097 
1098 static int
1099 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1100     int *advicep, struct uvm_object **uobjp, int *maxprotp)
1101 {
1102 	struct nvmm_owner *owner = fp->f_data;
1103 	struct nvmm_machine *mach;
1104 	nvmm_machid_t machid;
1105 	nvmm_cpuid_t cpuid;
1106 	int error;
1107 
1108 	if (prot & PROT_EXEC)
1109 		return EACCES;
1110 	if (size != PAGE_SIZE)
1111 		return EINVAL;
1112 
1113 	cpuid = NVMM_COMM_CPUID(*offp);
1114 	if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1115 		return EINVAL;
1116 
1117 	machid = NVMM_COMM_MACHID(*offp);
1118 	error = nvmm_machine_get(owner, machid, &mach, false);
1119 	if (error)
1120 		return error;
1121 
1122 	uao_reference(mach->commuobj);
1123 	*uobjp = mach->commuobj;
1124 	*offp = cpuid * PAGE_SIZE;
1125 	*maxprotp = prot;
1126 	*advicep = UVM_ADV_RANDOM;
1127 
1128 	nvmm_machine_put(mach);
1129 	return 0;
1130 }
1131 
1132 static int
1133 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1134 {
1135 	struct nvmm_owner *owner = fp->f_data;
1136 
1137 	KASSERT(owner != NULL);
1138 
1139 	switch (cmd) {
1140 	case NVMM_IOC_CAPABILITY:
1141 		return nvmm_capability(owner, data);
1142 	case NVMM_IOC_MACHINE_CREATE:
1143 		return nvmm_machine_create(owner, data);
1144 	case NVMM_IOC_MACHINE_DESTROY:
1145 		return nvmm_machine_destroy(owner, data);
1146 	case NVMM_IOC_MACHINE_CONFIGURE:
1147 		return nvmm_machine_configure(owner, data);
1148 	case NVMM_IOC_VCPU_CREATE:
1149 		return nvmm_vcpu_create(owner, data);
1150 	case NVMM_IOC_VCPU_DESTROY:
1151 		return nvmm_vcpu_destroy(owner, data);
1152 	case NVMM_IOC_VCPU_CONFIGURE:
1153 		return nvmm_vcpu_configure(owner, data);
1154 	case NVMM_IOC_VCPU_SETSTATE:
1155 		return nvmm_vcpu_setstate(owner, data);
1156 	case NVMM_IOC_VCPU_GETSTATE:
1157 		return nvmm_vcpu_getstate(owner, data);
1158 	case NVMM_IOC_VCPU_INJECT:
1159 		return nvmm_vcpu_inject(owner, data);
1160 	case NVMM_IOC_VCPU_RUN:
1161 		return nvmm_vcpu_run(owner, data);
1162 	case NVMM_IOC_GPA_MAP:
1163 		return nvmm_gpa_map(owner, data);
1164 	case NVMM_IOC_GPA_UNMAP:
1165 		return nvmm_gpa_unmap(owner, data);
1166 	case NVMM_IOC_HVA_MAP:
1167 		return nvmm_hva_map(owner, data);
1168 	case NVMM_IOC_HVA_UNMAP:
1169 		return nvmm_hva_unmap(owner, data);
1170 	case NVMM_IOC_CTL:
1171 		return nvmm_ctl(owner, data);
1172 	default:
1173 		return EINVAL;
1174 	}
1175 }
1176 
1177 /* -------------------------------------------------------------------------- */
1178 
1179 static int nvmm_match(device_t, cfdata_t, void *);
1180 static void nvmm_attach(device_t, device_t, void *);
1181 static int nvmm_detach(device_t, int);
1182 
1183 extern struct cfdriver nvmm_cd;
1184 
1185 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1186 
1187 static struct cfdata nvmm_cfdata[] = {
1188 	{
1189 		.cf_name = "nvmm",
1190 		.cf_atname = "nvmm",
1191 		.cf_unit = 0,
1192 		.cf_fstate = FSTATE_STAR,
1193 		.cf_loc = NULL,
1194 		.cf_flags = 0,
1195 		.cf_pspec = NULL,
1196 	},
1197 	{ NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1198 };
1199 
1200 static int
1201 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1202 {
1203 	return 1;
1204 }
1205 
1206 static void
1207 nvmm_attach(device_t parent, device_t self, void *aux)
1208 {
1209 	int error;
1210 
1211 	error = nvmm_init();
1212 	if (error)
1213 		panic("%s: impossible", __func__);
1214 	aprint_normal_dev(self, "attached, using backend %s\n",
1215 	    nvmm_impl->name);
1216 }
1217 
1218 static int
1219 nvmm_detach(device_t self, int flags)
1220 {
1221 	if (atomic_load_relaxed(&nmachines) > 0)
1222 		return EBUSY;
1223 	nvmm_fini();
1224 	return 0;
1225 }
1226 
1227 void
1228 nvmmattach(int nunits)
1229 {
1230 	/* nothing */
1231 }
1232 
1233 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1234 
1235 #if defined(_MODULE)
1236 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1237 #endif
1238 
1239 static int
1240 nvmm_modcmd(modcmd_t cmd, void *arg)
1241 {
1242 #if defined(_MODULE)
1243 	devmajor_t bmajor = NODEVMAJOR;
1244 	devmajor_t cmajor = 345;
1245 #endif
1246 	int error;
1247 
1248 	switch (cmd) {
1249 	case MODULE_CMD_INIT:
1250 		if (nvmm_ident() == NULL) {
1251 			aprint_error("%s: cpu not supported\n",
1252 			    nvmm_cd.cd_name);
1253 			return ENOTSUP;
1254 		}
1255 #if defined(_MODULE)
1256 		error = config_cfdriver_attach(&nvmm_cd);
1257 		if (error)
1258 			return error;
1259 #endif
1260 		error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1261 		if (error) {
1262 			config_cfdriver_detach(&nvmm_cd);
1263 			aprint_error("%s: config_cfattach_attach failed\n",
1264 			    nvmm_cd.cd_name);
1265 			return error;
1266 		}
1267 
1268 		error = config_cfdata_attach(nvmm_cfdata, 1);
1269 		if (error) {
1270 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1271 			config_cfdriver_detach(&nvmm_cd);
1272 			aprint_error("%s: unable to register cfdata\n",
1273 			    nvmm_cd.cd_name);
1274 			return error;
1275 		}
1276 
1277 		if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1278 			aprint_error("%s: config_attach_pseudo failed\n",
1279 			    nvmm_cd.cd_name);
1280 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1281 			config_cfdriver_detach(&nvmm_cd);
1282 			return ENXIO;
1283 		}
1284 
1285 #if defined(_MODULE)
1286 		/* mknod /dev/nvmm c 345 0 */
1287 		error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1288 			&nvmm_cdevsw, &cmajor);
1289 		if (error) {
1290 			aprint_error("%s: unable to register devsw\n",
1291 			    nvmm_cd.cd_name);
1292 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1293 			config_cfdriver_detach(&nvmm_cd);
1294 			return error;
1295 		}
1296 #endif
1297 		return 0;
1298 	case MODULE_CMD_FINI:
1299 		error = config_cfdata_detach(nvmm_cfdata);
1300 		if (error)
1301 			return error;
1302 		error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1303 		if (error)
1304 			return error;
1305 #if defined(_MODULE)
1306 		config_cfdriver_detach(&nvmm_cd);
1307 		devsw_detach(NULL, &nvmm_cdevsw);
1308 #endif
1309 		return 0;
1310 	case MODULE_CMD_AUTOUNLOAD:
1311 		return EBUSY;
1312 	default:
1313 		return ENOTTY;
1314 	}
1315 }
1316