xref: /netbsd-src/sys/dev/nvmm/nvmm.c (revision 9d35bdcab13b91c96c9e135ba98d51037d2d6c7d)
1 /*	$NetBSD: nvmm.c,v 1.47 2022/09/13 20:10:04 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
5  * All rights reserved.
6  *
7  * This code is part of the NVMM hypervisor.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.47 2022/09/13 20:10:04 riastradh Exp $");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 
38 #include <sys/atomic.h>
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44 #include <sys/mman.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/device.h>
48 
49 #include <uvm/uvm_aobj.h>
50 #include <uvm/uvm_extern.h>
51 #include <uvm/uvm_page.h>
52 
53 #include "ioconf.h"
54 
55 #include <dev/nvmm/nvmm.h>
56 #include <dev/nvmm/nvmm_internal.h>
57 #include <dev/nvmm/nvmm_ioctl.h>
58 
59 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
60 static volatile unsigned int nmachines __cacheline_aligned;
61 
62 static struct {
63 	kmutex_t	lock;
64 	kcondvar_t	suspendcv;
65 	kcondvar_t	resumecv;
66 	unsigned	users;
67 } suspension;
68 
69 volatile bool nvmm_suspending;
70 
71 static const struct nvmm_impl *nvmm_impl_list[] = {
72 #if defined(__x86_64__)
73 	&nvmm_x86_svm,	/* x86 AMD SVM */
74 	&nvmm_x86_vmx	/* x86 Intel VMX */
75 #endif
76 };
77 
78 static const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
79 
80 static struct nvmm_owner root_owner;
81 
82 /* -------------------------------------------------------------------------- */
83 
84 static int
nvmm_enter_sig(void)85 nvmm_enter_sig(void)
86 {
87 	int error;
88 
89 	mutex_enter(&suspension.lock);
90 	while (nvmm_suspending) {
91 		error = cv_wait_sig(&suspension.resumecv, &suspension.lock);
92 		if (error)
93 			goto out;
94 	}
95 	KASSERT(suspension.users < UINT_MAX);
96 	suspension.users++;
97 	error = 0;
98 out:	mutex_exit(&suspension.lock);
99 
100 	return 0;
101 }
102 
103 static void
nvmm_enter(void)104 nvmm_enter(void)
105 {
106 
107 	mutex_enter(&suspension.lock);
108 	while (nvmm_suspending)
109 		cv_wait(&suspension.resumecv, &suspension.lock);
110 	KASSERT(suspension.users < UINT_MAX);
111 	suspension.users++;
112 	mutex_exit(&suspension.lock);
113 }
114 
115 static void
nvmm_exit(void)116 nvmm_exit(void)
117 {
118 
119 	mutex_enter(&suspension.lock);
120 	KASSERT(suspension.users > 0);
121 	if (--suspension.users == 0)
122 		cv_signal(&suspension.suspendcv);
123 	mutex_exit(&suspension.lock);
124 }
125 
126 /* -------------------------------------------------------------------------- */
127 
128 static int
nvmm_machine_alloc(struct nvmm_machine ** ret)129 nvmm_machine_alloc(struct nvmm_machine **ret)
130 {
131 	struct nvmm_machine *mach;
132 	size_t i;
133 
134 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
135 		mach = &machines[i];
136 
137 		rw_enter(&mach->lock, RW_WRITER);
138 		if (mach->present) {
139 			rw_exit(&mach->lock);
140 			continue;
141 		}
142 
143 		mach->present = true;
144 		mach->time = time_second;
145 		*ret = mach;
146 		atomic_inc_uint(&nmachines);
147 		return 0;
148 	}
149 
150 	return ENOBUFS;
151 }
152 
153 static void
nvmm_machine_free(struct nvmm_machine * mach)154 nvmm_machine_free(struct nvmm_machine *mach)
155 {
156 	KASSERT(rw_write_held(&mach->lock));
157 	KASSERT(mach->present);
158 	mach->present = false;
159 	atomic_dec_uint(&nmachines);
160 }
161 
162 static int
nvmm_machine_get(struct nvmm_owner * owner,nvmm_machid_t machid,struct nvmm_machine ** ret,bool writer)163 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
164     struct nvmm_machine **ret, bool writer)
165 {
166 	struct nvmm_machine *mach;
167 	krw_t op = writer ? RW_WRITER : RW_READER;
168 
169 	if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
170 		return EINVAL;
171 	}
172 	mach = &machines[machid];
173 
174 	rw_enter(&mach->lock, op);
175 	if (__predict_false(!mach->present)) {
176 		rw_exit(&mach->lock);
177 		return ENOENT;
178 	}
179 	if (__predict_false(mach->owner != owner && owner != &root_owner)) {
180 		rw_exit(&mach->lock);
181 		return EPERM;
182 	}
183 	*ret = mach;
184 
185 	return 0;
186 }
187 
188 static void
nvmm_machine_put(struct nvmm_machine * mach)189 nvmm_machine_put(struct nvmm_machine *mach)
190 {
191 	rw_exit(&mach->lock);
192 }
193 
194 /* -------------------------------------------------------------------------- */
195 
196 static int
nvmm_vcpu_alloc(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_cpu ** ret)197 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
198     struct nvmm_cpu **ret)
199 {
200 	struct nvmm_cpu *vcpu;
201 
202 	if (cpuid >= NVMM_MAX_VCPUS) {
203 		return EINVAL;
204 	}
205 	vcpu = &mach->cpus[cpuid];
206 
207 	mutex_enter(&vcpu->lock);
208 	if (vcpu->present) {
209 		mutex_exit(&vcpu->lock);
210 		return EBUSY;
211 	}
212 
213 	vcpu->present = true;
214 	vcpu->comm = NULL;
215 	vcpu->hcpu_last = -1;
216 	*ret = vcpu;
217 	return 0;
218 }
219 
220 static void
nvmm_vcpu_free(struct nvmm_machine * mach,struct nvmm_cpu * vcpu)221 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
222 {
223 	KASSERT(mutex_owned(&vcpu->lock));
224 	vcpu->present = false;
225 	if (vcpu->comm != NULL) {
226 		uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
227 	}
228 }
229 
230 static int
nvmm_vcpu_get(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_cpu ** ret)231 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
232     struct nvmm_cpu **ret)
233 {
234 	struct nvmm_cpu *vcpu;
235 
236 	if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
237 		return EINVAL;
238 	}
239 	vcpu = &mach->cpus[cpuid];
240 
241 	mutex_enter(&vcpu->lock);
242 	if (__predict_false(!vcpu->present)) {
243 		mutex_exit(&vcpu->lock);
244 		return ENOENT;
245 	}
246 	*ret = vcpu;
247 
248 	return 0;
249 }
250 
251 static void
nvmm_vcpu_put(struct nvmm_cpu * vcpu)252 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
253 {
254 	mutex_exit(&vcpu->lock);
255 }
256 
257 /* -------------------------------------------------------------------------- */
258 
259 static void
nvmm_kill_machines(struct nvmm_owner * owner)260 nvmm_kill_machines(struct nvmm_owner *owner)
261 {
262 	struct nvmm_machine *mach;
263 	struct nvmm_cpu *vcpu;
264 	size_t i, j;
265 	int error;
266 
267 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
268 		mach = &machines[i];
269 
270 		rw_enter(&mach->lock, RW_WRITER);
271 		if (!mach->present || mach->owner != owner) {
272 			rw_exit(&mach->lock);
273 			continue;
274 		}
275 
276 		/* Kill it. */
277 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
278 			error = nvmm_vcpu_get(mach, j, &vcpu);
279 			if (error)
280 				continue;
281 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
282 			nvmm_vcpu_free(mach, vcpu);
283 			nvmm_vcpu_put(vcpu);
284 			atomic_dec_uint(&mach->ncpus);
285 		}
286 		(*nvmm_impl->machine_destroy)(mach);
287 		uvmspace_free(mach->vm);
288 
289 		/* Drop the kernel UOBJ refs. */
290 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
291 			if (!mach->hmap[j].present)
292 				continue;
293 			uao_detach(mach->hmap[j].uobj);
294 		}
295 
296 		nvmm_machine_free(mach);
297 
298 		rw_exit(&mach->lock);
299 	}
300 }
301 
302 /* -------------------------------------------------------------------------- */
303 
304 static int
nvmm_capability(struct nvmm_owner * owner,struct nvmm_ioc_capability * args)305 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
306 {
307 	args->cap.version = NVMM_KERN_VERSION;
308 	args->cap.state_size = nvmm_impl->state_size;
309 	args->cap.max_machines = NVMM_MAX_MACHINES;
310 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
311 	args->cap.max_ram = NVMM_MAX_RAM;
312 
313 	(*nvmm_impl->capability)(&args->cap);
314 
315 	return 0;
316 }
317 
318 static int
nvmm_machine_create(struct nvmm_owner * owner,struct nvmm_ioc_machine_create * args)319 nvmm_machine_create(struct nvmm_owner *owner,
320     struct nvmm_ioc_machine_create *args)
321 {
322 	struct nvmm_machine *mach;
323 	int error;
324 
325 	error = nvmm_machine_alloc(&mach);
326 	if (error)
327 		return error;
328 
329 	/* Curproc owns the machine. */
330 	mach->owner = owner;
331 
332 	/* Zero out the host mappings. */
333 	memset(&mach->hmap, 0, sizeof(mach->hmap));
334 
335 	/* Create the machine vmspace. */
336 	mach->gpa_begin = 0;
337 	mach->gpa_end = NVMM_MAX_RAM;
338 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
339 
340 	/* Create the comm uobj. */
341 	mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
342 
343 	(*nvmm_impl->machine_create)(mach);
344 
345 	args->machid = mach->machid;
346 	nvmm_machine_put(mach);
347 
348 	return 0;
349 }
350 
351 static int
nvmm_machine_destroy(struct nvmm_owner * owner,struct nvmm_ioc_machine_destroy * args)352 nvmm_machine_destroy(struct nvmm_owner *owner,
353     struct nvmm_ioc_machine_destroy *args)
354 {
355 	struct nvmm_machine *mach;
356 	struct nvmm_cpu *vcpu;
357 	int error;
358 	size_t i;
359 
360 	error = nvmm_machine_get(owner, args->machid, &mach, true);
361 	if (error)
362 		return error;
363 
364 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
365 		error = nvmm_vcpu_get(mach, i, &vcpu);
366 		if (error)
367 			continue;
368 
369 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
370 		nvmm_vcpu_free(mach, vcpu);
371 		nvmm_vcpu_put(vcpu);
372 		atomic_dec_uint(&mach->ncpus);
373 	}
374 
375 	(*nvmm_impl->machine_destroy)(mach);
376 
377 	/* Free the machine vmspace. */
378 	uvmspace_free(mach->vm);
379 
380 	/* Drop the kernel UOBJ refs. */
381 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
382 		if (!mach->hmap[i].present)
383 			continue;
384 		uao_detach(mach->hmap[i].uobj);
385 	}
386 
387 	nvmm_machine_free(mach);
388 	nvmm_machine_put(mach);
389 
390 	return 0;
391 }
392 
393 static int
nvmm_machine_configure(struct nvmm_owner * owner,struct nvmm_ioc_machine_configure * args)394 nvmm_machine_configure(struct nvmm_owner *owner,
395     struct nvmm_ioc_machine_configure *args)
396 {
397 	struct nvmm_machine *mach;
398 	size_t allocsz;
399 	uint64_t op;
400 	void *data;
401 	int error;
402 
403 	op = NVMM_MACH_CONF_MD(args->op);
404 	if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
405 		return EINVAL;
406 	}
407 
408 	allocsz = nvmm_impl->mach_conf_sizes[op];
409 	data = kmem_alloc(allocsz, KM_SLEEP);
410 
411 	error = nvmm_machine_get(owner, args->machid, &mach, true);
412 	if (error) {
413 		kmem_free(data, allocsz);
414 		return error;
415 	}
416 
417 	error = copyin(args->conf, data, allocsz);
418 	if (error) {
419 		goto out;
420 	}
421 
422 	error = (*nvmm_impl->machine_configure)(mach, op, data);
423 
424 out:
425 	nvmm_machine_put(mach);
426 	kmem_free(data, allocsz);
427 	return error;
428 }
429 
430 static int
nvmm_vcpu_create(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_create * args)431 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
432 {
433 	struct nvmm_machine *mach;
434 	struct nvmm_cpu *vcpu;
435 	int error;
436 
437 	error = nvmm_machine_get(owner, args->machid, &mach, false);
438 	if (error)
439 		return error;
440 
441 	error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
442 	if (error)
443 		goto out;
444 
445 	/* Allocate the comm page. */
446 	uao_reference(mach->commuobj);
447 	error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
448 	    mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
449 	    UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
450 	if (error) {
451 		uao_detach(mach->commuobj);
452 		nvmm_vcpu_free(mach, vcpu);
453 		nvmm_vcpu_put(vcpu);
454 		goto out;
455 	}
456 	error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
457 	    (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
458 	if (error) {
459 		nvmm_vcpu_free(mach, vcpu);
460 		nvmm_vcpu_put(vcpu);
461 		goto out;
462 	}
463 	memset(vcpu->comm, 0, PAGE_SIZE);
464 
465 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
466 	if (error) {
467 		nvmm_vcpu_free(mach, vcpu);
468 		nvmm_vcpu_put(vcpu);
469 		goto out;
470 	}
471 
472 	nvmm_vcpu_put(vcpu);
473 	atomic_inc_uint(&mach->ncpus);
474 
475 out:
476 	nvmm_machine_put(mach);
477 	return error;
478 }
479 
480 static int
nvmm_vcpu_destroy(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_destroy * args)481 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
482 {
483 	struct nvmm_machine *mach;
484 	struct nvmm_cpu *vcpu;
485 	int error;
486 
487 	error = nvmm_machine_get(owner, args->machid, &mach, false);
488 	if (error)
489 		return error;
490 
491 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
492 	if (error)
493 		goto out;
494 
495 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
496 	nvmm_vcpu_free(mach, vcpu);
497 	nvmm_vcpu_put(vcpu);
498 	atomic_dec_uint(&mach->ncpus);
499 
500 out:
501 	nvmm_machine_put(mach);
502 	return error;
503 }
504 
505 static int
nvmm_vcpu_configure(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_configure * args)506 nvmm_vcpu_configure(struct nvmm_owner *owner,
507     struct nvmm_ioc_vcpu_configure *args)
508 {
509 	struct nvmm_machine *mach;
510 	struct nvmm_cpu *vcpu;
511 	size_t allocsz;
512 	uint64_t op;
513 	void *data;
514 	int error;
515 
516 	op = NVMM_VCPU_CONF_MD(args->op);
517 	if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
518 		return EINVAL;
519 
520 	allocsz = nvmm_impl->vcpu_conf_sizes[op];
521 	data = kmem_alloc(allocsz, KM_SLEEP);
522 
523 	error = nvmm_machine_get(owner, args->machid, &mach, false);
524 	if (error) {
525 		kmem_free(data, allocsz);
526 		return error;
527 	}
528 
529 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
530 	if (error) {
531 		nvmm_machine_put(mach);
532 		kmem_free(data, allocsz);
533 		return error;
534 	}
535 
536 	error = copyin(args->conf, data, allocsz);
537 	if (error) {
538 		goto out;
539 	}
540 
541 	error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
542 
543 out:
544 	nvmm_vcpu_put(vcpu);
545 	nvmm_machine_put(mach);
546 	kmem_free(data, allocsz);
547 	return error;
548 }
549 
550 static int
nvmm_vcpu_setstate(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_setstate * args)551 nvmm_vcpu_setstate(struct nvmm_owner *owner,
552     struct nvmm_ioc_vcpu_setstate *args)
553 {
554 	struct nvmm_machine *mach;
555 	struct nvmm_cpu *vcpu;
556 	int error;
557 
558 	error = nvmm_machine_get(owner, args->machid, &mach, false);
559 	if (error)
560 		return error;
561 
562 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
563 	if (error)
564 		goto out;
565 
566 	(*nvmm_impl->vcpu_setstate)(vcpu);
567 	nvmm_vcpu_put(vcpu);
568 
569 out:
570 	nvmm_machine_put(mach);
571 	return error;
572 }
573 
574 static int
nvmm_vcpu_getstate(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_getstate * args)575 nvmm_vcpu_getstate(struct nvmm_owner *owner,
576     struct nvmm_ioc_vcpu_getstate *args)
577 {
578 	struct nvmm_machine *mach;
579 	struct nvmm_cpu *vcpu;
580 	int error;
581 
582 	error = nvmm_machine_get(owner, args->machid, &mach, false);
583 	if (error)
584 		return error;
585 
586 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
587 	if (error)
588 		goto out;
589 
590 	(*nvmm_impl->vcpu_getstate)(vcpu);
591 	nvmm_vcpu_put(vcpu);
592 
593 out:
594 	nvmm_machine_put(mach);
595 	return error;
596 }
597 
598 static int
nvmm_vcpu_inject(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_inject * args)599 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
600 {
601 	struct nvmm_machine *mach;
602 	struct nvmm_cpu *vcpu;
603 	int error;
604 
605 	error = nvmm_machine_get(owner, args->machid, &mach, false);
606 	if (error)
607 		return error;
608 
609 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
610 	if (error)
611 		goto out;
612 
613 	error = (*nvmm_impl->vcpu_inject)(vcpu);
614 	nvmm_vcpu_put(vcpu);
615 
616 out:
617 	nvmm_machine_put(mach);
618 	return error;
619 }
620 
621 static int
nvmm_do_vcpu_run(struct nvmm_machine * mach,struct nvmm_cpu * vcpu,struct nvmm_vcpu_exit * exit)622 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
623     struct nvmm_vcpu_exit *exit)
624 {
625 	struct vmspace *vm = mach->vm;
626 	int ret;
627 
628 	while (1) {
629 		/* Got a signal? Or pending resched? Leave. */
630 		if (__predict_false(nvmm_return_needed(vcpu, exit))) {
631 			return 0;
632 		}
633 
634 		/* Run the VCPU. */
635 		ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
636 		if (__predict_false(ret != 0)) {
637 			return ret;
638 		}
639 
640 		/* Process nested page faults. */
641 		if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
642 			break;
643 		}
644 		if (exit->u.mem.gpa >= mach->gpa_end) {
645 			break;
646 		}
647 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
648 			break;
649 		}
650 	}
651 
652 	return 0;
653 }
654 
655 static int
nvmm_vcpu_run(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_run * args)656 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
657 {
658 	struct nvmm_machine *mach;
659 	struct nvmm_cpu *vcpu = NULL;
660 	int error;
661 
662 	error = nvmm_machine_get(owner, args->machid, &mach, false);
663 	if (error)
664 		return error;
665 
666 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
667 	if (error)
668 		goto out;
669 
670 	error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
671 	nvmm_vcpu_put(vcpu);
672 
673 out:
674 	nvmm_machine_put(mach);
675 	if (vcpu)
676 		vcpu->comm->stop = 0;
677 	return error;
678 }
679 
680 /* -------------------------------------------------------------------------- */
681 
682 static struct uvm_object *
nvmm_hmapping_getuobj(struct nvmm_machine * mach,uintptr_t hva,size_t size,size_t * off)683 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
684    size_t *off)
685 {
686 	struct nvmm_hmapping *hmapping;
687 	size_t i;
688 
689 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
690 		hmapping = &mach->hmap[i];
691 		if (!hmapping->present) {
692 			continue;
693 		}
694 		if (hva >= hmapping->hva &&
695 		    hva + size <= hmapping->hva + hmapping->size) {
696 			*off = hva - hmapping->hva;
697 			return hmapping->uobj;
698 		}
699 	}
700 
701 	return NULL;
702 }
703 
704 static int
nvmm_hmapping_validate(struct nvmm_machine * mach,uintptr_t hva,size_t size)705 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
706 {
707 	struct nvmm_hmapping *hmapping;
708 	size_t i;
709 
710 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
711 		return EINVAL;
712 	}
713 	if (hva == 0) {
714 		return EINVAL;
715 	}
716 
717 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
718 		hmapping = &mach->hmap[i];
719 		if (!hmapping->present) {
720 			continue;
721 		}
722 
723 		if (hva >= hmapping->hva &&
724 		    hva + size <= hmapping->hva + hmapping->size) {
725 			break;
726 		}
727 
728 		if (hva >= hmapping->hva &&
729 		    hva < hmapping->hva + hmapping->size) {
730 			return EEXIST;
731 		}
732 		if (hva + size > hmapping->hva &&
733 		    hva + size <= hmapping->hva + hmapping->size) {
734 			return EEXIST;
735 		}
736 		if (hva <= hmapping->hva &&
737 		    hva + size >= hmapping->hva + hmapping->size) {
738 			return EEXIST;
739 		}
740 	}
741 
742 	return 0;
743 }
744 
745 static struct nvmm_hmapping *
nvmm_hmapping_alloc(struct nvmm_machine * mach)746 nvmm_hmapping_alloc(struct nvmm_machine *mach)
747 {
748 	struct nvmm_hmapping *hmapping;
749 	size_t i;
750 
751 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
752 		hmapping = &mach->hmap[i];
753 		if (!hmapping->present) {
754 			hmapping->present = true;
755 			return hmapping;
756 		}
757 	}
758 
759 	return NULL;
760 }
761 
762 static int
nvmm_hmapping_free(struct nvmm_machine * mach,uintptr_t hva,size_t size)763 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
764 {
765 	struct vmspace *vmspace = curproc->p_vmspace;
766 	struct nvmm_hmapping *hmapping;
767 	size_t i;
768 
769 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
770 		hmapping = &mach->hmap[i];
771 		if (!hmapping->present || hmapping->hva != hva ||
772 		    hmapping->size != size) {
773 			continue;
774 		}
775 
776 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
777 		    hmapping->hva + hmapping->size);
778 		uao_detach(hmapping->uobj);
779 
780 		hmapping->uobj = NULL;
781 		hmapping->present = false;
782 
783 		return 0;
784 	}
785 
786 	return ENOENT;
787 }
788 
789 static int
nvmm_hva_map(struct nvmm_owner * owner,struct nvmm_ioc_hva_map * args)790 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
791 {
792 	struct vmspace *vmspace = curproc->p_vmspace;
793 	struct nvmm_machine *mach;
794 	struct nvmm_hmapping *hmapping;
795 	vaddr_t uva;
796 	int error;
797 
798 	error = nvmm_machine_get(owner, args->machid, &mach, true);
799 	if (error)
800 		return error;
801 
802 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
803 	if (error)
804 		goto out;
805 
806 	hmapping = nvmm_hmapping_alloc(mach);
807 	if (hmapping == NULL) {
808 		error = ENOBUFS;
809 		goto out;
810 	}
811 
812 	hmapping->hva = args->hva;
813 	hmapping->size = args->size;
814 	hmapping->uobj = uao_create(hmapping->size, 0);
815 	uva = hmapping->hva;
816 
817 	/* Take a reference for the user. */
818 	uao_reference(hmapping->uobj);
819 
820 	/* Map the uobj into the user address space, as pageable. */
821 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
822 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
823 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
824 	if (error) {
825 		uao_detach(hmapping->uobj);
826 	}
827 
828 out:
829 	nvmm_machine_put(mach);
830 	return error;
831 }
832 
833 static int
nvmm_hva_unmap(struct nvmm_owner * owner,struct nvmm_ioc_hva_unmap * args)834 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
835 {
836 	struct nvmm_machine *mach;
837 	int error;
838 
839 	error = nvmm_machine_get(owner, args->machid, &mach, true);
840 	if (error)
841 		return error;
842 
843 	error = nvmm_hmapping_free(mach, args->hva, args->size);
844 
845 	nvmm_machine_put(mach);
846 	return error;
847 }
848 
849 /* -------------------------------------------------------------------------- */
850 
851 static int
nvmm_gpa_map(struct nvmm_owner * owner,struct nvmm_ioc_gpa_map * args)852 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
853 {
854 	struct nvmm_machine *mach;
855 	struct uvm_object *uobj;
856 	gpaddr_t gpa;
857 	size_t off;
858 	int error;
859 
860 	error = nvmm_machine_get(owner, args->machid, &mach, false);
861 	if (error)
862 		return error;
863 
864 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
865 		error = EINVAL;
866 		goto out;
867 	}
868 
869 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
870 	    (args->hva % PAGE_SIZE) != 0) {
871 		error = EINVAL;
872 		goto out;
873 	}
874 	if (args->hva == 0) {
875 		error = EINVAL;
876 		goto out;
877 	}
878 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
879 		error = EINVAL;
880 		goto out;
881 	}
882 	if (args->gpa + args->size <= args->gpa) {
883 		error = EINVAL;
884 		goto out;
885 	}
886 	if (args->gpa + args->size > mach->gpa_end) {
887 		error = EINVAL;
888 		goto out;
889 	}
890 	gpa = args->gpa;
891 
892 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
893 	if (uobj == NULL) {
894 		error = EINVAL;
895 		goto out;
896 	}
897 
898 	/* Take a reference for the machine. */
899 	uao_reference(uobj);
900 
901 	/* Map the uobj into the machine address space, as pageable. */
902 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
903 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
904 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
905 	if (error) {
906 		uao_detach(uobj);
907 		goto out;
908 	}
909 	if (gpa != args->gpa) {
910 		uao_detach(uobj);
911 		printf("[!] uvm_map problem\n");
912 		error = EINVAL;
913 		goto out;
914 	}
915 
916 out:
917 	nvmm_machine_put(mach);
918 	return error;
919 }
920 
921 static int
nvmm_gpa_unmap(struct nvmm_owner * owner,struct nvmm_ioc_gpa_unmap * args)922 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
923 {
924 	struct nvmm_machine *mach;
925 	gpaddr_t gpa;
926 	int error;
927 
928 	error = nvmm_machine_get(owner, args->machid, &mach, false);
929 	if (error)
930 		return error;
931 
932 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
933 		error = EINVAL;
934 		goto out;
935 	}
936 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
937 		error = EINVAL;
938 		goto out;
939 	}
940 	if (args->gpa + args->size <= args->gpa) {
941 		error = EINVAL;
942 		goto out;
943 	}
944 	if (args->gpa + args->size >= mach->gpa_end) {
945 		error = EINVAL;
946 		goto out;
947 	}
948 	gpa = args->gpa;
949 
950 	/* Unmap the memory from the machine. */
951 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
952 
953 out:
954 	nvmm_machine_put(mach);
955 	return error;
956 }
957 
958 /* -------------------------------------------------------------------------- */
959 
960 static int
nvmm_ctl_mach_info(struct nvmm_owner * owner,struct nvmm_ioc_ctl * args)961 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
962 {
963 	struct nvmm_ctl_mach_info ctl;
964 	struct nvmm_machine *mach;
965 	int error;
966 	size_t i;
967 
968 	if (args->size != sizeof(ctl))
969 		return EINVAL;
970 	error = copyin(args->data, &ctl, sizeof(ctl));
971 	if (error)
972 		return error;
973 
974 	error = nvmm_machine_get(owner, ctl.machid, &mach, true);
975 	if (error)
976 		return error;
977 
978 	ctl.nvcpus = mach->ncpus;
979 
980 	ctl.nram = 0;
981 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
982 		if (!mach->hmap[i].present)
983 			continue;
984 		ctl.nram += mach->hmap[i].size;
985 	}
986 
987 	ctl.pid = mach->owner->pid;
988 	ctl.time = mach->time;
989 
990 	nvmm_machine_put(mach);
991 
992 	error = copyout(&ctl, args->data, sizeof(ctl));
993 	if (error)
994 		return error;
995 
996 	return 0;
997 }
998 
999 static int
nvmm_ctl(struct nvmm_owner * owner,struct nvmm_ioc_ctl * args)1000 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
1001 {
1002 	switch (args->op) {
1003 	case NVMM_CTL_MACH_INFO:
1004 		return nvmm_ctl_mach_info(owner, args);
1005 	default:
1006 		return EINVAL;
1007 	}
1008 }
1009 
1010 /* -------------------------------------------------------------------------- */
1011 
1012 static const struct nvmm_impl *
nvmm_ident(void)1013 nvmm_ident(void)
1014 {
1015 	size_t i;
1016 
1017 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
1018 		if ((*nvmm_impl_list[i]->ident)())
1019 			return nvmm_impl_list[i];
1020 	}
1021 
1022 	return NULL;
1023 }
1024 
1025 static int
nvmm_init(void)1026 nvmm_init(void)
1027 {
1028 	size_t i, n;
1029 
1030 	nvmm_impl = nvmm_ident();
1031 	if (nvmm_impl == NULL)
1032 		return ENOTSUP;
1033 
1034 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1035 		machines[i].machid = i;
1036 		rw_init(&machines[i].lock);
1037 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1038 			machines[i].cpus[n].present = false;
1039 			machines[i].cpus[n].cpuid = n;
1040 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
1041 			    IPL_NONE);
1042 		}
1043 	}
1044 
1045 	mutex_init(&suspension.lock, MUTEX_DEFAULT, IPL_NONE);
1046 	cv_init(&suspension.suspendcv, "nvmmsus");
1047 	cv_init(&suspension.resumecv, "nvmmres");
1048 	suspension.users = 0;
1049 
1050 	(*nvmm_impl->init)();
1051 
1052 	return 0;
1053 }
1054 
1055 static void
nvmm_fini(void)1056 nvmm_fini(void)
1057 {
1058 	size_t i, n;
1059 
1060 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1061 		rw_destroy(&machines[i].lock);
1062 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1063 			mutex_destroy(&machines[i].cpus[n].lock);
1064 		}
1065 	}
1066 
1067 	(*nvmm_impl->fini)();
1068 	nvmm_impl = NULL;
1069 }
1070 
1071 /* -------------------------------------------------------------------------- */
1072 
1073 static dev_type_open(nvmm_open);
1074 
1075 const struct cdevsw nvmm_cdevsw = {
1076 	.d_open = nvmm_open,
1077 	.d_close = noclose,
1078 	.d_read = noread,
1079 	.d_write = nowrite,
1080 	.d_ioctl = noioctl,
1081 	.d_stop = nostop,
1082 	.d_tty = notty,
1083 	.d_poll = nopoll,
1084 	.d_mmap = nommap,
1085 	.d_kqfilter = nokqfilter,
1086 	.d_discard = nodiscard,
1087 	.d_flag = D_OTHER | D_MPSAFE
1088 };
1089 
1090 static int nvmm_ioctl(file_t *, u_long, void *);
1091 static int nvmm_close(file_t *);
1092 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1093     struct uvm_object **, int *);
1094 
1095 static const struct fileops nvmm_fileops = {
1096 	.fo_read = fbadop_read,
1097 	.fo_write = fbadop_write,
1098 	.fo_ioctl = nvmm_ioctl,
1099 	.fo_fcntl = fnullop_fcntl,
1100 	.fo_poll = fnullop_poll,
1101 	.fo_stat = fbadop_stat,
1102 	.fo_close = nvmm_close,
1103 	.fo_kqfilter = fnullop_kqfilter,
1104 	.fo_restart = fnullop_restart,
1105 	.fo_mmap = nvmm_mmap,
1106 };
1107 
1108 static int
nvmm_open(dev_t dev,int flags,int type,struct lwp * l)1109 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1110 {
1111 	struct nvmm_owner *owner;
1112 	struct file *fp;
1113 	int error, fd;
1114 
1115 	if (__predict_false(nvmm_impl == NULL))
1116 		return ENXIO;
1117 	if (minor(dev) != 0)
1118 		return EXDEV;
1119 	if (!(flags & O_CLOEXEC))
1120 		return EINVAL;
1121 	error = fd_allocfile(&fp, &fd);
1122 	if (error)
1123 		return error;
1124 
1125 	if (OFLAGS(flags) & O_WRONLY) {
1126 		owner = &root_owner;
1127 	} else {
1128 		owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1129 		owner->pid = l->l_proc->p_pid;
1130 	}
1131 
1132 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1133 }
1134 
1135 static int
nvmm_close(file_t * fp)1136 nvmm_close(file_t *fp)
1137 {
1138 	struct nvmm_owner *owner = fp->f_data;
1139 
1140 	KASSERT(owner != NULL);
1141 
1142 	nvmm_enter();
1143 	nvmm_kill_machines(owner);
1144 	nvmm_exit();
1145 
1146 	if (owner != &root_owner) {
1147 		kmem_free(owner, sizeof(*owner));
1148 	}
1149 	fp->f_data = NULL;
1150 
1151 	return 0;
1152 }
1153 
1154 static int
nvmm_mmap(file_t * fp,off_t * offp,size_t size,int prot,int * flagsp,int * advicep,struct uvm_object ** uobjp,int * maxprotp)1155 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1156     int *advicep, struct uvm_object **uobjp, int *maxprotp)
1157 {
1158 	struct nvmm_owner *owner = fp->f_data;
1159 	struct nvmm_machine *mach;
1160 	nvmm_machid_t machid;
1161 	nvmm_cpuid_t cpuid;
1162 	int error;
1163 
1164 	KASSERT(size > 0);
1165 
1166 	if (prot & PROT_EXEC)
1167 		return EACCES;
1168 	if (size != PAGE_SIZE)
1169 		return EINVAL;
1170 
1171 	cpuid = NVMM_COMM_CPUID(*offp);
1172 	if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1173 		return EINVAL;
1174 
1175 	machid = NVMM_COMM_MACHID(*offp);
1176 	error = nvmm_machine_get(owner, machid, &mach, false);
1177 	if (error)
1178 		return error;
1179 
1180 	uao_reference(mach->commuobj);
1181 	*uobjp = mach->commuobj;
1182 	*offp = cpuid * PAGE_SIZE;
1183 	*maxprotp = prot;
1184 	*advicep = UVM_ADV_RANDOM;
1185 
1186 	nvmm_machine_put(mach);
1187 	return 0;
1188 }
1189 
1190 static int
nvmm_ioctl_internal(file_t * fp,u_long cmd,void * data)1191 nvmm_ioctl_internal(file_t *fp, u_long cmd, void *data)
1192 {
1193 	struct nvmm_owner *owner = fp->f_data;
1194 
1195 	KASSERT(owner != NULL);
1196 
1197 	switch (cmd) {
1198 	case NVMM_IOC_CAPABILITY:
1199 		return nvmm_capability(owner, data);
1200 	case NVMM_IOC_MACHINE_CREATE:
1201 		return nvmm_machine_create(owner, data);
1202 	case NVMM_IOC_MACHINE_DESTROY:
1203 		return nvmm_machine_destroy(owner, data);
1204 	case NVMM_IOC_MACHINE_CONFIGURE:
1205 		return nvmm_machine_configure(owner, data);
1206 	case NVMM_IOC_VCPU_CREATE:
1207 		return nvmm_vcpu_create(owner, data);
1208 	case NVMM_IOC_VCPU_DESTROY:
1209 		return nvmm_vcpu_destroy(owner, data);
1210 	case NVMM_IOC_VCPU_CONFIGURE:
1211 		return nvmm_vcpu_configure(owner, data);
1212 	case NVMM_IOC_VCPU_SETSTATE:
1213 		return nvmm_vcpu_setstate(owner, data);
1214 	case NVMM_IOC_VCPU_GETSTATE:
1215 		return nvmm_vcpu_getstate(owner, data);
1216 	case NVMM_IOC_VCPU_INJECT:
1217 		return nvmm_vcpu_inject(owner, data);
1218 	case NVMM_IOC_VCPU_RUN:
1219 		return nvmm_vcpu_run(owner, data);
1220 	case NVMM_IOC_GPA_MAP:
1221 		return nvmm_gpa_map(owner, data);
1222 	case NVMM_IOC_GPA_UNMAP:
1223 		return nvmm_gpa_unmap(owner, data);
1224 	case NVMM_IOC_HVA_MAP:
1225 		return nvmm_hva_map(owner, data);
1226 	case NVMM_IOC_HVA_UNMAP:
1227 		return nvmm_hva_unmap(owner, data);
1228 	case NVMM_IOC_CTL:
1229 		return nvmm_ctl(owner, data);
1230 	default:
1231 		return EINVAL;
1232 	}
1233 }
1234 
1235 static int
nvmm_ioctl(struct file * fp,u_long cmd,void * data)1236 nvmm_ioctl(struct file *fp, u_long cmd, void *data)
1237 {
1238 	int error;
1239 
1240 	error = nvmm_enter_sig();
1241 	if (error)
1242 		return error;
1243 	error = nvmm_ioctl_internal(fp, cmd, data);
1244 	nvmm_exit();
1245 
1246 	return error;
1247 }
1248 
1249 /* -------------------------------------------------------------------------- */
1250 
1251 static int nvmm_match(device_t, cfdata_t, void *);
1252 static void nvmm_attach(device_t, device_t, void *);
1253 static int nvmm_detach(device_t, int);
1254 static bool nvmm_suspend(device_t, const pmf_qual_t *);
1255 static bool nvmm_resume(device_t, const pmf_qual_t *);
1256 
1257 extern struct cfdriver nvmm_cd;
1258 
1259 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1260 
1261 static struct cfdata nvmm_cfdata[] = {
1262 	{
1263 		.cf_name = "nvmm",
1264 		.cf_atname = "nvmm",
1265 		.cf_unit = 0,
1266 		.cf_fstate = FSTATE_STAR,
1267 		.cf_loc = NULL,
1268 		.cf_flags = 0,
1269 		.cf_pspec = NULL,
1270 	},
1271 	{ NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1272 };
1273 
1274 static int
nvmm_match(device_t self,cfdata_t cfdata,void * arg)1275 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1276 {
1277 	return 1;
1278 }
1279 
1280 static void
nvmm_attach(device_t parent,device_t self,void * aux)1281 nvmm_attach(device_t parent, device_t self, void *aux)
1282 {
1283 	int error;
1284 
1285 	error = nvmm_init();
1286 	if (error)
1287 		panic("%s: impossible", __func__);
1288 	aprint_normal_dev(self, "attached, using backend %s\n",
1289 	    nvmm_impl->name);
1290 	if (nvmm_impl->suspend != NULL && nvmm_impl->resume != NULL)
1291 		pmf_device_register(self, nvmm_suspend, nvmm_resume);
1292 }
1293 
1294 static int
nvmm_detach(device_t self,int flags)1295 nvmm_detach(device_t self, int flags)
1296 {
1297 	if (atomic_load_relaxed(&nmachines) > 0)
1298 		return EBUSY;
1299 	pmf_device_deregister(self);
1300 	nvmm_fini();
1301 	return 0;
1302 }
1303 
1304 static void
nvmm_suspend_vcpu(struct nvmm_machine * mach,struct nvmm_cpu * vcpu)1305 nvmm_suspend_vcpu(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
1306 {
1307 
1308 	mutex_enter(&vcpu->lock);
1309 	if (vcpu->present && nvmm_impl->vcpu_suspend)
1310 		(*nvmm_impl->vcpu_suspend)(mach, vcpu);
1311 	mutex_exit(&vcpu->lock);
1312 }
1313 
1314 static void
nvmm_resume_vcpu(struct nvmm_machine * mach,struct nvmm_cpu * vcpu)1315 nvmm_resume_vcpu(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
1316 {
1317 
1318 	mutex_enter(&vcpu->lock);
1319 	if (vcpu->present && nvmm_impl->vcpu_resume)
1320 		(*nvmm_impl->vcpu_resume)(mach, vcpu);
1321 	mutex_exit(&vcpu->lock);
1322 }
1323 
1324 static void
nvmm_suspend_machine(struct nvmm_machine * mach)1325 nvmm_suspend_machine(struct nvmm_machine *mach)
1326 {
1327 
1328 	rw_enter(&mach->lock, RW_WRITER);
1329 	if (mach->present) {
1330 		if (nvmm_impl->vcpu_suspend) {
1331 			size_t cpuid;
1332 
1333 			for (cpuid = 0; cpuid < NVMM_MAX_VCPUS; cpuid++)
1334 				nvmm_suspend_vcpu(mach, &mach->cpus[cpuid]);
1335 		}
1336 		if (nvmm_impl->machine_suspend)
1337 			(*nvmm_impl->machine_suspend)(mach);
1338 	}
1339 	rw_exit(&mach->lock);
1340 }
1341 
1342 static void
nvmm_resume_machine(struct nvmm_machine * mach)1343 nvmm_resume_machine(struct nvmm_machine *mach)
1344 {
1345 
1346 	rw_enter(&mach->lock, RW_WRITER);
1347 	if (mach->present) {
1348 		if (nvmm_impl->vcpu_resume) {
1349 			size_t cpuid;
1350 
1351 			for (cpuid = 0; cpuid < NVMM_MAX_VCPUS; cpuid++)
1352 				nvmm_resume_vcpu(mach, &mach->cpus[cpuid]);
1353 		}
1354 		if (nvmm_impl->machine_resume)
1355 			(*nvmm_impl->machine_resume)(mach);
1356 	}
1357 	rw_exit(&mach->lock);
1358 }
1359 
1360 static bool
nvmm_suspend(device_t self,const pmf_qual_t * qual)1361 nvmm_suspend(device_t self, const pmf_qual_t *qual)
1362 {
1363 	size_t i;
1364 
1365 	/*
1366 	 * Prevent new users (via ioctl) from starting.
1367 	 */
1368 	mutex_enter(&suspension.lock);
1369 	KASSERT(!nvmm_suspending);
1370 	atomic_store_relaxed(&nvmm_suspending, true);
1371 	mutex_exit(&suspension.lock);
1372 
1373 	/*
1374 	 * Interrupt any running VMs so they will break out of run
1375 	 * loops or anything else and not start up again until we've
1376 	 * resumed.
1377 	 */
1378 	if (nvmm_impl->suspend_interrupt)
1379 		(*nvmm_impl->suspend_interrupt)();
1380 
1381 	/*
1382 	 * Wait for any running VMs or other ioctls to finish running
1383 	 * or handling any other ioctls.
1384 	 */
1385 	mutex_enter(&suspension.lock);
1386 	while (suspension.users)
1387 		cv_wait(&suspension.suspendcv, &suspension.lock);
1388 	mutex_exit(&suspension.lock);
1389 
1390 	/*
1391 	 * Suspend all the machines.
1392 	 */
1393 	if (nvmm_impl->machine_suspend || nvmm_impl->vcpu_suspend) {
1394 		for (i = 0; i < NVMM_MAX_MACHINES; i++)
1395 			nvmm_suspend_machine(&machines[i]);
1396 	}
1397 
1398 	/*
1399 	 * Take any systemwide suspend action.
1400 	 */
1401 	if (nvmm_impl->suspend)
1402 		(*nvmm_impl->suspend)();
1403 
1404 	return true;
1405 }
1406 
1407 static bool
nvmm_resume(device_t self,const pmf_qual_t * qual)1408 nvmm_resume(device_t self, const pmf_qual_t *qual)
1409 {
1410 	size_t i;
1411 
1412 	KASSERT(atomic_load_relaxed(&nvmm_suspending));
1413 	KASSERT(suspension.users == 0);
1414 
1415 	/*
1416 	 * Take any systemwide resume action.
1417 	 */
1418 	if (nvmm_impl->resume)
1419 		(*nvmm_impl->resume)();
1420 
1421 	/*
1422 	 * Resume all the machines.
1423 	 */
1424 	if (nvmm_impl->machine_resume || nvmm_impl->vcpu_resume) {
1425 		for (i = 0; i < NVMM_MAX_MACHINES; i++)
1426 			nvmm_resume_machine(&machines[i]);
1427 	}
1428 
1429 	/*
1430 	 * Allow new users (via ioctl) to start again.
1431 	 */
1432 	mutex_enter(&suspension.lock);
1433 	atomic_store_relaxed(&nvmm_suspending, false);
1434 	cv_broadcast(&suspension.resumecv);
1435 	mutex_exit(&suspension.lock);
1436 
1437 	return true;
1438 }
1439 
1440 void
nvmmattach(int nunits)1441 nvmmattach(int nunits)
1442 {
1443 	/* nothing */
1444 }
1445 
1446 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1447 
1448 #if defined(_MODULE)
1449 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1450 #endif
1451 
1452 static int
nvmm_modcmd(modcmd_t cmd,void * arg)1453 nvmm_modcmd(modcmd_t cmd, void *arg)
1454 {
1455 #if defined(_MODULE)
1456 	devmajor_t bmajor = NODEVMAJOR;
1457 	devmajor_t cmajor = 345;
1458 #endif
1459 	int error;
1460 
1461 	switch (cmd) {
1462 	case MODULE_CMD_INIT:
1463 		if (nvmm_ident() == NULL) {
1464 			aprint_error("%s: cpu not supported\n",
1465 			    nvmm_cd.cd_name);
1466 			return ENOTSUP;
1467 		}
1468 #if defined(_MODULE)
1469 		error = config_cfdriver_attach(&nvmm_cd);
1470 		if (error)
1471 			return error;
1472 #endif
1473 		error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1474 		if (error) {
1475 #if defined(_MODULE)
1476 			config_cfdriver_detach(&nvmm_cd);
1477 #endif
1478 			aprint_error("%s: config_cfattach_attach failed\n",
1479 			    nvmm_cd.cd_name);
1480 			return error;
1481 		}
1482 
1483 		error = config_cfdata_attach(nvmm_cfdata, 1);
1484 		if (error) {
1485 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1486 #if defined(_MODULE)
1487 			config_cfdriver_detach(&nvmm_cd);
1488 #endif
1489 			aprint_error("%s: unable to register cfdata\n",
1490 			    nvmm_cd.cd_name);
1491 			return error;
1492 		}
1493 
1494 		if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1495 			aprint_error("%s: config_attach_pseudo failed\n",
1496 			    nvmm_cd.cd_name);
1497 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1498 #if defined(_MODULE)
1499 			config_cfdriver_detach(&nvmm_cd);
1500 #endif
1501 			return ENXIO;
1502 		}
1503 
1504 #if defined(_MODULE)
1505 		/* mknod /dev/nvmm c 345 0 */
1506 		error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1507 			&nvmm_cdevsw, &cmajor);
1508 		if (error) {
1509 			aprint_error("%s: unable to register devsw, err %d\n",
1510 			    nvmm_cd.cd_name, error);
1511 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1512 			config_cfdriver_detach(&nvmm_cd);
1513 			return error;
1514 		}
1515 #endif
1516 		return 0;
1517 	case MODULE_CMD_FINI:
1518 		error = config_cfdata_detach(nvmm_cfdata);
1519 		if (error)
1520 			return error;
1521 		error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1522 		if (error)
1523 			return error;
1524 #if defined(_MODULE)
1525 		config_cfdriver_detach(&nvmm_cd);
1526 		devsw_detach(NULL, &nvmm_cdevsw);
1527 #endif
1528 		return 0;
1529 	case MODULE_CMD_AUTOUNLOAD:
1530 		return EBUSY;
1531 	default:
1532 		return ENOTTY;
1533 	}
1534 }
1535