xref: /dflybsd-src/sys/vm/vm_vmspace.c (revision 0087561d6d4d84b8ac1a312cc720339cbf66781d)
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
5  *
6  * This code is derived from software contributed to The DragonFly Project
7  * by Matthew Dillon <dillon@backplane.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/sysproto.h>
41 #include <sys/kern_syscall.h>
42 #include <sys/mman.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/malloc.h>
46 #include <sys/sysctl.h>
47 #include <sys/vkernel.h>
48 #include <sys/vmspace.h>
49 
50 #include <vm/vm_extern.h>
51 #include <vm/pmap.h>
52 
53 #include <machine/vmparam.h>
54 #include <machine/vmm.h>
55 
56 #include <sys/sysref2.h>
57 
58 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp,
59 						  void *id, int havetoken);
60 static void vmspace_entry_delete(struct vmspace_entry *ve,
61 				 struct vkernel_proc *vkp);
62 static void vmspace_entry_drop(struct vmspace_entry *ve);
63 
64 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures");
65 
66 /*
67  * vmspace_create (void *id, int type, void *data)
68  *
69  * Create a VMSPACE under the control of the caller with the specified id.
70  * An id of NULL cannot be used.  The type and data fields must currently
71  * be 0.
72  *
73  * The vmspace starts out completely empty.  Memory may be mapped into the
74  * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled
75  * with vmspace_mcontrol().
76  *
77  * No requirements.
78  */
79 int
80 sys_vmspace_create(struct vmspace_create_args *uap)
81 {
82 	struct vmspace_entry *ve;
83 	struct vkernel_proc *vkp;
84 	struct proc *p = curproc;
85 	int error;
86 
87 	if (vkernel_enable == 0)
88 		return (EOPNOTSUPP);
89 
90 	/*
91 	 * Create a virtual kernel side-structure for the process if one
92 	 * does not exist.
93 	 *
94 	 * Implement a simple resolution for SMP races.
95 	 */
96 	if ((vkp = p->p_vkernel) == NULL) {
97 		vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO);
98 		lwkt_gettoken(&p->p_token);
99 		if (p->p_vkernel == NULL) {
100 			vkp->refs = 1;
101 			lwkt_token_init(&vkp->token, "vkernel");
102 			RB_INIT(&vkp->root);
103 			p->p_vkernel = vkp;
104 		} else {
105 			kfree(vkp, M_VKERNEL);
106 			vkp = p->p_vkernel;
107 		}
108 		lwkt_reltoken(&p->p_token);
109 	}
110 
111 	if (curthread->td_vmm)
112 		return 0;
113 
114 	/*
115 	 * Create a new VMSPACE, disallow conflicting ids
116 	 */
117 	ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO);
118 	ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
119 	ve->id = uap->id;
120 	ve->refs = 1;
121 	lwkt_token_init(&ve->token, "vkernve");
122 	pmap_pinit2(vmspace_pmap(ve->vmspace));
123 
124 	lwkt_gettoken(&vkp->token);
125 	if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) {
126 		vmspace_rel(ve->vmspace);
127 		ve->vmspace = NULL; /* safety */
128 		kfree(ve, M_VKERNEL);
129 		error = EEXIST;
130 	} else {
131 		error = 0;
132 	}
133 	lwkt_reltoken(&vkp->token);
134 
135 	return (error);
136 }
137 
138 /*
139  * Destroy a VMSPACE given its identifier.
140  *
141  * No requirements.
142  */
143 int
144 sys_vmspace_destroy(struct vmspace_destroy_args *uap)
145 {
146 	struct vkernel_proc *vkp;
147 	struct vmspace_entry *ve;
148 	int error;
149 
150 	if ((vkp = curproc->p_vkernel) == NULL) {
151 		error = EINVAL;
152 		goto done3;
153 	}
154 	lwkt_gettoken(&vkp->token);
155 	if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) == NULL) {
156 		error = ENOENT;
157 		goto done2;
158 	}
159 	if (ve->refs != 2 + ve->cache_refs) {	/* our ref + index ref */
160 		error = EBUSY;
161 		goto done2;
162 	}
163 	vmspace_entry_delete(ve, vkp);
164 	vmspace_entry_drop(ve);
165 	error = 0;
166 done2:
167 	lwkt_reltoken(&vkp->token);
168 done3:
169 	return(error);
170 }
171 
172 /*
173  * vmspace_ctl (void *id, int cmd, struct trapframe *tframe,
174  *		struct vextframe *vframe);
175  *
176  * Transfer control to a VMSPACE.  Control is returned after the specified
177  * number of microseconds or if a page fault, signal, trap, or system call
178  * occurs.  The context is updated as appropriate.
179  *
180  * No requirements.
181  */
182 int
183 sys_vmspace_ctl(struct vmspace_ctl_args *uap)
184 {
185 	struct vkernel_proc *vkp;
186 	struct vkernel_lwp *vklp;
187 	struct vmspace_entry *ve = NULL;
188 	struct lwp *lp;
189 	struct proc *p;
190 	int framesz;
191 	int error;
192 
193 	lp = curthread->td_lwp;
194 	p = lp->lwp_proc;
195 
196 	if ((vkp = p->p_vkernel) == NULL)
197 		return (EINVAL);
198 
199 	/*
200 	 * ve only matters when VMM is not used.
201 	 */
202 	if (curthread->td_vmm == NULL) {
203 		if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
204 			error = ENOENT;
205 			goto done;
206 		}
207 	}
208 
209 	switch(uap->cmd) {
210 	case VMSPACE_CTL_RUN:
211 		/*
212 		 * Save the caller's register context, swap VM spaces, and
213 		 * install the passed register context.  Return with
214 		 * EJUSTRETURN so the syscall code doesn't adjust the context.
215 		 */
216 		framesz = sizeof(struct trapframe);
217 		if ((vklp = lp->lwp_vkernel) == NULL) {
218 			vklp = kmalloc(sizeof(*vklp), M_VKERNEL,
219 				       M_WAITOK|M_ZERO);
220 			lp->lwp_vkernel = vklp;
221 		}
222 		if (ve && vklp->ve_cache != ve) {
223 			if (vklp->ve_cache) {
224 				atomic_add_int(&vklp->ve_cache->cache_refs, -1);
225 				vmspace_entry_drop(vklp->ve_cache);
226 			}
227 			vklp->ve_cache = ve;
228 			atomic_add_int(&ve->cache_refs, 1);
229 			atomic_add_int(&ve->refs, 1);
230 		}
231 		vklp->user_trapframe = uap->tframe;
232 		vklp->user_vextframe = uap->vframe;
233 		bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz);
234 		bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls,
235 		      sizeof(vklp->save_vextframe.vx_tls));
236 		error = copyin(uap->tframe, uap->sysmsg_frame, framesz);
237 		if (error == 0) {
238 			error = copyin(&uap->vframe->vx_tls,
239 				       &curthread->td_tls,
240 				       sizeof(struct savetls));
241 		}
242 		if (error == 0)
243 			error = cpu_sanitize_frame(uap->sysmsg_frame);
244 		if (error == 0)
245 			error = cpu_sanitize_tls(&curthread->td_tls);
246 		if (error) {
247 			bcopy(&vklp->save_trapframe, uap->sysmsg_frame,
248 			      framesz);
249 			bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
250 			      sizeof(vklp->save_vextframe.vx_tls));
251 			set_user_TLS();
252 		} else {
253 			/*
254 			 * If it's a VMM thread just set the CR3. We also set
255 			 * the vklp->ve to a key to be able to distinguish
256 			 * when a vkernel user process runs and when not
257 			 * (when it's NULL)
258 			 */
259 			if (curthread->td_vmm == NULL) {
260 				vklp->ve = ve;
261 				atomic_add_int(&ve->refs, 1);
262 				pmap_setlwpvm(lp, ve->vmspace);
263 			} else {
264 				vklp->ve = uap->id;
265 				vmm_vm_set_guest_cr3((register_t)uap->id);
266 			}
267 			set_user_TLS();
268 			set_vkernel_fp(uap->sysmsg_frame);
269 			error = EJUSTRETURN;
270 		}
271 		break;
272 	default:
273 		error = EOPNOTSUPP;
274 		break;
275 	}
276 done:
277 	if (ve)
278 		vmspace_entry_drop(ve);
279 
280 	return(error);
281 }
282 
283 /*
284  * vmspace_mmap(id, addr, len, prot, flags, fd, offset)
285  *
286  * map memory within a VMSPACE.  This function is just like a normal mmap()
287  * but operates on the vmspace's memory map.  Most callers use this to create
288  * a MAP_VPAGETABLE mapping.
289  *
290  * No requirements.
291  */
292 int
293 sys_vmspace_mmap(struct vmspace_mmap_args *uap)
294 {
295 	struct vkernel_proc *vkp;
296 	struct vmspace_entry *ve;
297 	int error;
298 
299 	if ((vkp = curproc->p_vkernel) == NULL) {
300 		error = EINVAL;
301 		goto done2;
302 	}
303 
304 	if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
305 		error = ENOENT;
306 		goto done2;
307 	}
308 
309 	error = kern_mmap(ve->vmspace, uap->addr, uap->len,
310 			  uap->prot, uap->flags,
311 			  uap->fd, uap->offset, &uap->sysmsg_resultp);
312 	vmspace_entry_drop(ve);
313 done2:
314 	return (error);
315 }
316 
317 /*
318  * vmspace_munmap(id, addr, len)
319  *
320  * unmap memory within a VMSPACE.
321  *
322  * No requirements.
323  */
324 int
325 sys_vmspace_munmap(struct vmspace_munmap_args *uap)
326 {
327 	struct vkernel_proc *vkp;
328 	struct vmspace_entry *ve;
329 	vm_offset_t addr;
330 	vm_offset_t tmpaddr;
331 	vm_size_t size, pageoff;
332 	vm_map_t map;
333 	int error;
334 
335 	if ((vkp = curproc->p_vkernel) == NULL) {
336 		error = EINVAL;
337 		goto done2;
338 	}
339 
340 	if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
341 		error = ENOENT;
342 		goto done2;
343 	}
344 
345 	/*
346 	 * NOTE: kern_munmap() can block so we need to temporarily
347 	 *	 ref ve->refs.
348 	 */
349 
350 	/*
351 	 * Copied from sys_munmap()
352 	 */
353 	addr = (vm_offset_t)uap->addr;
354 	size = uap->len;
355 
356 	pageoff = (addr & PAGE_MASK);
357 	addr -= pageoff;
358 	size += pageoff;
359 	size = (vm_size_t)round_page(size);
360 	if (size < uap->len) {		/* wrap */
361 		error = EINVAL;
362 		goto done1;
363 	}
364 	tmpaddr = addr + size;		/* workaround gcc4 opt */
365 	if (tmpaddr < addr) {		/* wrap */
366 		error = EINVAL;
367 		goto done1;
368 	}
369 	if (size == 0) {
370 		error = 0;
371 		goto done1;
372 	}
373 
374 	if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
375 		error = EINVAL;
376 		goto done1;
377 	}
378 	if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) {
379 		error = EINVAL;
380 		goto done1;
381 	}
382 	map = &ve->vmspace->vm_map;
383 	if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) {
384 		error = EINVAL;
385 		goto done1;
386 	}
387 	vm_map_remove(map, addr, addr + size);
388 	error = 0;
389 done1:
390 	vmspace_entry_drop(ve);
391 done2:
392 	return (error);
393 }
394 
395 /*
396  * vmspace_pread(id, buf, nbyte, flags, offset)
397  *
398  * Read data from a vmspace.  The number of bytes read is returned or
399  * -1 if an unrecoverable error occured.  If the number of bytes read is
400  * less then the request size, a page fault occured in the VMSPACE which
401  * the caller must resolve in order to proceed.
402  *
403  * (not implemented yet)
404  * No requirements.
405  */
406 int
407 sys_vmspace_pread(struct vmspace_pread_args *uap)
408 {
409 	struct vkernel_proc *vkp;
410 	struct vmspace_entry *ve;
411 	int error;
412 
413 	if ((vkp = curproc->p_vkernel) == NULL) {
414 		error = EINVAL;
415 		goto done3;
416 	}
417 
418 	if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
419 		error = ENOENT;
420 		goto done3;
421 	}
422 	vmspace_entry_drop(ve);
423 	error = EINVAL;
424 done3:
425 	return (error);
426 }
427 
428 /*
429  * vmspace_pwrite(id, buf, nbyte, flags, offset)
430  *
431  * Write data to a vmspace.  The number of bytes written is returned or
432  * -1 if an unrecoverable error occured.  If the number of bytes written is
433  * less then the request size, a page fault occured in the VMSPACE which
434  * the caller must resolve in order to proceed.
435  *
436  * (not implemented yet)
437  * No requirements.
438  */
439 int
440 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap)
441 {
442 	struct vkernel_proc *vkp;
443 	struct vmspace_entry *ve;
444 	int error;
445 
446 	if ((vkp = curproc->p_vkernel) == NULL) {
447 		error = EINVAL;
448 		goto done3;
449 	}
450 	if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
451 		error = ENOENT;
452 		goto done3;
453 	}
454 	vmspace_entry_drop(ve);
455 	error = EINVAL;
456 done3:
457 	return (error);
458 }
459 
460 /*
461  * vmspace_mcontrol(id, addr, len, behav, value)
462  *
463  * madvise/mcontrol support for a vmspace.
464  *
465  * No requirements.
466  */
467 int
468 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap)
469 {
470 	struct vkernel_proc *vkp;
471 	struct vmspace_entry *ve;
472 	struct lwp *lp;
473 	vm_offset_t start, end;
474 	vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len;
475 	int error;
476 
477 	lp = curthread->td_lwp;
478 	if ((vkp = curproc->p_vkernel) == NULL) {
479 		error = EINVAL;
480 		goto done3;
481 	}
482 
483 	if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
484 		error = ENOENT;
485 		goto done3;
486 	}
487 
488 	/*
489 	 * This code is basically copied from sys_mcontrol()
490 	 */
491 	if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) {
492 		error = EINVAL;
493 		goto done1;
494 	}
495 
496 	if (tmpaddr < (vm_offset_t)uap->addr) {
497 		error = EINVAL;
498 		goto done1;
499 	}
500 	if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
501 		error = EINVAL;
502 		goto done1;
503 	}
504         if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) {
505 		error = EINVAL;
506 		goto done1;
507 	}
508 
509 	start = trunc_page((vm_offset_t) uap->addr);
510 	end = round_page(tmpaddr);
511 
512 	error = vm_map_madvise(&ve->vmspace->vm_map, start, end,
513 				uap->behav, uap->value);
514 done1:
515 	vmspace_entry_drop(ve);
516 done3:
517 	return (error);
518 }
519 
520 /*
521  * Red black tree functions
522  */
523 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *);
524 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare);
525 
526 /*
527  * a->start is address, and the only field has to be initialized.
528  * The caller must hold vkp->token.
529  *
530  * The caller must hold vkp->token.
531  */
532 static int
533 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b)
534 {
535         if ((char *)a->id < (char *)b->id)
536                 return(-1);
537         else if ((char *)a->id > (char *)b->id)
538                 return(1);
539         return(0);
540 }
541 
542 /*
543  * The caller must hold vkp->token.
544  */
545 static
546 int
547 rb_vmspace_delete(struct vmspace_entry *ve, void *data)
548 {
549 	struct vkernel_proc *vkp = data;
550 
551 	KKASSERT(ve->refs == ve->cache_refs + 1);
552 	vmspace_entry_delete(ve, vkp);
553 
554 	return(0);
555 }
556 
557 /*
558  * Remove a vmspace_entry from the RB tree and destroy it.  We have to clean
559  * up the pmap, the vm_map, then destroy the vmspace.
560  *
561  * This function must remove the ve immediately before it might potentially
562  * block.
563  *
564  * The caller must hold vkp->token.
565  */
566 static
567 void
568 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp)
569 {
570 	RB_REMOVE(vmspace_rb_tree, &vkp->root, ve);
571 
572 	pmap_remove_pages(vmspace_pmap(ve->vmspace),
573 			  VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
574 	vm_map_remove(&ve->vmspace->vm_map,
575 		      VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
576 	vmspace_rel(ve->vmspace);
577 	ve->vmspace = NULL; /* safety */
578 	vmspace_entry_drop(ve);
579 }
580 
581 static
582 void
583 vmspace_entry_drop(struct vmspace_entry *ve)
584 {
585 	if (atomic_fetchadd_int(&ve->refs, -1) == 1)
586 		kfree(ve, M_VKERNEL);
587 }
588 
589 /*
590  * Locate the ve for (id), return the ve or NULL.  If found this function
591  * will bump ve->refs which prevents the ve from being immediately destroyed
592  * (but it can still be removed).
593  *
594  * The cache can potentially contain a stale ve, check by testing ve->vmspace.
595  *
596  * The caller must hold vkp->token if excl is non-zero.
597  */
598 static
599 struct vmspace_entry *
600 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl)
601 {
602 	struct vmspace_entry *ve;
603 	struct vmspace_entry key;
604 	struct vkernel_lwp *vklp;
605 	struct lwp *lp = curthread->td_lwp;
606 
607 	ve = NULL;
608 	if ((vklp = lp->lwp_vkernel) != NULL) {
609 		ve = vklp->ve_cache;
610 		if (ve && (ve->id != id || ve->vmspace == NULL))
611 			ve = NULL;
612 	}
613 	if (ve == NULL) {
614 		if (excl == 0)
615 			lwkt_gettoken_shared(&vkp->token);
616 		key.id = id;
617 		ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key);
618 		if (ve) {
619 			if (ve->vmspace)
620 				atomic_add_int(&ve->refs, 1);
621 			else
622 				ve = NULL;
623 		}
624 		if (excl == 0)
625 			lwkt_reltoken(&vkp->token);
626 	} else {
627 		atomic_add_int(&ve->refs, 1);
628 	}
629 	return (ve);
630 }
631 
632 /*
633  * Manage vkernel refs, used by the kernel when fork()ing or exit()ing
634  * a vkernel process.
635  *
636  * No requirements.
637  */
638 void
639 vkernel_inherit(struct proc *p1, struct proc *p2)
640 {
641 	struct vkernel_proc *vkp;
642 
643 	vkp = p1->p_vkernel;
644 	KKASSERT(vkp->refs > 0);
645 	atomic_add_int(&vkp->refs, 1);
646 	p2->p_vkernel = vkp;
647 }
648 
649 /*
650  * No requirements.
651  */
652 void
653 vkernel_exit(struct proc *p)
654 {
655 	struct vkernel_proc *vkp;
656 	struct lwp *lp;
657 
658 	vkp = p->p_vkernel;
659 
660 	/*
661 	 * Restore the original VM context if we are killed while running
662 	 * a different one.
663 	 *
664 	 * This isn't supposed to happen.  What is supposed to happen is
665 	 * that the process should enter vkernel_trap() before the handling
666 	 * the signal.
667 	 */
668 	RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) {
669 		vkernel_lwp_exit(lp);
670 	}
671 
672 	/*
673 	 * Dereference the common area
674 	 */
675 	p->p_vkernel = NULL;
676 	KKASSERT(vkp->refs > 0);
677 
678 	if (atomic_fetchadd_int(&vkp->refs, -1) == 1) {
679 		lwkt_gettoken(&vkp->token);
680 		RB_SCAN(vmspace_rb_tree, &vkp->root, NULL,
681 			rb_vmspace_delete, vkp);
682 		lwkt_reltoken(&vkp->token);
683 		kfree(vkp, M_VKERNEL);
684 	}
685 }
686 
687 /*
688  * No requirements.
689  */
690 void
691 vkernel_lwp_exit(struct lwp *lp)
692 {
693 	struct vkernel_lwp *vklp;
694 	struct vmspace_entry *ve;
695 
696 	if ((vklp = lp->lwp_vkernel) != NULL) {
697 		if (lp->lwp_thread->td_vmm == NULL) {
698 			/*
699 			 * vkernel thread
700 			 */
701 			if ((ve = vklp->ve) != NULL) {
702 				kprintf("Warning, pid %d killed with "
703 					"active VC!\n", lp->lwp_proc->p_pid);
704 				pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace);
705 				vklp->ve = NULL;
706 				KKASSERT(ve->refs > 0);
707 				vmspace_entry_drop(ve);
708 			}
709 		} else {
710 			/*
711 			 * guest thread
712 			 */
713 			vklp->ve = NULL;
714 		}
715 		if ((ve = vklp->ve_cache) != NULL) {
716 			vklp->ve_cache = NULL;
717 			atomic_add_int(&ve->cache_refs, -1);
718 			vmspace_entry_drop(ve);
719 		}
720 
721 		lp->lwp_vkernel = NULL;
722 		kfree(vklp, M_VKERNEL);
723 	}
724 }
725 
726 /*
727  * A VM space under virtual kernel control trapped out or made a system call
728  * or otherwise needs to return control to the virtual kernel context.
729  *
730  * No requirements.
731  */
732 void
733 vkernel_trap(struct lwp *lp, struct trapframe *frame)
734 {
735 	struct proc *p = lp->lwp_proc;
736 	struct vmspace_entry *ve;
737 	struct vkernel_lwp *vklp;
738 	int error;
739 
740 	/*
741 	 * Which vmspace entry was running?
742 	 */
743 	vklp = lp->lwp_vkernel;
744 	KKASSERT(vklp);
745 
746 	/* If it's a VMM thread just set the vkernel CR3 back */
747 	if (curthread->td_vmm == NULL) {
748 		ve = vklp->ve;
749 		KKASSERT(ve != NULL);
750 
751 		/*
752 		 * Switch the LWP vmspace back to the virtual kernel's VM space.
753 		 */
754 		vklp->ve = NULL;
755 		pmap_setlwpvm(lp, p->p_vmspace);
756 		KKASSERT(ve->refs > 0);
757 		vmspace_entry_drop(ve);
758 		/* ve is invalid once we kill our ref */
759 	} else {
760 		vklp->ve = NULL;
761 		vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3);
762 	}
763 
764 	/*
765 	 * Copy the emulated process frame to the virtual kernel process.
766 	 * The emulated process cannot change TLS descriptors so don't
767 	 * bother saving them, we already have a copy.
768 	 *
769 	 * Restore the virtual kernel's saved context so the virtual kernel
770 	 * process can resume.
771 	 */
772 	error = copyout(frame, vklp->user_trapframe, sizeof(*frame));
773 	bcopy(&vklp->save_trapframe, frame, sizeof(*frame));
774 	bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
775 	      sizeof(vklp->save_vextframe.vx_tls));
776 	set_user_TLS();
777 	cpu_vkernel_trap(frame, error);
778 }
779