1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.14 2007/08/15 03:15:07 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/malloc.h> 45 #include <sys/sysctl.h> 46 #include <sys/vkernel.h> 47 #include <sys/vmspace.h> 48 49 #include <vm/vm_extern.h> 50 #include <vm/pmap.h> 51 52 #include <machine/vmparam.h> 53 54 #include <sys/spinlock2.h> 55 #include <sys/sysref2.h> 56 57 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 58 void *id); 59 static void vmspace_entry_delete(struct vmspace_entry *ve, 60 struct vkernel_proc *vkp); 61 62 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 63 64 /* 65 * vmspace_create (void *id, int type, void *data) 66 * 67 * Create a VMSPACE under the control of the caller with the specified id. 68 * An id of NULL cannot be used. The type and data fields must currently 69 * be 0. 70 * 71 * The vmspace starts out completely empty. Memory may be mapped into the 72 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 73 * with vmspace_mcontrol(). 74 * 75 * MPALMOSTSAFE 76 */ 77 int 78 sys_vmspace_create(struct vmspace_create_args *uap) 79 { 80 struct vmspace_entry *ve; 81 struct vkernel_proc *vkp; 82 int error; 83 84 if (vkernel_enable == 0) 85 return (EOPNOTSUPP); 86 87 /* 88 * Create a virtual kernel side-structure for the process if one 89 * does not exist. 90 */ 91 get_mplock(); 92 if ((vkp = curproc->p_vkernel) == NULL) { 93 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 94 vkp->refs = 1; 95 spin_init(&vkp->spin); 96 RB_INIT(&vkp->root); 97 curproc->p_vkernel = vkp; 98 } 99 100 /* 101 * Create a new VMSPACE 102 * 103 * XXX race if kmalloc blocks 104 */ 105 if (vkernel_find_vmspace(vkp, uap->id)) { 106 error = EEXIST; 107 goto done; 108 } 109 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 110 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 111 ve->id = uap->id; 112 pmap_pinit2(vmspace_pmap(ve->vmspace)); 113 RB_INSERT(vmspace_rb_tree, &vkp->root, ve); 114 error = 0; 115 done: 116 rel_mplock(); 117 return (error); 118 } 119 120 /* 121 * vmspace_destroy (void *id) 122 * 123 * Destroy a VMSPACE. 124 * 125 * MPALMOSTSAFE 126 */ 127 int 128 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 129 { 130 struct vkernel_proc *vkp; 131 struct vmspace_entry *ve; 132 int error; 133 134 get_mplock(); 135 if ((vkp = curproc->p_vkernel) == NULL) { 136 error = EINVAL; 137 goto done; 138 } 139 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 140 error = ENOENT; 141 goto done; 142 } 143 if (ve->refs) { 144 error = EBUSY; 145 goto done; 146 } 147 vmspace_entry_delete(ve, vkp); 148 error = 0; 149 done: 150 rel_mplock(); 151 return(error); 152 } 153 154 /* 155 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 156 * struct vextframe *vframe); 157 * 158 * Transfer control to a VMSPACE. Control is returned after the specified 159 * number of microseconds or if a page fault, signal, trap, or system call 160 * occurs. The context is updated as appropriate. 161 * 162 * MPALMOSTSAFE 163 */ 164 int 165 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 166 { 167 struct vkernel_proc *vkp; 168 struct vkernel_lwp *vklp; 169 struct vmspace_entry *ve; 170 struct lwp *lp; 171 struct proc *p; 172 int framesz; 173 int error; 174 175 lp = curthread->td_lwp; 176 p = lp->lwp_proc; 177 178 get_mplock(); 179 if ((vkp = p->p_vkernel) == NULL) { 180 error = EINVAL; 181 goto done; 182 } 183 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 184 error = ENOENT; 185 goto done; 186 } 187 188 /* 189 * Signal mailbox interlock 190 */ 191 if (p->p_flag & P_MAILBOX) { 192 p->p_flag &= ~P_MAILBOX; 193 error = EINTR; 194 goto done; 195 } 196 197 switch(uap->cmd) { 198 case VMSPACE_CTL_RUN: 199 /* 200 * Save the caller's register context, swap VM spaces, and 201 * install the passed register context. Return with 202 * EJUSTRETURN so the syscall code doesn't adjust the context. 203 */ 204 atomic_add_int(&ve->refs, 1); 205 framesz = sizeof(struct trapframe); 206 if ((vklp = lp->lwp_vkernel) == NULL) { 207 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 208 M_WAITOK|M_ZERO); 209 lp->lwp_vkernel = vklp; 210 } 211 vklp->user_trapframe = uap->tframe; 212 vklp->user_vextframe = uap->vframe; 213 bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz); 214 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 215 sizeof(vklp->save_vextframe.vx_tls)); 216 error = copyin(uap->tframe, uap->sysmsg_frame, framesz); 217 if (error == 0) 218 error = copyin(&uap->vframe->vx_tls, &curthread->td_tls, sizeof(struct savetls)); 219 if (error == 0) 220 error = cpu_sanitize_frame(uap->sysmsg_frame); 221 if (error == 0) 222 error = cpu_sanitize_tls(&curthread->td_tls); 223 if (error) { 224 bcopy(&vklp->save_trapframe, uap->sysmsg_frame, framesz); 225 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 226 sizeof(vklp->save_vextframe.vx_tls)); 227 set_user_TLS(); 228 atomic_subtract_int(&ve->refs, 1); 229 } else { 230 vklp->ve = ve; 231 pmap_setlwpvm(lp, ve->vmspace); 232 set_user_TLS(); 233 set_vkernel_fp(uap->sysmsg_frame); 234 error = EJUSTRETURN; 235 } 236 break; 237 default: 238 error = EOPNOTSUPP; 239 break; 240 } 241 done: 242 rel_mplock(); 243 return(error); 244 } 245 246 /* 247 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 248 * 249 * map memory within a VMSPACE. This function is just like a normal mmap() 250 * but operates on the vmspace's memory map. Most callers use this to create 251 * a MAP_VPAGETABLE mapping. 252 * 253 * MPALMOSTSAFE 254 */ 255 int 256 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 257 { 258 struct vkernel_proc *vkp; 259 struct vmspace_entry *ve; 260 int error; 261 262 get_mplock(); 263 if ((vkp = curproc->p_vkernel) == NULL) { 264 error = EINVAL; 265 goto done; 266 } 267 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 268 error = ENOENT; 269 goto done; 270 } 271 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 272 uap->prot, uap->flags, 273 uap->fd, uap->offset, &uap->sysmsg_resultp); 274 done: 275 rel_mplock(); 276 return (error); 277 } 278 279 /* 280 * vmspace_munmap(id, addr, len) 281 * 282 * unmap memory within a VMSPACE. 283 * 284 * MPALMOSTSAFE 285 */ 286 int 287 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 288 { 289 struct vkernel_proc *vkp; 290 struct vmspace_entry *ve; 291 vm_offset_t addr; 292 vm_offset_t tmpaddr; 293 vm_size_t size, pageoff; 294 vm_map_t map; 295 int error; 296 297 get_mplock(); 298 if ((vkp = curproc->p_vkernel) == NULL) { 299 error = EINVAL; 300 goto done; 301 } 302 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 303 error = ENOENT; 304 goto done; 305 } 306 307 /* 308 * Copied from sys_munmap() 309 */ 310 addr = (vm_offset_t)uap->addr; 311 size = uap->len; 312 313 pageoff = (addr & PAGE_MASK); 314 addr -= pageoff; 315 size += pageoff; 316 size = (vm_size_t)round_page(size); 317 if (size < uap->len) { /* wrap */ 318 error = EINVAL; 319 goto done; 320 } 321 tmpaddr = addr + size; /* workaround gcc4 opt */ 322 if (tmpaddr < addr) { /* wrap */ 323 error = EINVAL; 324 goto done; 325 } 326 if (size == 0) { 327 error = 0; 328 goto done; 329 } 330 331 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 332 error = EINVAL; 333 goto done; 334 } 335 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 336 error = EINVAL; 337 goto done; 338 } 339 map = &ve->vmspace->vm_map; 340 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE)) { 341 error = EINVAL; 342 goto done; 343 } 344 vm_map_remove(map, addr, addr + size); 345 error = 0; 346 done: 347 rel_mplock(); 348 return (error); 349 } 350 351 /* 352 * vmspace_pread(id, buf, nbyte, flags, offset) 353 * 354 * Read data from a vmspace. The number of bytes read is returned or 355 * -1 if an unrecoverable error occured. If the number of bytes read is 356 * less then the request size, a page fault occured in the VMSPACE which 357 * the caller must resolve in order to proceed. 358 * 359 * (not implemented yet) 360 * 361 * MPALMOSTSAFE 362 */ 363 int 364 sys_vmspace_pread(struct vmspace_pread_args *uap) 365 { 366 struct vkernel_proc *vkp; 367 struct vmspace_entry *ve; 368 int error; 369 370 get_mplock(); 371 if ((vkp = curproc->p_vkernel) == NULL) { 372 error = EINVAL; 373 goto done; 374 } 375 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 376 error = ENOENT; 377 goto done; 378 } 379 error = EINVAL; 380 done: 381 rel_mplock(); 382 return (error); 383 } 384 385 /* 386 * vmspace_pwrite(id, buf, nbyte, flags, offset) 387 * 388 * Write data to a vmspace. The number of bytes written is returned or 389 * -1 if an unrecoverable error occured. If the number of bytes written is 390 * less then the request size, a page fault occured in the VMSPACE which 391 * the caller must resolve in order to proceed. 392 * 393 * (not implemented yet) 394 * 395 * MPALMOSTSAFE 396 */ 397 int 398 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 399 { 400 struct vkernel_proc *vkp; 401 struct vmspace_entry *ve; 402 int error; 403 404 get_mplock(); 405 if ((vkp = curproc->p_vkernel) == NULL) { 406 error = EINVAL; 407 goto done; 408 } 409 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 410 error = ENOENT; 411 goto done; 412 } 413 error = EINVAL; 414 done: 415 rel_mplock(); 416 return (error); 417 } 418 419 /* 420 * vmspace_mcontrol(id, addr, len, behav, value) 421 * 422 * madvise/mcontrol support for a vmspace. 423 * 424 * MPALMOSTSAFE 425 */ 426 int 427 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 428 { 429 struct vkernel_proc *vkp; 430 struct vmspace_entry *ve; 431 vm_offset_t start, end; 432 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 433 int error; 434 435 get_mplock(); 436 if ((vkp = curproc->p_vkernel) == NULL) { 437 error = EINVAL; 438 goto done; 439 } 440 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 441 error = ENOENT; 442 goto done; 443 } 444 445 /* 446 * This code is basically copied from sys_mcontrol() 447 */ 448 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 449 error = EINVAL; 450 goto done; 451 } 452 453 if (tmpaddr < (vm_offset_t)uap->addr) { 454 error = EINVAL; 455 goto done; 456 } 457 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 458 error = EINVAL; 459 goto done; 460 } 461 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 462 error = EINVAL; 463 goto done; 464 } 465 466 start = trunc_page((vm_offset_t) uap->addr); 467 end = round_page(tmpaddr); 468 469 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 470 uap->behav, uap->value); 471 done: 472 rel_mplock(); 473 return (error); 474 } 475 476 /* 477 * Red black tree functions 478 */ 479 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 480 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 481 482 /* a->start is address, and the only field has to be initialized */ 483 static int 484 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 485 { 486 if ((char *)a->id < (char *)b->id) 487 return(-1); 488 else if ((char *)a->id > (char *)b->id) 489 return(1); 490 return(0); 491 } 492 493 static 494 int 495 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 496 { 497 struct vkernel_proc *vkp = data; 498 499 KKASSERT(ve->refs == 0); 500 vmspace_entry_delete(ve, vkp); 501 return(0); 502 } 503 504 /* 505 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 506 * up the pmap, the vm_map, then destroy the vmspace. 507 */ 508 static 509 void 510 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp) 511 { 512 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 513 514 pmap_remove_pages(vmspace_pmap(ve->vmspace), 515 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 516 vm_map_remove(&ve->vmspace->vm_map, 517 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 518 sysref_put(&ve->vmspace->vm_sysref); 519 kfree(ve, M_VKERNEL); 520 } 521 522 523 static 524 struct vmspace_entry * 525 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id) 526 { 527 struct vmspace_entry *ve; 528 struct vmspace_entry key; 529 530 key.id = id; 531 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 532 return (ve); 533 } 534 535 /* 536 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 537 * a vkernel process. 538 */ 539 void 540 vkernel_inherit(struct proc *p1, struct proc *p2) 541 { 542 struct vkernel_proc *vkp; 543 544 vkp = p1->p_vkernel; 545 KKASSERT(vkp->refs > 0); 546 atomic_add_int(&vkp->refs, 1); 547 p2->p_vkernel = vkp; 548 } 549 550 void 551 vkernel_exit(struct proc *p) 552 { 553 struct vkernel_proc *vkp; 554 struct lwp *lp; 555 int freeme = 0; 556 557 vkp = p->p_vkernel; 558 /* 559 * Restore the original VM context if we are killed while running 560 * a different one. 561 * 562 * This isn't supposed to happen. What is supposed to happen is 563 * that the process should enter vkernel_trap() before the handling 564 * the signal. 565 */ 566 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 567 vkernel_lwp_exit(lp); 568 } 569 570 /* 571 * Dereference the common area 572 */ 573 p->p_vkernel = NULL; 574 KKASSERT(vkp->refs > 0); 575 spin_lock_wr(&vkp->spin); 576 if (--vkp->refs == 0) 577 freeme = 1; 578 spin_unlock_wr(&vkp->spin); 579 580 if (freeme) { 581 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 582 rb_vmspace_delete, vkp); 583 kfree(vkp, M_VKERNEL); 584 } 585 } 586 587 void 588 vkernel_lwp_exit(struct lwp *lp) 589 { 590 struct vkernel_lwp *vklp; 591 struct vmspace_entry *ve; 592 593 if ((vklp = lp->lwp_vkernel) != NULL) { 594 if ((ve = vklp->ve) != NULL) { 595 kprintf("Warning, pid %d killed with " 596 "active VC!\n", lp->lwp_proc->p_pid); 597 print_backtrace(); 598 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 599 vklp->ve = NULL; 600 KKASSERT(ve->refs > 0); 601 atomic_subtract_int(&ve->refs, 1); 602 } 603 lp->lwp_vkernel = NULL; 604 kfree(vklp, M_VKERNEL); 605 } 606 } 607 608 /* 609 * A VM space under virtual kernel control trapped out or made a system call 610 * or otherwise needs to return control to the virtual kernel context. 611 */ 612 int 613 vkernel_trap(struct lwp *lp, struct trapframe *frame) 614 { 615 struct proc *p = lp->lwp_proc; 616 struct vmspace_entry *ve; 617 struct vkernel_lwp *vklp; 618 int error; 619 620 /* 621 * Which vmspace entry was running? 622 */ 623 vklp = lp->lwp_vkernel; 624 KKASSERT(vklp); 625 ve = vklp->ve; 626 KKASSERT(ve != NULL); 627 628 /* 629 * Switch the LWP vmspace back to the virtual kernel's VM space. 630 */ 631 vklp->ve = NULL; 632 pmap_setlwpvm(lp, p->p_vmspace); 633 KKASSERT(ve->refs > 0); 634 atomic_subtract_int(&ve->refs, 1); 635 636 /* 637 * Copy the emulated process frame to the virtual kernel process. 638 * The emulated process cannot change TLS descriptors so don't 639 * bother saving them, we already have a copy. 640 * 641 * Restore the virtual kernel's saved context so the virtual kernel 642 * process can resume. 643 */ 644 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 645 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 646 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 647 sizeof(vklp->save_vextframe.vx_tls)); 648 set_user_TLS(); 649 return(error); 650 } 651 652