1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.5 2006/11/07 17:51:24 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/malloc.h> 45 #include <sys/sysctl.h> 46 #include <sys/vkernel.h> 47 #include <sys/vmspace.h> 48 #include <sys/spinlock2.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_common *vc, 54 void *id); 55 static void vmspace_entry_delete(struct vmspace_entry *ve, 56 struct vkernel_common *vc); 57 58 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 59 60 /* 61 * vmspace_create (void *id, int type, void *data) 62 * 63 * Create a VMSPACE under the control of the caller with the specified id. 64 * An id of NULL cannot be used. The type and data fields must currently 65 * be 0. 66 * 67 * The vmspace starts out completely empty. Memory may be mapped into the 68 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 69 * with vmspace_mcontrol(). 70 */ 71 int 72 sys_vmspace_create(struct vmspace_create_args *uap) 73 { 74 struct vkernel_common *vc; 75 struct vmspace_entry *ve; 76 struct vkernel *vk; 77 78 if (vkernel_enable == 0) 79 return (EOPNOTSUPP); 80 81 /* 82 * Create a virtual kernel side-structure for the process if one 83 * does not exist. 84 */ 85 if ((vk = curproc->p_vkernel) == NULL) { 86 vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); 87 vc = kmalloc(sizeof(*vc), M_VKERNEL, M_WAITOK|M_ZERO); 88 vc->vc_refs = 1; 89 spin_init(&vc->vc_spin); 90 RB_INIT(&vc->vc_root); 91 vk->vk_common = vc; 92 curproc->p_vkernel = vk; 93 } 94 vc = vk->vk_common; 95 96 /* 97 * Create a new VMSPACE 98 */ 99 if (vkernel_find_vmspace(vc, uap->id)) 100 return (EEXIST); 101 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 102 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 103 ve->id = uap->id; 104 pmap_pinit2(vmspace_pmap(ve->vmspace)); 105 RB_INSERT(vmspace_rb_tree, &vc->vc_root, ve); 106 return (0); 107 } 108 109 /* 110 * vmspace_destroy (void *id) 111 * 112 * Destroy a VMSPACE. 113 */ 114 int 115 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 116 { 117 struct vkernel_common *vc; 118 struct vmspace_entry *ve; 119 struct vkernel *vk; 120 121 if ((vk = curproc->p_vkernel) == NULL) 122 return (EINVAL); 123 vc = vk->vk_common; 124 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 125 return (ENOENT); 126 if (ve->refs) 127 return (EBUSY); 128 vmspace_entry_delete(ve, vc); 129 return(0); 130 } 131 132 /* 133 * vmspace_ctl (void *id, int cmd, void *ctx, int ctx_bytes, int timeout_us) 134 * 135 * Transfer control to a VMSPACE. Control is returned after the specified 136 * number of microseconds or if a page fault, signal, trap, or system call 137 * occurs. The context is updated as appropriate. 138 */ 139 int 140 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 141 { 142 struct vkernel_common *vc; 143 struct vmspace_entry *ve; 144 struct vkernel *vk; 145 struct proc *p; 146 int framesz; 147 int error; 148 149 if ((vk = curproc->p_vkernel) == NULL) 150 return (EINVAL); 151 vc = vk->vk_common; 152 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 153 return (ENOENT); 154 155 switch(uap->cmd) { 156 case VMSPACE_CTL_RUN: 157 /* 158 * Save the caller's register context, swap VM spaces, and 159 * install the passed register context. Return with 160 * EJUSTRETURN so the syscall code doesn't adjust the context. 161 */ 162 p = curproc; 163 ++ve->refs; 164 framesz = sizeof(struct trapframe); 165 vk->vk_current = ve; 166 vk->vk_save_vmspace = p->p_vmspace; 167 vk->vk_user_frame = uap->ctx; 168 bcopy(uap->sysmsg_frame, &vk->vk_save_frame, framesz); 169 error = copyin(uap->ctx, uap->sysmsg_frame, framesz); 170 if (error == 0) 171 error = cpu_sanitize_frame(uap->sysmsg_frame); 172 if (error) { 173 bcopy(&vk->vk_save_frame, uap->sysmsg_frame, framesz); 174 vk->vk_current = NULL; 175 vk->vk_save_vmspace = NULL; 176 --ve->refs; 177 } else { 178 pmap_deactivate(p); 179 p->p_vmspace = ve->vmspace; 180 pmap_activate(p); 181 error = EJUSTRETURN; 182 } 183 break; 184 default: 185 error = EOPNOTSUPP; 186 break; 187 } 188 return(error); 189 } 190 191 /* 192 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 193 * 194 * map memory within a VMSPACE. This function is just like a normal mmap() 195 * but operates on the vmspace's memory map. Most callers use this to create 196 * a MAP_VPAGETABLE mapping. 197 */ 198 int 199 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 200 { 201 struct vkernel_common *vc; 202 struct vmspace_entry *ve; 203 struct vkernel *vk; 204 int error; 205 206 if ((vk = curproc->p_vkernel) == NULL) 207 return (EINVAL); 208 vc = vk->vk_common; 209 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 210 return (ENOENT); 211 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 212 uap->prot, uap->flags, 213 uap->fd, uap->offset, &uap->sysmsg_resultp); 214 return (error); 215 } 216 217 /* 218 * vmspace_munmap(id, addr, len) 219 * 220 * unmap memory within a VMSPACE. 221 */ 222 int 223 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 224 { 225 struct vkernel_common *vc; 226 struct vmspace_entry *ve; 227 struct vkernel *vk; 228 vm_offset_t addr; 229 vm_size_t size, pageoff; 230 vm_map_t map; 231 232 if ((vk = curproc->p_vkernel) == NULL) 233 return (EINVAL); 234 vc = vk->vk_common; 235 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 236 return (ENOENT); 237 238 /* 239 * Copied from sys_munmap() 240 */ 241 addr = (vm_offset_t)uap->addr; 242 size = uap->len; 243 244 pageoff = (addr & PAGE_MASK); 245 addr -= pageoff; 246 size += pageoff; 247 size = (vm_size_t)round_page(size); 248 if (addr + size < addr) 249 return (EINVAL); 250 if (size == 0) 251 return (0); 252 253 if (VM_MAX_USER_ADDRESS > 0 && addr + size > VM_MAX_USER_ADDRESS) 254 return (EINVAL); 255 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) 256 return (EINVAL); 257 map = &ve->vmspace->vm_map; 258 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 259 return (EINVAL); 260 vm_map_remove(map, addr, addr + size); 261 return (0); 262 } 263 264 /* 265 * vmspace_pread(id, buf, nbyte, flags, offset) 266 * 267 * Read data from a vmspace. The number of bytes read is returned or 268 * -1 if an unrecoverable error occured. If the number of bytes read is 269 * less then the request size, a page fault occured in the VMSPACE which 270 * the caller must resolve in order to proceed. 271 */ 272 int 273 sys_vmspace_pread(struct vmspace_pread_args *uap) 274 { 275 struct vkernel_common *vc; 276 struct vmspace_entry *ve; 277 struct vkernel *vk; 278 279 if ((vk = curproc->p_vkernel) == NULL) 280 return (EINVAL); 281 vc = vk->vk_common; 282 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 283 return (ENOENT); 284 return (EINVAL); 285 } 286 287 /* 288 * vmspace_pwrite(id, buf, nbyte, flags, offset) 289 * 290 * Write data to a vmspace. The number of bytes written is returned or 291 * -1 if an unrecoverable error occured. If the number of bytes written is 292 * less then the request size, a page fault occured in the VMSPACE which 293 * the caller must resolve in order to proceed. 294 */ 295 int 296 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 297 { 298 struct vkernel_common *vc; 299 struct vmspace_entry *ve; 300 struct vkernel *vk; 301 302 if ((vk = curproc->p_vkernel) == NULL) 303 return (EINVAL); 304 vc = vk->vk_common; 305 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 306 return (ENOENT); 307 return (EINVAL); 308 } 309 310 /* 311 * vmspace_mcontrol(id, addr, len, behav, value) 312 * 313 * madvise/mcontrol support for a vmspace. 314 */ 315 int 316 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 317 { 318 struct vkernel_common *vc; 319 struct vmspace_entry *ve; 320 struct vkernel *vk; 321 vm_offset_t start, end; 322 323 if ((vk = curproc->p_vkernel) == NULL) 324 return (EINVAL); 325 vc = vk->vk_common; 326 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 327 return (ENOENT); 328 329 /* 330 * This code is basically copied from sys_mcontrol() 331 */ 332 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) 333 return (EINVAL); 334 335 if (VM_MAX_USER_ADDRESS > 0 && 336 ((vm_offset_t) uap->addr + uap->len) > VM_MAX_USER_ADDRESS) 337 return (EINVAL); 338 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) 339 return (EINVAL); 340 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 341 return (EINVAL); 342 343 start = trunc_page((vm_offset_t) uap->addr); 344 end = round_page((vm_offset_t) uap->addr + uap->len); 345 346 return (vm_map_madvise(&ve->vmspace->vm_map, start, end, 347 uap->behav, uap->value)); 348 } 349 350 /* 351 * Red black tree functions 352 */ 353 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 354 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 355 356 /* a->start is address, and the only field has to be initialized */ 357 static int 358 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 359 { 360 if ((char *)a->id < (char *)b->id) 361 return(-1); 362 else if ((char *)a->id > (char *)b->id) 363 return(1); 364 return(0); 365 } 366 367 static 368 int 369 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 370 { 371 struct vkernel_common *vc = data; 372 373 KKASSERT(ve->refs == 0); 374 vmspace_entry_delete(ve, vc); 375 return(0); 376 } 377 378 /* 379 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 380 * up the pmap, the vm_map, then destroy the vmspace. 381 */ 382 static 383 void 384 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_common *vc) 385 { 386 RB_REMOVE(vmspace_rb_tree, &vc->vc_root, ve); 387 388 pmap_remove_pages(vmspace_pmap(ve->vmspace), 389 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 390 vm_map_remove(&ve->vmspace->vm_map, 391 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 392 vmspace_free(ve->vmspace); 393 kfree(ve, M_VKERNEL); 394 } 395 396 397 static 398 struct vmspace_entry * 399 vkernel_find_vmspace(struct vkernel_common *vc, void *id) 400 { 401 struct vmspace_entry *ve; 402 struct vmspace_entry key; 403 404 key.id = id; 405 ve = RB_FIND(vmspace_rb_tree, &vc->vc_root, &key); 406 return (ve); 407 } 408 409 /* 410 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 411 * a vkernel process. 412 */ 413 void 414 vkernel_inherit(struct proc *p1, struct proc *p2) 415 { 416 struct vkernel_common *vc; 417 struct vkernel *vk; 418 419 vk = p1->p_vkernel; 420 vc = vk->vk_common; 421 KKASSERT(vc->vc_refs > 0); 422 atomic_add_int(&vc->vc_refs, 1); 423 vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); 424 p2->p_vkernel = vk; 425 vk->vk_common = vc; 426 } 427 428 void 429 vkernel_exit(struct proc *p) 430 { 431 struct vkernel_common *vc; 432 struct vmspace_entry *ve; 433 struct vkernel *vk; 434 int freeme = 0; 435 436 vk = p->p_vkernel; 437 p->p_vkernel = NULL; 438 vc = vk->vk_common; 439 vk->vk_common = NULL; 440 441 /* 442 * Restore the original VM context if we are killed while running 443 * a different one. 444 */ 445 if ((ve = vk->vk_current) != NULL) { 446 printf("killed with active VC\n"); 447 vk->vk_current = NULL; 448 pmap_deactivate(p); 449 p->p_vmspace = vk->vk_save_vmspace; 450 pmap_activate(p); 451 vk->vk_save_vmspace = NULL; 452 KKASSERT(ve->refs > 0); 453 --ve->refs; 454 } 455 456 /* 457 * Dereference the common area 458 */ 459 KKASSERT(vc->vc_refs > 0); 460 spin_lock_wr(&vc->vc_spin); 461 if (--vc->vc_refs == 0) 462 freeme = 1; 463 spin_unlock_wr(&vc->vc_spin); 464 465 if (freeme) { 466 RB_SCAN(vmspace_rb_tree, &vc->vc_root, NULL, 467 rb_vmspace_delete, vc); 468 kfree(vc, M_VKERNEL); 469 } 470 kfree(vk, M_VKERNEL); 471 } 472 473 /* 474 * A VM space under virtual kernel control trapped out or made a system call 475 * or otherwise needs to return control to the virtual kernel context. 476 */ 477 int 478 vkernel_trap(struct proc *p, struct trapframe *frame) 479 { 480 struct vmspace_entry *ve; 481 struct vkernel *vk; 482 int error; 483 484 printf("trap for vkernel type %d wm=%d\n", 485 frame->tf_trapno & 0x7FFFFFFF, 486 ((frame->tf_trapno & 0x80000000) ? 1 : 0)); 487 488 /* 489 * Which vmspace entry was running? 490 */ 491 vk = p->p_vkernel; 492 ve = vk->vk_current; 493 vk->vk_current = NULL; 494 KKASSERT(ve != NULL); 495 496 /* 497 * Switch the process context back to the virtual kernel's VM space. 498 */ 499 pmap_deactivate(p); 500 p->p_vmspace = vk->vk_save_vmspace; 501 pmap_activate(p); 502 vk->vk_save_vmspace = NULL; 503 KKASSERT(ve->refs > 0); 504 --ve->refs; 505 506 /* 507 * Copy the trapframe to the virtual kernel's userspace, then 508 * restore virtual kernel's original syscall trap frame so we 509 * can 'return' from the system call that ran the custom VM space. 510 */ 511 error = copyout(frame, vk->vk_user_frame, sizeof(*frame)); 512 bcopy(&vk->vk_save_frame, frame, sizeof(*frame)); 513 return(error); 514 } 515 516