1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.4 2006/10/20 17:02:09 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/malloc.h> 45 #include <sys/sysctl.h> 46 #include <sys/vkernel.h> 47 #include <sys/vmspace.h> 48 #include <sys/spinlock2.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_common *vc, 54 void *id); 55 static void vmspace_entry_delete(struct vmspace_entry *ve, 56 struct vkernel_common *vc); 57 58 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 59 60 /* 61 * vmspace_create (void *id, int type, void *data) 62 * 63 * Create a VMSPACE under the control of the caller with the specified id. 64 * An id of NULL cannot be used. The type and data fields must currently 65 * be 0. 66 * 67 * The vmspace starts out completely empty. Memory may be mapped into the 68 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 69 * with vmspace_mcontrol(). 70 */ 71 int 72 sys_vmspace_create(struct vmspace_create_args *uap) 73 { 74 struct vkernel_common *vc; 75 struct vmspace_entry *ve; 76 struct vkernel *vk; 77 78 if (vkernel_enable == 0) 79 return (EOPNOTSUPP); 80 81 /* 82 * Create a virtual kernel side-structure for the process if one 83 * does not exist. 84 */ 85 if ((vk = curproc->p_vkernel) == NULL) { 86 vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); 87 vc = kmalloc(sizeof(*vc), M_VKERNEL, M_WAITOK|M_ZERO); 88 vc->vc_refs = 1; 89 spin_init(&vc->vc_spin); 90 RB_INIT(&vc->vc_root); 91 vk->vk_common = vc; 92 curproc->p_vkernel = vk; 93 } 94 vc = vk->vk_common; 95 96 /* 97 * Create a new VMSPACE 98 */ 99 if (vkernel_find_vmspace(vc, uap->id)) 100 return (EEXIST); 101 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 102 ve->vmspace = vmspace_alloc(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 103 ve->id = uap->id; 104 pmap_pinit2(vmspace_pmap(ve->vmspace)); 105 RB_INSERT(vmspace_rb_tree, &vc->vc_root, ve); 106 return (0); 107 } 108 109 /* 110 * vmspace_destroy (void *id) 111 * 112 * Destroy a VMSPACE. 113 */ 114 int 115 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 116 { 117 struct vkernel_common *vc; 118 struct vmspace_entry *ve; 119 struct vkernel *vk; 120 121 if ((vk = curproc->p_vkernel) == NULL) 122 return (EINVAL); 123 vc = vk->vk_common; 124 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 125 return (ENOENT); 126 if (ve->refs) 127 return (EBUSY); 128 vmspace_entry_delete(ve, vc); 129 return(0); 130 } 131 132 /* 133 * vmspace_ctl (void *id, int cmd, void *ctx, int ctx_bytes, int timeout_us) 134 * 135 * Transfer control to a VMSPACE. Control is returned after the specified 136 * number of microseconds or if a page fault, signal, trap, or system call 137 * occurs. The context is updated as appropriate. 138 */ 139 int 140 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 141 { 142 struct vkernel_common *vc; 143 struct vmspace_entry *ve; 144 struct vkernel *vk; 145 struct proc *p; 146 int framesz; 147 int error; 148 149 if ((vk = curproc->p_vkernel) == NULL) 150 return (EINVAL); 151 vc = vk->vk_common; 152 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 153 return (ENOENT); 154 155 switch(uap->cmd) { 156 case VMSPACE_CTL_RUN: 157 /* 158 * Save the caller's register context, swap VM spaces, and 159 * install the passed register context. Return with 160 * EJUSTRETURN so the syscall code doesn't adjust the context. 161 */ 162 p = curproc; 163 ++ve->refs; 164 framesz = sizeof(struct trapframe); 165 vk->vk_current = ve; 166 vk->vk_save_vmspace = p->p_vmspace; 167 vk->vk_user_frame = uap->ctx; 168 bcopy(uap->sysmsg_frame, &vk->vk_save_frame, framesz); 169 error = copyin(uap->ctx, uap->sysmsg_frame, framesz); 170 if (error == 0) 171 error = cpu_sanitize_frame(uap->sysmsg_frame); 172 if (error) { 173 bcopy(&vk->vk_save_frame, uap->sysmsg_frame, framesz); 174 vk->vk_current = NULL; 175 vk->vk_save_vmspace = NULL; 176 --ve->refs; 177 } else { 178 pmap_deactivate(p); 179 p->p_vmspace = ve->vmspace; 180 pmap_activate(p); 181 error = EJUSTRETURN; 182 } 183 break; 184 default: 185 error = EOPNOTSUPP; 186 break; 187 } 188 return(error); 189 } 190 191 /* 192 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 193 * 194 * map memory within a VMSPACE. This function is just like a normal mmap() 195 * but operates on the vmspace's memory map. Most callers use this to create 196 * a MAP_VPAGETABLE mapping. 197 */ 198 int 199 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 200 { 201 struct vkernel_common *vc; 202 struct vmspace_entry *ve; 203 struct vkernel *vk; 204 int error; 205 206 if ((vk = curproc->p_vkernel) == NULL) 207 return (EINVAL); 208 vc = vk->vk_common; 209 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 210 return (ENOENT); 211 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 212 uap->prot, uap->flags, 213 uap->fd, uap->offset, &uap->sysmsg_resultp); 214 return (error); 215 } 216 217 /* 218 * vmspace_munmap(id, addr, len) 219 * 220 * unmap memory within a VMSPACE. 221 */ 222 int 223 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 224 { 225 struct vkernel_common *vc; 226 struct vmspace_entry *ve; 227 struct vkernel *vk; 228 vm_offset_t addr; 229 vm_size_t size, pageoff; 230 vm_map_t map; 231 232 if ((vk = curproc->p_vkernel) == NULL) 233 return (EINVAL); 234 vc = vk->vk_common; 235 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 236 return (ENOENT); 237 238 /* 239 * Copied from sys_munmap() 240 */ 241 addr = (vm_offset_t)uap->addr; 242 size = uap->len; 243 244 pageoff = (addr & PAGE_MASK); 245 addr -= pageoff; 246 size += pageoff; 247 size = (vm_size_t)round_page(size); 248 if (addr + size < addr) 249 return (EINVAL); 250 if (size == 0) 251 return (0); 252 253 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 254 return (EINVAL); 255 #ifndef i386 256 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 257 return (EINVAL); 258 #endif 259 map = &ve->vmspace->vm_map; 260 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 261 return (EINVAL); 262 vm_map_remove(map, addr, addr + size); 263 return (0); 264 } 265 266 /* 267 * vmspace_pread(id, buf, nbyte, flags, offset) 268 * 269 * Read data from a vmspace. The number of bytes read is returned or 270 * -1 if an unrecoverable error occured. If the number of bytes read is 271 * less then the request size, a page fault occured in the VMSPACE which 272 * the caller must resolve in order to proceed. 273 */ 274 int 275 sys_vmspace_pread(struct vmspace_pread_args *uap) 276 { 277 struct vkernel_common *vc; 278 struct vmspace_entry *ve; 279 struct vkernel *vk; 280 281 if ((vk = curproc->p_vkernel) == NULL) 282 return (EINVAL); 283 vc = vk->vk_common; 284 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 285 return (ENOENT); 286 return (EINVAL); 287 } 288 289 /* 290 * vmspace_pwrite(id, buf, nbyte, flags, offset) 291 * 292 * Write data to a vmspace. The number of bytes written is returned or 293 * -1 if an unrecoverable error occured. If the number of bytes written is 294 * less then the request size, a page fault occured in the VMSPACE which 295 * the caller must resolve in order to proceed. 296 */ 297 int 298 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 299 { 300 struct vkernel_common *vc; 301 struct vmspace_entry *ve; 302 struct vkernel *vk; 303 304 if ((vk = curproc->p_vkernel) == NULL) 305 return (EINVAL); 306 vc = vk->vk_common; 307 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 308 return (ENOENT); 309 return (EINVAL); 310 } 311 312 /* 313 * vmspace_mcontrol(id, addr, len, behav, value) 314 * 315 * madvise/mcontrol support for a vmspace. 316 */ 317 int 318 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 319 { 320 struct vkernel_common *vc; 321 struct vmspace_entry *ve; 322 struct vkernel *vk; 323 vm_offset_t start, end; 324 325 if ((vk = curproc->p_vkernel) == NULL) 326 return (EINVAL); 327 vc = vk->vk_common; 328 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 329 return (ENOENT); 330 331 /* 332 * This code is basically copied from sys_mcontrol() 333 */ 334 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) 335 return (EINVAL); 336 337 if (VM_MAXUSER_ADDRESS > 0 && 338 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 339 return (EINVAL); 340 #ifndef i386 341 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 342 return (EINVAL); 343 #endif 344 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 345 return (EINVAL); 346 347 start = trunc_page((vm_offset_t) uap->addr); 348 end = round_page((vm_offset_t) uap->addr + uap->len); 349 350 return (vm_map_madvise(&ve->vmspace->vm_map, start, end, 351 uap->behav, uap->value)); 352 } 353 354 /* 355 * Red black tree functions 356 */ 357 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 358 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 359 360 /* a->start is address, and the only field has to be initialized */ 361 static int 362 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 363 { 364 if ((char *)a->id < (char *)b->id) 365 return(-1); 366 else if ((char *)a->id > (char *)b->id) 367 return(1); 368 return(0); 369 } 370 371 static 372 int 373 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 374 { 375 struct vkernel_common *vc = data; 376 377 KKASSERT(ve->refs == 0); 378 vmspace_entry_delete(ve, vc); 379 return(0); 380 } 381 382 /* 383 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 384 * up the pmap, the vm_map, then destroy the vmspace. 385 */ 386 static 387 void 388 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_common *vc) 389 { 390 RB_REMOVE(vmspace_rb_tree, &vc->vc_root, ve); 391 392 pmap_remove_pages(vmspace_pmap(ve->vmspace), 393 VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 394 vm_map_remove(&ve->vmspace->vm_map, 395 VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 396 vmspace_free(ve->vmspace); 397 kfree(ve, M_VKERNEL); 398 } 399 400 401 static 402 struct vmspace_entry * 403 vkernel_find_vmspace(struct vkernel_common *vc, void *id) 404 { 405 struct vmspace_entry *ve; 406 struct vmspace_entry key; 407 408 key.id = id; 409 ve = RB_FIND(vmspace_rb_tree, &vc->vc_root, &key); 410 return (ve); 411 } 412 413 /* 414 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 415 * a vkernel process. 416 */ 417 void 418 vkernel_inherit(struct proc *p1, struct proc *p2) 419 { 420 struct vkernel_common *vc; 421 struct vkernel *vk; 422 423 vk = p1->p_vkernel; 424 vc = vk->vk_common; 425 KKASSERT(vc->vc_refs > 0); 426 atomic_add_int(&vc->vc_refs, 1); 427 vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); 428 p2->p_vkernel = vk; 429 vk->vk_common = vc; 430 } 431 432 void 433 vkernel_exit(struct proc *p) 434 { 435 struct vkernel_common *vc; 436 struct vmspace_entry *ve; 437 struct vkernel *vk; 438 int freeme = 0; 439 440 vk = p->p_vkernel; 441 p->p_vkernel = NULL; 442 vc = vk->vk_common; 443 vk->vk_common = NULL; 444 445 /* 446 * Restore the original VM context if we are killed while running 447 * a different one. 448 */ 449 if ((ve = vk->vk_current) != NULL) { 450 printf("killed with active VC\n"); 451 vk->vk_current = NULL; 452 pmap_deactivate(p); 453 p->p_vmspace = vk->vk_save_vmspace; 454 pmap_activate(p); 455 vk->vk_save_vmspace = NULL; 456 KKASSERT(ve->refs > 0); 457 --ve->refs; 458 } 459 460 /* 461 * Dereference the common area 462 */ 463 KKASSERT(vc->vc_refs > 0); 464 spin_lock_wr(&vc->vc_spin); 465 if (--vc->vc_refs == 0) 466 freeme = 1; 467 spin_unlock_wr(&vc->vc_spin); 468 469 if (freeme) { 470 RB_SCAN(vmspace_rb_tree, &vc->vc_root, NULL, 471 rb_vmspace_delete, vc); 472 kfree(vc, M_VKERNEL); 473 } 474 kfree(vk, M_VKERNEL); 475 } 476 477 /* 478 * A VM space under virtual kernel control trapped out or made a system call 479 * or otherwise needs to return control to the virtual kernel context. 480 */ 481 int 482 vkernel_trap(struct proc *p, struct trapframe *frame) 483 { 484 struct vmspace_entry *ve; 485 struct vkernel *vk; 486 int error; 487 488 printf("trap for vkernel type %d wm=%d\n", 489 frame->tf_trapno & 0x7FFFFFFF, 490 ((frame->tf_trapno & 0x80000000) ? 1 : 0)); 491 492 /* 493 * Which vmspace entry was running? 494 */ 495 vk = p->p_vkernel; 496 ve = vk->vk_current; 497 vk->vk_current = NULL; 498 KKASSERT(ve != NULL); 499 500 /* 501 * Switch the process context back to the virtual kernel's VM space. 502 */ 503 pmap_deactivate(p); 504 p->p_vmspace = vk->vk_save_vmspace; 505 pmap_activate(p); 506 vk->vk_save_vmspace = NULL; 507 KKASSERT(ve->refs > 0); 508 --ve->refs; 509 510 /* 511 * Copy the trapframe to the virtual kernel's userspace, then 512 * restore virtual kernel's original syscall trap frame so we 513 * can 'return' from the system call that ran the custom VM space. 514 */ 515 error = copyout(frame, vk->vk_user_frame, sizeof(*frame)); 516 bcopy(&vk->vk_save_frame, frame, sizeof(*frame)); 517 return(error); 518 } 519 520