1 /* $NetBSD: uvm_mmap.c,v 1.185 2023/11/21 14:35:36 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.185 2023/11/21 14:35:36 riastradh Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/param.h>
55 #include <sys/types.h>
56 #include <sys/file.h>
57 #include <sys/filedesc.h>
58 #include <sys/resourcevar.h>
59 #include <sys/mman.h>
60 #include <sys/pax.h>
61
62 #include <sys/syscallargs.h>
63
64 #include <uvm/uvm.h>
65 #include <uvm/uvm_device.h>
66
67 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
68 int, int, struct uvm_object *, voff_t, vsize_t);
69
70 static int
range_test(const struct vm_map * map,vaddr_t addr,vsize_t size,bool ismmap)71 range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
72 {
73 vaddr_t vm_min_address = vm_map_min(map);
74 vaddr_t vm_max_address = vm_map_max(map);
75 vaddr_t eaddr = addr + size;
76 int res = 0;
77
78 if (addr < vm_min_address)
79 return EINVAL;
80 if (eaddr > vm_max_address)
81 return ismmap ? EFBIG : EINVAL;
82 if (addr > eaddr) /* no wrapping! */
83 return ismmap ? EOVERFLOW : EINVAL;
84
85 #ifdef MD_MMAP_RANGE_TEST
86 res = MD_MMAP_RANGE_TEST(addr, eaddr);
87 #endif
88
89 return res;
90 }
91
92 /*
93 * align the address to a page boundary, and adjust the size accordingly
94 */
95 static int
round_and_check(const struct vm_map * map,vaddr_t * addr,vsize_t * size)96 round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size)
97 {
98 const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK);
99
100 *addr -= pageoff;
101
102 if (*size != 0) {
103 *size += pageoff;
104 *size = (vsize_t)round_page(*size);
105 } else if (*addr + *size < *addr) {
106 return ENOMEM;
107 }
108
109 return range_test(map, *addr, *size, false);
110 }
111
112 /*
113 * sys_mincore: determine if pages are in core or not.
114 */
115
116 /* ARGSUSED */
117 int
sys_mincore(struct lwp * l,const struct sys_mincore_args * uap,register_t * retval)118 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
119 register_t *retval)
120 {
121 /* {
122 syscallarg(void *) addr;
123 syscallarg(size_t) len;
124 syscallarg(char *) vec;
125 } */
126 struct proc *p = l->l_proc;
127 struct vm_page *pg;
128 char *vec, pgi;
129 struct uvm_object *uobj;
130 struct vm_amap *amap;
131 struct vm_anon *anon;
132 struct vm_map_entry *entry;
133 vaddr_t start, end, lim;
134 struct vm_map *map;
135 vsize_t len;
136 int error = 0;
137 size_t npgs;
138
139 map = &p->p_vmspace->vm_map;
140
141 start = (vaddr_t)SCARG(uap, addr);
142 len = SCARG(uap, len);
143 vec = SCARG(uap, vec);
144
145 if (start & PAGE_MASK)
146 return EINVAL;
147 len = round_page(len);
148 end = start + len;
149 if (end <= start)
150 return EINVAL;
151
152 /*
153 * Lock down vec, so our returned status isn't outdated by
154 * storing the status byte for a page.
155 */
156
157 npgs = len >> PAGE_SHIFT;
158 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
159 if (error) {
160 return error;
161 }
162 vm_map_lock_read(map);
163
164 if (uvm_map_lookup_entry(map, start, &entry) == false) {
165 error = ENOMEM;
166 goto out;
167 }
168
169 for (/* nothing */;
170 entry != &map->header && entry->start < end;
171 entry = entry->next) {
172 KASSERT(!UVM_ET_ISSUBMAP(entry));
173 KASSERT(start >= entry->start);
174
175 /* Make sure there are no holes. */
176 if (entry->end < end &&
177 (entry->next == &map->header ||
178 entry->next->start > entry->end)) {
179 error = ENOMEM;
180 goto out;
181 }
182
183 lim = end < entry->end ? end : entry->end;
184
185 /*
186 * Special case for objects with no "real" pages. Those
187 * are always considered resident (mapped devices).
188 */
189
190 if (UVM_ET_ISOBJ(entry)) {
191 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
192 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
193 for (/* nothing */; start < lim;
194 start += PAGE_SIZE, vec++)
195 ustore_char(vec, 1);
196 continue;
197 }
198 }
199
200 amap = entry->aref.ar_amap; /* upper layer */
201 uobj = entry->object.uvm_obj; /* lower layer */
202
203 if (amap != NULL)
204 amap_lock(amap, RW_READER);
205 if (uobj != NULL)
206 rw_enter(uobj->vmobjlock, RW_READER);
207
208 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
209 pgi = 0;
210 if (amap != NULL) {
211 /* Check the upper layer first. */
212 anon = amap_lookup(&entry->aref,
213 start - entry->start);
214 /* Don't need to lock anon here. */
215 if (anon != NULL && anon->an_page != NULL) {
216
217 /*
218 * Anon has the page for this entry
219 * offset.
220 */
221
222 pgi = 1;
223 }
224 }
225 if (uobj != NULL && pgi == 0) {
226 /* Check the lower layer. */
227 pg = uvm_pagelookup(uobj,
228 entry->offset + (start - entry->start));
229 if (pg != NULL) {
230
231 /*
232 * Object has the page for this entry
233 * offset.
234 */
235
236 pgi = 1;
237 }
238 }
239 (void) ustore_char(vec, pgi);
240 }
241 if (uobj != NULL)
242 rw_exit(uobj->vmobjlock);
243 if (amap != NULL)
244 amap_unlock(amap);
245 }
246
247 out:
248 vm_map_unlock_read(map);
249 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
250 return error;
251 }
252
253 /*
254 * sys_mmap: mmap system call.
255 *
256 * => file offset and address may not be page aligned
257 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
258 * - if address isn't page aligned the mapping starts at trunc_page(addr)
259 * and the return value is adjusted up by the page offset.
260 */
261
262 int
sys_mmap(struct lwp * l,const struct sys_mmap_args * uap,register_t * retval)263 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
264 {
265 /* {
266 syscallarg(void *) addr;
267 syscallarg(size_t) len;
268 syscallarg(int) prot;
269 syscallarg(int) flags;
270 syscallarg(int) fd;
271 syscallarg(long) pad;
272 syscallarg(off_t) pos;
273 } */
274 struct proc *p = l->l_proc;
275 vaddr_t addr;
276 off_t pos;
277 vsize_t size, pageoff;
278 vm_prot_t prot, maxprot, extraprot;
279 int flags, fd, advice;
280 vaddr_t defaddr = 0; /* XXXGCC */
281 bool addrhint = false;
282 struct file *fp = NULL;
283 struct uvm_object *uobj;
284 int error;
285 vaddr_t orig_addr;
286
287 /*
288 * first, extract syscall args from the uap.
289 */
290
291 addr = (vaddr_t)SCARG(uap, addr);
292 size = (vsize_t)SCARG(uap, len);
293 prot = SCARG(uap, prot) & VM_PROT_ALL;
294 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot));
295 flags = SCARG(uap, flags);
296 fd = SCARG(uap, fd);
297 pos = SCARG(uap, pos);
298
299 orig_addr = addr;
300
301 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
302 return EINVAL;
303
304 if (size == 0 && (flags & MAP_ANON) == 0)
305 return EINVAL;
306
307 /*
308 * Align file position and save offset into page. Adjust size
309 * so that it is an integral multiple of the page size.
310 */
311 pageoff = pos & PAGE_MASK;
312 pos -= pageoff;
313 KASSERT(PAGE_MASK <= __type_max(vsize_t));
314 KASSERT((__type_max(vsize_t) - PAGE_SIZE + 1) % PAGE_SIZE == 0);
315 if (size > __type_max(vsize_t) - PAGE_SIZE + 1 - pageoff)
316 return ENOMEM;
317 /*
318 * size + pageoff <= VSIZE_MAX + 1 - PAGE_SIZE, and the
319 * right-hand side is an integral multiple of the page size, so
320 * round_page(size + pageoff) <= VSIZE_MAX + 1 - PAGE_SIZE.
321 */
322 size = round_page(size + pageoff);
323
324 /*
325 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
326 */
327 if (flags & MAP_FIXED) {
328 /* ensure address and file offset are aligned properly */
329 addr -= pageoff;
330 if (addr & PAGE_MASK)
331 return EINVAL;
332
333 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
334 if (error) {
335 return error;
336 }
337 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
338 /*
339 * not fixed: make sure we skip over the largest
340 * possible heap for non-topdown mapping arrangements.
341 * we will refine our guess later (e.g. to account for
342 * VAC, etc)
343 */
344
345 defaddr = p->p_emul->e_vm_default_addr(p,
346 (vaddr_t)p->p_vmspace->vm_daddr, size,
347 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
348
349 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
350 addr = MAX(addr, defaddr);
351 else
352 addr = MIN(addr, defaddr);
353
354 /*
355 * If addr is nonzero and not the default, then the
356 * address is a hint.
357 */
358 addrhint = (addr != 0 && addr != defaddr);
359 }
360
361 /*
362 * check for file mappings (i.e. not anonymous) and verify file.
363 */
364
365 advice = UVM_ADV_NORMAL;
366 if ((flags & MAP_ANON) == 0) {
367 KASSERT(size != 0);
368
369 if ((fp = fd_getfile(fd)) == NULL)
370 return EBADF;
371
372 if (fp->f_ops->fo_mmap == NULL) {
373 error = ENODEV;
374 goto out;
375 }
376 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
377 &advice, &uobj, &maxprot);
378 if (error) {
379 goto out;
380 }
381 if (uobj == NULL) {
382 flags |= MAP_ANON;
383 fd_putfile(fd);
384 fp = NULL;
385 goto is_anon;
386 }
387 } else { /* MAP_ANON case */
388 /*
389 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
390 */
391 if (fd != -1)
392 return EINVAL;
393
394 is_anon: /* label for SunOS style /dev/zero */
395 uobj = NULL;
396 maxprot = VM_PROT_ALL;
397 pos = 0;
398 }
399
400 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot);
401 if (((prot | extraprot) & maxprot) != (prot | extraprot)) {
402 error = EACCES;
403 goto out;
404 }
405 if ((error = PAX_MPROTECT_VALIDATE(l, prot)))
406 goto out;
407
408 pax_aslr_mmap(l, &addr, orig_addr, flags);
409
410 /*
411 * Now let kernel internal function uvm_mmap do the work.
412 *
413 * If the user provided a hint, take a reference to uobj in
414 * case the first attempt to satisfy the hint fails, so we can
415 * try again with the default address.
416 */
417 if (addrhint) {
418 if (uobj)
419 (*uobj->pgops->pgo_reference)(uobj);
420 }
421 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
422 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
423 if (addrhint) {
424 if (error) {
425 addr = defaddr;
426 pax_aslr_mmap(l, &addr, orig_addr, flags);
427 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size,
428 prot, maxprot, flags, advice, uobj, pos,
429 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
430 } else if (uobj) {
431 /* Release the exta reference we took. */
432 (*uobj->pgops->pgo_detach)(uobj);
433 }
434 }
435
436 /* remember to add offset */
437 *retval = (register_t)(addr + pageoff);
438
439 out:
440 if (fp != NULL)
441 fd_putfile(fd);
442
443 return error;
444 }
445
446 /*
447 * sys___msync13: the msync system call (a front-end for flush)
448 */
449
450 int
sys___msync13(struct lwp * l,const struct sys___msync13_args * uap,register_t * retval)451 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
452 register_t *retval)
453 {
454 /* {
455 syscallarg(void *) addr;
456 syscallarg(size_t) len;
457 syscallarg(int) flags;
458 } */
459 struct proc *p = l->l_proc;
460 vaddr_t addr;
461 vsize_t size;
462 struct vm_map *map;
463 int error, flags, uvmflags;
464 bool rv;
465
466 /*
467 * extract syscall args from the uap
468 */
469
470 addr = (vaddr_t)SCARG(uap, addr);
471 size = (vsize_t)SCARG(uap, len);
472 flags = SCARG(uap, flags);
473
474 /* sanity check flags */
475 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
476 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
477 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
478 return EINVAL;
479 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
480 flags |= MS_SYNC;
481
482 /*
483 * get map
484 */
485 map = &p->p_vmspace->vm_map;
486
487 if (round_and_check(map, &addr, &size))
488 return ENOMEM;
489
490 /*
491 * XXXCDC: do we really need this semantic?
492 *
493 * XXX Gak! If size is zero we are supposed to sync "all modified
494 * pages with the region containing addr". Unfortunately, we
495 * don't really keep track of individual mmaps so we approximate
496 * by flushing the range of the map entry containing addr.
497 * This can be incorrect if the region splits or is coalesced
498 * with a neighbor.
499 */
500
501 if (size == 0) {
502 struct vm_map_entry *entry;
503
504 vm_map_lock_read(map);
505 rv = uvm_map_lookup_entry(map, addr, &entry);
506 if (rv == true) {
507 addr = entry->start;
508 size = entry->end - entry->start;
509 }
510 vm_map_unlock_read(map);
511 if (rv == false)
512 return EINVAL;
513 }
514
515 /*
516 * translate MS_ flags into PGO_ flags
517 */
518
519 uvmflags = PGO_CLEANIT;
520 if (flags & MS_INVALIDATE)
521 uvmflags |= PGO_FREE;
522 if (flags & MS_SYNC)
523 uvmflags |= PGO_SYNCIO;
524
525 error = uvm_map_clean(map, addr, addr+size, uvmflags);
526 return error;
527 }
528
529 /*
530 * sys_munmap: unmap a users memory
531 */
532
533 int
sys_munmap(struct lwp * l,const struct sys_munmap_args * uap,register_t * retval)534 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
535 {
536 /* {
537 syscallarg(void *) addr;
538 syscallarg(size_t) len;
539 } */
540 struct proc *p = l->l_proc;
541 vaddr_t addr;
542 vsize_t size;
543 struct vm_map *map;
544 struct vm_map_entry *dead_entries;
545
546 /*
547 * get syscall args.
548 */
549
550 addr = (vaddr_t)SCARG(uap, addr);
551 size = (vsize_t)SCARG(uap, len);
552
553 map = &p->p_vmspace->vm_map;
554
555 if (round_and_check(map, &addr, &size))
556 return EINVAL;
557
558 if (size == 0)
559 return 0;
560
561 vm_map_lock(map);
562 #if 0
563 /*
564 * interesting system call semantic: make sure entire range is
565 * allocated before allowing an unmap.
566 */
567 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
568 vm_map_unlock(map);
569 return EINVAL;
570 }
571 #endif
572 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
573 vm_map_unlock(map);
574 if (dead_entries != NULL)
575 uvm_unmap_detach(dead_entries, 0);
576 return 0;
577 }
578
579 /*
580 * sys_mprotect: the mprotect system call
581 */
582
583 int
sys_mprotect(struct lwp * l,const struct sys_mprotect_args * uap,register_t * retval)584 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
585 register_t *retval)
586 {
587 /* {
588 syscallarg(void *) addr;
589 syscallarg(size_t) len;
590 syscallarg(int) prot;
591 } */
592 struct proc *p = l->l_proc;
593 vaddr_t addr;
594 vsize_t size;
595 vm_prot_t prot;
596 int error;
597
598 /*
599 * extract syscall args from uap
600 */
601
602 addr = (vaddr_t)SCARG(uap, addr);
603 size = (vsize_t)SCARG(uap, len);
604 prot = SCARG(uap, prot) & VM_PROT_ALL;
605
606 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
607 return EINVAL;
608
609 error = uvm_map_protect_user(l, addr, addr + size, prot);
610 return error;
611 }
612
613 /*
614 * sys_minherit: the minherit system call
615 */
616
617 int
sys_minherit(struct lwp * l,const struct sys_minherit_args * uap,register_t * retval)618 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
619 register_t *retval)
620 {
621 /* {
622 syscallarg(void *) addr;
623 syscallarg(int) len;
624 syscallarg(int) inherit;
625 } */
626 struct proc *p = l->l_proc;
627 vaddr_t addr;
628 vsize_t size;
629 vm_inherit_t inherit;
630 int error;
631
632 addr = (vaddr_t)SCARG(uap, addr);
633 size = (vsize_t)SCARG(uap, len);
634 inherit = SCARG(uap, inherit);
635
636 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
637 return EINVAL;
638
639 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
640 inherit);
641 return error;
642 }
643
644 /*
645 * sys_madvise: give advice about memory usage.
646 */
647
648 /* ARGSUSED */
649 int
sys_madvise(struct lwp * l,const struct sys_madvise_args * uap,register_t * retval)650 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
651 register_t *retval)
652 {
653 /* {
654 syscallarg(void *) addr;
655 syscallarg(size_t) len;
656 syscallarg(int) behav;
657 } */
658 struct proc *p = l->l_proc;
659 vaddr_t addr;
660 vsize_t size;
661 int advice, error;
662
663 addr = (vaddr_t)SCARG(uap, addr);
664 size = (vsize_t)SCARG(uap, len);
665 advice = SCARG(uap, behav);
666
667 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
668 return EINVAL;
669
670 switch (advice) {
671 case MADV_NORMAL:
672 case MADV_RANDOM:
673 case MADV_SEQUENTIAL:
674 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
675 advice);
676 break;
677
678 case MADV_WILLNEED:
679
680 /*
681 * Activate all these pages, pre-faulting them in if
682 * necessary.
683 */
684 error = uvm_map_willneed(&p->p_vmspace->vm_map,
685 addr, addr + size);
686 break;
687
688 case MADV_DONTNEED:
689
690 /*
691 * Deactivate all these pages. We don't need them
692 * any more. We don't, however, toss the data in
693 * the pages.
694 */
695
696 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
697 PGO_DEACTIVATE);
698 break;
699
700 case MADV_FREE:
701
702 /*
703 * These pages contain no valid data, and may be
704 * garbage-collected. Toss all resources, including
705 * any swap space in use.
706 */
707
708 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
709 PGO_FREE);
710 break;
711
712 case MADV_SPACEAVAIL:
713
714 /*
715 * XXXMRG What is this? I think it's:
716 *
717 * Ensure that we have allocated backing-store
718 * for these pages.
719 *
720 * This is going to require changes to the page daemon,
721 * as it will free swap space allocated to pages in core.
722 * There's also what to do for device/file/anonymous memory.
723 */
724
725 return EINVAL;
726
727 default:
728 return EINVAL;
729 }
730
731 return error;
732 }
733
734 /*
735 * sys_mlock: memory lock
736 */
737
738 int
sys_mlock(struct lwp * l,const struct sys_mlock_args * uap,register_t * retval)739 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
740 {
741 /* {
742 syscallarg(const void *) addr;
743 syscallarg(size_t) len;
744 } */
745 struct proc *p = l->l_proc;
746 vaddr_t addr;
747 vsize_t size;
748 int error;
749
750 /*
751 * extract syscall args from uap
752 */
753
754 addr = (vaddr_t)SCARG(uap, addr);
755 size = (vsize_t)SCARG(uap, len);
756
757 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
758 return ENOMEM;
759
760 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
761 return EAGAIN;
762
763 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
764 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
765 return EAGAIN;
766
767 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
768 0);
769 if (error == EFAULT)
770 error = ENOMEM;
771 return error;
772 }
773
774 /*
775 * sys_munlock: unlock wired pages
776 */
777
778 int
sys_munlock(struct lwp * l,const struct sys_munlock_args * uap,register_t * retval)779 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
780 register_t *retval)
781 {
782 /* {
783 syscallarg(const void *) addr;
784 syscallarg(size_t) len;
785 } */
786 struct proc *p = l->l_proc;
787 vaddr_t addr;
788 vsize_t size;
789
790 /*
791 * extract syscall args from uap
792 */
793
794 addr = (vaddr_t)SCARG(uap, addr);
795 size = (vsize_t)SCARG(uap, len);
796
797 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
798 return ENOMEM;
799
800 if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0))
801 return ENOMEM;
802
803 return 0;
804 }
805
806 /*
807 * sys_mlockall: lock all pages mapped into an address space.
808 */
809
810 int
sys_mlockall(struct lwp * l,const struct sys_mlockall_args * uap,register_t * retval)811 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
812 register_t *retval)
813 {
814 /* {
815 syscallarg(int) flags;
816 } */
817 struct proc *p = l->l_proc;
818 int error, flags;
819
820 flags = SCARG(uap, flags);
821
822 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
823 return EINVAL;
824
825 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
826 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
827 return error;
828 }
829
830 /*
831 * sys_munlockall: unlock all pages mapped into an address space.
832 */
833
834 int
sys_munlockall(struct lwp * l,const void * v,register_t * retval)835 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
836 {
837 struct proc *p = l->l_proc;
838
839 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
840 return 0;
841 }
842
843 /*
844 * uvm_mmap: internal version of mmap
845 *
846 * - used by sys_mmap and various framebuffers
847 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
848 * - caller must page-align the file offset
849 *
850 * XXX This appears to leak the uobj in various error branches? Need
851 * to clean up the contract around uobj reference.
852 */
853
854 static int
uvm_mmap(struct vm_map * map,vaddr_t * addr,vsize_t size,vm_prot_t prot,vm_prot_t maxprot,int flags,int advice,struct uvm_object * uobj,voff_t foff,vsize_t locklimit)855 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
856 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
857 voff_t foff, vsize_t locklimit)
858 {
859 vaddr_t align = 0;
860 int error;
861 uvm_flag_t uvmflag = 0;
862
863 /*
864 * check params
865 */
866
867 if (size == 0)
868 return 0;
869 if (foff & PAGE_MASK)
870 return EINVAL;
871 if ((prot & maxprot) != prot)
872 return EINVAL;
873
874 /*
875 * for non-fixed mappings, round off the suggested address.
876 * for fixed mappings, check alignment.
877 */
878
879 if ((flags & MAP_FIXED) == 0) {
880 *addr = round_page(*addr);
881 } else {
882 if (*addr & PAGE_MASK)
883 return EINVAL;
884 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP;
885 }
886
887 /*
888 * Try to see if any requested alignment can even be attemped.
889 * Make sure we can express the alignment (asking for a >= 4GB
890 * alignment on an ILP32 architecure make no sense) and the
891 * alignment is at least for a page sized quanitiy. If the
892 * request was for a fixed mapping, make sure supplied address
893 * adheres to the request alignment.
894 */
895 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
896 if (align) {
897 if (align >= sizeof(vaddr_t) * NBBY)
898 return EINVAL;
899 align = 1UL << align;
900 if (align < PAGE_SIZE)
901 return EINVAL;
902 if (align >= vm_map_max(map))
903 return ENOMEM;
904 if (flags & MAP_FIXED) {
905 if ((*addr & (align-1)) != 0)
906 return EINVAL;
907 align = 0;
908 }
909 }
910
911 /*
912 * check resource limits
913 */
914
915 if (!VM_MAP_IS_KERNEL(map) &&
916 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
917 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
918 return ENOMEM;
919
920 /*
921 * handle anon vs. non-anon mappings. for non-anon mappings attach
922 * to underlying vm object.
923 */
924
925 if (flags & MAP_ANON) {
926 KASSERT(uobj == NULL);
927 foff = UVM_UNKNOWN_OFFSET;
928 if ((flags & MAP_SHARED) == 0)
929 /* XXX: defer amap create */
930 uvmflag |= UVM_FLAG_COPYONW;
931 else
932 /* shared: create amap now */
933 uvmflag |= UVM_FLAG_OVERLAY;
934
935 } else {
936 KASSERT(uobj != NULL);
937 if ((flags & MAP_SHARED) == 0) {
938 uvmflag |= UVM_FLAG_COPYONW;
939 }
940 }
941
942 uvmflag = UVM_MAPFLAG(prot, maxprot,
943 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
944 uvmflag);
945 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
946 if (error) {
947 if (uobj)
948 uobj->pgops->pgo_detach(uobj);
949 return error;
950 }
951
952 /*
953 * POSIX 1003.1b -- if our address space was configured
954 * to lock all future mappings, wire the one we just made.
955 *
956 * Also handle the MAP_WIRED flag here.
957 */
958
959 if (prot == VM_PROT_NONE) {
960
961 /*
962 * No more work to do in this case.
963 */
964
965 return 0;
966 }
967 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
968 vm_map_lock(map);
969 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
970 (locklimit != 0 &&
971 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
972 locklimit)) {
973 vm_map_unlock(map);
974 uvm_unmap(map, *addr, *addr + size);
975 return ENOMEM;
976 }
977
978 /*
979 * uvm_map_pageable() always returns the map unlocked.
980 */
981
982 error = uvm_map_pageable(map, *addr, *addr + size,
983 false, UVM_LK_ENTER);
984 if (error) {
985 uvm_unmap(map, *addr, *addr + size);
986 return error;
987 }
988 return 0;
989 }
990 return 0;
991 }
992
993 vaddr_t
uvm_default_mapaddr(struct proc * p,vaddr_t base,vsize_t sz,int topdown)994 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
995 {
996
997 if (topdown)
998 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
999 else
1000 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1001 }
1002
1003 int
uvm_mmap_dev(struct proc * p,void ** addrp,size_t len,dev_t dev,off_t off)1004 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1005 off_t off)
1006 {
1007 struct uvm_object *uobj;
1008 int error, flags, prot;
1009
1010 KASSERT(len > 0);
1011
1012 flags = MAP_SHARED;
1013 prot = VM_PROT_READ | VM_PROT_WRITE;
1014 if (*addrp)
1015 flags |= MAP_FIXED;
1016 else
1017 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1018 (vaddr_t)p->p_vmspace->vm_daddr, len,
1019 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1020
1021 uobj = udv_attach(dev, prot, off, len);
1022 if (uobj == NULL)
1023 return EINVAL;
1024
1025 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1026 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
1027 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1028 return error;
1029 }
1030
1031 int
uvm_mmap_anon(struct proc * p,void ** addrp,size_t len)1032 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1033 {
1034 int error, flags, prot;
1035
1036 flags = MAP_PRIVATE | MAP_ANON;
1037 prot = VM_PROT_READ | VM_PROT_WRITE;
1038 if (*addrp)
1039 flags |= MAP_FIXED;
1040 else
1041 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1042 (vaddr_t)p->p_vmspace->vm_daddr, len,
1043 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1044
1045 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1046 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
1047 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1048 return error;
1049 }
1050