145749Smckusick /*
245749Smckusick * Copyright (c) 1988 University of Utah.
363552Sbostic * Copyright (c) 1991, 1993
463552Sbostic * The Regents of the University of California. All rights reserved.
545749Smckusick *
645749Smckusick * This code is derived from software contributed to Berkeley by
745749Smckusick * the Systems Programming Group of the University of Utah Computer
845749Smckusick * Science Department.
945749Smckusick *
1045749Smckusick * %sccs.include.redist.c%
1145749Smckusick *
1251384Shibler * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
1345749Smckusick *
14*68361Scgd * @(#)vm_mmap.c 8.10 (Berkeley) 02/19/95
1545749Smckusick */
1645749Smckusick
1745749Smckusick /*
1845749Smckusick * Mapped file (mmap) interface to VM
1945749Smckusick */
2045749Smckusick
2153351Sbostic #include <sys/param.h>
2253351Sbostic #include <sys/systm.h>
2353351Sbostic #include <sys/filedesc.h>
2461004Shibler #include <sys/resourcevar.h>
2553351Sbostic #include <sys/proc.h>
2653351Sbostic #include <sys/vnode.h>
2753351Sbostic #include <sys/file.h>
2853351Sbostic #include <sys/mman.h>
2953351Sbostic #include <sys/conf.h>
3045749Smckusick
31*68361Scgd #include <sys/mount.h>
32*68361Scgd #include <sys/syscallargs.h>
33*68361Scgd
3455051Spendry #include <miscfs/specfs/specdev.h>
3555051Spendry
3653351Sbostic #include <vm/vm.h>
3753351Sbostic #include <vm/vm_pager.h>
3853351Sbostic #include <vm/vm_prot.h>
3945749Smckusick
4045749Smckusick #ifdef DEBUG
4145749Smckusick int mmapdebug = 0;
4245749Smckusick #define MDB_FOLLOW 0x01
4345749Smckusick #define MDB_SYNC 0x02
4445749Smckusick #define MDB_MAPIT 0x04
4545749Smckusick #endif
4645749Smckusick
4745749Smckusick /* ARGSUSED */
4853351Sbostic int
sbrk(p,uap,retval)4945749Smckusick sbrk(p, uap, retval)
5045749Smckusick struct proc *p;
51*68361Scgd struct sbrk_args /* {
52*68361Scgd syscallarg(int) incr;
53*68361Scgd } */ *uap;
54*68361Scgd register_t *retval;
5545749Smckusick {
5645749Smckusick
5745749Smckusick /* Not yet implemented */
5845749Smckusick return (EOPNOTSUPP);
5945749Smckusick }
6045749Smckusick
6145749Smckusick /* ARGSUSED */
6253351Sbostic int
sstk(p,uap,retval)6345749Smckusick sstk(p, uap, retval)
6445749Smckusick struct proc *p;
65*68361Scgd struct sstk_args /* {
66*68361Scgd syscallarg(int) incr;
67*68361Scgd } */ *uap;
68*68361Scgd register_t *retval;
6945749Smckusick {
7045749Smckusick
7145749Smckusick /* Not yet implemented */
7245749Smckusick return (EOPNOTSUPP);
7345749Smckusick }
7445749Smckusick
7558967Storek #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
7657825Smckusick /* ARGSUSED */
7757825Smckusick int
compat_43_getpagesize(p,uap,retval)78*68361Scgd compat_43_getpagesize(p, uap, retval)
7957825Smckusick struct proc *p;
80*68361Scgd void *uap;
81*68361Scgd register_t *retval;
8257825Smckusick {
8357825Smckusick
8457825Smckusick *retval = PAGE_SIZE;
8557825Smckusick return (0);
8657825Smckusick }
8758967Storek #endif /* COMPAT_43 || COMPAT_SUNOS */
8857825Smckusick
8958967Storek #ifdef COMPAT_43
9053351Sbostic int
compat_43_mmap(p,uap,retval)91*68361Scgd compat_43_mmap(p, uap, retval)
9245916Smckusick struct proc *p;
93*68361Scgd register struct compat_43_mmap_args /* {
94*68361Scgd syscallarg(caddr_t) addr;
95*68361Scgd syscallarg(int) len;
96*68361Scgd syscallarg(int) prot;
97*68361Scgd syscallarg(int) flags;
98*68361Scgd syscallarg(int) fd;
99*68361Scgd syscallarg(long) pos;
100*68361Scgd } */ *uap;
101*68361Scgd register_t *retval;
10245749Smckusick {
103*68361Scgd struct mmap_args /* {
104*68361Scgd syscallarg(caddr_t) addr;
105*68361Scgd syscallarg(size_t) len;
106*68361Scgd syscallarg(int) prot;
107*68361Scgd syscallarg(int) flags;
108*68361Scgd syscallarg(int) fd;
109*68361Scgd syscallarg(long) pad;
110*68361Scgd syscallarg(off_t) pos;
111*68361Scgd } */ nargs;
11254300Smckusick static const char cvtbsdprot[8] = {
11354300Smckusick 0,
11454300Smckusick PROT_EXEC,
11554300Smckusick PROT_WRITE,
11654300Smckusick PROT_EXEC|PROT_WRITE,
11754300Smckusick PROT_READ,
11854300Smckusick PROT_EXEC|PROT_READ,
11954300Smckusick PROT_WRITE|PROT_READ,
12054300Smckusick PROT_EXEC|PROT_WRITE|PROT_READ,
12154300Smckusick };
12254300Smckusick #define OMAP_ANON 0x0002
12354300Smckusick #define OMAP_COPY 0x0020
12454300Smckusick #define OMAP_SHARED 0x0010
12554300Smckusick #define OMAP_FIXED 0x0100
12654300Smckusick #define OMAP_INHERIT 0x0800
12754300Smckusick
128*68361Scgd SCARG(&nargs, addr) = SCARG(uap, addr);
129*68361Scgd SCARG(&nargs, len) = SCARG(uap, len);
130*68361Scgd SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7];
131*68361Scgd SCARG(&nargs, flags) = 0;
132*68361Scgd if (SCARG(uap, flags) & OMAP_ANON)
133*68361Scgd SCARG(&nargs, flags) |= MAP_ANON;
134*68361Scgd if (SCARG(uap, flags) & OMAP_COPY)
135*68361Scgd SCARG(&nargs, flags) |= MAP_COPY;
136*68361Scgd if (SCARG(uap, flags) & OMAP_SHARED)
137*68361Scgd SCARG(&nargs, flags) |= MAP_SHARED;
13854300Smckusick else
139*68361Scgd SCARG(&nargs, flags) |= MAP_PRIVATE;
140*68361Scgd if (SCARG(uap, flags) & OMAP_FIXED)
141*68361Scgd SCARG(&nargs, flags) |= MAP_FIXED;
142*68361Scgd if (SCARG(uap, flags) & OMAP_INHERIT)
143*68361Scgd SCARG(&nargs, flags) |= MAP_INHERIT;
144*68361Scgd SCARG(&nargs, fd) = SCARG(uap, fd);
145*68361Scgd SCARG(&nargs, pos) = SCARG(uap, pos);
14663429Sbostic return (mmap(p, &nargs, retval));
14754300Smckusick }
14854300Smckusick #endif
14954300Smckusick
15054300Smckusick int
mmap(p,uap,retval)15163429Sbostic mmap(p, uap, retval)
15254300Smckusick struct proc *p;
153*68361Scgd register struct mmap_args /* {
154*68361Scgd syscallarg(caddr_t) addr;
155*68361Scgd syscallarg(size_t) len;
156*68361Scgd syscallarg(int) prot;
157*68361Scgd syscallarg(int) flags;
158*68361Scgd syscallarg(int) fd;
159*68361Scgd syscallarg(long) pad;
160*68361Scgd syscallarg(off_t) pos;
161*68361Scgd } */ *uap;
162*68361Scgd register_t *retval;
16354300Smckusick {
16445916Smckusick register struct filedesc *fdp = p->p_fd;
16545916Smckusick register struct file *fp;
16645749Smckusick struct vnode *vp;
16767182Shibler vm_offset_t addr, pos;
16845749Smckusick vm_size_t size;
16958598Shibler vm_prot_t prot, maxprot;
17045749Smckusick caddr_t handle;
17158258Smckusick int flags, error;
17245749Smckusick
173*68361Scgd prot = SCARG(uap, prot) & VM_PROT_ALL;
174*68361Scgd flags = SCARG(uap, flags);
175*68361Scgd pos = SCARG(uap, pos);
17645749Smckusick #ifdef DEBUG
17745749Smckusick if (mmapdebug & MDB_FOLLOW)
17845749Smckusick printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
179*68361Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot,
180*68361Scgd flags, SCARG(uap, fd), pos);
18145749Smckusick #endif
18245749Smckusick /*
18350261Shibler * Address (if FIXED) must be page aligned.
18450261Shibler * Size is implicitly rounded to a page boundary.
18567256Shibler *
18667256Shibler * XXX most (all?) vendors require that the file offset be
18767256Shibler * page aligned as well. However, we already have applications
18867256Shibler * (e.g. nlist) that rely on unrestricted alignment. Since we
18967256Shibler * support it, let it happen.
19045749Smckusick */
191*68361Scgd addr = (vm_offset_t) SCARG(uap, addr);
19258878Smckusick if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
19367256Shibler #if 0
19467243Shibler ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
19567256Shibler #endif
196*68361Scgd (ssize_t)SCARG(uap, len) < 0 || ((flags & MAP_ANON) && SCARG(uap, fd) != -1))
19754300Smckusick return (EINVAL);
198*68361Scgd size = (vm_size_t) round_page(SCARG(uap, len));
19945749Smckusick /*
20058081Smckusick * Check for illegal addresses. Watch out for address wrap...
20158081Smckusick * Note that VM_*_ADDRESS are not constants due to casts (argh).
20258081Smckusick */
20358258Smckusick if (flags & MAP_FIXED) {
20458081Smckusick if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
20558081Smckusick return (EINVAL);
20658081Smckusick if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
20758081Smckusick return (EINVAL);
20858081Smckusick if (addr > addr + size)
20958081Smckusick return (EINVAL);
21058081Smckusick }
21158081Smckusick /*
21267182Shibler * XXX for non-fixed mappings where no hint is provided or
21367182Shibler * the hint would fall in the potential heap space,
21467182Shibler * place it after the end of the largest possible heap.
21550261Shibler *
21650261Shibler * There should really be a pmap call to determine a reasonable
21750261Shibler * location.
21850261Shibler */
21967182Shibler else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
22050261Shibler addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
22158598Shibler if (flags & MAP_ANON) {
22258878Smckusick /*
22358878Smckusick * Mapping blank space is trivial.
22458878Smckusick */
22554300Smckusick handle = NULL;
22658598Shibler maxprot = VM_PROT_ALL;
22767182Shibler pos = 0;
22858598Shibler } else {
22945749Smckusick /*
23054300Smckusick * Mapping file, get fp for validation.
23158878Smckusick * Obtain vnode and make sure it is of appropriate type.
23245749Smckusick */
233*68361Scgd if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles ||
234*68361Scgd (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
23558878Smckusick return (EBADF);
23645749Smckusick if (fp->f_type != DTYPE_VNODE)
23758878Smckusick return (EINVAL);
23845749Smckusick vp = (struct vnode *)fp->f_data;
23945749Smckusick if (vp->v_type != VREG && vp->v_type != VCHR)
24058878Smckusick return (EINVAL);
24145749Smckusick /*
24264854Shibler * XXX hack to handle use of /dev/zero to map anon
24364854Shibler * memory (ala SunOS).
24458598Shibler */
24564854Shibler if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
24664854Shibler handle = NULL;
24764854Shibler maxprot = VM_PROT_ALL;
24864854Shibler flags |= MAP_ANON;
24964854Shibler } else {
25064854Shibler /*
25164854Shibler * Ensure that file and memory protections are
25264854Shibler * compatible. Note that we only worry about
25364854Shibler * writability if mapping is shared; in this case,
25464854Shibler * current and max prot are dictated by the open file.
25564854Shibler * XXX use the vnode instead? Problem is: what
25664854Shibler * credentials do we use for determination?
25764854Shibler * What if proc does a setuid?
25864854Shibler */
25964854Shibler maxprot = VM_PROT_EXECUTE; /* ??? */
26064854Shibler if (fp->f_flag & FREAD)
26164854Shibler maxprot |= VM_PROT_READ;
26264854Shibler else if (prot & PROT_READ)
26364854Shibler return (EACCES);
26464854Shibler if (flags & MAP_SHARED) {
26564854Shibler if (fp->f_flag & FWRITE)
26664854Shibler maxprot |= VM_PROT_WRITE;
26764854Shibler else if (prot & PROT_WRITE)
26864854Shibler return (EACCES);
26964854Shibler } else
27058878Smckusick maxprot |= VM_PROT_WRITE;
27164854Shibler handle = (caddr_t)vp;
27264854Shibler }
27354300Smckusick }
27458598Shibler error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
27567243Shibler flags, handle, pos);
27645749Smckusick if (error == 0)
277*68361Scgd *retval = (register_t)addr;
27858878Smckusick return (error);
27945749Smckusick }
28045749Smckusick
28153351Sbostic int
msync(p,uap,retval)28245749Smckusick msync(p, uap, retval)
28345749Smckusick struct proc *p;
284*68361Scgd struct msync_args /* {
285*68361Scgd syscallarg(caddr_t) addr;
286*68361Scgd syscallarg(int) len;
287*68361Scgd } */ *uap;
288*68361Scgd register_t *retval;
28945749Smckusick {
29065687Shibler vm_offset_t addr;
29165687Shibler vm_size_t size;
29265687Shibler vm_map_t map;
29345749Smckusick int rv;
29465687Shibler boolean_t syncio, invalidate;
29545749Smckusick
29645749Smckusick #ifdef DEBUG
29745749Smckusick if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
29845749Smckusick printf("msync(%d): addr %x len %x\n",
299*68361Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
30045749Smckusick #endif
301*68361Scgd if (((vm_offset_t)SCARG(uap, addr) & PAGE_MASK) ||
302*68361Scgd SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
30365687Shibler return (EINVAL);
30465687Shibler map = &p->p_vmspace->vm_map;
305*68361Scgd addr = (vm_offset_t)SCARG(uap, addr);
306*68361Scgd size = (vm_size_t)SCARG(uap, len);
30745749Smckusick /*
30865687Shibler * XXX Gak! If size is zero we are supposed to sync "all modified
30965687Shibler * pages with the region containing addr". Unfortunately, we
31065687Shibler * don't really keep track of individual mmaps so we approximate
31165687Shibler * by flushing the range of the map entry containing addr.
31265687Shibler * This can be incorrect if the region splits or is coalesced
31365687Shibler * with a neighbor.
31445749Smckusick */
31565687Shibler if (size == 0) {
31665687Shibler vm_map_entry_t entry;
31765687Shibler
31865687Shibler vm_map_lock_read(map);
31965687Shibler rv = vm_map_lookup_entry(map, addr, &entry);
32065687Shibler vm_map_unlock_read(map);
32167437Shibler if (!rv)
32265687Shibler return (EINVAL);
32365687Shibler addr = entry->start;
32465687Shibler size = entry->end - entry->start;
32565687Shibler }
32645749Smckusick #ifdef DEBUG
32745749Smckusick if (mmapdebug & MDB_SYNC)
32865687Shibler printf("msync: cleaning/flushing address range [%x-%x)\n",
32965687Shibler addr, addr+size);
33045749Smckusick #endif
33145749Smckusick /*
33265687Shibler * Could pass this in as a third flag argument to implement
33365687Shibler * Sun's MS_ASYNC.
33445749Smckusick */
33565687Shibler syncio = TRUE;
33645749Smckusick /*
33765687Shibler * XXX bummer, gotta flush all cached pages to ensure
33865687Shibler * consistency with the file system cache. Otherwise, we could
33965687Shibler * pass this in to implement Sun's MS_INVALIDATE.
34045749Smckusick */
34165687Shibler invalidate = TRUE;
34265687Shibler /*
34365687Shibler * Clean the pages and interpret the return value.
34465687Shibler */
34565687Shibler rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
34665687Shibler switch (rv) {
34765687Shibler case KERN_SUCCESS:
34865687Shibler break;
34965687Shibler case KERN_INVALID_ADDRESS:
35065687Shibler return (EINVAL); /* Sun returns ENOMEM? */
35165687Shibler case KERN_FAILURE:
35265687Shibler return (EIO);
35365687Shibler default:
35465687Shibler return (EINVAL);
35565687Shibler }
35665687Shibler return (0);
35745749Smckusick }
35845749Smckusick
35953351Sbostic int
munmap(p,uap,retval)36045749Smckusick munmap(p, uap, retval)
36145749Smckusick register struct proc *p;
362*68361Scgd register struct munmap_args /* {
363*68361Scgd syscallarg(caddr_t) addr;
364*68361Scgd syscallarg(int) len;
365*68361Scgd } */ *uap;
366*68361Scgd register_t *retval;
36745749Smckusick {
36845749Smckusick vm_offset_t addr;
36945749Smckusick vm_size_t size;
37065687Shibler vm_map_t map;
37145749Smckusick
37245749Smckusick #ifdef DEBUG
37345749Smckusick if (mmapdebug & MDB_FOLLOW)
37445749Smckusick printf("munmap(%d): addr %x len %x\n",
375*68361Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
37645749Smckusick #endif
37745749Smckusick
378*68361Scgd addr = (vm_offset_t) SCARG(uap, addr);
379*68361Scgd if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
38045749Smckusick return(EINVAL);
381*68361Scgd size = (vm_size_t) round_page(SCARG(uap, len));
38245749Smckusick if (size == 0)
38345749Smckusick return(0);
38458081Smckusick /*
38558081Smckusick * Check for illegal addresses. Watch out for address wrap...
38658081Smckusick * Note that VM_*_ADDRESS are not constants due to casts (argh).
38758081Smckusick */
38858081Smckusick if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
38958081Smckusick return (EINVAL);
39058081Smckusick if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
39158081Smckusick return (EINVAL);
39258081Smckusick if (addr > addr + size)
39358081Smckusick return (EINVAL);
39465687Shibler map = &p->p_vmspace->vm_map;
39565687Shibler /*
39665687Shibler * Make sure entire range is allocated.
39767256Shibler * XXX this seemed overly restrictive, so we relaxed it.
39865687Shibler */
39967256Shibler #if 0
40065687Shibler if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
40145749Smckusick return(EINVAL);
40267256Shibler #endif
40345749Smckusick /* returns nothing but KERN_SUCCESS anyway */
40465687Shibler (void) vm_map_remove(map, addr, addr+size);
40545749Smckusick return(0);
40645749Smckusick }
40745749Smckusick
40853351Sbostic void
munmapfd(p,fd)40968160Scgd munmapfd(p, fd)
41068160Scgd struct proc *p;
41154108Smckusick int fd;
41245749Smckusick {
41345749Smckusick #ifdef DEBUG
41445749Smckusick if (mmapdebug & MDB_FOLLOW)
41568160Scgd printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
41645749Smckusick #endif
41745749Smckusick
41845749Smckusick /*
41965687Shibler * XXX should vm_deallocate any regions mapped to this file
42045749Smckusick */
42168160Scgd p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
42245749Smckusick }
42345749Smckusick
42453351Sbostic int
mprotect(p,uap,retval)42545749Smckusick mprotect(p, uap, retval)
42645749Smckusick struct proc *p;
427*68361Scgd struct mprotect_args /* {
428*68361Scgd syscallarg(caddr_t) addr;
429*68361Scgd syscallarg(int) len;
430*68361Scgd syscallarg(int) prot;
431*68361Scgd } */ *uap;
432*68361Scgd register_t *retval;
43345749Smckusick {
43445749Smckusick vm_offset_t addr;
43545749Smckusick vm_size_t size;
43645749Smckusick register vm_prot_t prot;
43745749Smckusick
43845749Smckusick #ifdef DEBUG
43945749Smckusick if (mmapdebug & MDB_FOLLOW)
44045749Smckusick printf("mprotect(%d): addr %x len %x prot %d\n",
441*68361Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot));
44245749Smckusick #endif
44345749Smckusick
444*68361Scgd addr = (vm_offset_t)SCARG(uap, addr);
445*68361Scgd if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
44650261Shibler return(EINVAL);
447*68361Scgd size = (vm_size_t)SCARG(uap, len);
448*68361Scgd prot = SCARG(uap, prot) & VM_PROT_ALL;
44945749Smckusick
45048384Skarels switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
45148384Skarels FALSE)) {
45245749Smckusick case KERN_SUCCESS:
45345749Smckusick return (0);
45445749Smckusick case KERN_PROTECTION_FAILURE:
45545749Smckusick return (EACCES);
45645749Smckusick }
45745749Smckusick return (EINVAL);
45845749Smckusick }
45945749Smckusick
46045749Smckusick /* ARGSUSED */
46153351Sbostic int
madvise(p,uap,retval)46245749Smckusick madvise(p, uap, retval)
46345749Smckusick struct proc *p;
464*68361Scgd struct madvise_args /* {
465*68361Scgd syscallarg(caddr_t) addr;
466*68361Scgd syscallarg(int) len;
467*68361Scgd syscallarg(int) behav;
468*68361Scgd } */ *uap;
469*68361Scgd register_t *retval;
47045749Smckusick {
47145749Smckusick
47245749Smckusick /* Not yet implemented */
47345749Smckusick return (EOPNOTSUPP);
47445749Smckusick }
47545749Smckusick
47645749Smckusick /* ARGSUSED */
47753351Sbostic int
mincore(p,uap,retval)47845749Smckusick mincore(p, uap, retval)
47945749Smckusick struct proc *p;
480*68361Scgd struct mincore_args /* {
481*68361Scgd syscallarg(caddr_t) addr;
482*68361Scgd syscallarg(int) len;
483*68361Scgd syscallarg(char *) vec;
484*68361Scgd } */ *uap;
485*68361Scgd register_t *retval;
48645749Smckusick {
48745749Smckusick
48845749Smckusick /* Not yet implemented */
48945749Smckusick return (EOPNOTSUPP);
49045749Smckusick }
49145749Smckusick
49261004Shibler int
mlock(p,uap,retval)49361004Shibler mlock(p, uap, retval)
49461004Shibler struct proc *p;
495*68361Scgd struct mlock_args /* {
496*68361Scgd syscallarg(caddr_t) addr;
497*68361Scgd syscallarg(size_t) len;
498*68361Scgd } */ *uap;
499*68361Scgd register_t *retval;
50061004Shibler {
50161004Shibler vm_offset_t addr;
50261004Shibler vm_size_t size;
50361004Shibler int error;
50461004Shibler extern int vm_page_max_wired;
50561004Shibler
50661004Shibler #ifdef DEBUG
50761004Shibler if (mmapdebug & MDB_FOLLOW)
50861004Shibler printf("mlock(%d): addr %x len %x\n",
509*68361Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
51061004Shibler #endif
511*68361Scgd addr = (vm_offset_t)SCARG(uap, addr);
512*68361Scgd if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
51361004Shibler return (EINVAL);
514*68361Scgd size = round_page((vm_size_t)SCARG(uap, len));
51561004Shibler if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
51661015Shibler return (EAGAIN);
51761004Shibler #ifdef pmap_wired_count
51861004Shibler if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
51961004Shibler p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
52061015Shibler return (EAGAIN);
52161004Shibler #else
52261004Shibler if (error = suser(p->p_ucred, &p->p_acflag))
52361004Shibler return (error);
52461004Shibler #endif
52561004Shibler
52661004Shibler error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
52761004Shibler return (error == KERN_SUCCESS ? 0 : ENOMEM);
52861004Shibler }
52961004Shibler
53061004Shibler int
munlock(p,uap,retval)53161004Shibler munlock(p, uap, retval)
53261004Shibler struct proc *p;
533*68361Scgd struct munlock_args /* {
534*68361Scgd syscallarg(caddr_t) addr;
535*68361Scgd syscallarg(size_t) len;
536*68361Scgd } */ *uap;
537*68361Scgd register_t *retval;
53861004Shibler {
53961004Shibler vm_offset_t addr;
54061004Shibler vm_size_t size;
54161004Shibler int error;
54261004Shibler
54361004Shibler #ifdef DEBUG
54461004Shibler if (mmapdebug & MDB_FOLLOW)
54561004Shibler printf("munlock(%d): addr %x len %x\n",
546*68361Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
54761004Shibler #endif
548*68361Scgd addr = (vm_offset_t)SCARG(uap, addr);
549*68361Scgd if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
55061004Shibler return (EINVAL);
55161004Shibler #ifndef pmap_wired_count
55261004Shibler if (error = suser(p->p_ucred, &p->p_acflag))
55361004Shibler return (error);
55461004Shibler #endif
555*68361Scgd size = round_page((vm_size_t)SCARG(uap, len));
55661004Shibler
55761004Shibler error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
55861004Shibler return (error == KERN_SUCCESS ? 0 : ENOMEM);
55961004Shibler }
56061004Shibler
56145749Smckusick /*
56245749Smckusick * Internal version of mmap.
56345749Smckusick * Currently used by mmap, exec, and sys5 shared memory.
56454300Smckusick * Handle is either a vnode pointer or NULL for MAP_ANON.
56545749Smckusick */
56653351Sbostic int
vm_mmap(map,addr,size,prot,maxprot,flags,handle,foff)56758598Shibler vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
56845749Smckusick register vm_map_t map;
56945749Smckusick register vm_offset_t *addr;
57045749Smckusick register vm_size_t size;
57158598Shibler vm_prot_t prot, maxprot;
57245749Smckusick register int flags;
57345749Smckusick caddr_t handle; /* XXX should be vp */
57445749Smckusick vm_offset_t foff;
57545749Smckusick {
57645749Smckusick register vm_pager_t pager;
57745749Smckusick boolean_t fitit;
57845749Smckusick vm_object_t object;
57963551Smckusick struct vnode *vp = NULL;
58045749Smckusick int type;
58145749Smckusick int rv = KERN_SUCCESS;
58245749Smckusick
58345749Smckusick if (size == 0)
58445749Smckusick return (0);
58545749Smckusick
58645749Smckusick if ((flags & MAP_FIXED) == 0) {
58745749Smckusick fitit = TRUE;
58845749Smckusick *addr = round_page(*addr);
58945749Smckusick } else {
59045749Smckusick fitit = FALSE;
59154915Storek (void)vm_deallocate(map, *addr, size);
59245749Smckusick }
59345749Smckusick
59445749Smckusick /*
59545749Smckusick * Lookup/allocate pager. All except an unnamed anonymous lookup
59645749Smckusick * gain a reference to ensure continued existance of the object.
59745749Smckusick * (XXX the exception is to appease the pageout daemon)
59845749Smckusick */
59954300Smckusick if (flags & MAP_ANON)
60045749Smckusick type = PG_DFLT;
60145749Smckusick else {
60245749Smckusick vp = (struct vnode *)handle;
60345749Smckusick if (vp->v_type == VCHR) {
60445749Smckusick type = PG_DEVICE;
60545749Smckusick handle = (caddr_t)vp->v_rdev;
60645749Smckusick } else
60745749Smckusick type = PG_VNODE;
60845749Smckusick }
60964827Storek pager = vm_pager_allocate(type, handle, size, prot, foff);
61048384Skarels if (pager == NULL)
61145749Smckusick return (type == PG_DEVICE ? EINVAL : ENOMEM);
61245749Smckusick /*
61345749Smckusick * Find object and release extra reference gained by lookup
61445749Smckusick */
61545749Smckusick object = vm_object_lookup(pager);
61645749Smckusick vm_object_deallocate(object);
61745749Smckusick
61845749Smckusick /*
61945749Smckusick * Anonymous memory.
62045749Smckusick */
62154300Smckusick if (flags & MAP_ANON) {
62245749Smckusick rv = vm_allocate_with_pager(map, addr, size, fitit,
62358839Sralph pager, foff, TRUE);
62445749Smckusick if (rv != KERN_SUCCESS) {
62545749Smckusick if (handle == NULL)
62645749Smckusick vm_pager_deallocate(pager);
62745749Smckusick else
62845749Smckusick vm_object_deallocate(object);
62945749Smckusick goto out;
63045749Smckusick }
63145749Smckusick /*
63245749Smckusick * Don't cache anonymous objects.
63345749Smckusick * Loses the reference gained by vm_pager_allocate.
63458598Shibler * Note that object will be NULL when handle == NULL,
63558598Shibler * this is ok since vm_allocate_with_pager has made
63658598Shibler * sure that these objects are uncached.
63745749Smckusick */
63845749Smckusick (void) pager_cache(object, FALSE);
63945749Smckusick #ifdef DEBUG
64045749Smckusick if (mmapdebug & MDB_MAPIT)
64145749Smckusick printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
64248384Skarels curproc->p_pid, *addr, size, pager);
64345749Smckusick #endif
64445749Smckusick }
64545749Smckusick /*
64654300Smckusick * Must be a mapped file.
64745749Smckusick * Distinguish between character special and regular files.
64845749Smckusick */
64945749Smckusick else if (vp->v_type == VCHR) {
65045749Smckusick rv = vm_allocate_with_pager(map, addr, size, fitit,
65158839Sralph pager, foff, FALSE);
65245749Smckusick /*
65345749Smckusick * Uncache the object and lose the reference gained
65445749Smckusick * by vm_pager_allocate(). If the call to
65545749Smckusick * vm_allocate_with_pager() was sucessful, then we
65645749Smckusick * gained an additional reference ensuring the object
65745749Smckusick * will continue to exist. If the call failed then
65845749Smckusick * the deallocate call below will terminate the
65945749Smckusick * object which is fine.
66045749Smckusick */
66145749Smckusick (void) pager_cache(object, FALSE);
66245749Smckusick if (rv != KERN_SUCCESS)
66345749Smckusick goto out;
66445749Smckusick }
66545749Smckusick /*
66645749Smckusick * A regular file
66745749Smckusick */
66845749Smckusick else {
66945749Smckusick #ifdef DEBUG
67048384Skarels if (object == NULL)
67145749Smckusick printf("vm_mmap: no object: vp %x, pager %x\n",
67245749Smckusick vp, pager);
67345749Smckusick #endif
67445749Smckusick /*
67545749Smckusick * Map it directly.
67645749Smckusick * Allows modifications to go out to the vnode.
67745749Smckusick */
67845749Smckusick if (flags & MAP_SHARED) {
67945749Smckusick rv = vm_allocate_with_pager(map, addr, size,
68045749Smckusick fitit, pager,
68158839Sralph foff, FALSE);
68245749Smckusick if (rv != KERN_SUCCESS) {
68345749Smckusick vm_object_deallocate(object);
68445749Smckusick goto out;
68545749Smckusick }
68645749Smckusick /*
68745749Smckusick * Don't cache the object. This is the easiest way
68845749Smckusick * of ensuring that data gets back to the filesystem
68945749Smckusick * because vnode_pager_deallocate() will fsync the
69045749Smckusick * vnode. pager_cache() will lose the extra ref.
69145749Smckusick */
69245749Smckusick if (prot & VM_PROT_WRITE)
69345749Smckusick pager_cache(object, FALSE);
69445749Smckusick else
69545749Smckusick vm_object_deallocate(object);
69645749Smckusick }
69745749Smckusick /*
69845749Smckusick * Copy-on-write of file. Two flavors.
69945749Smckusick * MAP_COPY is true COW, you essentially get a snapshot of
70045749Smckusick * the region at the time of mapping. MAP_PRIVATE means only
70145749Smckusick * that your changes are not reflected back to the object.
70245749Smckusick * Changes made by others will be seen.
70345749Smckusick */
70445749Smckusick else {
70545749Smckusick vm_map_t tmap;
70645749Smckusick vm_offset_t off;
70745749Smckusick
70845749Smckusick /* locate and allocate the target address space */
70948384Skarels rv = vm_map_find(map, NULL, (vm_offset_t)0,
71045749Smckusick addr, size, fitit);
71145749Smckusick if (rv != KERN_SUCCESS) {
71245749Smckusick vm_object_deallocate(object);
71345749Smckusick goto out;
71445749Smckusick }
71545749Smckusick tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
71645749Smckusick VM_MIN_ADDRESS+size, TRUE);
71745749Smckusick off = VM_MIN_ADDRESS;
71845749Smckusick rv = vm_allocate_with_pager(tmap, &off, size,
71945749Smckusick TRUE, pager,
72058839Sralph foff, FALSE);
72145749Smckusick if (rv != KERN_SUCCESS) {
72245749Smckusick vm_object_deallocate(object);
72345749Smckusick vm_map_deallocate(tmap);
72445749Smckusick goto out;
72545749Smckusick }
72645749Smckusick /*
72745749Smckusick * (XXX)
72845749Smckusick * MAP_PRIVATE implies that we see changes made by
72945749Smckusick * others. To ensure that we need to guarentee that
73045749Smckusick * no copy object is created (otherwise original
73145749Smckusick * pages would be pushed to the copy object and we
73245749Smckusick * would never see changes made by others). We
73345749Smckusick * totally sleeze it right now by marking the object
73445749Smckusick * internal temporarily.
73545749Smckusick */
73645749Smckusick if ((flags & MAP_COPY) == 0)
73750913Smckusick object->flags |= OBJ_INTERNAL;
73845749Smckusick rv = vm_map_copy(map, tmap, *addr, size, off,
73945749Smckusick FALSE, FALSE);
74050913Smckusick object->flags &= ~OBJ_INTERNAL;
74145749Smckusick /*
74245749Smckusick * (XXX)
74345749Smckusick * My oh my, this only gets worse...
74445749Smckusick * Force creation of a shadow object so that
74545749Smckusick * vm_map_fork will do the right thing.
74645749Smckusick */
74745749Smckusick if ((flags & MAP_COPY) == 0) {
74845749Smckusick vm_map_t tmap;
74945749Smckusick vm_map_entry_t tentry;
75045749Smckusick vm_object_t tobject;
75145749Smckusick vm_offset_t toffset;
75245749Smckusick vm_prot_t tprot;
75345749Smckusick boolean_t twired, tsu;
75445749Smckusick
75545749Smckusick tmap = map;
75645749Smckusick vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
75745749Smckusick &tentry, &tobject, &toffset,
75845749Smckusick &tprot, &twired, &tsu);
75945749Smckusick vm_map_lookup_done(tmap, tentry);
76045749Smckusick }
76145749Smckusick /*
76245749Smckusick * (XXX)
76345749Smckusick * Map copy code cannot detect sharing unless a
76445749Smckusick * sharing map is involved. So we cheat and write
76549290Shibler * protect everything ourselves.
76645749Smckusick */
76758839Sralph vm_object_pmap_copy(object, foff, foff + size);
76845749Smckusick vm_object_deallocate(object);
76945749Smckusick vm_map_deallocate(tmap);
77045749Smckusick if (rv != KERN_SUCCESS)
77145749Smckusick goto out;
77245749Smckusick }
77345749Smckusick #ifdef DEBUG
77445749Smckusick if (mmapdebug & MDB_MAPIT)
77545749Smckusick printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
77648384Skarels curproc->p_pid, *addr, size, pager);
77745749Smckusick #endif
77845749Smckusick }
77945749Smckusick /*
78045749Smckusick * Correct protection (default is VM_PROT_ALL).
78158598Shibler * If maxprot is different than prot, we must set both explicitly.
78245749Smckusick */
78358598Shibler rv = KERN_SUCCESS;
78458598Shibler if (maxprot != VM_PROT_ALL)
78558598Shibler rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
78658598Shibler if (rv == KERN_SUCCESS && prot != maxprot)
78745749Smckusick rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
78858598Shibler if (rv != KERN_SUCCESS) {
78958598Shibler (void) vm_deallocate(map, *addr, size);
79058598Shibler goto out;
79145749Smckusick }
79245749Smckusick /*
79345749Smckusick * Shared memory is also shared with children.
79445749Smckusick */
79545749Smckusick if (flags & MAP_SHARED) {
79665687Shibler rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
79745749Smckusick if (rv != KERN_SUCCESS) {
79845749Smckusick (void) vm_deallocate(map, *addr, size);
79945749Smckusick goto out;
80045749Smckusick }
80145749Smckusick }
80245749Smckusick out:
80345749Smckusick #ifdef DEBUG
80445749Smckusick if (mmapdebug & MDB_MAPIT)
80545749Smckusick printf("vm_mmap: rv %d\n", rv);
80645749Smckusick #endif
80745749Smckusick switch (rv) {
80845749Smckusick case KERN_SUCCESS:
80945749Smckusick return (0);
81045749Smckusick case KERN_INVALID_ADDRESS:
81145749Smckusick case KERN_NO_SPACE:
81245749Smckusick return (ENOMEM);
81345749Smckusick case KERN_PROTECTION_FAILURE:
81445749Smckusick return (EACCES);
81545749Smckusick default:
81645749Smckusick return (EINVAL);
81745749Smckusick }
81845749Smckusick }
819