xref: /csrg-svn/sys/vm/vm_mmap.c (revision 68361)
145749Smckusick /*
245749Smckusick  * Copyright (c) 1988 University of Utah.
363552Sbostic  * Copyright (c) 1991, 1993
463552Sbostic  *	The Regents of the University of California.  All rights reserved.
545749Smckusick  *
645749Smckusick  * This code is derived from software contributed to Berkeley by
745749Smckusick  * the Systems Programming Group of the University of Utah Computer
845749Smckusick  * Science Department.
945749Smckusick  *
1045749Smckusick  * %sccs.include.redist.c%
1145749Smckusick  *
1251384Shibler  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
1345749Smckusick  *
14*68361Scgd  *	@(#)vm_mmap.c	8.10 (Berkeley) 02/19/95
1545749Smckusick  */
1645749Smckusick 
1745749Smckusick /*
1845749Smckusick  * Mapped file (mmap) interface to VM
1945749Smckusick  */
2045749Smckusick 
2153351Sbostic #include <sys/param.h>
2253351Sbostic #include <sys/systm.h>
2353351Sbostic #include <sys/filedesc.h>
2461004Shibler #include <sys/resourcevar.h>
2553351Sbostic #include <sys/proc.h>
2653351Sbostic #include <sys/vnode.h>
2753351Sbostic #include <sys/file.h>
2853351Sbostic #include <sys/mman.h>
2953351Sbostic #include <sys/conf.h>
3045749Smckusick 
31*68361Scgd #include <sys/mount.h>
32*68361Scgd #include <sys/syscallargs.h>
33*68361Scgd 
3455051Spendry #include <miscfs/specfs/specdev.h>
3555051Spendry 
3653351Sbostic #include <vm/vm.h>
3753351Sbostic #include <vm/vm_pager.h>
3853351Sbostic #include <vm/vm_prot.h>
3945749Smckusick 
4045749Smckusick #ifdef DEBUG
4145749Smckusick int mmapdebug = 0;
4245749Smckusick #define MDB_FOLLOW	0x01
4345749Smckusick #define MDB_SYNC	0x02
4445749Smckusick #define MDB_MAPIT	0x04
4545749Smckusick #endif
4645749Smckusick 
4745749Smckusick /* ARGSUSED */
4853351Sbostic int
sbrk(p,uap,retval)4945749Smckusick sbrk(p, uap, retval)
5045749Smckusick 	struct proc *p;
51*68361Scgd 	struct sbrk_args /* {
52*68361Scgd 		syscallarg(int) incr;
53*68361Scgd 	} */ *uap;
54*68361Scgd 	register_t *retval;
5545749Smckusick {
5645749Smckusick 
5745749Smckusick 	/* Not yet implemented */
5845749Smckusick 	return (EOPNOTSUPP);
5945749Smckusick }
6045749Smckusick 
6145749Smckusick /* ARGSUSED */
6253351Sbostic int
sstk(p,uap,retval)6345749Smckusick sstk(p, uap, retval)
6445749Smckusick 	struct proc *p;
65*68361Scgd 	struct sstk_args /* {
66*68361Scgd 		syscallarg(int) incr;
67*68361Scgd 	} */ *uap;
68*68361Scgd 	register_t *retval;
6945749Smckusick {
7045749Smckusick 
7145749Smckusick 	/* Not yet implemented */
7245749Smckusick 	return (EOPNOTSUPP);
7345749Smckusick }
7445749Smckusick 
7558967Storek #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
7657825Smckusick /* ARGSUSED */
7757825Smckusick int
compat_43_getpagesize(p,uap,retval)78*68361Scgd compat_43_getpagesize(p, uap, retval)
7957825Smckusick 	struct proc *p;
80*68361Scgd 	void *uap;
81*68361Scgd 	register_t *retval;
8257825Smckusick {
8357825Smckusick 
8457825Smckusick 	*retval = PAGE_SIZE;
8557825Smckusick 	return (0);
8657825Smckusick }
8758967Storek #endif /* COMPAT_43 || COMPAT_SUNOS */
8857825Smckusick 
8958967Storek #ifdef COMPAT_43
9053351Sbostic int
compat_43_mmap(p,uap,retval)91*68361Scgd compat_43_mmap(p, uap, retval)
9245916Smckusick 	struct proc *p;
93*68361Scgd 	register struct compat_43_mmap_args /* {
94*68361Scgd 		syscallarg(caddr_t) addr;
95*68361Scgd 		syscallarg(int) len;
96*68361Scgd 		syscallarg(int) prot;
97*68361Scgd 		syscallarg(int) flags;
98*68361Scgd 		syscallarg(int) fd;
99*68361Scgd 		syscallarg(long) pos;
100*68361Scgd 	} */ *uap;
101*68361Scgd 	register_t *retval;
10245749Smckusick {
103*68361Scgd 	struct mmap_args /* {
104*68361Scgd 		syscallarg(caddr_t) addr;
105*68361Scgd 		syscallarg(size_t) len;
106*68361Scgd 		syscallarg(int) prot;
107*68361Scgd 		syscallarg(int) flags;
108*68361Scgd 		syscallarg(int) fd;
109*68361Scgd 		syscallarg(long) pad;
110*68361Scgd 		syscallarg(off_t) pos;
111*68361Scgd 	} */ nargs;
11254300Smckusick 	static const char cvtbsdprot[8] = {
11354300Smckusick 		0,
11454300Smckusick 		PROT_EXEC,
11554300Smckusick 		PROT_WRITE,
11654300Smckusick 		PROT_EXEC|PROT_WRITE,
11754300Smckusick 		PROT_READ,
11854300Smckusick 		PROT_EXEC|PROT_READ,
11954300Smckusick 		PROT_WRITE|PROT_READ,
12054300Smckusick 		PROT_EXEC|PROT_WRITE|PROT_READ,
12154300Smckusick 	};
12254300Smckusick #define	OMAP_ANON	0x0002
12354300Smckusick #define	OMAP_COPY	0x0020
12454300Smckusick #define	OMAP_SHARED	0x0010
12554300Smckusick #define	OMAP_FIXED	0x0100
12654300Smckusick #define	OMAP_INHERIT	0x0800
12754300Smckusick 
128*68361Scgd 	SCARG(&nargs, addr) = SCARG(uap, addr);
129*68361Scgd 	SCARG(&nargs, len) = SCARG(uap, len);
130*68361Scgd 	SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7];
131*68361Scgd 	SCARG(&nargs, flags) = 0;
132*68361Scgd 	if (SCARG(uap, flags) & OMAP_ANON)
133*68361Scgd 		SCARG(&nargs, flags) |= MAP_ANON;
134*68361Scgd 	if (SCARG(uap, flags) & OMAP_COPY)
135*68361Scgd 		SCARG(&nargs, flags) |= MAP_COPY;
136*68361Scgd 	if (SCARG(uap, flags) & OMAP_SHARED)
137*68361Scgd 		SCARG(&nargs, flags) |= MAP_SHARED;
13854300Smckusick 	else
139*68361Scgd 		SCARG(&nargs, flags) |= MAP_PRIVATE;
140*68361Scgd 	if (SCARG(uap, flags) & OMAP_FIXED)
141*68361Scgd 		SCARG(&nargs, flags) |= MAP_FIXED;
142*68361Scgd 	if (SCARG(uap, flags) & OMAP_INHERIT)
143*68361Scgd 		SCARG(&nargs, flags) |= MAP_INHERIT;
144*68361Scgd 	SCARG(&nargs, fd) = SCARG(uap, fd);
145*68361Scgd 	SCARG(&nargs, pos) = SCARG(uap, pos);
14663429Sbostic 	return (mmap(p, &nargs, retval));
14754300Smckusick }
14854300Smckusick #endif
14954300Smckusick 
15054300Smckusick int
mmap(p,uap,retval)15163429Sbostic mmap(p, uap, retval)
15254300Smckusick 	struct proc *p;
153*68361Scgd 	register struct mmap_args /* {
154*68361Scgd 		syscallarg(caddr_t) addr;
155*68361Scgd 		syscallarg(size_t) len;
156*68361Scgd 		syscallarg(int) prot;
157*68361Scgd 		syscallarg(int) flags;
158*68361Scgd 		syscallarg(int) fd;
159*68361Scgd 		syscallarg(long) pad;
160*68361Scgd 		syscallarg(off_t) pos;
161*68361Scgd 	} */ *uap;
162*68361Scgd 	register_t *retval;
16354300Smckusick {
16445916Smckusick 	register struct filedesc *fdp = p->p_fd;
16545916Smckusick 	register struct file *fp;
16645749Smckusick 	struct vnode *vp;
16767182Shibler 	vm_offset_t addr, pos;
16845749Smckusick 	vm_size_t size;
16958598Shibler 	vm_prot_t prot, maxprot;
17045749Smckusick 	caddr_t handle;
17158258Smckusick 	int flags, error;
17245749Smckusick 
173*68361Scgd 	prot = SCARG(uap, prot) & VM_PROT_ALL;
174*68361Scgd 	flags = SCARG(uap, flags);
175*68361Scgd 	pos = SCARG(uap, pos);
17645749Smckusick #ifdef DEBUG
17745749Smckusick 	if (mmapdebug & MDB_FOLLOW)
17845749Smckusick 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
179*68361Scgd 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot,
180*68361Scgd 		       flags, SCARG(uap, fd), pos);
18145749Smckusick #endif
18245749Smckusick 	/*
18350261Shibler 	 * Address (if FIXED) must be page aligned.
18450261Shibler 	 * Size is implicitly rounded to a page boundary.
18567256Shibler 	 *
18667256Shibler 	 * XXX most (all?) vendors require that the file offset be
18767256Shibler 	 * page aligned as well.  However, we already have applications
18867256Shibler 	 * (e.g. nlist) that rely on unrestricted alignment.  Since we
18967256Shibler 	 * support it, let it happen.
19045749Smckusick 	 */
191*68361Scgd 	addr = (vm_offset_t) SCARG(uap, addr);
19258878Smckusick 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
19367256Shibler #if 0
19467243Shibler 	    ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
19567256Shibler #endif
196*68361Scgd 	    (ssize_t)SCARG(uap, len) < 0 || ((flags & MAP_ANON) && SCARG(uap, fd) != -1))
19754300Smckusick 		return (EINVAL);
198*68361Scgd 	size = (vm_size_t) round_page(SCARG(uap, len));
19945749Smckusick 	/*
20058081Smckusick 	 * Check for illegal addresses.  Watch out for address wrap...
20158081Smckusick 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
20258081Smckusick 	 */
20358258Smckusick 	if (flags & MAP_FIXED) {
20458081Smckusick 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
20558081Smckusick 			return (EINVAL);
20658081Smckusick 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
20758081Smckusick 			return (EINVAL);
20858081Smckusick 		if (addr > addr + size)
20958081Smckusick 			return (EINVAL);
21058081Smckusick 	}
21158081Smckusick 	/*
21267182Shibler 	 * XXX for non-fixed mappings where no hint is provided or
21367182Shibler 	 * the hint would fall in the potential heap space,
21467182Shibler 	 * place it after the end of the largest possible heap.
21550261Shibler 	 *
21650261Shibler 	 * There should really be a pmap call to determine a reasonable
21750261Shibler 	 * location.
21850261Shibler 	 */
21967182Shibler 	else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
22050261Shibler 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
22158598Shibler 	if (flags & MAP_ANON) {
22258878Smckusick 		/*
22358878Smckusick 		 * Mapping blank space is trivial.
22458878Smckusick 		 */
22554300Smckusick 		handle = NULL;
22658598Shibler 		maxprot = VM_PROT_ALL;
22767182Shibler 		pos = 0;
22858598Shibler 	} else {
22945749Smckusick 		/*
23054300Smckusick 		 * Mapping file, get fp for validation.
23158878Smckusick 		 * Obtain vnode and make sure it is of appropriate type.
23245749Smckusick 		 */
233*68361Scgd 		if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles ||
234*68361Scgd 		    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
23558878Smckusick 			return (EBADF);
23645749Smckusick 		if (fp->f_type != DTYPE_VNODE)
23758878Smckusick 			return (EINVAL);
23845749Smckusick 		vp = (struct vnode *)fp->f_data;
23945749Smckusick 		if (vp->v_type != VREG && vp->v_type != VCHR)
24058878Smckusick 			return (EINVAL);
24145749Smckusick 		/*
24264854Shibler 		 * XXX hack to handle use of /dev/zero to map anon
24364854Shibler 		 * memory (ala SunOS).
24458598Shibler 		 */
24564854Shibler 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
24664854Shibler 			handle = NULL;
24764854Shibler 			maxprot = VM_PROT_ALL;
24864854Shibler 			flags |= MAP_ANON;
24964854Shibler 		} else {
25064854Shibler 			/*
25164854Shibler 			 * Ensure that file and memory protections are
25264854Shibler 			 * compatible.  Note that we only worry about
25364854Shibler 			 * writability if mapping is shared; in this case,
25464854Shibler 			 * current and max prot are dictated by the open file.
25564854Shibler 			 * XXX use the vnode instead?  Problem is: what
25664854Shibler 			 * credentials do we use for determination?
25764854Shibler 			 * What if proc does a setuid?
25864854Shibler 			 */
25964854Shibler 			maxprot = VM_PROT_EXECUTE;	/* ??? */
26064854Shibler 			if (fp->f_flag & FREAD)
26164854Shibler 				maxprot |= VM_PROT_READ;
26264854Shibler 			else if (prot & PROT_READ)
26364854Shibler 				return (EACCES);
26464854Shibler 			if (flags & MAP_SHARED) {
26564854Shibler 				if (fp->f_flag & FWRITE)
26664854Shibler 					maxprot |= VM_PROT_WRITE;
26764854Shibler 				else if (prot & PROT_WRITE)
26864854Shibler 					return (EACCES);
26964854Shibler 			} else
27058878Smckusick 				maxprot |= VM_PROT_WRITE;
27164854Shibler 			handle = (caddr_t)vp;
27264854Shibler 		}
27354300Smckusick 	}
27458598Shibler 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
27567243Shibler 	    flags, handle, pos);
27645749Smckusick 	if (error == 0)
277*68361Scgd 		*retval = (register_t)addr;
27858878Smckusick 	return (error);
27945749Smckusick }
28045749Smckusick 
28153351Sbostic int
msync(p,uap,retval)28245749Smckusick msync(p, uap, retval)
28345749Smckusick 	struct proc *p;
284*68361Scgd 	struct msync_args /* {
285*68361Scgd 		syscallarg(caddr_t) addr;
286*68361Scgd 		syscallarg(int) len;
287*68361Scgd 	} */ *uap;
288*68361Scgd 	register_t *retval;
28945749Smckusick {
29065687Shibler 	vm_offset_t addr;
29165687Shibler 	vm_size_t size;
29265687Shibler 	vm_map_t map;
29345749Smckusick 	int rv;
29465687Shibler 	boolean_t syncio, invalidate;
29545749Smckusick 
29645749Smckusick #ifdef DEBUG
29745749Smckusick 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
29845749Smckusick 		printf("msync(%d): addr %x len %x\n",
299*68361Scgd 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
30045749Smckusick #endif
301*68361Scgd 	if (((vm_offset_t)SCARG(uap, addr) & PAGE_MASK) ||
302*68361Scgd 	    SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
30365687Shibler 		return (EINVAL);
30465687Shibler 	map = &p->p_vmspace->vm_map;
305*68361Scgd 	addr = (vm_offset_t)SCARG(uap, addr);
306*68361Scgd 	size = (vm_size_t)SCARG(uap, len);
30745749Smckusick 	/*
30865687Shibler 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
30965687Shibler 	 * pages with the region containing addr".  Unfortunately, we
31065687Shibler 	 * don't really keep track of individual mmaps so we approximate
31165687Shibler 	 * by flushing the range of the map entry containing addr.
31265687Shibler 	 * This can be incorrect if the region splits or is coalesced
31365687Shibler 	 * with a neighbor.
31445749Smckusick 	 */
31565687Shibler 	if (size == 0) {
31665687Shibler 		vm_map_entry_t entry;
31765687Shibler 
31865687Shibler 		vm_map_lock_read(map);
31965687Shibler 		rv = vm_map_lookup_entry(map, addr, &entry);
32065687Shibler 		vm_map_unlock_read(map);
32167437Shibler 		if (!rv)
32265687Shibler 			return (EINVAL);
32365687Shibler 		addr = entry->start;
32465687Shibler 		size = entry->end - entry->start;
32565687Shibler 	}
32645749Smckusick #ifdef DEBUG
32745749Smckusick 	if (mmapdebug & MDB_SYNC)
32865687Shibler 		printf("msync: cleaning/flushing address range [%x-%x)\n",
32965687Shibler 		       addr, addr+size);
33045749Smckusick #endif
33145749Smckusick 	/*
33265687Shibler 	 * Could pass this in as a third flag argument to implement
33365687Shibler 	 * Sun's MS_ASYNC.
33445749Smckusick 	 */
33565687Shibler 	syncio = TRUE;
33645749Smckusick 	/*
33765687Shibler 	 * XXX bummer, gotta flush all cached pages to ensure
33865687Shibler 	 * consistency with the file system cache.  Otherwise, we could
33965687Shibler 	 * pass this in to implement Sun's MS_INVALIDATE.
34045749Smckusick 	 */
34165687Shibler 	invalidate = TRUE;
34265687Shibler 	/*
34365687Shibler 	 * Clean the pages and interpret the return value.
34465687Shibler 	 */
34565687Shibler 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
34665687Shibler 	switch (rv) {
34765687Shibler 	case KERN_SUCCESS:
34865687Shibler 		break;
34965687Shibler 	case KERN_INVALID_ADDRESS:
35065687Shibler 		return (EINVAL);	/* Sun returns ENOMEM? */
35165687Shibler 	case KERN_FAILURE:
35265687Shibler 		return (EIO);
35365687Shibler 	default:
35465687Shibler 		return (EINVAL);
35565687Shibler 	}
35665687Shibler 	return (0);
35745749Smckusick }
35845749Smckusick 
35953351Sbostic int
munmap(p,uap,retval)36045749Smckusick munmap(p, uap, retval)
36145749Smckusick 	register struct proc *p;
362*68361Scgd 	register struct munmap_args /* {
363*68361Scgd 		syscallarg(caddr_t) addr;
364*68361Scgd 		syscallarg(int) len;
365*68361Scgd 	} */ *uap;
366*68361Scgd 	register_t *retval;
36745749Smckusick {
36845749Smckusick 	vm_offset_t addr;
36945749Smckusick 	vm_size_t size;
37065687Shibler 	vm_map_t map;
37145749Smckusick 
37245749Smckusick #ifdef DEBUG
37345749Smckusick 	if (mmapdebug & MDB_FOLLOW)
37445749Smckusick 		printf("munmap(%d): addr %x len %x\n",
375*68361Scgd 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
37645749Smckusick #endif
37745749Smckusick 
378*68361Scgd 	addr = (vm_offset_t) SCARG(uap, addr);
379*68361Scgd 	if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
38045749Smckusick 		return(EINVAL);
381*68361Scgd 	size = (vm_size_t) round_page(SCARG(uap, len));
38245749Smckusick 	if (size == 0)
38345749Smckusick 		return(0);
38458081Smckusick 	/*
38558081Smckusick 	 * Check for illegal addresses.  Watch out for address wrap...
38658081Smckusick 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
38758081Smckusick 	 */
38858081Smckusick 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
38958081Smckusick 		return (EINVAL);
39058081Smckusick 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
39158081Smckusick 		return (EINVAL);
39258081Smckusick 	if (addr > addr + size)
39358081Smckusick 		return (EINVAL);
39465687Shibler 	map = &p->p_vmspace->vm_map;
39565687Shibler 	/*
39665687Shibler 	 * Make sure entire range is allocated.
39767256Shibler 	 * XXX this seemed overly restrictive, so we relaxed it.
39865687Shibler 	 */
39967256Shibler #if 0
40065687Shibler 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
40145749Smckusick 		return(EINVAL);
40267256Shibler #endif
40345749Smckusick 	/* returns nothing but KERN_SUCCESS anyway */
40465687Shibler 	(void) vm_map_remove(map, addr, addr+size);
40545749Smckusick 	return(0);
40645749Smckusick }
40745749Smckusick 
40853351Sbostic void
munmapfd(p,fd)40968160Scgd munmapfd(p, fd)
41068160Scgd 	struct proc *p;
41154108Smckusick 	int fd;
41245749Smckusick {
41345749Smckusick #ifdef DEBUG
41445749Smckusick 	if (mmapdebug & MDB_FOLLOW)
41568160Scgd 		printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
41645749Smckusick #endif
41745749Smckusick 
41845749Smckusick 	/*
41965687Shibler 	 * XXX should vm_deallocate any regions mapped to this file
42045749Smckusick 	 */
42168160Scgd 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
42245749Smckusick }
42345749Smckusick 
42453351Sbostic int
mprotect(p,uap,retval)42545749Smckusick mprotect(p, uap, retval)
42645749Smckusick 	struct proc *p;
427*68361Scgd 	struct mprotect_args /* {
428*68361Scgd 		syscallarg(caddr_t) addr;
429*68361Scgd 		syscallarg(int) len;
430*68361Scgd 		syscallarg(int) prot;
431*68361Scgd 	} */ *uap;
432*68361Scgd 	register_t *retval;
43345749Smckusick {
43445749Smckusick 	vm_offset_t addr;
43545749Smckusick 	vm_size_t size;
43645749Smckusick 	register vm_prot_t prot;
43745749Smckusick 
43845749Smckusick #ifdef DEBUG
43945749Smckusick 	if (mmapdebug & MDB_FOLLOW)
44045749Smckusick 		printf("mprotect(%d): addr %x len %x prot %d\n",
441*68361Scgd 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot));
44245749Smckusick #endif
44345749Smckusick 
444*68361Scgd 	addr = (vm_offset_t)SCARG(uap, addr);
445*68361Scgd 	if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
44650261Shibler 		return(EINVAL);
447*68361Scgd 	size = (vm_size_t)SCARG(uap, len);
448*68361Scgd 	prot = SCARG(uap, prot) & VM_PROT_ALL;
44945749Smckusick 
45048384Skarels 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
45148384Skarels 	    FALSE)) {
45245749Smckusick 	case KERN_SUCCESS:
45345749Smckusick 		return (0);
45445749Smckusick 	case KERN_PROTECTION_FAILURE:
45545749Smckusick 		return (EACCES);
45645749Smckusick 	}
45745749Smckusick 	return (EINVAL);
45845749Smckusick }
45945749Smckusick 
46045749Smckusick /* ARGSUSED */
46153351Sbostic int
madvise(p,uap,retval)46245749Smckusick madvise(p, uap, retval)
46345749Smckusick 	struct proc *p;
464*68361Scgd 	struct madvise_args /* {
465*68361Scgd 		syscallarg(caddr_t) addr;
466*68361Scgd 		syscallarg(int) len;
467*68361Scgd 		syscallarg(int) behav;
468*68361Scgd 	} */ *uap;
469*68361Scgd 	register_t *retval;
47045749Smckusick {
47145749Smckusick 
47245749Smckusick 	/* Not yet implemented */
47345749Smckusick 	return (EOPNOTSUPP);
47445749Smckusick }
47545749Smckusick 
47645749Smckusick /* ARGSUSED */
47753351Sbostic int
mincore(p,uap,retval)47845749Smckusick mincore(p, uap, retval)
47945749Smckusick 	struct proc *p;
480*68361Scgd 	struct mincore_args /* {
481*68361Scgd 		syscallarg(caddr_t) addr;
482*68361Scgd 		syscallarg(int) len;
483*68361Scgd 		syscallarg(char *) vec;
484*68361Scgd 	} */ *uap;
485*68361Scgd 	register_t *retval;
48645749Smckusick {
48745749Smckusick 
48845749Smckusick 	/* Not yet implemented */
48945749Smckusick 	return (EOPNOTSUPP);
49045749Smckusick }
49145749Smckusick 
49261004Shibler int
mlock(p,uap,retval)49361004Shibler mlock(p, uap, retval)
49461004Shibler 	struct proc *p;
495*68361Scgd 	struct mlock_args /* {
496*68361Scgd 		syscallarg(caddr_t) addr;
497*68361Scgd 		syscallarg(size_t) len;
498*68361Scgd 	} */ *uap;
499*68361Scgd 	register_t *retval;
50061004Shibler {
50161004Shibler 	vm_offset_t addr;
50261004Shibler 	vm_size_t size;
50361004Shibler 	int error;
50461004Shibler 	extern int vm_page_max_wired;
50561004Shibler 
50661004Shibler #ifdef DEBUG
50761004Shibler 	if (mmapdebug & MDB_FOLLOW)
50861004Shibler 		printf("mlock(%d): addr %x len %x\n",
509*68361Scgd 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
51061004Shibler #endif
511*68361Scgd 	addr = (vm_offset_t)SCARG(uap, addr);
512*68361Scgd 	if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
51361004Shibler 		return (EINVAL);
514*68361Scgd 	size = round_page((vm_size_t)SCARG(uap, len));
51561004Shibler 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
51661015Shibler 		return (EAGAIN);
51761004Shibler #ifdef pmap_wired_count
51861004Shibler 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
51961004Shibler 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
52061015Shibler 		return (EAGAIN);
52161004Shibler #else
52261004Shibler 	if (error = suser(p->p_ucred, &p->p_acflag))
52361004Shibler 		return (error);
52461004Shibler #endif
52561004Shibler 
52661004Shibler 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
52761004Shibler 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
52861004Shibler }
52961004Shibler 
53061004Shibler int
munlock(p,uap,retval)53161004Shibler munlock(p, uap, retval)
53261004Shibler 	struct proc *p;
533*68361Scgd 	struct munlock_args /* {
534*68361Scgd 		syscallarg(caddr_t) addr;
535*68361Scgd 		syscallarg(size_t) len;
536*68361Scgd 	} */ *uap;
537*68361Scgd 	register_t *retval;
53861004Shibler {
53961004Shibler 	vm_offset_t addr;
54061004Shibler 	vm_size_t size;
54161004Shibler 	int error;
54261004Shibler 
54361004Shibler #ifdef DEBUG
54461004Shibler 	if (mmapdebug & MDB_FOLLOW)
54561004Shibler 		printf("munlock(%d): addr %x len %x\n",
546*68361Scgd 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
54761004Shibler #endif
548*68361Scgd 	addr = (vm_offset_t)SCARG(uap, addr);
549*68361Scgd 	if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
55061004Shibler 		return (EINVAL);
55161004Shibler #ifndef pmap_wired_count
55261004Shibler 	if (error = suser(p->p_ucred, &p->p_acflag))
55361004Shibler 		return (error);
55461004Shibler #endif
555*68361Scgd 	size = round_page((vm_size_t)SCARG(uap, len));
55661004Shibler 
55761004Shibler 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
55861004Shibler 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
55961004Shibler }
56061004Shibler 
56145749Smckusick /*
56245749Smckusick  * Internal version of mmap.
56345749Smckusick  * Currently used by mmap, exec, and sys5 shared memory.
56454300Smckusick  * Handle is either a vnode pointer or NULL for MAP_ANON.
56545749Smckusick  */
56653351Sbostic int
vm_mmap(map,addr,size,prot,maxprot,flags,handle,foff)56758598Shibler vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
56845749Smckusick 	register vm_map_t map;
56945749Smckusick 	register vm_offset_t *addr;
57045749Smckusick 	register vm_size_t size;
57158598Shibler 	vm_prot_t prot, maxprot;
57245749Smckusick 	register int flags;
57345749Smckusick 	caddr_t handle;		/* XXX should be vp */
57445749Smckusick 	vm_offset_t foff;
57545749Smckusick {
57645749Smckusick 	register vm_pager_t pager;
57745749Smckusick 	boolean_t fitit;
57845749Smckusick 	vm_object_t object;
57963551Smckusick 	struct vnode *vp = NULL;
58045749Smckusick 	int type;
58145749Smckusick 	int rv = KERN_SUCCESS;
58245749Smckusick 
58345749Smckusick 	if (size == 0)
58445749Smckusick 		return (0);
58545749Smckusick 
58645749Smckusick 	if ((flags & MAP_FIXED) == 0) {
58745749Smckusick 		fitit = TRUE;
58845749Smckusick 		*addr = round_page(*addr);
58945749Smckusick 	} else {
59045749Smckusick 		fitit = FALSE;
59154915Storek 		(void)vm_deallocate(map, *addr, size);
59245749Smckusick 	}
59345749Smckusick 
59445749Smckusick 	/*
59545749Smckusick 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
59645749Smckusick 	 * gain a reference to ensure continued existance of the object.
59745749Smckusick 	 * (XXX the exception is to appease the pageout daemon)
59845749Smckusick 	 */
59954300Smckusick 	if (flags & MAP_ANON)
60045749Smckusick 		type = PG_DFLT;
60145749Smckusick 	else {
60245749Smckusick 		vp = (struct vnode *)handle;
60345749Smckusick 		if (vp->v_type == VCHR) {
60445749Smckusick 			type = PG_DEVICE;
60545749Smckusick 			handle = (caddr_t)vp->v_rdev;
60645749Smckusick 		} else
60745749Smckusick 			type = PG_VNODE;
60845749Smckusick 	}
60964827Storek 	pager = vm_pager_allocate(type, handle, size, prot, foff);
61048384Skarels 	if (pager == NULL)
61145749Smckusick 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
61245749Smckusick 	/*
61345749Smckusick 	 * Find object and release extra reference gained by lookup
61445749Smckusick 	 */
61545749Smckusick 	object = vm_object_lookup(pager);
61645749Smckusick 	vm_object_deallocate(object);
61745749Smckusick 
61845749Smckusick 	/*
61945749Smckusick 	 * Anonymous memory.
62045749Smckusick 	 */
62154300Smckusick 	if (flags & MAP_ANON) {
62245749Smckusick 		rv = vm_allocate_with_pager(map, addr, size, fitit,
62358839Sralph 					    pager, foff, TRUE);
62445749Smckusick 		if (rv != KERN_SUCCESS) {
62545749Smckusick 			if (handle == NULL)
62645749Smckusick 				vm_pager_deallocate(pager);
62745749Smckusick 			else
62845749Smckusick 				vm_object_deallocate(object);
62945749Smckusick 			goto out;
63045749Smckusick 		}
63145749Smckusick 		/*
63245749Smckusick 		 * Don't cache anonymous objects.
63345749Smckusick 		 * Loses the reference gained by vm_pager_allocate.
63458598Shibler 		 * Note that object will be NULL when handle == NULL,
63558598Shibler 		 * this is ok since vm_allocate_with_pager has made
63658598Shibler 		 * sure that these objects are uncached.
63745749Smckusick 		 */
63845749Smckusick 		(void) pager_cache(object, FALSE);
63945749Smckusick #ifdef DEBUG
64045749Smckusick 		if (mmapdebug & MDB_MAPIT)
64145749Smckusick 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
64248384Skarels 			       curproc->p_pid, *addr, size, pager);
64345749Smckusick #endif
64445749Smckusick 	}
64545749Smckusick 	/*
64654300Smckusick 	 * Must be a mapped file.
64745749Smckusick 	 * Distinguish between character special and regular files.
64845749Smckusick 	 */
64945749Smckusick 	else if (vp->v_type == VCHR) {
65045749Smckusick 		rv = vm_allocate_with_pager(map, addr, size, fitit,
65158839Sralph 					    pager, foff, FALSE);
65245749Smckusick 		/*
65345749Smckusick 		 * Uncache the object and lose the reference gained
65445749Smckusick 		 * by vm_pager_allocate().  If the call to
65545749Smckusick 		 * vm_allocate_with_pager() was sucessful, then we
65645749Smckusick 		 * gained an additional reference ensuring the object
65745749Smckusick 		 * will continue to exist.  If the call failed then
65845749Smckusick 		 * the deallocate call below will terminate the
65945749Smckusick 		 * object which is fine.
66045749Smckusick 		 */
66145749Smckusick 		(void) pager_cache(object, FALSE);
66245749Smckusick 		if (rv != KERN_SUCCESS)
66345749Smckusick 			goto out;
66445749Smckusick 	}
66545749Smckusick 	/*
66645749Smckusick 	 * A regular file
66745749Smckusick 	 */
66845749Smckusick 	else {
66945749Smckusick #ifdef DEBUG
67048384Skarels 		if (object == NULL)
67145749Smckusick 			printf("vm_mmap: no object: vp %x, pager %x\n",
67245749Smckusick 			       vp, pager);
67345749Smckusick #endif
67445749Smckusick 		/*
67545749Smckusick 		 * Map it directly.
67645749Smckusick 		 * Allows modifications to go out to the vnode.
67745749Smckusick 		 */
67845749Smckusick 		if (flags & MAP_SHARED) {
67945749Smckusick 			rv = vm_allocate_with_pager(map, addr, size,
68045749Smckusick 						    fitit, pager,
68158839Sralph 						    foff, FALSE);
68245749Smckusick 			if (rv != KERN_SUCCESS) {
68345749Smckusick 				vm_object_deallocate(object);
68445749Smckusick 				goto out;
68545749Smckusick 			}
68645749Smckusick 			/*
68745749Smckusick 			 * Don't cache the object.  This is the easiest way
68845749Smckusick 			 * of ensuring that data gets back to the filesystem
68945749Smckusick 			 * because vnode_pager_deallocate() will fsync the
69045749Smckusick 			 * vnode.  pager_cache() will lose the extra ref.
69145749Smckusick 			 */
69245749Smckusick 			if (prot & VM_PROT_WRITE)
69345749Smckusick 				pager_cache(object, FALSE);
69445749Smckusick 			else
69545749Smckusick 				vm_object_deallocate(object);
69645749Smckusick 		}
69745749Smckusick 		/*
69845749Smckusick 		 * Copy-on-write of file.  Two flavors.
69945749Smckusick 		 * MAP_COPY is true COW, you essentially get a snapshot of
70045749Smckusick 		 * the region at the time of mapping.  MAP_PRIVATE means only
70145749Smckusick 		 * that your changes are not reflected back to the object.
70245749Smckusick 		 * Changes made by others will be seen.
70345749Smckusick 		 */
70445749Smckusick 		else {
70545749Smckusick 			vm_map_t tmap;
70645749Smckusick 			vm_offset_t off;
70745749Smckusick 
70845749Smckusick 			/* locate and allocate the target address space */
70948384Skarels 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
71045749Smckusick 					 addr, size, fitit);
71145749Smckusick 			if (rv != KERN_SUCCESS) {
71245749Smckusick 				vm_object_deallocate(object);
71345749Smckusick 				goto out;
71445749Smckusick 			}
71545749Smckusick 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
71645749Smckusick 					     VM_MIN_ADDRESS+size, TRUE);
71745749Smckusick 			off = VM_MIN_ADDRESS;
71845749Smckusick 			rv = vm_allocate_with_pager(tmap, &off, size,
71945749Smckusick 						    TRUE, pager,
72058839Sralph 						    foff, FALSE);
72145749Smckusick 			if (rv != KERN_SUCCESS) {
72245749Smckusick 				vm_object_deallocate(object);
72345749Smckusick 				vm_map_deallocate(tmap);
72445749Smckusick 				goto out;
72545749Smckusick 			}
72645749Smckusick 			/*
72745749Smckusick 			 * (XXX)
72845749Smckusick 			 * MAP_PRIVATE implies that we see changes made by
72945749Smckusick 			 * others.  To ensure that we need to guarentee that
73045749Smckusick 			 * no copy object is created (otherwise original
73145749Smckusick 			 * pages would be pushed to the copy object and we
73245749Smckusick 			 * would never see changes made by others).  We
73345749Smckusick 			 * totally sleeze it right now by marking the object
73445749Smckusick 			 * internal temporarily.
73545749Smckusick 			 */
73645749Smckusick 			if ((flags & MAP_COPY) == 0)
73750913Smckusick 				object->flags |= OBJ_INTERNAL;
73845749Smckusick 			rv = vm_map_copy(map, tmap, *addr, size, off,
73945749Smckusick 					 FALSE, FALSE);
74050913Smckusick 			object->flags &= ~OBJ_INTERNAL;
74145749Smckusick 			/*
74245749Smckusick 			 * (XXX)
74345749Smckusick 			 * My oh my, this only gets worse...
74445749Smckusick 			 * Force creation of a shadow object so that
74545749Smckusick 			 * vm_map_fork will do the right thing.
74645749Smckusick 			 */
74745749Smckusick 			if ((flags & MAP_COPY) == 0) {
74845749Smckusick 				vm_map_t tmap;
74945749Smckusick 				vm_map_entry_t tentry;
75045749Smckusick 				vm_object_t tobject;
75145749Smckusick 				vm_offset_t toffset;
75245749Smckusick 				vm_prot_t tprot;
75345749Smckusick 				boolean_t twired, tsu;
75445749Smckusick 
75545749Smckusick 				tmap = map;
75645749Smckusick 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
75745749Smckusick 					      &tentry, &tobject, &toffset,
75845749Smckusick 					      &tprot, &twired, &tsu);
75945749Smckusick 				vm_map_lookup_done(tmap, tentry);
76045749Smckusick 			}
76145749Smckusick 			/*
76245749Smckusick 			 * (XXX)
76345749Smckusick 			 * Map copy code cannot detect sharing unless a
76445749Smckusick 			 * sharing map is involved.  So we cheat and write
76549290Shibler 			 * protect everything ourselves.
76645749Smckusick 			 */
76758839Sralph 			vm_object_pmap_copy(object, foff, foff + size);
76845749Smckusick 			vm_object_deallocate(object);
76945749Smckusick 			vm_map_deallocate(tmap);
77045749Smckusick 			if (rv != KERN_SUCCESS)
77145749Smckusick 				goto out;
77245749Smckusick 		}
77345749Smckusick #ifdef DEBUG
77445749Smckusick 		if (mmapdebug & MDB_MAPIT)
77545749Smckusick 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
77648384Skarels 			       curproc->p_pid, *addr, size, pager);
77745749Smckusick #endif
77845749Smckusick 	}
77945749Smckusick 	/*
78045749Smckusick 	 * Correct protection (default is VM_PROT_ALL).
78158598Shibler 	 * If maxprot is different than prot, we must set both explicitly.
78245749Smckusick 	 */
78358598Shibler 	rv = KERN_SUCCESS;
78458598Shibler 	if (maxprot != VM_PROT_ALL)
78558598Shibler 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
78658598Shibler 	if (rv == KERN_SUCCESS && prot != maxprot)
78745749Smckusick 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
78858598Shibler 	if (rv != KERN_SUCCESS) {
78958598Shibler 		(void) vm_deallocate(map, *addr, size);
79058598Shibler 		goto out;
79145749Smckusick 	}
79245749Smckusick 	/*
79345749Smckusick 	 * Shared memory is also shared with children.
79445749Smckusick 	 */
79545749Smckusick 	if (flags & MAP_SHARED) {
79665687Shibler 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
79745749Smckusick 		if (rv != KERN_SUCCESS) {
79845749Smckusick 			(void) vm_deallocate(map, *addr, size);
79945749Smckusick 			goto out;
80045749Smckusick 		}
80145749Smckusick 	}
80245749Smckusick out:
80345749Smckusick #ifdef DEBUG
80445749Smckusick 	if (mmapdebug & MDB_MAPIT)
80545749Smckusick 		printf("vm_mmap: rv %d\n", rv);
80645749Smckusick #endif
80745749Smckusick 	switch (rv) {
80845749Smckusick 	case KERN_SUCCESS:
80945749Smckusick 		return (0);
81045749Smckusick 	case KERN_INVALID_ADDRESS:
81145749Smckusick 	case KERN_NO_SPACE:
81245749Smckusick 		return (ENOMEM);
81345749Smckusick 	case KERN_PROTECTION_FAILURE:
81445749Smckusick 		return (EACCES);
81545749Smckusick 	default:
81645749Smckusick 		return (EINVAL);
81745749Smckusick 	}
81845749Smckusick }
819