xref: /onnv-gate/usr/src/uts/common/os/urw.c (revision 5331:3047ad28a67b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*5331Samw  * Common Development and Distribution License (the "License").
6*5331Samw  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*5331Samw  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
270Sstevel@tonic-gate /*	  All Rights Reserved   */
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #include <sys/atomic.h>
320Sstevel@tonic-gate #include <sys/errno.h>
330Sstevel@tonic-gate #include <sys/stat.h>
340Sstevel@tonic-gate #include <sys/modctl.h>
350Sstevel@tonic-gate #include <sys/conf.h>
360Sstevel@tonic-gate #include <sys/systm.h>
370Sstevel@tonic-gate #include <sys/ddi.h>
380Sstevel@tonic-gate #include <sys/sunddi.h>
390Sstevel@tonic-gate #include <sys/cpuvar.h>
400Sstevel@tonic-gate #include <sys/kmem.h>
410Sstevel@tonic-gate #include <sys/strsubr.h>
420Sstevel@tonic-gate #include <sys/sysmacros.h>
430Sstevel@tonic-gate #include <sys/frame.h>
440Sstevel@tonic-gate #include <sys/stack.h>
450Sstevel@tonic-gate #include <sys/proc.h>
460Sstevel@tonic-gate #include <sys/priv.h>
470Sstevel@tonic-gate #include <sys/policy.h>
480Sstevel@tonic-gate #include <sys/ontrap.h>
490Sstevel@tonic-gate #include <sys/vmsystm.h>
500Sstevel@tonic-gate #include <sys/prsystm.h>
510Sstevel@tonic-gate 
520Sstevel@tonic-gate #include <vm/as.h>
530Sstevel@tonic-gate #include <vm/seg.h>
540Sstevel@tonic-gate #include <vm/seg_dev.h>
550Sstevel@tonic-gate #include <vm/seg_vn.h>
560Sstevel@tonic-gate #include <vm/seg_spt.h>
570Sstevel@tonic-gate #include <vm/seg_kmem.h>
580Sstevel@tonic-gate 
590Sstevel@tonic-gate extern struct seg_ops segdev_ops;	/* needs a header file */
600Sstevel@tonic-gate extern struct seg_ops segspt_shmops;	/* needs a header file */
610Sstevel@tonic-gate 
620Sstevel@tonic-gate static int
page_valid(struct seg * seg,caddr_t addr)630Sstevel@tonic-gate page_valid(struct seg *seg, caddr_t addr)
640Sstevel@tonic-gate {
650Sstevel@tonic-gate 	struct segvn_data *svd;
660Sstevel@tonic-gate 	vnode_t *vp;
670Sstevel@tonic-gate 	vattr_t vattr;
680Sstevel@tonic-gate 
690Sstevel@tonic-gate 	/*
700Sstevel@tonic-gate 	 * Fail if the page doesn't map to a page in the underlying
710Sstevel@tonic-gate 	 * mapped file, if an underlying mapped file exists.
720Sstevel@tonic-gate 	 */
730Sstevel@tonic-gate 	vattr.va_mask = AT_SIZE;
740Sstevel@tonic-gate 	if (seg->s_ops == &segvn_ops &&
750Sstevel@tonic-gate 	    SEGOP_GETVP(seg, addr, &vp) == 0 &&
760Sstevel@tonic-gate 	    vp != NULL && vp->v_type == VREG &&
77*5331Samw 	    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
780Sstevel@tonic-gate 		u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
790Sstevel@tonic-gate 		u_offset_t offset = SEGOP_GETOFFSET(seg, addr);
800Sstevel@tonic-gate 
810Sstevel@tonic-gate 		if (offset >= size)
820Sstevel@tonic-gate 			return (0);
830Sstevel@tonic-gate 	}
840Sstevel@tonic-gate 
850Sstevel@tonic-gate 	/*
860Sstevel@tonic-gate 	 * Fail if this is an ISM shared segment and the address is
870Sstevel@tonic-gate 	 * not within the real size of the spt segment that backs it.
880Sstevel@tonic-gate 	 */
890Sstevel@tonic-gate 	if (seg->s_ops == &segspt_shmops &&
900Sstevel@tonic-gate 	    addr >= seg->s_base + spt_realsize(seg))
910Sstevel@tonic-gate 		return (0);
920Sstevel@tonic-gate 
930Sstevel@tonic-gate 	/*
940Sstevel@tonic-gate 	 * Fail if the segment is mapped from /dev/null.
950Sstevel@tonic-gate 	 * The key is that the mapping comes from segdev and the
960Sstevel@tonic-gate 	 * type is neither MAP_SHARED nor MAP_PRIVATE.
970Sstevel@tonic-gate 	 */
980Sstevel@tonic-gate 	if (seg->s_ops == &segdev_ops &&
990Sstevel@tonic-gate 	    ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
1000Sstevel@tonic-gate 		return (0);
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate 	/*
1030Sstevel@tonic-gate 	 * Fail if the page is a MAP_NORESERVE page that has
1040Sstevel@tonic-gate 	 * not actually materialized.
1050Sstevel@tonic-gate 	 * We cheat by knowing that segvn is the only segment
1060Sstevel@tonic-gate 	 * driver that supports MAP_NORESERVE.
1070Sstevel@tonic-gate 	 */
1080Sstevel@tonic-gate 	if (seg->s_ops == &segvn_ops &&
1090Sstevel@tonic-gate 	    (svd = (struct segvn_data *)seg->s_data) != NULL &&
1100Sstevel@tonic-gate 	    (svd->vp == NULL || svd->vp->v_type != VREG) &&
1110Sstevel@tonic-gate 	    (svd->flags & MAP_NORESERVE)) {
1120Sstevel@tonic-gate 		/*
1130Sstevel@tonic-gate 		 * Guilty knowledge here.  We know that
1140Sstevel@tonic-gate 		 * segvn_incore returns more than just the
1150Sstevel@tonic-gate 		 * low-order bit that indicates the page is
1160Sstevel@tonic-gate 		 * actually in memory.  If any bits are set,
1170Sstevel@tonic-gate 		 * then there is backing store for the page.
1180Sstevel@tonic-gate 		 */
1190Sstevel@tonic-gate 		char incore = 0;
1200Sstevel@tonic-gate 		(void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore);
1210Sstevel@tonic-gate 		if (incore == 0)
1220Sstevel@tonic-gate 			return (0);
1230Sstevel@tonic-gate 	}
1240Sstevel@tonic-gate 	return (1);
1250Sstevel@tonic-gate }
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate /*
1280Sstevel@tonic-gate  * Map address "addr" in address space "as" into a kernel virtual address.
1290Sstevel@tonic-gate  * The memory is guaranteed to be resident and locked down.
1300Sstevel@tonic-gate  */
1310Sstevel@tonic-gate static caddr_t
mapin(struct as * as,caddr_t addr,int writing)1320Sstevel@tonic-gate mapin(struct as *as, caddr_t addr, int writing)
1330Sstevel@tonic-gate {
1340Sstevel@tonic-gate 	page_t *pp;
1350Sstevel@tonic-gate 	caddr_t kaddr;
1360Sstevel@tonic-gate 	pfn_t pfnum;
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate 	/*
1390Sstevel@tonic-gate 	 * NB: Because of past mistakes, we have bits being returned
1400Sstevel@tonic-gate 	 * by getpfnum that are actually the page type bits of the pte.
1410Sstevel@tonic-gate 	 * When the object we are trying to map is a memory page with
1420Sstevel@tonic-gate 	 * a page structure everything is ok and we can use the optimal
1430Sstevel@tonic-gate 	 * method, ppmapin.  Otherwise, we have to do something special.
1440Sstevel@tonic-gate 	 */
1450Sstevel@tonic-gate 	pfnum = hat_getpfnum(as->a_hat, addr);
1460Sstevel@tonic-gate 	if (pf_is_memory(pfnum)) {
1470Sstevel@tonic-gate 		pp = page_numtopp_nolock(pfnum);
1480Sstevel@tonic-gate 		if (pp != NULL) {
1490Sstevel@tonic-gate 			ASSERT(PAGE_LOCKED(pp));
1500Sstevel@tonic-gate 			kaddr = ppmapin(pp, writing ?
1510Sstevel@tonic-gate 				(PROT_READ | PROT_WRITE) : PROT_READ,
1520Sstevel@tonic-gate 				(caddr_t)-1);
1530Sstevel@tonic-gate 			return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
1540Sstevel@tonic-gate 		}
1550Sstevel@tonic-gate 	}
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate 	/*
1580Sstevel@tonic-gate 	 * Oh well, we didn't have a page struct for the object we were
1590Sstevel@tonic-gate 	 * trying to map in; ppmapin doesn't handle devices, but allocating a
1600Sstevel@tonic-gate 	 * heap address allows ppmapout to free virutal space when done.
1610Sstevel@tonic-gate 	 */
1620Sstevel@tonic-gate 	kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 	hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
1650Sstevel@tonic-gate 		writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate 	return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate /*ARGSUSED*/
1710Sstevel@tonic-gate static void
mapout(struct as * as,caddr_t addr,caddr_t vaddr,int writing)1720Sstevel@tonic-gate mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
1730Sstevel@tonic-gate {
1740Sstevel@tonic-gate 	vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK);
1750Sstevel@tonic-gate 	ppmapout(vaddr);
1760Sstevel@tonic-gate }
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate /*
179*5331Samw  * Perform I/O to a given process. This will return EIO if we detect
1800Sstevel@tonic-gate  * corrupt memory and ENXIO if there is no such mapped address in the
1810Sstevel@tonic-gate  * user process's address space.
1820Sstevel@tonic-gate  */
1830Sstevel@tonic-gate static int
urw(proc_t * p,int writing,void * buf,size_t len,uintptr_t a)1840Sstevel@tonic-gate urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a)
1850Sstevel@tonic-gate {
1860Sstevel@tonic-gate 	caddr_t addr = (caddr_t)a;
1870Sstevel@tonic-gate 	caddr_t page;
1880Sstevel@tonic-gate 	caddr_t vaddr;
1890Sstevel@tonic-gate 	struct seg *seg;
1900Sstevel@tonic-gate 	int error = 0;
1910Sstevel@tonic-gate 	int err = 0;
1920Sstevel@tonic-gate 	uint_t prot;
1930Sstevel@tonic-gate 	uint_t prot_rw = writing ? PROT_WRITE : PROT_READ;
1940Sstevel@tonic-gate 	int protchanged;
1950Sstevel@tonic-gate 	on_trap_data_t otd;
1960Sstevel@tonic-gate 	int retrycnt;
1970Sstevel@tonic-gate 	struct as *as = p->p_as;
1980Sstevel@tonic-gate 	enum seg_rw rw;
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 	/*
2010Sstevel@tonic-gate 	 * Locate segment containing address of interest.
2020Sstevel@tonic-gate 	 */
2030Sstevel@tonic-gate 	page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK);
2040Sstevel@tonic-gate 	retrycnt = 0;
2050Sstevel@tonic-gate 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2060Sstevel@tonic-gate retry:
2070Sstevel@tonic-gate 	if ((seg = as_segat(as, page)) == NULL ||
2080Sstevel@tonic-gate 	    !page_valid(seg, page)) {
2090Sstevel@tonic-gate 		AS_LOCK_EXIT(as, &as->a_lock);
2100Sstevel@tonic-gate 		return (ENXIO);
2110Sstevel@tonic-gate 	}
2120Sstevel@tonic-gate 	SEGOP_GETPROT(seg, page, 0, &prot);
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	protchanged = 0;
2150Sstevel@tonic-gate 	if ((prot & prot_rw) == 0) {
2160Sstevel@tonic-gate 		protchanged = 1;
2170Sstevel@tonic-gate 		err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw);
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 		if (err == IE_RETRY) {
2200Sstevel@tonic-gate 			protchanged = 0;
2210Sstevel@tonic-gate 			ASSERT(retrycnt == 0);
2220Sstevel@tonic-gate 			retrycnt++;
2230Sstevel@tonic-gate 			goto retry;
2240Sstevel@tonic-gate 		}
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 		if (err != 0) {
2270Sstevel@tonic-gate 			AS_LOCK_EXIT(as, &as->a_lock);
2280Sstevel@tonic-gate 			return (ENXIO);
2290Sstevel@tonic-gate 		}
2300Sstevel@tonic-gate 	}
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	/*
2330Sstevel@tonic-gate 	 * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break
2340Sstevel@tonic-gate 	 * sharing to avoid a copy on write of a softlocked page by another
2350Sstevel@tonic-gate 	 * thread. But since we locked the address space as a writer no other
2360Sstevel@tonic-gate 	 * thread can cause a copy on write. S_READ_NOCOW is passed as the
2370Sstevel@tonic-gate 	 * access type to tell segvn that it's ok not to do a copy-on-write
2380Sstevel@tonic-gate 	 * for this SOFTLOCK fault.
2390Sstevel@tonic-gate 	 */
2400Sstevel@tonic-gate 	if (writing)
2410Sstevel@tonic-gate 		rw = S_WRITE;
2420Sstevel@tonic-gate 	else if (seg->s_ops == &segvn_ops)
2430Sstevel@tonic-gate 		rw = S_READ_NOCOW;
2440Sstevel@tonic-gate 	else
2450Sstevel@tonic-gate 		rw = S_READ;
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) {
2480Sstevel@tonic-gate 		if (protchanged)
2490Sstevel@tonic-gate 			(void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
2500Sstevel@tonic-gate 		AS_LOCK_EXIT(as, &as->a_lock);
2510Sstevel@tonic-gate 		return (ENXIO);
2520Sstevel@tonic-gate 	}
2530Sstevel@tonic-gate 	CPU_STATS_ADD_K(vm, softlock, 1);
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	/*
2560Sstevel@tonic-gate 	 * Make sure we're not trying to read or write off the end of the page.
2570Sstevel@tonic-gate 	 */
2580Sstevel@tonic-gate 	ASSERT(len <= page + PAGESIZE - addr);
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate 	/*
2610Sstevel@tonic-gate 	 * Map in the locked page, copy to our local buffer,
2620Sstevel@tonic-gate 	 * then map the page out and unlock it.
2630Sstevel@tonic-gate 	 */
2640Sstevel@tonic-gate 	vaddr = mapin(as, addr, writing);
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate 	/*
2670Sstevel@tonic-gate 	 * Since we are copying memory on behalf of the user process,
2680Sstevel@tonic-gate 	 * protect against memory error correction faults.
2690Sstevel@tonic-gate 	 */
2700Sstevel@tonic-gate 	if (!on_trap(&otd, OT_DATA_EC)) {
2710Sstevel@tonic-gate 		if (seg->s_ops == &segdev_ops) {
2720Sstevel@tonic-gate 			/*
2730Sstevel@tonic-gate 			 * Device memory can behave strangely; invoke
2740Sstevel@tonic-gate 			 * a segdev-specific copy operation instead.
2750Sstevel@tonic-gate 			 */
2760Sstevel@tonic-gate 			if (writing) {
2770Sstevel@tonic-gate 				if (segdev_copyto(seg, addr, buf, vaddr, len))
2780Sstevel@tonic-gate 					error = ENXIO;
2790Sstevel@tonic-gate 			} else {
2800Sstevel@tonic-gate 				if (segdev_copyfrom(seg, addr, vaddr, buf, len))
2810Sstevel@tonic-gate 					error = ENXIO;
2820Sstevel@tonic-gate 			}
2830Sstevel@tonic-gate 		} else {
2840Sstevel@tonic-gate 			if (writing)
2850Sstevel@tonic-gate 				bcopy(buf, vaddr, len);
2860Sstevel@tonic-gate 			else
2870Sstevel@tonic-gate 				bcopy(vaddr, buf, len);
2880Sstevel@tonic-gate 		}
2890Sstevel@tonic-gate 	} else {
2900Sstevel@tonic-gate 		error = EIO;
2910Sstevel@tonic-gate 	}
2920Sstevel@tonic-gate 	no_trap();
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	/*
2950Sstevel@tonic-gate 	 * If we're writing to an executable page, we may need to sychronize
2960Sstevel@tonic-gate 	 * the I$ with the modifications we made through the D$.
2970Sstevel@tonic-gate 	 */
2980Sstevel@tonic-gate 	if (writing && (prot & PROT_EXEC))
2990Sstevel@tonic-gate 		sync_icache(vaddr, (uint_t)len);
3000Sstevel@tonic-gate 
3010Sstevel@tonic-gate 	mapout(as, addr, vaddr, writing);
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	if (rw == S_READ_NOCOW)
3040Sstevel@tonic-gate 		rw = S_READ;
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	(void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw);
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 	if (protchanged)
3090Sstevel@tonic-gate 		(void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	AS_LOCK_EXIT(as, &as->a_lock);
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	return (error);
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate int
uread(proc_t * p,void * buf,size_t len,uintptr_t a)3170Sstevel@tonic-gate uread(proc_t *p, void *buf, size_t len, uintptr_t a)
3180Sstevel@tonic-gate {
3190Sstevel@tonic-gate 	return (urw(p, 0, buf, len, a));
3200Sstevel@tonic-gate }
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate int
uwrite(proc_t * p,void * buf,size_t len,uintptr_t a)3230Sstevel@tonic-gate uwrite(proc_t *p, void *buf, size_t len, uintptr_t a)
3240Sstevel@tonic-gate {
3250Sstevel@tonic-gate 	return (urw(p, 1, buf, len, a));
3260Sstevel@tonic-gate }
327