1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate #include <sys/atomic.h> 33*0Sstevel@tonic-gate #include <sys/errno.h> 34*0Sstevel@tonic-gate #include <sys/stat.h> 35*0Sstevel@tonic-gate #include <sys/modctl.h> 36*0Sstevel@tonic-gate #include <sys/conf.h> 37*0Sstevel@tonic-gate #include <sys/systm.h> 38*0Sstevel@tonic-gate #include <sys/ddi.h> 39*0Sstevel@tonic-gate #include <sys/sunddi.h> 40*0Sstevel@tonic-gate #include <sys/cpuvar.h> 41*0Sstevel@tonic-gate #include <sys/kmem.h> 42*0Sstevel@tonic-gate #include <sys/strsubr.h> 43*0Sstevel@tonic-gate #include <sys/sysmacros.h> 44*0Sstevel@tonic-gate #include <sys/frame.h> 45*0Sstevel@tonic-gate #include <sys/stack.h> 46*0Sstevel@tonic-gate #include <sys/proc.h> 47*0Sstevel@tonic-gate #include <sys/priv.h> 48*0Sstevel@tonic-gate #include <sys/policy.h> 49*0Sstevel@tonic-gate #include <sys/ontrap.h> 50*0Sstevel@tonic-gate #include <sys/vmsystm.h> 51*0Sstevel@tonic-gate #include <sys/prsystm.h> 52*0Sstevel@tonic-gate 53*0Sstevel@tonic-gate #include <vm/as.h> 54*0Sstevel@tonic-gate #include <vm/seg.h> 55*0Sstevel@tonic-gate #include <vm/seg_dev.h> 56*0Sstevel@tonic-gate #include <vm/seg_vn.h> 57*0Sstevel@tonic-gate #include <vm/seg_spt.h> 58*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 59*0Sstevel@tonic-gate 60*0Sstevel@tonic-gate extern struct seg_ops segdev_ops; /* needs a header file */ 61*0Sstevel@tonic-gate extern struct seg_ops segspt_shmops; /* needs a header file */ 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate static int 64*0Sstevel@tonic-gate page_valid(struct seg *seg, caddr_t addr) 65*0Sstevel@tonic-gate { 66*0Sstevel@tonic-gate struct segvn_data *svd; 67*0Sstevel@tonic-gate vnode_t *vp; 68*0Sstevel@tonic-gate vattr_t vattr; 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate /* 71*0Sstevel@tonic-gate * Fail if the page doesn't map to a page in the underlying 72*0Sstevel@tonic-gate * mapped file, if an underlying mapped file exists. 73*0Sstevel@tonic-gate */ 74*0Sstevel@tonic-gate vattr.va_mask = AT_SIZE; 75*0Sstevel@tonic-gate if (seg->s_ops == &segvn_ops && 76*0Sstevel@tonic-gate SEGOP_GETVP(seg, addr, &vp) == 0 && 77*0Sstevel@tonic-gate vp != NULL && vp->v_type == VREG && 78*0Sstevel@tonic-gate VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 79*0Sstevel@tonic-gate u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE); 80*0Sstevel@tonic-gate u_offset_t offset = SEGOP_GETOFFSET(seg, addr); 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate if (offset >= size) 83*0Sstevel@tonic-gate return (0); 84*0Sstevel@tonic-gate } 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate /* 87*0Sstevel@tonic-gate * Fail if this is an ISM shared segment and the address is 88*0Sstevel@tonic-gate * not within the real size of the spt segment that backs it. 89*0Sstevel@tonic-gate */ 90*0Sstevel@tonic-gate if (seg->s_ops == &segspt_shmops && 91*0Sstevel@tonic-gate addr >= seg->s_base + spt_realsize(seg)) 92*0Sstevel@tonic-gate return (0); 93*0Sstevel@tonic-gate 94*0Sstevel@tonic-gate /* 95*0Sstevel@tonic-gate * Fail if the segment is mapped from /dev/null. 96*0Sstevel@tonic-gate * The key is that the mapping comes from segdev and the 97*0Sstevel@tonic-gate * type is neither MAP_SHARED nor MAP_PRIVATE. 98*0Sstevel@tonic-gate */ 99*0Sstevel@tonic-gate if (seg->s_ops == &segdev_ops && 100*0Sstevel@tonic-gate ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0)) 101*0Sstevel@tonic-gate return (0); 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate /* 104*0Sstevel@tonic-gate * Fail if the page is a MAP_NORESERVE page that has 105*0Sstevel@tonic-gate * not actually materialized. 106*0Sstevel@tonic-gate * We cheat by knowing that segvn is the only segment 107*0Sstevel@tonic-gate * driver that supports MAP_NORESERVE. 108*0Sstevel@tonic-gate */ 109*0Sstevel@tonic-gate if (seg->s_ops == &segvn_ops && 110*0Sstevel@tonic-gate (svd = (struct segvn_data *)seg->s_data) != NULL && 111*0Sstevel@tonic-gate (svd->vp == NULL || svd->vp->v_type != VREG) && 112*0Sstevel@tonic-gate (svd->flags & MAP_NORESERVE)) { 113*0Sstevel@tonic-gate /* 114*0Sstevel@tonic-gate * Guilty knowledge here. We know that 115*0Sstevel@tonic-gate * segvn_incore returns more than just the 116*0Sstevel@tonic-gate * low-order bit that indicates the page is 117*0Sstevel@tonic-gate * actually in memory. If any bits are set, 118*0Sstevel@tonic-gate * then there is backing store for the page. 119*0Sstevel@tonic-gate */ 120*0Sstevel@tonic-gate char incore = 0; 121*0Sstevel@tonic-gate (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore); 122*0Sstevel@tonic-gate if (incore == 0) 123*0Sstevel@tonic-gate return (0); 124*0Sstevel@tonic-gate } 125*0Sstevel@tonic-gate return (1); 126*0Sstevel@tonic-gate } 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate /* 129*0Sstevel@tonic-gate * Map address "addr" in address space "as" into a kernel virtual address. 130*0Sstevel@tonic-gate * The memory is guaranteed to be resident and locked down. 131*0Sstevel@tonic-gate */ 132*0Sstevel@tonic-gate static caddr_t 133*0Sstevel@tonic-gate mapin(struct as *as, caddr_t addr, int writing) 134*0Sstevel@tonic-gate { 135*0Sstevel@tonic-gate page_t *pp; 136*0Sstevel@tonic-gate caddr_t kaddr; 137*0Sstevel@tonic-gate pfn_t pfnum; 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate /* 140*0Sstevel@tonic-gate * NB: Because of past mistakes, we have bits being returned 141*0Sstevel@tonic-gate * by getpfnum that are actually the page type bits of the pte. 142*0Sstevel@tonic-gate * When the object we are trying to map is a memory page with 143*0Sstevel@tonic-gate * a page structure everything is ok and we can use the optimal 144*0Sstevel@tonic-gate * method, ppmapin. Otherwise, we have to do something special. 145*0Sstevel@tonic-gate */ 146*0Sstevel@tonic-gate pfnum = hat_getpfnum(as->a_hat, addr); 147*0Sstevel@tonic-gate if (pf_is_memory(pfnum)) { 148*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfnum); 149*0Sstevel@tonic-gate if (pp != NULL) { 150*0Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 151*0Sstevel@tonic-gate kaddr = ppmapin(pp, writing ? 152*0Sstevel@tonic-gate (PROT_READ | PROT_WRITE) : PROT_READ, 153*0Sstevel@tonic-gate (caddr_t)-1); 154*0Sstevel@tonic-gate return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 155*0Sstevel@tonic-gate } 156*0Sstevel@tonic-gate } 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gate /* 159*0Sstevel@tonic-gate * Oh well, we didn't have a page struct for the object we were 160*0Sstevel@tonic-gate * trying to map in; ppmapin doesn't handle devices, but allocating a 161*0Sstevel@tonic-gate * heap address allows ppmapout to free virutal space when done. 162*0Sstevel@tonic-gate */ 163*0Sstevel@tonic-gate kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 164*0Sstevel@tonic-gate 165*0Sstevel@tonic-gate hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, 166*0Sstevel@tonic-gate writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK); 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 169*0Sstevel@tonic-gate } 170*0Sstevel@tonic-gate 171*0Sstevel@tonic-gate /*ARGSUSED*/ 172*0Sstevel@tonic-gate static void 173*0Sstevel@tonic-gate mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing) 174*0Sstevel@tonic-gate { 175*0Sstevel@tonic-gate vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK); 176*0Sstevel@tonic-gate ppmapout(vaddr); 177*0Sstevel@tonic-gate } 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate /* 180*0Sstevel@tonic-gate * Perform I/O to a given process. This will return EIO if we dectect 181*0Sstevel@tonic-gate * corrupt memory and ENXIO if there is no such mapped address in the 182*0Sstevel@tonic-gate * user process's address space. 183*0Sstevel@tonic-gate */ 184*0Sstevel@tonic-gate static int 185*0Sstevel@tonic-gate urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a) 186*0Sstevel@tonic-gate { 187*0Sstevel@tonic-gate caddr_t addr = (caddr_t)a; 188*0Sstevel@tonic-gate caddr_t page; 189*0Sstevel@tonic-gate caddr_t vaddr; 190*0Sstevel@tonic-gate struct seg *seg; 191*0Sstevel@tonic-gate int error = 0; 192*0Sstevel@tonic-gate int err = 0; 193*0Sstevel@tonic-gate uint_t prot; 194*0Sstevel@tonic-gate uint_t prot_rw = writing ? PROT_WRITE : PROT_READ; 195*0Sstevel@tonic-gate int protchanged; 196*0Sstevel@tonic-gate on_trap_data_t otd; 197*0Sstevel@tonic-gate int retrycnt; 198*0Sstevel@tonic-gate struct as *as = p->p_as; 199*0Sstevel@tonic-gate enum seg_rw rw; 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gate /* 202*0Sstevel@tonic-gate * Locate segment containing address of interest. 203*0Sstevel@tonic-gate */ 204*0Sstevel@tonic-gate page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK); 205*0Sstevel@tonic-gate retrycnt = 0; 206*0Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 207*0Sstevel@tonic-gate retry: 208*0Sstevel@tonic-gate if ((seg = as_segat(as, page)) == NULL || 209*0Sstevel@tonic-gate !page_valid(seg, page)) { 210*0Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 211*0Sstevel@tonic-gate return (ENXIO); 212*0Sstevel@tonic-gate } 213*0Sstevel@tonic-gate SEGOP_GETPROT(seg, page, 0, &prot); 214*0Sstevel@tonic-gate 215*0Sstevel@tonic-gate protchanged = 0; 216*0Sstevel@tonic-gate if ((prot & prot_rw) == 0) { 217*0Sstevel@tonic-gate protchanged = 1; 218*0Sstevel@tonic-gate err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw); 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate if (err == IE_RETRY) { 221*0Sstevel@tonic-gate protchanged = 0; 222*0Sstevel@tonic-gate ASSERT(retrycnt == 0); 223*0Sstevel@tonic-gate retrycnt++; 224*0Sstevel@tonic-gate goto retry; 225*0Sstevel@tonic-gate } 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gate if (err != 0) { 228*0Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 229*0Sstevel@tonic-gate return (ENXIO); 230*0Sstevel@tonic-gate } 231*0Sstevel@tonic-gate } 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate /* 234*0Sstevel@tonic-gate * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break 235*0Sstevel@tonic-gate * sharing to avoid a copy on write of a softlocked page by another 236*0Sstevel@tonic-gate * thread. But since we locked the address space as a writer no other 237*0Sstevel@tonic-gate * thread can cause a copy on write. S_READ_NOCOW is passed as the 238*0Sstevel@tonic-gate * access type to tell segvn that it's ok not to do a copy-on-write 239*0Sstevel@tonic-gate * for this SOFTLOCK fault. 240*0Sstevel@tonic-gate */ 241*0Sstevel@tonic-gate if (writing) 242*0Sstevel@tonic-gate rw = S_WRITE; 243*0Sstevel@tonic-gate else if (seg->s_ops == &segvn_ops) 244*0Sstevel@tonic-gate rw = S_READ_NOCOW; 245*0Sstevel@tonic-gate else 246*0Sstevel@tonic-gate rw = S_READ; 247*0Sstevel@tonic-gate 248*0Sstevel@tonic-gate if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) { 249*0Sstevel@tonic-gate if (protchanged) 250*0Sstevel@tonic-gate (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 251*0Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 252*0Sstevel@tonic-gate return (ENXIO); 253*0Sstevel@tonic-gate } 254*0Sstevel@tonic-gate CPU_STATS_ADD_K(vm, softlock, 1); 255*0Sstevel@tonic-gate 256*0Sstevel@tonic-gate /* 257*0Sstevel@tonic-gate * Make sure we're not trying to read or write off the end of the page. 258*0Sstevel@tonic-gate */ 259*0Sstevel@tonic-gate ASSERT(len <= page + PAGESIZE - addr); 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate /* 262*0Sstevel@tonic-gate * Map in the locked page, copy to our local buffer, 263*0Sstevel@tonic-gate * then map the page out and unlock it. 264*0Sstevel@tonic-gate */ 265*0Sstevel@tonic-gate vaddr = mapin(as, addr, writing); 266*0Sstevel@tonic-gate 267*0Sstevel@tonic-gate /* 268*0Sstevel@tonic-gate * Since we are copying memory on behalf of the user process, 269*0Sstevel@tonic-gate * protect against memory error correction faults. 270*0Sstevel@tonic-gate */ 271*0Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_EC)) { 272*0Sstevel@tonic-gate if (seg->s_ops == &segdev_ops) { 273*0Sstevel@tonic-gate /* 274*0Sstevel@tonic-gate * Device memory can behave strangely; invoke 275*0Sstevel@tonic-gate * a segdev-specific copy operation instead. 276*0Sstevel@tonic-gate */ 277*0Sstevel@tonic-gate if (writing) { 278*0Sstevel@tonic-gate if (segdev_copyto(seg, addr, buf, vaddr, len)) 279*0Sstevel@tonic-gate error = ENXIO; 280*0Sstevel@tonic-gate } else { 281*0Sstevel@tonic-gate if (segdev_copyfrom(seg, addr, vaddr, buf, len)) 282*0Sstevel@tonic-gate error = ENXIO; 283*0Sstevel@tonic-gate } 284*0Sstevel@tonic-gate } else { 285*0Sstevel@tonic-gate if (writing) 286*0Sstevel@tonic-gate bcopy(buf, vaddr, len); 287*0Sstevel@tonic-gate else 288*0Sstevel@tonic-gate bcopy(vaddr, buf, len); 289*0Sstevel@tonic-gate } 290*0Sstevel@tonic-gate } else { 291*0Sstevel@tonic-gate error = EIO; 292*0Sstevel@tonic-gate } 293*0Sstevel@tonic-gate no_trap(); 294*0Sstevel@tonic-gate 295*0Sstevel@tonic-gate /* 296*0Sstevel@tonic-gate * If we're writing to an executable page, we may need to sychronize 297*0Sstevel@tonic-gate * the I$ with the modifications we made through the D$. 298*0Sstevel@tonic-gate */ 299*0Sstevel@tonic-gate if (writing && (prot & PROT_EXEC)) 300*0Sstevel@tonic-gate sync_icache(vaddr, (uint_t)len); 301*0Sstevel@tonic-gate 302*0Sstevel@tonic-gate mapout(as, addr, vaddr, writing); 303*0Sstevel@tonic-gate 304*0Sstevel@tonic-gate if (rw == S_READ_NOCOW) 305*0Sstevel@tonic-gate rw = S_READ; 306*0Sstevel@tonic-gate 307*0Sstevel@tonic-gate (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw); 308*0Sstevel@tonic-gate 309*0Sstevel@tonic-gate if (protchanged) 310*0Sstevel@tonic-gate (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 311*0Sstevel@tonic-gate 312*0Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate return (error); 315*0Sstevel@tonic-gate } 316*0Sstevel@tonic-gate 317*0Sstevel@tonic-gate int 318*0Sstevel@tonic-gate uread(proc_t *p, void *buf, size_t len, uintptr_t a) 319*0Sstevel@tonic-gate { 320*0Sstevel@tonic-gate return (urw(p, 0, buf, len, a)); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate int 324*0Sstevel@tonic-gate uwrite(proc_t *p, void *buf, size_t len, uintptr_t a) 325*0Sstevel@tonic-gate { 326*0Sstevel@tonic-gate return (urw(p, 1, buf, len, a)); 327*0Sstevel@tonic-gate } 328