1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate /* 31*0Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 32*0Sstevel@tonic-gate * The Regents of the University of California 33*0Sstevel@tonic-gate * All Rights Reserved 34*0Sstevel@tonic-gate * 35*0Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 36*0Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 37*0Sstevel@tonic-gate * contributors. 38*0Sstevel@tonic-gate */ 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gate /* 43*0Sstevel@tonic-gate * VM - shared or copy-on-write from a vnode/anonymous memory. 44*0Sstevel@tonic-gate */ 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gate #include <sys/types.h> 47*0Sstevel@tonic-gate #include <sys/param.h> 48*0Sstevel@tonic-gate #include <sys/t_lock.h> 49*0Sstevel@tonic-gate #include <sys/errno.h> 50*0Sstevel@tonic-gate #include <sys/systm.h> 51*0Sstevel@tonic-gate #include <sys/mman.h> 52*0Sstevel@tonic-gate #include <sys/debug.h> 53*0Sstevel@tonic-gate #include <sys/cred.h> 54*0Sstevel@tonic-gate #include <sys/vmsystm.h> 55*0Sstevel@tonic-gate #include <sys/tuneable.h> 56*0Sstevel@tonic-gate #include <sys/bitmap.h> 57*0Sstevel@tonic-gate #include <sys/swap.h> 58*0Sstevel@tonic-gate #include <sys/kmem.h> 59*0Sstevel@tonic-gate #include <sys/sysmacros.h> 60*0Sstevel@tonic-gate #include <sys/vtrace.h> 61*0Sstevel@tonic-gate #include <sys/cmn_err.h> 62*0Sstevel@tonic-gate #include <sys/vm.h> 63*0Sstevel@tonic-gate #include <sys/dumphdr.h> 64*0Sstevel@tonic-gate #include <sys/lgrp.h> 65*0Sstevel@tonic-gate 66*0Sstevel@tonic-gate #include <vm/hat.h> 67*0Sstevel@tonic-gate #include <vm/as.h> 68*0Sstevel@tonic-gate #include <vm/seg.h> 69*0Sstevel@tonic-gate #include <vm/seg_vn.h> 70*0Sstevel@tonic-gate #include <vm/pvn.h> 71*0Sstevel@tonic-gate #include <vm/anon.h> 72*0Sstevel@tonic-gate #include <vm/page.h> 73*0Sstevel@tonic-gate #include <vm/vpage.h> 74*0Sstevel@tonic-gate 75*0Sstevel@tonic-gate /* 76*0Sstevel@tonic-gate * Private seg op routines. 77*0Sstevel@tonic-gate */ 78*0Sstevel@tonic-gate static int segvn_dup(struct seg *seg, struct seg *newseg); 79*0Sstevel@tonic-gate static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len); 80*0Sstevel@tonic-gate static void segvn_free(struct seg *seg); 81*0Sstevel@tonic-gate static faultcode_t segvn_fault(struct hat *hat, struct seg *seg, 82*0Sstevel@tonic-gate caddr_t addr, size_t len, enum fault_type type, 83*0Sstevel@tonic-gate enum seg_rw rw); 84*0Sstevel@tonic-gate static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr); 85*0Sstevel@tonic-gate static int segvn_setprot(struct seg *seg, caddr_t addr, 86*0Sstevel@tonic-gate size_t len, uint_t prot); 87*0Sstevel@tonic-gate static int segvn_checkprot(struct seg *seg, caddr_t addr, 88*0Sstevel@tonic-gate size_t len, uint_t prot); 89*0Sstevel@tonic-gate static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta); 90*0Sstevel@tonic-gate static size_t segvn_swapout(struct seg *seg); 91*0Sstevel@tonic-gate static int segvn_sync(struct seg *seg, caddr_t addr, size_t len, 92*0Sstevel@tonic-gate int attr, uint_t flags); 93*0Sstevel@tonic-gate static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len, 94*0Sstevel@tonic-gate char *vec); 95*0Sstevel@tonic-gate static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len, 96*0Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos); 97*0Sstevel@tonic-gate static int segvn_getprot(struct seg *seg, caddr_t addr, size_t len, 98*0Sstevel@tonic-gate uint_t *protv); 99*0Sstevel@tonic-gate static u_offset_t segvn_getoffset(struct seg *seg, caddr_t addr); 100*0Sstevel@tonic-gate static int segvn_gettype(struct seg *seg, caddr_t addr); 101*0Sstevel@tonic-gate static int segvn_getvp(struct seg *seg, caddr_t addr, 102*0Sstevel@tonic-gate struct vnode **vpp); 103*0Sstevel@tonic-gate static int segvn_advise(struct seg *seg, caddr_t addr, size_t len, 104*0Sstevel@tonic-gate uint_t behav); 105*0Sstevel@tonic-gate static void segvn_dump(struct seg *seg); 106*0Sstevel@tonic-gate static int segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, 107*0Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw); 108*0Sstevel@tonic-gate static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, 109*0Sstevel@tonic-gate uint_t szc); 110*0Sstevel@tonic-gate static int segvn_getmemid(struct seg *seg, caddr_t addr, 111*0Sstevel@tonic-gate memid_t *memidp); 112*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t); 113*0Sstevel@tonic-gate 114*0Sstevel@tonic-gate struct seg_ops segvn_ops = { 115*0Sstevel@tonic-gate segvn_dup, 116*0Sstevel@tonic-gate segvn_unmap, 117*0Sstevel@tonic-gate segvn_free, 118*0Sstevel@tonic-gate segvn_fault, 119*0Sstevel@tonic-gate segvn_faulta, 120*0Sstevel@tonic-gate segvn_setprot, 121*0Sstevel@tonic-gate segvn_checkprot, 122*0Sstevel@tonic-gate segvn_kluster, 123*0Sstevel@tonic-gate segvn_swapout, 124*0Sstevel@tonic-gate segvn_sync, 125*0Sstevel@tonic-gate segvn_incore, 126*0Sstevel@tonic-gate segvn_lockop, 127*0Sstevel@tonic-gate segvn_getprot, 128*0Sstevel@tonic-gate segvn_getoffset, 129*0Sstevel@tonic-gate segvn_gettype, 130*0Sstevel@tonic-gate segvn_getvp, 131*0Sstevel@tonic-gate segvn_advise, 132*0Sstevel@tonic-gate segvn_dump, 133*0Sstevel@tonic-gate segvn_pagelock, 134*0Sstevel@tonic-gate segvn_setpagesize, 135*0Sstevel@tonic-gate segvn_getmemid, 136*0Sstevel@tonic-gate segvn_getpolicy, 137*0Sstevel@tonic-gate }; 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate /* 140*0Sstevel@tonic-gate * Common zfod structures, provided as a shorthand for others to use. 141*0Sstevel@tonic-gate */ 142*0Sstevel@tonic-gate static segvn_crargs_t zfod_segvn_crargs = 143*0Sstevel@tonic-gate SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 144*0Sstevel@tonic-gate static segvn_crargs_t kzfod_segvn_crargs = 145*0Sstevel@tonic-gate SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_USER, 146*0Sstevel@tonic-gate PROT_ALL & ~PROT_USER); 147*0Sstevel@tonic-gate static segvn_crargs_t stack_noexec_crargs = 148*0Sstevel@tonic-gate SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_EXEC, PROT_ALL); 149*0Sstevel@tonic-gate 150*0Sstevel@tonic-gate caddr_t zfod_argsp = (caddr_t)&zfod_segvn_crargs; /* user zfod argsp */ 151*0Sstevel@tonic-gate caddr_t kzfod_argsp = (caddr_t)&kzfod_segvn_crargs; /* kernel zfod argsp */ 152*0Sstevel@tonic-gate caddr_t stack_exec_argsp = (caddr_t)&zfod_segvn_crargs; /* executable stack */ 153*0Sstevel@tonic-gate caddr_t stack_noexec_argsp = (caddr_t)&stack_noexec_crargs; /* noexec stack */ 154*0Sstevel@tonic-gate 155*0Sstevel@tonic-gate #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */ 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate size_t segvn_comb_thrshld = UINT_MAX; /* patchable -- see 1196681 */ 158*0Sstevel@tonic-gate 159*0Sstevel@tonic-gate static int segvn_concat(struct seg *, struct seg *, int); 160*0Sstevel@tonic-gate static int segvn_extend_prev(struct seg *, struct seg *, 161*0Sstevel@tonic-gate struct segvn_crargs *, size_t); 162*0Sstevel@tonic-gate static int segvn_extend_next(struct seg *, struct seg *, 163*0Sstevel@tonic-gate struct segvn_crargs *, size_t); 164*0Sstevel@tonic-gate static void segvn_softunlock(struct seg *, caddr_t, size_t, enum seg_rw); 165*0Sstevel@tonic-gate static void segvn_pagelist_rele(page_t **); 166*0Sstevel@tonic-gate static void segvn_setvnode_mpss(vnode_t *); 167*0Sstevel@tonic-gate static void segvn_relocate_pages(page_t **, page_t *); 168*0Sstevel@tonic-gate static int segvn_full_szcpages(page_t **, uint_t, int *, uint_t *); 169*0Sstevel@tonic-gate static int segvn_fill_vp_pages(struct segvn_data *, vnode_t *, u_offset_t, 170*0Sstevel@tonic-gate uint_t, page_t **, page_t **, uint_t *, int *); 171*0Sstevel@tonic-gate static faultcode_t segvn_fault_vnodepages(struct hat *, struct seg *, caddr_t, 172*0Sstevel@tonic-gate caddr_t, enum fault_type, enum seg_rw, caddr_t, caddr_t, int); 173*0Sstevel@tonic-gate static faultcode_t segvn_fault_anonpages(struct hat *, struct seg *, caddr_t, 174*0Sstevel@tonic-gate caddr_t, enum fault_type, enum seg_rw, caddr_t, caddr_t, int); 175*0Sstevel@tonic-gate static faultcode_t segvn_faultpage(struct hat *, struct seg *, caddr_t, 176*0Sstevel@tonic-gate u_offset_t, struct vpage *, page_t **, uint_t, 177*0Sstevel@tonic-gate enum fault_type, enum seg_rw, int); 178*0Sstevel@tonic-gate static void segvn_vpage(struct seg *); 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gate static void segvn_purge(struct seg *seg); 181*0Sstevel@tonic-gate static int segvn_reclaim(struct seg *, caddr_t, size_t, struct page **, 182*0Sstevel@tonic-gate enum seg_rw); 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gate static int sameprot(struct seg *, caddr_t, size_t); 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate static int segvn_demote_range(struct seg *, caddr_t, size_t, int); 187*0Sstevel@tonic-gate static int segvn_clrszc(struct seg *); 188*0Sstevel@tonic-gate static struct seg *segvn_split_seg(struct seg *, caddr_t); 189*0Sstevel@tonic-gate static int segvn_claim_pages(struct seg *, struct vpage *, u_offset_t, 190*0Sstevel@tonic-gate ulong_t, uint_t); 191*0Sstevel@tonic-gate 192*0Sstevel@tonic-gate static struct kmem_cache *segvn_cache; 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate #ifdef VM_STATS 195*0Sstevel@tonic-gate static struct segvnvmstats_str { 196*0Sstevel@tonic-gate ulong_t fill_vp_pages[31]; 197*0Sstevel@tonic-gate ulong_t fltvnpages[49]; 198*0Sstevel@tonic-gate ulong_t fullszcpages[10]; 199*0Sstevel@tonic-gate ulong_t relocatepages[3]; 200*0Sstevel@tonic-gate ulong_t fltanpages[17]; 201*0Sstevel@tonic-gate ulong_t pagelock[3]; 202*0Sstevel@tonic-gate ulong_t demoterange[3]; 203*0Sstevel@tonic-gate } segvnvmstats; 204*0Sstevel@tonic-gate #endif /* VM_STATS */ 205*0Sstevel@tonic-gate 206*0Sstevel@tonic-gate #define SDR_RANGE 1 /* demote entire range */ 207*0Sstevel@tonic-gate #define SDR_END 2 /* demote non aligned ends only */ 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate #define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \ 210*0Sstevel@tonic-gate if ((len) != 0) { \ 211*0Sstevel@tonic-gate lpgaddr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); \ 212*0Sstevel@tonic-gate ASSERT(lpgaddr >= (seg)->s_base); \ 213*0Sstevel@tonic-gate lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)((addr) + \ 214*0Sstevel@tonic-gate (len)), pgsz); \ 215*0Sstevel@tonic-gate ASSERT(lpgeaddr > lpgaddr); \ 216*0Sstevel@tonic-gate ASSERT(lpgeaddr <= (seg)->s_base + (seg)->s_size); \ 217*0Sstevel@tonic-gate } else { \ 218*0Sstevel@tonic-gate lpgeaddr = lpgaddr = (addr); \ 219*0Sstevel@tonic-gate } \ 220*0Sstevel@tonic-gate } 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gate /*ARGSUSED*/ 223*0Sstevel@tonic-gate static int 224*0Sstevel@tonic-gate segvn_cache_constructor(void *buf, void *cdrarg, int kmflags) 225*0Sstevel@tonic-gate { 226*0Sstevel@tonic-gate struct segvn_data *svd = buf; 227*0Sstevel@tonic-gate 228*0Sstevel@tonic-gate rw_init(&svd->lock, NULL, RW_DEFAULT, NULL); 229*0Sstevel@tonic-gate mutex_init(&svd->segp_slock, NULL, MUTEX_DEFAULT, NULL); 230*0Sstevel@tonic-gate return (0); 231*0Sstevel@tonic-gate } 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate /*ARGSUSED1*/ 234*0Sstevel@tonic-gate static void 235*0Sstevel@tonic-gate segvn_cache_destructor(void *buf, void *cdrarg) 236*0Sstevel@tonic-gate { 237*0Sstevel@tonic-gate struct segvn_data *svd = buf; 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gate rw_destroy(&svd->lock); 240*0Sstevel@tonic-gate mutex_destroy(&svd->segp_slock); 241*0Sstevel@tonic-gate } 242*0Sstevel@tonic-gate 243*0Sstevel@tonic-gate /* 244*0Sstevel@tonic-gate * Patching this variable to non-zero allows the system to run with 245*0Sstevel@tonic-gate * stacks marked as "not executable". It's a bit of a kludge, but is 246*0Sstevel@tonic-gate * provided as a tweakable for platforms that export those ABIs 247*0Sstevel@tonic-gate * (e.g. sparc V8) that have executable stacks enabled by default. 248*0Sstevel@tonic-gate * There are also some restrictions for platforms that don't actually 249*0Sstevel@tonic-gate * implement 'noexec' protections. 250*0Sstevel@tonic-gate * 251*0Sstevel@tonic-gate * Once enabled, the system is (therefore) unable to provide a fully 252*0Sstevel@tonic-gate * ABI-compliant execution environment, though practically speaking, 253*0Sstevel@tonic-gate * most everything works. The exceptions are generally some interpreters 254*0Sstevel@tonic-gate * and debuggers that create executable code on the stack and jump 255*0Sstevel@tonic-gate * into it (without explicitly mprotecting the address range to include 256*0Sstevel@tonic-gate * PROT_EXEC). 257*0Sstevel@tonic-gate * 258*0Sstevel@tonic-gate * One important class of applications that are disabled are those 259*0Sstevel@tonic-gate * that have been transformed into malicious agents using one of the 260*0Sstevel@tonic-gate * numerous "buffer overflow" attacks. See 4007890. 261*0Sstevel@tonic-gate */ 262*0Sstevel@tonic-gate int noexec_user_stack = 0; 263*0Sstevel@tonic-gate int noexec_user_stack_log = 1; 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate int segvn_lpg_disable = 0; 266*0Sstevel@tonic-gate uint_t segvn_maxpgszc = 0; 267*0Sstevel@tonic-gate 268*0Sstevel@tonic-gate ulong_t segvn_fltvnpages_clrszc_err; 269*0Sstevel@tonic-gate ulong_t segvn_setpgsz_align_err; 270*0Sstevel@tonic-gate ulong_t segvn_setpgsz_getattr_err; 271*0Sstevel@tonic-gate ulong_t segvn_setpgsz_eof_err; 272*0Sstevel@tonic-gate ulong_t segvn_faultvnmpss_align_err1; 273*0Sstevel@tonic-gate ulong_t segvn_faultvnmpss_align_err2; 274*0Sstevel@tonic-gate ulong_t segvn_faultvnmpss_align_err3; 275*0Sstevel@tonic-gate ulong_t segvn_faultvnmpss_align_err4; 276*0Sstevel@tonic-gate ulong_t segvn_faultvnmpss_align_err5; 277*0Sstevel@tonic-gate ulong_t segvn_vmpss_pageio_deadlk_err; 278*0Sstevel@tonic-gate 279*0Sstevel@tonic-gate /* 280*0Sstevel@tonic-gate * Initialize segvn data structures 281*0Sstevel@tonic-gate */ 282*0Sstevel@tonic-gate void 283*0Sstevel@tonic-gate segvn_init(void) 284*0Sstevel@tonic-gate { 285*0Sstevel@tonic-gate uint_t maxszc; 286*0Sstevel@tonic-gate uint_t szc; 287*0Sstevel@tonic-gate size_t pgsz; 288*0Sstevel@tonic-gate 289*0Sstevel@tonic-gate segvn_cache = kmem_cache_create("segvn_cache", 290*0Sstevel@tonic-gate sizeof (struct segvn_data), 0, 291*0Sstevel@tonic-gate segvn_cache_constructor, segvn_cache_destructor, NULL, 292*0Sstevel@tonic-gate NULL, NULL, 0); 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate if (segvn_lpg_disable != 0) 295*0Sstevel@tonic-gate return; 296*0Sstevel@tonic-gate szc = maxszc = page_num_pagesizes() - 1; 297*0Sstevel@tonic-gate if (szc == 0) { 298*0Sstevel@tonic-gate segvn_lpg_disable = 1; 299*0Sstevel@tonic-gate return; 300*0Sstevel@tonic-gate } 301*0Sstevel@tonic-gate if (page_get_pagesize(0) != PAGESIZE) { 302*0Sstevel@tonic-gate panic("segvn_init: bad szc 0"); 303*0Sstevel@tonic-gate /*NOTREACHED*/ 304*0Sstevel@tonic-gate } 305*0Sstevel@tonic-gate while (szc != 0) { 306*0Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 307*0Sstevel@tonic-gate if (pgsz <= PAGESIZE || !IS_P2ALIGNED(pgsz, pgsz)) { 308*0Sstevel@tonic-gate panic("segvn_init: bad szc %d", szc); 309*0Sstevel@tonic-gate /*NOTREACHED*/ 310*0Sstevel@tonic-gate } 311*0Sstevel@tonic-gate szc--; 312*0Sstevel@tonic-gate } 313*0Sstevel@tonic-gate if (segvn_maxpgszc == 0 || segvn_maxpgszc > maxszc) 314*0Sstevel@tonic-gate segvn_maxpgszc = maxszc; 315*0Sstevel@tonic-gate } 316*0Sstevel@tonic-gate 317*0Sstevel@tonic-gate #define SEGVN_PAGEIO ((void *)0x1) 318*0Sstevel@tonic-gate #define SEGVN_NOPAGEIO ((void *)0x2) 319*0Sstevel@tonic-gate 320*0Sstevel@tonic-gate static void 321*0Sstevel@tonic-gate segvn_setvnode_mpss(vnode_t *vp) 322*0Sstevel@tonic-gate { 323*0Sstevel@tonic-gate int err; 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate ASSERT(vp->v_mpssdata == NULL || 326*0Sstevel@tonic-gate vp->v_mpssdata == SEGVN_PAGEIO || 327*0Sstevel@tonic-gate vp->v_mpssdata == SEGVN_NOPAGEIO); 328*0Sstevel@tonic-gate 329*0Sstevel@tonic-gate if (vp->v_mpssdata == NULL) { 330*0Sstevel@tonic-gate if (vn_vmpss_usepageio(vp)) { 331*0Sstevel@tonic-gate err = VOP_PAGEIO(vp, (page_t *)NULL, 332*0Sstevel@tonic-gate (u_offset_t)0, 0, 0, CRED()); 333*0Sstevel@tonic-gate } else { 334*0Sstevel@tonic-gate err = ENOSYS; 335*0Sstevel@tonic-gate } 336*0Sstevel@tonic-gate /* 337*0Sstevel@tonic-gate * set v_mpssdata just once per vnode life 338*0Sstevel@tonic-gate * so that it never changes. 339*0Sstevel@tonic-gate */ 340*0Sstevel@tonic-gate mutex_enter(&vp->v_lock); 341*0Sstevel@tonic-gate if (vp->v_mpssdata == NULL) { 342*0Sstevel@tonic-gate if (err == EINVAL) { 343*0Sstevel@tonic-gate vp->v_mpssdata = SEGVN_PAGEIO; 344*0Sstevel@tonic-gate } else { 345*0Sstevel@tonic-gate vp->v_mpssdata = SEGVN_NOPAGEIO; 346*0Sstevel@tonic-gate } 347*0Sstevel@tonic-gate } 348*0Sstevel@tonic-gate mutex_exit(&vp->v_lock); 349*0Sstevel@tonic-gate } 350*0Sstevel@tonic-gate } 351*0Sstevel@tonic-gate 352*0Sstevel@tonic-gate int 353*0Sstevel@tonic-gate segvn_create(struct seg *seg, void *argsp) 354*0Sstevel@tonic-gate { 355*0Sstevel@tonic-gate struct segvn_crargs *a = (struct segvn_crargs *)argsp; 356*0Sstevel@tonic-gate struct segvn_data *svd; 357*0Sstevel@tonic-gate size_t swresv = 0; 358*0Sstevel@tonic-gate struct cred *cred; 359*0Sstevel@tonic-gate struct anon_map *amp; 360*0Sstevel@tonic-gate int error = 0; 361*0Sstevel@tonic-gate size_t pgsz; 362*0Sstevel@tonic-gate lgrp_mem_policy_t mpolicy = LGRP_MEM_POLICY_DEFAULT; 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate 365*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 366*0Sstevel@tonic-gate 367*0Sstevel@tonic-gate if (a->type != MAP_PRIVATE && a->type != MAP_SHARED) { 368*0Sstevel@tonic-gate panic("segvn_create type"); 369*0Sstevel@tonic-gate /*NOTREACHED*/ 370*0Sstevel@tonic-gate } 371*0Sstevel@tonic-gate 372*0Sstevel@tonic-gate /* 373*0Sstevel@tonic-gate * Check arguments. If a shared anon structure is given then 374*0Sstevel@tonic-gate * it is illegal to also specify a vp. 375*0Sstevel@tonic-gate */ 376*0Sstevel@tonic-gate if (a->amp != NULL && a->vp != NULL) { 377*0Sstevel@tonic-gate panic("segvn_create anon_map"); 378*0Sstevel@tonic-gate /*NOTREACHED*/ 379*0Sstevel@tonic-gate } 380*0Sstevel@tonic-gate 381*0Sstevel@tonic-gate /* MAP_NORESERVE on a MAP_SHARED segment is meaningless. */ 382*0Sstevel@tonic-gate if (a->type == MAP_SHARED) 383*0Sstevel@tonic-gate a->flags &= ~MAP_NORESERVE; 384*0Sstevel@tonic-gate 385*0Sstevel@tonic-gate if (a->szc != 0) { 386*0Sstevel@tonic-gate if (segvn_lpg_disable != 0 || a->amp != NULL || 387*0Sstevel@tonic-gate (a->type == MAP_SHARED && a->vp == NULL) || 388*0Sstevel@tonic-gate (a->flags & MAP_NORESERVE) || seg->s_as == &kas) { 389*0Sstevel@tonic-gate a->szc = 0; 390*0Sstevel@tonic-gate } else { 391*0Sstevel@tonic-gate if (a->szc > segvn_maxpgszc) 392*0Sstevel@tonic-gate a->szc = segvn_maxpgszc; 393*0Sstevel@tonic-gate pgsz = page_get_pagesize(a->szc); 394*0Sstevel@tonic-gate if (!IS_P2ALIGNED(seg->s_base, pgsz) || 395*0Sstevel@tonic-gate !IS_P2ALIGNED(seg->s_size, pgsz)) { 396*0Sstevel@tonic-gate a->szc = 0; 397*0Sstevel@tonic-gate } else if (a->vp != NULL) { 398*0Sstevel@tonic-gate extern struct vnode kvp; 399*0Sstevel@tonic-gate if (IS_SWAPFSVP(a->vp) || a->vp == &kvp) { 400*0Sstevel@tonic-gate /* 401*0Sstevel@tonic-gate * paranoid check. 402*0Sstevel@tonic-gate * hat_page_demote() is not supported 403*0Sstevel@tonic-gate * on swapfs pages. 404*0Sstevel@tonic-gate */ 405*0Sstevel@tonic-gate a->szc = 0; 406*0Sstevel@tonic-gate } else if (map_addr_vacalign_check(seg->s_base, 407*0Sstevel@tonic-gate a->offset & PAGEMASK)) { 408*0Sstevel@tonic-gate a->szc = 0; 409*0Sstevel@tonic-gate } 410*0Sstevel@tonic-gate } 411*0Sstevel@tonic-gate } 412*0Sstevel@tonic-gate } 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate /* 415*0Sstevel@tonic-gate * If segment may need private pages, reserve them now. 416*0Sstevel@tonic-gate */ 417*0Sstevel@tonic-gate if (!(a->flags & MAP_NORESERVE) && ((a->vp == NULL && a->amp == NULL) || 418*0Sstevel@tonic-gate (a->type == MAP_PRIVATE && (a->prot & PROT_WRITE)))) { 419*0Sstevel@tonic-gate if (anon_resv(seg->s_size) == 0) 420*0Sstevel@tonic-gate return (EAGAIN); 421*0Sstevel@tonic-gate swresv = seg->s_size; 422*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", 423*0Sstevel@tonic-gate seg, swresv, 1); 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate 426*0Sstevel@tonic-gate /* 427*0Sstevel@tonic-gate * Reserve any mapping structures that may be required. 428*0Sstevel@tonic-gate */ 429*0Sstevel@tonic-gate hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP); 430*0Sstevel@tonic-gate 431*0Sstevel@tonic-gate if (a->cred) { 432*0Sstevel@tonic-gate cred = a->cred; 433*0Sstevel@tonic-gate crhold(cred); 434*0Sstevel@tonic-gate } else { 435*0Sstevel@tonic-gate crhold(cred = CRED()); 436*0Sstevel@tonic-gate } 437*0Sstevel@tonic-gate 438*0Sstevel@tonic-gate /* Inform the vnode of the new mapping */ 439*0Sstevel@tonic-gate if (a->vp) { 440*0Sstevel@tonic-gate error = VOP_ADDMAP(a->vp, a->offset & PAGEMASK, 441*0Sstevel@tonic-gate seg->s_as, seg->s_base, seg->s_size, a->prot, 442*0Sstevel@tonic-gate a->maxprot, a->type, cred); 443*0Sstevel@tonic-gate if (error) { 444*0Sstevel@tonic-gate if (swresv != 0) { 445*0Sstevel@tonic-gate anon_unresv(swresv); 446*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, 447*0Sstevel@tonic-gate "anon proc:%p %lu %u", 448*0Sstevel@tonic-gate seg, swresv, 0); 449*0Sstevel@tonic-gate } 450*0Sstevel@tonic-gate crfree(cred); 451*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, seg->s_base, 452*0Sstevel@tonic-gate seg->s_size, HAT_UNLOAD_UNMAP); 453*0Sstevel@tonic-gate return (error); 454*0Sstevel@tonic-gate } 455*0Sstevel@tonic-gate } 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate /* 458*0Sstevel@tonic-gate * If more than one segment in the address space, and 459*0Sstevel@tonic-gate * they're adjacent virtually, try to concatenate them. 460*0Sstevel@tonic-gate * Don't concatenate if an explicit anon_map structure 461*0Sstevel@tonic-gate * was supplied (e.g., SystemV shared memory). 462*0Sstevel@tonic-gate */ 463*0Sstevel@tonic-gate if (a->amp == NULL) { 464*0Sstevel@tonic-gate struct seg *pseg, *nseg; 465*0Sstevel@tonic-gate struct segvn_data *psvd, *nsvd; 466*0Sstevel@tonic-gate lgrp_mem_policy_t ppolicy, npolicy; 467*0Sstevel@tonic-gate uint_t lgrp_mem_policy_flags = 0; 468*0Sstevel@tonic-gate extern lgrp_mem_policy_t lgrp_mem_default_policy; 469*0Sstevel@tonic-gate 470*0Sstevel@tonic-gate /* 471*0Sstevel@tonic-gate * Memory policy flags (lgrp_mem_policy_flags) is valid when 472*0Sstevel@tonic-gate * extending stack/heap segments. 473*0Sstevel@tonic-gate */ 474*0Sstevel@tonic-gate if ((a->vp == NULL) && (a->type == MAP_PRIVATE) && 475*0Sstevel@tonic-gate !(a->flags & MAP_NORESERVE) && (seg->s_as != &kas)) { 476*0Sstevel@tonic-gate lgrp_mem_policy_flags = a->lgrp_mem_policy_flags; 477*0Sstevel@tonic-gate } else { 478*0Sstevel@tonic-gate /* 479*0Sstevel@tonic-gate * Get policy when not extending it from another segment 480*0Sstevel@tonic-gate */ 481*0Sstevel@tonic-gate mpolicy = lgrp_mem_policy_default(seg->s_size, a->type); 482*0Sstevel@tonic-gate } 483*0Sstevel@tonic-gate 484*0Sstevel@tonic-gate /* 485*0Sstevel@tonic-gate * First, try to concatenate the previous and new segments 486*0Sstevel@tonic-gate */ 487*0Sstevel@tonic-gate pseg = AS_SEGPREV(seg->s_as, seg); 488*0Sstevel@tonic-gate if (pseg != NULL && 489*0Sstevel@tonic-gate pseg->s_base + pseg->s_size == seg->s_base && 490*0Sstevel@tonic-gate pseg->s_ops == &segvn_ops) { 491*0Sstevel@tonic-gate /* 492*0Sstevel@tonic-gate * Get memory allocation policy from previous segment. 493*0Sstevel@tonic-gate * When extension is specified (e.g. for heap) apply 494*0Sstevel@tonic-gate * this policy to the new segment regardless of the 495*0Sstevel@tonic-gate * outcome of segment concatenation. Extension occurs 496*0Sstevel@tonic-gate * for non-default policy otherwise default policy is 497*0Sstevel@tonic-gate * used and is based on extended segment size. 498*0Sstevel@tonic-gate */ 499*0Sstevel@tonic-gate psvd = (struct segvn_data *)pseg->s_data; 500*0Sstevel@tonic-gate ppolicy = psvd->policy_info.mem_policy; 501*0Sstevel@tonic-gate if (lgrp_mem_policy_flags == 502*0Sstevel@tonic-gate LGRP_MP_FLAG_EXTEND_UP) { 503*0Sstevel@tonic-gate if (ppolicy != lgrp_mem_default_policy) { 504*0Sstevel@tonic-gate mpolicy = ppolicy; 505*0Sstevel@tonic-gate } else { 506*0Sstevel@tonic-gate mpolicy = lgrp_mem_policy_default( 507*0Sstevel@tonic-gate pseg->s_size + seg->s_size, 508*0Sstevel@tonic-gate a->type); 509*0Sstevel@tonic-gate } 510*0Sstevel@tonic-gate } 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate if (mpolicy == ppolicy && 513*0Sstevel@tonic-gate (pseg->s_size + seg->s_size <= 514*0Sstevel@tonic-gate segvn_comb_thrshld || psvd->amp == NULL) && 515*0Sstevel@tonic-gate segvn_extend_prev(pseg, seg, a, swresv) == 0) { 516*0Sstevel@tonic-gate /* 517*0Sstevel@tonic-gate * success! now try to concatenate 518*0Sstevel@tonic-gate * with following seg 519*0Sstevel@tonic-gate */ 520*0Sstevel@tonic-gate crfree(cred); 521*0Sstevel@tonic-gate nseg = AS_SEGNEXT(pseg->s_as, pseg); 522*0Sstevel@tonic-gate if (nseg != NULL && 523*0Sstevel@tonic-gate nseg != pseg && 524*0Sstevel@tonic-gate nseg->s_ops == &segvn_ops && 525*0Sstevel@tonic-gate pseg->s_base + pseg->s_size == 526*0Sstevel@tonic-gate nseg->s_base) 527*0Sstevel@tonic-gate (void) segvn_concat(pseg, nseg, 0); 528*0Sstevel@tonic-gate ASSERT(pseg->s_szc == 0 || 529*0Sstevel@tonic-gate (a->szc == pseg->s_szc && 530*0Sstevel@tonic-gate IS_P2ALIGNED(pseg->s_base, pgsz) && 531*0Sstevel@tonic-gate IS_P2ALIGNED(pseg->s_size, pgsz))); 532*0Sstevel@tonic-gate return (0); 533*0Sstevel@tonic-gate } 534*0Sstevel@tonic-gate } 535*0Sstevel@tonic-gate 536*0Sstevel@tonic-gate /* 537*0Sstevel@tonic-gate * Failed, so try to concatenate with following seg 538*0Sstevel@tonic-gate */ 539*0Sstevel@tonic-gate nseg = AS_SEGNEXT(seg->s_as, seg); 540*0Sstevel@tonic-gate if (nseg != NULL && 541*0Sstevel@tonic-gate seg->s_base + seg->s_size == nseg->s_base && 542*0Sstevel@tonic-gate nseg->s_ops == &segvn_ops) { 543*0Sstevel@tonic-gate /* 544*0Sstevel@tonic-gate * Get memory allocation policy from next segment. 545*0Sstevel@tonic-gate * When extension is specified (e.g. for stack) apply 546*0Sstevel@tonic-gate * this policy to the new segment regardless of the 547*0Sstevel@tonic-gate * outcome of segment concatenation. Extension occurs 548*0Sstevel@tonic-gate * for non-default policy otherwise default policy is 549*0Sstevel@tonic-gate * used and is based on extended segment size. 550*0Sstevel@tonic-gate */ 551*0Sstevel@tonic-gate nsvd = (struct segvn_data *)nseg->s_data; 552*0Sstevel@tonic-gate npolicy = nsvd->policy_info.mem_policy; 553*0Sstevel@tonic-gate if (lgrp_mem_policy_flags == 554*0Sstevel@tonic-gate LGRP_MP_FLAG_EXTEND_DOWN) { 555*0Sstevel@tonic-gate if (npolicy != lgrp_mem_default_policy) { 556*0Sstevel@tonic-gate mpolicy = npolicy; 557*0Sstevel@tonic-gate } else { 558*0Sstevel@tonic-gate mpolicy = lgrp_mem_policy_default( 559*0Sstevel@tonic-gate nseg->s_size + seg->s_size, 560*0Sstevel@tonic-gate a->type); 561*0Sstevel@tonic-gate } 562*0Sstevel@tonic-gate } 563*0Sstevel@tonic-gate 564*0Sstevel@tonic-gate if (mpolicy == npolicy && 565*0Sstevel@tonic-gate segvn_extend_next(seg, nseg, a, swresv) == 0) { 566*0Sstevel@tonic-gate crfree(cred); 567*0Sstevel@tonic-gate ASSERT(nseg->s_szc == 0 || 568*0Sstevel@tonic-gate (a->szc == nseg->s_szc && 569*0Sstevel@tonic-gate IS_P2ALIGNED(nseg->s_base, pgsz) && 570*0Sstevel@tonic-gate IS_P2ALIGNED(nseg->s_size, pgsz))); 571*0Sstevel@tonic-gate return (0); 572*0Sstevel@tonic-gate } 573*0Sstevel@tonic-gate } 574*0Sstevel@tonic-gate } 575*0Sstevel@tonic-gate 576*0Sstevel@tonic-gate if (a->vp != NULL) { 577*0Sstevel@tonic-gate VN_HOLD(a->vp); 578*0Sstevel@tonic-gate if (a->type == MAP_SHARED) 579*0Sstevel@tonic-gate lgrp_shm_policy_init(NULL, a->vp); 580*0Sstevel@tonic-gate } 581*0Sstevel@tonic-gate svd = kmem_cache_alloc(segvn_cache, KM_SLEEP); 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate seg->s_ops = &segvn_ops; 584*0Sstevel@tonic-gate seg->s_data = (void *)svd; 585*0Sstevel@tonic-gate seg->s_szc = a->szc; 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate svd->vp = a->vp; 588*0Sstevel@tonic-gate /* 589*0Sstevel@tonic-gate * Anonymous mappings have no backing file so the offset is meaningless. 590*0Sstevel@tonic-gate */ 591*0Sstevel@tonic-gate svd->offset = a->vp ? (a->offset & PAGEMASK) : 0; 592*0Sstevel@tonic-gate svd->prot = a->prot; 593*0Sstevel@tonic-gate svd->maxprot = a->maxprot; 594*0Sstevel@tonic-gate svd->pageprot = 0; 595*0Sstevel@tonic-gate svd->type = a->type; 596*0Sstevel@tonic-gate svd->vpage = NULL; 597*0Sstevel@tonic-gate svd->cred = cred; 598*0Sstevel@tonic-gate svd->advice = MADV_NORMAL; 599*0Sstevel@tonic-gate svd->pageadvice = 0; 600*0Sstevel@tonic-gate svd->flags = (ushort_t)a->flags; 601*0Sstevel@tonic-gate svd->softlockcnt = 0; 602*0Sstevel@tonic-gate if (a->szc != 0 && a->vp != NULL) { 603*0Sstevel@tonic-gate segvn_setvnode_mpss(a->vp); 604*0Sstevel@tonic-gate } 605*0Sstevel@tonic-gate 606*0Sstevel@tonic-gate amp = a->amp; 607*0Sstevel@tonic-gate if ((svd->amp = amp) == NULL) { 608*0Sstevel@tonic-gate svd->anon_index = 0; 609*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 610*0Sstevel@tonic-gate svd->swresv = 0; 611*0Sstevel@tonic-gate /* 612*0Sstevel@tonic-gate * Shared mappings to a vp need no other setup. 613*0Sstevel@tonic-gate * If we have a shared mapping to an anon_map object 614*0Sstevel@tonic-gate * which hasn't been allocated yet, allocate the 615*0Sstevel@tonic-gate * struct now so that it will be properly shared 616*0Sstevel@tonic-gate * by remembering the swap reservation there. 617*0Sstevel@tonic-gate */ 618*0Sstevel@tonic-gate if (a->vp == NULL) { 619*0Sstevel@tonic-gate svd->amp = anonmap_alloc(seg->s_size, swresv); 620*0Sstevel@tonic-gate svd->amp->a_szc = seg->s_szc; 621*0Sstevel@tonic-gate } 622*0Sstevel@tonic-gate } else { 623*0Sstevel@tonic-gate /* 624*0Sstevel@tonic-gate * Private mapping (with or without a vp). 625*0Sstevel@tonic-gate * Allocate anon_map when needed. 626*0Sstevel@tonic-gate */ 627*0Sstevel@tonic-gate svd->swresv = swresv; 628*0Sstevel@tonic-gate } 629*0Sstevel@tonic-gate } else { 630*0Sstevel@tonic-gate pgcnt_t anon_num; 631*0Sstevel@tonic-gate 632*0Sstevel@tonic-gate /* 633*0Sstevel@tonic-gate * Mapping to an existing anon_map structure without a vp. 634*0Sstevel@tonic-gate * For now we will insure that the segment size isn't larger 635*0Sstevel@tonic-gate * than the size - offset gives us. Later on we may wish to 636*0Sstevel@tonic-gate * have the anon array dynamically allocated itself so that 637*0Sstevel@tonic-gate * we don't always have to allocate all the anon pointer slots. 638*0Sstevel@tonic-gate * This of course involves adding extra code to check that we 639*0Sstevel@tonic-gate * aren't trying to use an anon pointer slot beyond the end 640*0Sstevel@tonic-gate * of the currently allocated anon array. 641*0Sstevel@tonic-gate */ 642*0Sstevel@tonic-gate if ((amp->size - a->offset) < seg->s_size) { 643*0Sstevel@tonic-gate panic("segvn_create anon_map size"); 644*0Sstevel@tonic-gate /*NOTREACHED*/ 645*0Sstevel@tonic-gate } 646*0Sstevel@tonic-gate 647*0Sstevel@tonic-gate anon_num = btopr(a->offset); 648*0Sstevel@tonic-gate 649*0Sstevel@tonic-gate if (a->type == MAP_SHARED) { 650*0Sstevel@tonic-gate /* 651*0Sstevel@tonic-gate * SHARED mapping to a given anon_map. 652*0Sstevel@tonic-gate */ 653*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 654*0Sstevel@tonic-gate amp->refcnt++; 655*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 656*0Sstevel@tonic-gate svd->anon_index = anon_num; 657*0Sstevel@tonic-gate svd->swresv = 0; 658*0Sstevel@tonic-gate } else { 659*0Sstevel@tonic-gate /* 660*0Sstevel@tonic-gate * PRIVATE mapping to a given anon_map. 661*0Sstevel@tonic-gate * Make sure that all the needed anon 662*0Sstevel@tonic-gate * structures are created (so that we will 663*0Sstevel@tonic-gate * share the underlying pages if nothing 664*0Sstevel@tonic-gate * is written by this mapping) and then 665*0Sstevel@tonic-gate * duplicate the anon array as is done 666*0Sstevel@tonic-gate * when a privately mapped segment is dup'ed. 667*0Sstevel@tonic-gate */ 668*0Sstevel@tonic-gate struct anon *ap; 669*0Sstevel@tonic-gate caddr_t addr; 670*0Sstevel@tonic-gate caddr_t eaddr; 671*0Sstevel@tonic-gate ulong_t anon_idx; 672*0Sstevel@tonic-gate int hat_flag = HAT_LOAD; 673*0Sstevel@tonic-gate 674*0Sstevel@tonic-gate if (svd->flags & MAP_TEXT) { 675*0Sstevel@tonic-gate hat_flag |= HAT_LOAD_TEXT; 676*0Sstevel@tonic-gate } 677*0Sstevel@tonic-gate 678*0Sstevel@tonic-gate svd->amp = anonmap_alloc(seg->s_size, 0); 679*0Sstevel@tonic-gate svd->amp->a_szc = seg->s_szc; 680*0Sstevel@tonic-gate svd->anon_index = 0; 681*0Sstevel@tonic-gate svd->swresv = swresv; 682*0Sstevel@tonic-gate 683*0Sstevel@tonic-gate /* 684*0Sstevel@tonic-gate * Prevent 2 threads from allocating anon 685*0Sstevel@tonic-gate * slots simultaneously. 686*0Sstevel@tonic-gate */ 687*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 688*0Sstevel@tonic-gate eaddr = seg->s_base + seg->s_size; 689*0Sstevel@tonic-gate 690*0Sstevel@tonic-gate for (anon_idx = anon_num, addr = seg->s_base; 691*0Sstevel@tonic-gate addr < eaddr; addr += PAGESIZE, anon_idx++) { 692*0Sstevel@tonic-gate page_t *pp; 693*0Sstevel@tonic-gate 694*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, 695*0Sstevel@tonic-gate anon_idx)) != NULL) 696*0Sstevel@tonic-gate continue; 697*0Sstevel@tonic-gate 698*0Sstevel@tonic-gate /* 699*0Sstevel@tonic-gate * Allocate the anon struct now. 700*0Sstevel@tonic-gate * Might as well load up translation 701*0Sstevel@tonic-gate * to the page while we're at it... 702*0Sstevel@tonic-gate */ 703*0Sstevel@tonic-gate pp = anon_zero(seg, addr, &ap, cred); 704*0Sstevel@tonic-gate if (ap == NULL || pp == NULL) { 705*0Sstevel@tonic-gate panic("segvn_create anon_zero"); 706*0Sstevel@tonic-gate /*NOTREACHED*/ 707*0Sstevel@tonic-gate } 708*0Sstevel@tonic-gate 709*0Sstevel@tonic-gate /* 710*0Sstevel@tonic-gate * Re-acquire the anon_map lock and 711*0Sstevel@tonic-gate * initialize the anon array entry. 712*0Sstevel@tonic-gate */ 713*0Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, 714*0Sstevel@tonic-gate anon_idx) == NULL); 715*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_idx, ap, 716*0Sstevel@tonic-gate ANON_SLEEP); 717*0Sstevel@tonic-gate 718*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 719*0Sstevel@tonic-gate ASSERT(!IS_VMODSORT(pp->p_vnode)); 720*0Sstevel@tonic-gate 721*0Sstevel@tonic-gate hat_memload(seg->s_as->a_hat, addr, pp, 722*0Sstevel@tonic-gate svd->prot & ~PROT_WRITE, hat_flag); 723*0Sstevel@tonic-gate 724*0Sstevel@tonic-gate page_unlock(pp); 725*0Sstevel@tonic-gate } 726*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 727*0Sstevel@tonic-gate anon_dup(amp->ahp, anon_num, svd->amp->ahp, 728*0Sstevel@tonic-gate 0, seg->s_size); 729*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 730*0Sstevel@tonic-gate } 731*0Sstevel@tonic-gate } 732*0Sstevel@tonic-gate 733*0Sstevel@tonic-gate /* 734*0Sstevel@tonic-gate * Set default memory allocation policy for segment 735*0Sstevel@tonic-gate * 736*0Sstevel@tonic-gate * Always set policy for private memory at least for initialization 737*0Sstevel@tonic-gate * even if this is a shared memory segment 738*0Sstevel@tonic-gate */ 739*0Sstevel@tonic-gate (void) lgrp_privm_policy_set(mpolicy, &svd->policy_info, seg->s_size); 740*0Sstevel@tonic-gate 741*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) 742*0Sstevel@tonic-gate (void) lgrp_shm_policy_set(mpolicy, svd->amp, svd->anon_index, 743*0Sstevel@tonic-gate svd->vp, svd->offset, seg->s_size); 744*0Sstevel@tonic-gate 745*0Sstevel@tonic-gate return (0); 746*0Sstevel@tonic-gate } 747*0Sstevel@tonic-gate 748*0Sstevel@tonic-gate /* 749*0Sstevel@tonic-gate * Concatenate two existing segments, if possible. 750*0Sstevel@tonic-gate * Return 0 on success, -1 if two segments are not compatible 751*0Sstevel@tonic-gate * or -2 on memory allocation failure. 752*0Sstevel@tonic-gate * If private == 1 then try and concat segments with private pages. 753*0Sstevel@tonic-gate */ 754*0Sstevel@tonic-gate static int 755*0Sstevel@tonic-gate segvn_concat(struct seg *seg1, struct seg *seg2, int private) 756*0Sstevel@tonic-gate { 757*0Sstevel@tonic-gate struct segvn_data *svd1 = seg1->s_data; 758*0Sstevel@tonic-gate struct segvn_data *svd2 = seg2->s_data; 759*0Sstevel@tonic-gate struct anon_map *amp1 = svd1->amp; 760*0Sstevel@tonic-gate struct anon_map *amp2 = svd2->amp; 761*0Sstevel@tonic-gate struct vpage *vpage1 = svd1->vpage; 762*0Sstevel@tonic-gate struct vpage *vpage2 = svd2->vpage, *nvpage = NULL; 763*0Sstevel@tonic-gate size_t size, nvpsize; 764*0Sstevel@tonic-gate pgcnt_t npages1, npages2; 765*0Sstevel@tonic-gate 766*0Sstevel@tonic-gate ASSERT(seg1->s_as && seg2->s_as && seg1->s_as == seg2->s_as); 767*0Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock)); 768*0Sstevel@tonic-gate ASSERT(seg1->s_ops == seg2->s_ops); 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gate /* both segments exist, try to merge them */ 771*0Sstevel@tonic-gate #define incompat(x) (svd1->x != svd2->x) 772*0Sstevel@tonic-gate if (incompat(vp) || incompat(maxprot) || 773*0Sstevel@tonic-gate (!svd1->pageadvice && !svd2->pageadvice && incompat(advice)) || 774*0Sstevel@tonic-gate (!svd1->pageprot && !svd2->pageprot && incompat(prot)) || 775*0Sstevel@tonic-gate incompat(type) || incompat(cred) || incompat(flags) || 776*0Sstevel@tonic-gate seg1->s_szc != seg2->s_szc || incompat(policy_info.mem_policy) || 777*0Sstevel@tonic-gate (svd2->softlockcnt > 0)) 778*0Sstevel@tonic-gate return (-1); 779*0Sstevel@tonic-gate #undef incompat 780*0Sstevel@tonic-gate 781*0Sstevel@tonic-gate /* 782*0Sstevel@tonic-gate * vp == NULL implies zfod, offset doesn't matter 783*0Sstevel@tonic-gate */ 784*0Sstevel@tonic-gate if (svd1->vp != NULL && 785*0Sstevel@tonic-gate svd1->offset + seg1->s_size != svd2->offset) { 786*0Sstevel@tonic-gate return (-1); 787*0Sstevel@tonic-gate } 788*0Sstevel@tonic-gate 789*0Sstevel@tonic-gate /* 790*0Sstevel@tonic-gate * Fail early if we're not supposed to concatenate 791*0Sstevel@tonic-gate * private pages. 792*0Sstevel@tonic-gate */ 793*0Sstevel@tonic-gate if ((private == 0 || svd1->type != MAP_PRIVATE) && 794*0Sstevel@tonic-gate (amp1 != NULL || amp2 != NULL)) { 795*0Sstevel@tonic-gate return (-1); 796*0Sstevel@tonic-gate } 797*0Sstevel@tonic-gate 798*0Sstevel@tonic-gate /* 799*0Sstevel@tonic-gate * If either seg has vpages, create a new merged vpage array. 800*0Sstevel@tonic-gate */ 801*0Sstevel@tonic-gate if (vpage1 != NULL || vpage2 != NULL) { 802*0Sstevel@tonic-gate struct vpage *vp; 803*0Sstevel@tonic-gate 804*0Sstevel@tonic-gate npages1 = seg_pages(seg1); 805*0Sstevel@tonic-gate npages2 = seg_pages(seg2); 806*0Sstevel@tonic-gate nvpsize = vpgtob(npages1 + npages2); 807*0Sstevel@tonic-gate 808*0Sstevel@tonic-gate if ((nvpage = kmem_zalloc(nvpsize, KM_NOSLEEP)) == NULL) { 809*0Sstevel@tonic-gate return (-2); 810*0Sstevel@tonic-gate } 811*0Sstevel@tonic-gate if (vpage1 != NULL) { 812*0Sstevel@tonic-gate bcopy(vpage1, nvpage, vpgtob(npages1)); 813*0Sstevel@tonic-gate } 814*0Sstevel@tonic-gate if (vpage2 != NULL) { 815*0Sstevel@tonic-gate bcopy(vpage2, nvpage + npages1, vpgtob(npages2)); 816*0Sstevel@tonic-gate } 817*0Sstevel@tonic-gate for (vp = nvpage; vp < nvpage + npages1; vp++) { 818*0Sstevel@tonic-gate if (svd2->pageprot && !svd1->pageprot) { 819*0Sstevel@tonic-gate VPP_SETPROT(vp, svd1->prot); 820*0Sstevel@tonic-gate } 821*0Sstevel@tonic-gate if (svd2->pageadvice && !svd1->pageadvice) { 822*0Sstevel@tonic-gate VPP_SETADVICE(vp, svd1->advice); 823*0Sstevel@tonic-gate } 824*0Sstevel@tonic-gate } 825*0Sstevel@tonic-gate for (vp = nvpage + npages1; 826*0Sstevel@tonic-gate vp < nvpage + npages1 + npages2; vp++) { 827*0Sstevel@tonic-gate if (svd1->pageprot && !svd2->pageprot) { 828*0Sstevel@tonic-gate VPP_SETPROT(vp, svd2->prot); 829*0Sstevel@tonic-gate } 830*0Sstevel@tonic-gate if (svd1->pageadvice && !svd2->pageadvice) { 831*0Sstevel@tonic-gate VPP_SETADVICE(vp, svd2->advice); 832*0Sstevel@tonic-gate } 833*0Sstevel@tonic-gate } 834*0Sstevel@tonic-gate } 835*0Sstevel@tonic-gate 836*0Sstevel@tonic-gate /* 837*0Sstevel@tonic-gate * If either segment has private pages, create a new merged anon 838*0Sstevel@tonic-gate * array. 839*0Sstevel@tonic-gate */ 840*0Sstevel@tonic-gate if (amp1 != NULL || amp2 != NULL) { 841*0Sstevel@tonic-gate struct anon_hdr *nahp; 842*0Sstevel@tonic-gate struct anon_map *namp = NULL; 843*0Sstevel@tonic-gate size_t asize = seg1->s_size + seg2->s_size; 844*0Sstevel@tonic-gate 845*0Sstevel@tonic-gate if ((nahp = anon_create(btop(asize), ANON_NOSLEEP)) == NULL) { 846*0Sstevel@tonic-gate if (nvpage != NULL) { 847*0Sstevel@tonic-gate kmem_free(nvpage, nvpsize); 848*0Sstevel@tonic-gate } 849*0Sstevel@tonic-gate return (-2); 850*0Sstevel@tonic-gate } 851*0Sstevel@tonic-gate if (amp1 != NULL) { 852*0Sstevel@tonic-gate /* 853*0Sstevel@tonic-gate * XXX anon rwlock is not really needed because 854*0Sstevel@tonic-gate * this is a private segment and we are writers. 855*0Sstevel@tonic-gate */ 856*0Sstevel@tonic-gate ANON_LOCK_ENTER(&1->a_rwlock, RW_WRITER); 857*0Sstevel@tonic-gate ASSERT(amp1->refcnt == 1); 858*0Sstevel@tonic-gate if (anon_copy_ptr(amp1->ahp, svd1->anon_index, 859*0Sstevel@tonic-gate nahp, 0, btop(seg1->s_size), ANON_NOSLEEP)) { 860*0Sstevel@tonic-gate anon_release(nahp, btop(asize)); 861*0Sstevel@tonic-gate ANON_LOCK_EXIT(&1->a_rwlock); 862*0Sstevel@tonic-gate if (nvpage != NULL) { 863*0Sstevel@tonic-gate kmem_free(nvpage, nvpsize); 864*0Sstevel@tonic-gate } 865*0Sstevel@tonic-gate return (-2); 866*0Sstevel@tonic-gate } 867*0Sstevel@tonic-gate } 868*0Sstevel@tonic-gate if (amp2 != NULL) { 869*0Sstevel@tonic-gate ANON_LOCK_ENTER(&2->a_rwlock, RW_WRITER); 870*0Sstevel@tonic-gate ASSERT(amp2->refcnt == 1); 871*0Sstevel@tonic-gate if (anon_copy_ptr(amp2->ahp, svd2->anon_index, 872*0Sstevel@tonic-gate nahp, btop(seg1->s_size), btop(seg2->s_size), 873*0Sstevel@tonic-gate ANON_NOSLEEP)) { 874*0Sstevel@tonic-gate anon_release(nahp, btop(asize)); 875*0Sstevel@tonic-gate ANON_LOCK_EXIT(&2->a_rwlock); 876*0Sstevel@tonic-gate if (amp1 != NULL) { 877*0Sstevel@tonic-gate ANON_LOCK_EXIT(&1->a_rwlock); 878*0Sstevel@tonic-gate } 879*0Sstevel@tonic-gate if (nvpage != NULL) { 880*0Sstevel@tonic-gate kmem_free(nvpage, nvpsize); 881*0Sstevel@tonic-gate } 882*0Sstevel@tonic-gate return (-2); 883*0Sstevel@tonic-gate } 884*0Sstevel@tonic-gate } 885*0Sstevel@tonic-gate if (amp1 != NULL) { 886*0Sstevel@tonic-gate namp = amp1; 887*0Sstevel@tonic-gate anon_release(amp1->ahp, btop(amp1->size)); 888*0Sstevel@tonic-gate } 889*0Sstevel@tonic-gate if (amp2 != NULL) { 890*0Sstevel@tonic-gate if (namp == NULL) { 891*0Sstevel@tonic-gate ASSERT(amp1 == NULL); 892*0Sstevel@tonic-gate namp = amp2; 893*0Sstevel@tonic-gate anon_release(amp2->ahp, btop(amp2->size)); 894*0Sstevel@tonic-gate } else { 895*0Sstevel@tonic-gate amp2->refcnt--; 896*0Sstevel@tonic-gate ANON_LOCK_EXIT(&2->a_rwlock); 897*0Sstevel@tonic-gate anonmap_free(amp2); 898*0Sstevel@tonic-gate } 899*0Sstevel@tonic-gate svd2->amp = NULL; /* needed for seg_free */ 900*0Sstevel@tonic-gate } 901*0Sstevel@tonic-gate namp->ahp = nahp; 902*0Sstevel@tonic-gate namp->size = asize; 903*0Sstevel@tonic-gate svd1->amp = namp; 904*0Sstevel@tonic-gate svd1->anon_index = 0; 905*0Sstevel@tonic-gate ANON_LOCK_EXIT(&namp->a_rwlock); 906*0Sstevel@tonic-gate } 907*0Sstevel@tonic-gate /* 908*0Sstevel@tonic-gate * Now free the old vpage structures. 909*0Sstevel@tonic-gate */ 910*0Sstevel@tonic-gate if (nvpage != NULL) { 911*0Sstevel@tonic-gate if (vpage1 != NULL) { 912*0Sstevel@tonic-gate kmem_free(vpage1, vpgtob(npages1)); 913*0Sstevel@tonic-gate } 914*0Sstevel@tonic-gate if (vpage2 != NULL) { 915*0Sstevel@tonic-gate svd2->vpage = NULL; 916*0Sstevel@tonic-gate kmem_free(vpage2, vpgtob(npages2)); 917*0Sstevel@tonic-gate } 918*0Sstevel@tonic-gate if (svd2->pageprot) { 919*0Sstevel@tonic-gate svd1->pageprot = 1; 920*0Sstevel@tonic-gate } 921*0Sstevel@tonic-gate if (svd2->pageadvice) { 922*0Sstevel@tonic-gate svd1->pageadvice = 1; 923*0Sstevel@tonic-gate } 924*0Sstevel@tonic-gate svd1->vpage = nvpage; 925*0Sstevel@tonic-gate } 926*0Sstevel@tonic-gate 927*0Sstevel@tonic-gate /* all looks ok, merge segments */ 928*0Sstevel@tonic-gate svd1->swresv += svd2->swresv; 929*0Sstevel@tonic-gate svd2->swresv = 0; /* so seg_free doesn't release swap space */ 930*0Sstevel@tonic-gate size = seg2->s_size; 931*0Sstevel@tonic-gate seg_free(seg2); 932*0Sstevel@tonic-gate seg1->s_size += size; 933*0Sstevel@tonic-gate return (0); 934*0Sstevel@tonic-gate } 935*0Sstevel@tonic-gate 936*0Sstevel@tonic-gate /* 937*0Sstevel@tonic-gate * Extend the previous segment (seg1) to include the 938*0Sstevel@tonic-gate * new segment (seg2 + a), if possible. 939*0Sstevel@tonic-gate * Return 0 on success. 940*0Sstevel@tonic-gate */ 941*0Sstevel@tonic-gate static int 942*0Sstevel@tonic-gate segvn_extend_prev(seg1, seg2, a, swresv) 943*0Sstevel@tonic-gate struct seg *seg1, *seg2; 944*0Sstevel@tonic-gate struct segvn_crargs *a; 945*0Sstevel@tonic-gate size_t swresv; 946*0Sstevel@tonic-gate { 947*0Sstevel@tonic-gate struct segvn_data *svd1 = (struct segvn_data *)seg1->s_data; 948*0Sstevel@tonic-gate size_t size; 949*0Sstevel@tonic-gate struct anon_map *amp1; 950*0Sstevel@tonic-gate struct vpage *new_vpage; 951*0Sstevel@tonic-gate 952*0Sstevel@tonic-gate /* 953*0Sstevel@tonic-gate * We don't need any segment level locks for "segvn" data 954*0Sstevel@tonic-gate * since the address space is "write" locked. 955*0Sstevel@tonic-gate */ 956*0Sstevel@tonic-gate ASSERT(seg1->s_as && AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock)); 957*0Sstevel@tonic-gate 958*0Sstevel@tonic-gate /* second segment is new, try to extend first */ 959*0Sstevel@tonic-gate /* XXX - should also check cred */ 960*0Sstevel@tonic-gate if (svd1->vp != a->vp || svd1->maxprot != a->maxprot || 961*0Sstevel@tonic-gate (!svd1->pageprot && (svd1->prot != a->prot)) || 962*0Sstevel@tonic-gate svd1->type != a->type || svd1->flags != a->flags || 963*0Sstevel@tonic-gate seg1->s_szc != a->szc) 964*0Sstevel@tonic-gate return (-1); 965*0Sstevel@tonic-gate 966*0Sstevel@tonic-gate /* vp == NULL implies zfod, offset doesn't matter */ 967*0Sstevel@tonic-gate if (svd1->vp != NULL && 968*0Sstevel@tonic-gate svd1->offset + seg1->s_size != (a->offset & PAGEMASK)) 969*0Sstevel@tonic-gate return (-1); 970*0Sstevel@tonic-gate 971*0Sstevel@tonic-gate amp1 = svd1->amp; 972*0Sstevel@tonic-gate if (amp1) { 973*0Sstevel@tonic-gate pgcnt_t newpgs; 974*0Sstevel@tonic-gate 975*0Sstevel@tonic-gate /* 976*0Sstevel@tonic-gate * Segment has private pages, can data structures 977*0Sstevel@tonic-gate * be expanded? 978*0Sstevel@tonic-gate * 979*0Sstevel@tonic-gate * Acquire the anon_map lock to prevent it from changing, 980*0Sstevel@tonic-gate * if it is shared. This ensures that the anon_map 981*0Sstevel@tonic-gate * will not change while a thread which has a read/write 982*0Sstevel@tonic-gate * lock on an address space references it. 983*0Sstevel@tonic-gate * XXX - Don't need the anon_map lock at all if "refcnt" 984*0Sstevel@tonic-gate * is 1. 985*0Sstevel@tonic-gate * 986*0Sstevel@tonic-gate * Can't grow a MAP_SHARED segment with an anonmap because 987*0Sstevel@tonic-gate * there may be existing anon slots where we want to extend 988*0Sstevel@tonic-gate * the segment and we wouldn't know what to do with them 989*0Sstevel@tonic-gate * (e.g., for tmpfs right thing is to just leave them there, 990*0Sstevel@tonic-gate * for /dev/zero they should be cleared out). 991*0Sstevel@tonic-gate */ 992*0Sstevel@tonic-gate if (svd1->type == MAP_SHARED) 993*0Sstevel@tonic-gate return (-1); 994*0Sstevel@tonic-gate 995*0Sstevel@tonic-gate ANON_LOCK_ENTER(&1->a_rwlock, RW_WRITER); 996*0Sstevel@tonic-gate if (amp1->refcnt > 1) { 997*0Sstevel@tonic-gate ANON_LOCK_EXIT(&1->a_rwlock); 998*0Sstevel@tonic-gate return (-1); 999*0Sstevel@tonic-gate } 1000*0Sstevel@tonic-gate newpgs = anon_grow(amp1->ahp, &svd1->anon_index, 1001*0Sstevel@tonic-gate btop(seg1->s_size), btop(seg2->s_size), ANON_NOSLEEP); 1002*0Sstevel@tonic-gate 1003*0Sstevel@tonic-gate if (newpgs == 0) { 1004*0Sstevel@tonic-gate ANON_LOCK_EXIT(&1->a_rwlock); 1005*0Sstevel@tonic-gate return (-1); 1006*0Sstevel@tonic-gate } 1007*0Sstevel@tonic-gate amp1->size = ptob(newpgs); 1008*0Sstevel@tonic-gate ANON_LOCK_EXIT(&1->a_rwlock); 1009*0Sstevel@tonic-gate } 1010*0Sstevel@tonic-gate if (svd1->vpage != NULL) { 1011*0Sstevel@tonic-gate new_vpage = 1012*0Sstevel@tonic-gate kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), 1013*0Sstevel@tonic-gate KM_NOSLEEP); 1014*0Sstevel@tonic-gate if (new_vpage == NULL) 1015*0Sstevel@tonic-gate return (-1); 1016*0Sstevel@tonic-gate bcopy(svd1->vpage, new_vpage, vpgtob(seg_pages(seg1))); 1017*0Sstevel@tonic-gate kmem_free(svd1->vpage, vpgtob(seg_pages(seg1))); 1018*0Sstevel@tonic-gate svd1->vpage = new_vpage; 1019*0Sstevel@tonic-gate if (svd1->pageprot) { 1020*0Sstevel@tonic-gate struct vpage *vp, *evp; 1021*0Sstevel@tonic-gate 1022*0Sstevel@tonic-gate vp = new_vpage + seg_pages(seg1); 1023*0Sstevel@tonic-gate evp = vp + seg_pages(seg2); 1024*0Sstevel@tonic-gate for (; vp < evp; vp++) 1025*0Sstevel@tonic-gate VPP_SETPROT(vp, a->prot); 1026*0Sstevel@tonic-gate } 1027*0Sstevel@tonic-gate } 1028*0Sstevel@tonic-gate size = seg2->s_size; 1029*0Sstevel@tonic-gate seg_free(seg2); 1030*0Sstevel@tonic-gate seg1->s_size += size; 1031*0Sstevel@tonic-gate svd1->swresv += swresv; 1032*0Sstevel@tonic-gate return (0); 1033*0Sstevel@tonic-gate } 1034*0Sstevel@tonic-gate 1035*0Sstevel@tonic-gate /* 1036*0Sstevel@tonic-gate * Extend the next segment (seg2) to include the 1037*0Sstevel@tonic-gate * new segment (seg1 + a), if possible. 1038*0Sstevel@tonic-gate * Return 0 on success. 1039*0Sstevel@tonic-gate */ 1040*0Sstevel@tonic-gate static int 1041*0Sstevel@tonic-gate segvn_extend_next( 1042*0Sstevel@tonic-gate struct seg *seg1, 1043*0Sstevel@tonic-gate struct seg *seg2, 1044*0Sstevel@tonic-gate struct segvn_crargs *a, 1045*0Sstevel@tonic-gate size_t swresv) 1046*0Sstevel@tonic-gate { 1047*0Sstevel@tonic-gate struct segvn_data *svd2 = (struct segvn_data *)seg2->s_data; 1048*0Sstevel@tonic-gate size_t size; 1049*0Sstevel@tonic-gate struct anon_map *amp2; 1050*0Sstevel@tonic-gate struct vpage *new_vpage; 1051*0Sstevel@tonic-gate 1052*0Sstevel@tonic-gate /* 1053*0Sstevel@tonic-gate * We don't need any segment level locks for "segvn" data 1054*0Sstevel@tonic-gate * since the address space is "write" locked. 1055*0Sstevel@tonic-gate */ 1056*0Sstevel@tonic-gate ASSERT(seg2->s_as && AS_WRITE_HELD(seg2->s_as, &seg2->s_as->a_lock)); 1057*0Sstevel@tonic-gate 1058*0Sstevel@tonic-gate /* first segment is new, try to extend second */ 1059*0Sstevel@tonic-gate /* XXX - should also check cred */ 1060*0Sstevel@tonic-gate if (svd2->vp != a->vp || svd2->maxprot != a->maxprot || 1061*0Sstevel@tonic-gate (!svd2->pageprot && (svd2->prot != a->prot)) || 1062*0Sstevel@tonic-gate svd2->type != a->type || svd2->flags != a->flags || 1063*0Sstevel@tonic-gate seg2->s_szc != a->szc) 1064*0Sstevel@tonic-gate return (-1); 1065*0Sstevel@tonic-gate /* vp == NULL implies zfod, offset doesn't matter */ 1066*0Sstevel@tonic-gate if (svd2->vp != NULL && 1067*0Sstevel@tonic-gate (a->offset & PAGEMASK) + seg1->s_size != svd2->offset) 1068*0Sstevel@tonic-gate return (-1); 1069*0Sstevel@tonic-gate 1070*0Sstevel@tonic-gate amp2 = svd2->amp; 1071*0Sstevel@tonic-gate if (amp2) { 1072*0Sstevel@tonic-gate pgcnt_t newpgs; 1073*0Sstevel@tonic-gate 1074*0Sstevel@tonic-gate /* 1075*0Sstevel@tonic-gate * Segment has private pages, can data structures 1076*0Sstevel@tonic-gate * be expanded? 1077*0Sstevel@tonic-gate * 1078*0Sstevel@tonic-gate * Acquire the anon_map lock to prevent it from changing, 1079*0Sstevel@tonic-gate * if it is shared. This ensures that the anon_map 1080*0Sstevel@tonic-gate * will not change while a thread which has a read/write 1081*0Sstevel@tonic-gate * lock on an address space references it. 1082*0Sstevel@tonic-gate * 1083*0Sstevel@tonic-gate * XXX - Don't need the anon_map lock at all if "refcnt" 1084*0Sstevel@tonic-gate * is 1. 1085*0Sstevel@tonic-gate */ 1086*0Sstevel@tonic-gate if (svd2->type == MAP_SHARED) 1087*0Sstevel@tonic-gate return (-1); 1088*0Sstevel@tonic-gate 1089*0Sstevel@tonic-gate ANON_LOCK_ENTER(&2->a_rwlock, RW_WRITER); 1090*0Sstevel@tonic-gate if (amp2->refcnt > 1) { 1091*0Sstevel@tonic-gate ANON_LOCK_EXIT(&2->a_rwlock); 1092*0Sstevel@tonic-gate return (-1); 1093*0Sstevel@tonic-gate } 1094*0Sstevel@tonic-gate newpgs = anon_grow(amp2->ahp, &svd2->anon_index, 1095*0Sstevel@tonic-gate btop(seg2->s_size), btop(seg1->s_size), 1096*0Sstevel@tonic-gate ANON_NOSLEEP | ANON_GROWDOWN); 1097*0Sstevel@tonic-gate 1098*0Sstevel@tonic-gate if (newpgs == 0) { 1099*0Sstevel@tonic-gate ANON_LOCK_EXIT(&2->a_rwlock); 1100*0Sstevel@tonic-gate return (-1); 1101*0Sstevel@tonic-gate } 1102*0Sstevel@tonic-gate amp2->size = ptob(newpgs); 1103*0Sstevel@tonic-gate ANON_LOCK_EXIT(&2->a_rwlock); 1104*0Sstevel@tonic-gate } 1105*0Sstevel@tonic-gate if (svd2->vpage != NULL) { 1106*0Sstevel@tonic-gate new_vpage = 1107*0Sstevel@tonic-gate kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), 1108*0Sstevel@tonic-gate KM_NOSLEEP); 1109*0Sstevel@tonic-gate if (new_vpage == NULL) { 1110*0Sstevel@tonic-gate /* Not merging segments so adjust anon_index back */ 1111*0Sstevel@tonic-gate if (amp2) 1112*0Sstevel@tonic-gate svd2->anon_index += seg_pages(seg1); 1113*0Sstevel@tonic-gate return (-1); 1114*0Sstevel@tonic-gate } 1115*0Sstevel@tonic-gate bcopy(svd2->vpage, new_vpage + seg_pages(seg1), 1116*0Sstevel@tonic-gate vpgtob(seg_pages(seg2))); 1117*0Sstevel@tonic-gate kmem_free(svd2->vpage, vpgtob(seg_pages(seg2))); 1118*0Sstevel@tonic-gate svd2->vpage = new_vpage; 1119*0Sstevel@tonic-gate if (svd2->pageprot) { 1120*0Sstevel@tonic-gate struct vpage *vp, *evp; 1121*0Sstevel@tonic-gate 1122*0Sstevel@tonic-gate vp = new_vpage; 1123*0Sstevel@tonic-gate evp = vp + seg_pages(seg1); 1124*0Sstevel@tonic-gate for (; vp < evp; vp++) 1125*0Sstevel@tonic-gate VPP_SETPROT(vp, a->prot); 1126*0Sstevel@tonic-gate } 1127*0Sstevel@tonic-gate } 1128*0Sstevel@tonic-gate size = seg1->s_size; 1129*0Sstevel@tonic-gate seg_free(seg1); 1130*0Sstevel@tonic-gate seg2->s_size += size; 1131*0Sstevel@tonic-gate seg2->s_base -= size; 1132*0Sstevel@tonic-gate svd2->offset -= size; 1133*0Sstevel@tonic-gate svd2->swresv += swresv; 1134*0Sstevel@tonic-gate return (0); 1135*0Sstevel@tonic-gate } 1136*0Sstevel@tonic-gate 1137*0Sstevel@tonic-gate static int 1138*0Sstevel@tonic-gate segvn_dup(struct seg *seg, struct seg *newseg) 1139*0Sstevel@tonic-gate { 1140*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 1141*0Sstevel@tonic-gate struct segvn_data *newsvd; 1142*0Sstevel@tonic-gate pgcnt_t npages = seg_pages(seg); 1143*0Sstevel@tonic-gate int error = 0; 1144*0Sstevel@tonic-gate uint_t prot; 1145*0Sstevel@tonic-gate size_t len; 1146*0Sstevel@tonic-gate 1147*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1148*0Sstevel@tonic-gate 1149*0Sstevel@tonic-gate /* 1150*0Sstevel@tonic-gate * If segment has anon reserved, reserve more for the new seg. 1151*0Sstevel@tonic-gate * For a MAP_NORESERVE segment swresv will be a count of all the 1152*0Sstevel@tonic-gate * allocated anon slots; thus we reserve for the child as many slots 1153*0Sstevel@tonic-gate * as the parent has allocated. This semantic prevents the child or 1154*0Sstevel@tonic-gate * parent from dieing during a copy-on-write fault caused by trying 1155*0Sstevel@tonic-gate * to write a shared pre-existing anon page. 1156*0Sstevel@tonic-gate */ 1157*0Sstevel@tonic-gate if ((len = svd->swresv) != 0) { 1158*0Sstevel@tonic-gate if (anon_resv(svd->swresv) == 0) 1159*0Sstevel@tonic-gate return (ENOMEM); 1160*0Sstevel@tonic-gate 1161*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", 1162*0Sstevel@tonic-gate seg, len, 0); 1163*0Sstevel@tonic-gate } 1164*0Sstevel@tonic-gate 1165*0Sstevel@tonic-gate newsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP); 1166*0Sstevel@tonic-gate 1167*0Sstevel@tonic-gate newseg->s_ops = &segvn_ops; 1168*0Sstevel@tonic-gate newseg->s_data = (void *)newsvd; 1169*0Sstevel@tonic-gate newseg->s_szc = seg->s_szc; 1170*0Sstevel@tonic-gate 1171*0Sstevel@tonic-gate if ((newsvd->vp = svd->vp) != NULL) { 1172*0Sstevel@tonic-gate VN_HOLD(svd->vp); 1173*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) 1174*0Sstevel@tonic-gate lgrp_shm_policy_init(NULL, svd->vp); 1175*0Sstevel@tonic-gate } 1176*0Sstevel@tonic-gate newsvd->offset = svd->offset; 1177*0Sstevel@tonic-gate newsvd->prot = svd->prot; 1178*0Sstevel@tonic-gate newsvd->maxprot = svd->maxprot; 1179*0Sstevel@tonic-gate newsvd->pageprot = svd->pageprot; 1180*0Sstevel@tonic-gate newsvd->type = svd->type; 1181*0Sstevel@tonic-gate newsvd->cred = svd->cred; 1182*0Sstevel@tonic-gate crhold(newsvd->cred); 1183*0Sstevel@tonic-gate newsvd->advice = svd->advice; 1184*0Sstevel@tonic-gate newsvd->pageadvice = svd->pageadvice; 1185*0Sstevel@tonic-gate newsvd->swresv = svd->swresv; 1186*0Sstevel@tonic-gate newsvd->flags = svd->flags; 1187*0Sstevel@tonic-gate newsvd->softlockcnt = 0; 1188*0Sstevel@tonic-gate newsvd->policy_info = svd->policy_info; 1189*0Sstevel@tonic-gate if ((newsvd->amp = svd->amp) == NULL) { 1190*0Sstevel@tonic-gate /* 1191*0Sstevel@tonic-gate * Not attaching to a shared anon object. 1192*0Sstevel@tonic-gate */ 1193*0Sstevel@tonic-gate newsvd->anon_index = 0; 1194*0Sstevel@tonic-gate } else { 1195*0Sstevel@tonic-gate struct anon_map *amp; 1196*0Sstevel@tonic-gate 1197*0Sstevel@tonic-gate amp = svd->amp; 1198*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 1199*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1200*0Sstevel@tonic-gate amp->refcnt++; 1201*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1202*0Sstevel@tonic-gate newsvd->anon_index = svd->anon_index; 1203*0Sstevel@tonic-gate } else { 1204*0Sstevel@tonic-gate int reclaim = 1; 1205*0Sstevel@tonic-gate 1206*0Sstevel@tonic-gate /* 1207*0Sstevel@tonic-gate * Allocate and initialize new anon_map structure. 1208*0Sstevel@tonic-gate */ 1209*0Sstevel@tonic-gate newsvd->amp = anonmap_alloc(newseg->s_size, 0); 1210*0Sstevel@tonic-gate newsvd->amp->a_szc = newseg->s_szc; 1211*0Sstevel@tonic-gate newsvd->anon_index = 0; 1212*0Sstevel@tonic-gate 1213*0Sstevel@tonic-gate /* 1214*0Sstevel@tonic-gate * We don't have to acquire the anon_map lock 1215*0Sstevel@tonic-gate * for the new segment (since it belongs to an 1216*0Sstevel@tonic-gate * address space that is still not associated 1217*0Sstevel@tonic-gate * with any process), or the segment in the old 1218*0Sstevel@tonic-gate * address space (since all threads in it 1219*0Sstevel@tonic-gate * are stopped while duplicating the address space). 1220*0Sstevel@tonic-gate */ 1221*0Sstevel@tonic-gate 1222*0Sstevel@tonic-gate /* 1223*0Sstevel@tonic-gate * The goal of the following code is to make sure that 1224*0Sstevel@tonic-gate * softlocked pages do not end up as copy on write 1225*0Sstevel@tonic-gate * pages. This would cause problems where one 1226*0Sstevel@tonic-gate * thread writes to a page that is COW and a different 1227*0Sstevel@tonic-gate * thread in the same process has softlocked it. The 1228*0Sstevel@tonic-gate * softlock lock would move away from this process 1229*0Sstevel@tonic-gate * because the write would cause this process to get 1230*0Sstevel@tonic-gate * a copy (without the softlock). 1231*0Sstevel@tonic-gate * 1232*0Sstevel@tonic-gate * The strategy here is to just break the 1233*0Sstevel@tonic-gate * sharing on pages that could possibly be 1234*0Sstevel@tonic-gate * softlocked. 1235*0Sstevel@tonic-gate */ 1236*0Sstevel@tonic-gate retry: 1237*0Sstevel@tonic-gate if (svd->softlockcnt) { 1238*0Sstevel@tonic-gate struct anon *ap, *newap; 1239*0Sstevel@tonic-gate size_t i; 1240*0Sstevel@tonic-gate uint_t vpprot; 1241*0Sstevel@tonic-gate page_t *anon_pl[1+1], *pp; 1242*0Sstevel@tonic-gate caddr_t addr; 1243*0Sstevel@tonic-gate ulong_t anon_idx = 0; 1244*0Sstevel@tonic-gate 1245*0Sstevel@tonic-gate /* 1246*0Sstevel@tonic-gate * The softlock count might be non zero 1247*0Sstevel@tonic-gate * because some pages are still stuck in the 1248*0Sstevel@tonic-gate * cache for lazy reclaim. Flush the cache 1249*0Sstevel@tonic-gate * now. This should drop the count to zero. 1250*0Sstevel@tonic-gate * [or there is really I/O going on to these 1251*0Sstevel@tonic-gate * pages]. Note, we have the writers lock so 1252*0Sstevel@tonic-gate * nothing gets inserted during the flush. 1253*0Sstevel@tonic-gate */ 1254*0Sstevel@tonic-gate if (reclaim == 1) { 1255*0Sstevel@tonic-gate segvn_purge(seg); 1256*0Sstevel@tonic-gate reclaim = 0; 1257*0Sstevel@tonic-gate goto retry; 1258*0Sstevel@tonic-gate } 1259*0Sstevel@tonic-gate i = btopr(seg->s_size); 1260*0Sstevel@tonic-gate addr = seg->s_base; 1261*0Sstevel@tonic-gate /* 1262*0Sstevel@tonic-gate * XXX break cow sharing using PAGESIZE 1263*0Sstevel@tonic-gate * pages. They will be relocated into larger 1264*0Sstevel@tonic-gate * pages at fault time. 1265*0Sstevel@tonic-gate */ 1266*0Sstevel@tonic-gate while (i-- > 0) { 1267*0Sstevel@tonic-gate if (ap = anon_get_ptr(amp->ahp, 1268*0Sstevel@tonic-gate anon_idx)) { 1269*0Sstevel@tonic-gate error = anon_getpage(&ap, 1270*0Sstevel@tonic-gate &vpprot, anon_pl, PAGESIZE, 1271*0Sstevel@tonic-gate seg, addr, S_READ, 1272*0Sstevel@tonic-gate svd->cred); 1273*0Sstevel@tonic-gate if (error) { 1274*0Sstevel@tonic-gate newsvd->vpage = NULL; 1275*0Sstevel@tonic-gate goto out; 1276*0Sstevel@tonic-gate } 1277*0Sstevel@tonic-gate /* 1278*0Sstevel@tonic-gate * prot need not be computed 1279*0Sstevel@tonic-gate * below 'cause anon_private is 1280*0Sstevel@tonic-gate * going to ignore it anyway 1281*0Sstevel@tonic-gate * as child doesn't inherit 1282*0Sstevel@tonic-gate * pagelock from parent. 1283*0Sstevel@tonic-gate */ 1284*0Sstevel@tonic-gate prot = svd->pageprot ? 1285*0Sstevel@tonic-gate VPP_PROT( 1286*0Sstevel@tonic-gate &svd->vpage[ 1287*0Sstevel@tonic-gate seg_page(seg, addr)]) 1288*0Sstevel@tonic-gate : svd->prot; 1289*0Sstevel@tonic-gate pp = anon_private(&newap, 1290*0Sstevel@tonic-gate newseg, addr, prot, 1291*0Sstevel@tonic-gate anon_pl[0], 0, 1292*0Sstevel@tonic-gate newsvd->cred); 1293*0Sstevel@tonic-gate if (pp == NULL) { 1294*0Sstevel@tonic-gate /* no mem abort */ 1295*0Sstevel@tonic-gate newsvd->vpage = NULL; 1296*0Sstevel@tonic-gate error = ENOMEM; 1297*0Sstevel@tonic-gate goto out; 1298*0Sstevel@tonic-gate } 1299*0Sstevel@tonic-gate (void) anon_set_ptr( 1300*0Sstevel@tonic-gate newsvd->amp->ahp, anon_idx, 1301*0Sstevel@tonic-gate newap, ANON_SLEEP); 1302*0Sstevel@tonic-gate page_unlock(pp); 1303*0Sstevel@tonic-gate } 1304*0Sstevel@tonic-gate addr += PAGESIZE; 1305*0Sstevel@tonic-gate anon_idx++; 1306*0Sstevel@tonic-gate } 1307*0Sstevel@tonic-gate } else { /* common case */ 1308*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1309*0Sstevel@tonic-gate /* 1310*0Sstevel@tonic-gate * If at least one of anon slots of a 1311*0Sstevel@tonic-gate * large page exists then make sure 1312*0Sstevel@tonic-gate * all anon slots of a large page 1313*0Sstevel@tonic-gate * exist to avoid partial cow sharing 1314*0Sstevel@tonic-gate * of a large page in the future. 1315*0Sstevel@tonic-gate */ 1316*0Sstevel@tonic-gate anon_dup_fill_holes(amp->ahp, 1317*0Sstevel@tonic-gate svd->anon_index, newsvd->amp->ahp, 1318*0Sstevel@tonic-gate 0, seg->s_size, seg->s_szc, 1319*0Sstevel@tonic-gate svd->vp != NULL); 1320*0Sstevel@tonic-gate } else { 1321*0Sstevel@tonic-gate anon_dup(amp->ahp, svd->anon_index, 1322*0Sstevel@tonic-gate newsvd->amp->ahp, 0, seg->s_size); 1323*0Sstevel@tonic-gate } 1324*0Sstevel@tonic-gate 1325*0Sstevel@tonic-gate hat_clrattr(seg->s_as->a_hat, seg->s_base, 1326*0Sstevel@tonic-gate seg->s_size, PROT_WRITE); 1327*0Sstevel@tonic-gate } 1328*0Sstevel@tonic-gate } 1329*0Sstevel@tonic-gate } 1330*0Sstevel@tonic-gate /* 1331*0Sstevel@tonic-gate * If necessary, create a vpage structure for the new segment. 1332*0Sstevel@tonic-gate * Do not copy any page lock indications. 1333*0Sstevel@tonic-gate */ 1334*0Sstevel@tonic-gate if (svd->vpage != NULL) { 1335*0Sstevel@tonic-gate uint_t i; 1336*0Sstevel@tonic-gate struct vpage *ovp = svd->vpage; 1337*0Sstevel@tonic-gate struct vpage *nvp; 1338*0Sstevel@tonic-gate 1339*0Sstevel@tonic-gate nvp = newsvd->vpage = 1340*0Sstevel@tonic-gate kmem_alloc(vpgtob(npages), KM_SLEEP); 1341*0Sstevel@tonic-gate for (i = 0; i < npages; i++) { 1342*0Sstevel@tonic-gate *nvp = *ovp++; 1343*0Sstevel@tonic-gate VPP_CLRPPLOCK(nvp++); 1344*0Sstevel@tonic-gate } 1345*0Sstevel@tonic-gate } else 1346*0Sstevel@tonic-gate newsvd->vpage = NULL; 1347*0Sstevel@tonic-gate 1348*0Sstevel@tonic-gate /* Inform the vnode of the new mapping */ 1349*0Sstevel@tonic-gate if (newsvd->vp != NULL) { 1350*0Sstevel@tonic-gate error = VOP_ADDMAP(newsvd->vp, (offset_t)newsvd->offset, 1351*0Sstevel@tonic-gate newseg->s_as, newseg->s_base, newseg->s_size, newsvd->prot, 1352*0Sstevel@tonic-gate newsvd->maxprot, newsvd->type, newsvd->cred); 1353*0Sstevel@tonic-gate } 1354*0Sstevel@tonic-gate out: 1355*0Sstevel@tonic-gate return (error); 1356*0Sstevel@tonic-gate } 1357*0Sstevel@tonic-gate 1358*0Sstevel@tonic-gate 1359*0Sstevel@tonic-gate /* 1360*0Sstevel@tonic-gate * callback function used by segvn_unmap to invoke free_vp_pages() for only 1361*0Sstevel@tonic-gate * those pages actually processed by the HAT 1362*0Sstevel@tonic-gate */ 1363*0Sstevel@tonic-gate extern int free_pages; 1364*0Sstevel@tonic-gate 1365*0Sstevel@tonic-gate static void 1366*0Sstevel@tonic-gate segvn_hat_unload_callback(hat_callback_t *cb) 1367*0Sstevel@tonic-gate { 1368*0Sstevel@tonic-gate struct seg *seg = cb->hcb_data; 1369*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 1370*0Sstevel@tonic-gate size_t len; 1371*0Sstevel@tonic-gate u_offset_t off; 1372*0Sstevel@tonic-gate 1373*0Sstevel@tonic-gate ASSERT(svd->vp != NULL); 1374*0Sstevel@tonic-gate ASSERT(cb->hcb_end_addr > cb->hcb_start_addr); 1375*0Sstevel@tonic-gate ASSERT(cb->hcb_start_addr >= seg->s_base); 1376*0Sstevel@tonic-gate 1377*0Sstevel@tonic-gate len = cb->hcb_end_addr - cb->hcb_start_addr; 1378*0Sstevel@tonic-gate off = cb->hcb_start_addr - seg->s_base; 1379*0Sstevel@tonic-gate free_vp_pages(svd->vp, svd->offset + off, len); 1380*0Sstevel@tonic-gate } 1381*0Sstevel@tonic-gate 1382*0Sstevel@tonic-gate 1383*0Sstevel@tonic-gate static int 1384*0Sstevel@tonic-gate segvn_unmap(struct seg *seg, caddr_t addr, size_t len) 1385*0Sstevel@tonic-gate { 1386*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 1387*0Sstevel@tonic-gate struct segvn_data *nsvd; 1388*0Sstevel@tonic-gate struct seg *nseg; 1389*0Sstevel@tonic-gate struct anon_map *amp; 1390*0Sstevel@tonic-gate pgcnt_t opages; /* old segment size in pages */ 1391*0Sstevel@tonic-gate pgcnt_t npages; /* new segment size in pages */ 1392*0Sstevel@tonic-gate pgcnt_t dpages; /* pages being deleted (unmapped) */ 1393*0Sstevel@tonic-gate hat_callback_t callback; /* used for free_vp_pages() */ 1394*0Sstevel@tonic-gate hat_callback_t *cbp = NULL; 1395*0Sstevel@tonic-gate caddr_t nbase; 1396*0Sstevel@tonic-gate size_t nsize; 1397*0Sstevel@tonic-gate size_t oswresv; 1398*0Sstevel@tonic-gate int reclaim = 1; 1399*0Sstevel@tonic-gate 1400*0Sstevel@tonic-gate /* 1401*0Sstevel@tonic-gate * We don't need any segment level locks for "segvn" data 1402*0Sstevel@tonic-gate * since the address space is "write" locked. 1403*0Sstevel@tonic-gate */ 1404*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1405*0Sstevel@tonic-gate 1406*0Sstevel@tonic-gate /* 1407*0Sstevel@tonic-gate * Fail the unmap if pages are SOFTLOCKed through this mapping. 1408*0Sstevel@tonic-gate * softlockcnt is protected from change by the as write lock. 1409*0Sstevel@tonic-gate */ 1410*0Sstevel@tonic-gate retry: 1411*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 1412*0Sstevel@tonic-gate /* 1413*0Sstevel@tonic-gate * since we do have the writers lock nobody can fill 1414*0Sstevel@tonic-gate * the cache during the purge. The flush either succeeds 1415*0Sstevel@tonic-gate * or we still have pending I/Os. 1416*0Sstevel@tonic-gate */ 1417*0Sstevel@tonic-gate if (reclaim == 1) { 1418*0Sstevel@tonic-gate segvn_purge(seg); 1419*0Sstevel@tonic-gate reclaim = 0; 1420*0Sstevel@tonic-gate goto retry; 1421*0Sstevel@tonic-gate } 1422*0Sstevel@tonic-gate return (EAGAIN); 1423*0Sstevel@tonic-gate } 1424*0Sstevel@tonic-gate 1425*0Sstevel@tonic-gate /* 1426*0Sstevel@tonic-gate * Check for bad sizes 1427*0Sstevel@tonic-gate */ 1428*0Sstevel@tonic-gate if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || 1429*0Sstevel@tonic-gate (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) { 1430*0Sstevel@tonic-gate panic("segvn_unmap"); 1431*0Sstevel@tonic-gate /*NOTREACHED*/ 1432*0Sstevel@tonic-gate } 1433*0Sstevel@tonic-gate 1434*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1435*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(seg->s_szc); 1436*0Sstevel@tonic-gate int err; 1437*0Sstevel@tonic-gate if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { 1438*0Sstevel@tonic-gate ASSERT(seg->s_base != addr || seg->s_size != len); 1439*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.demoterange[0]); 1440*0Sstevel@tonic-gate err = segvn_demote_range(seg, addr, len, SDR_END); 1441*0Sstevel@tonic-gate if (err == 0) { 1442*0Sstevel@tonic-gate return (IE_RETRY); 1443*0Sstevel@tonic-gate } 1444*0Sstevel@tonic-gate return (err); 1445*0Sstevel@tonic-gate } 1446*0Sstevel@tonic-gate } 1447*0Sstevel@tonic-gate 1448*0Sstevel@tonic-gate /* Inform the vnode of the unmapping. */ 1449*0Sstevel@tonic-gate if (svd->vp) { 1450*0Sstevel@tonic-gate int error; 1451*0Sstevel@tonic-gate 1452*0Sstevel@tonic-gate error = VOP_DELMAP(svd->vp, 1453*0Sstevel@tonic-gate (offset_t)svd->offset + (uintptr_t)(addr - seg->s_base), 1454*0Sstevel@tonic-gate seg->s_as, addr, len, svd->prot, svd->maxprot, 1455*0Sstevel@tonic-gate svd->type, svd->cred); 1456*0Sstevel@tonic-gate 1457*0Sstevel@tonic-gate if (error == EAGAIN) 1458*0Sstevel@tonic-gate return (error); 1459*0Sstevel@tonic-gate } 1460*0Sstevel@tonic-gate /* 1461*0Sstevel@tonic-gate * Remove any page locks set through this mapping. 1462*0Sstevel@tonic-gate */ 1463*0Sstevel@tonic-gate (void) segvn_lockop(seg, addr, len, 0, MC_UNLOCK, NULL, 0); 1464*0Sstevel@tonic-gate 1465*0Sstevel@tonic-gate /* 1466*0Sstevel@tonic-gate * Unload any hardware translations in the range to be taken out. 1467*0Sstevel@tonic-gate * Use a callback to invoke free_vp_pages() effectively. 1468*0Sstevel@tonic-gate */ 1469*0Sstevel@tonic-gate if (svd->vp != NULL && free_pages != 0) { 1470*0Sstevel@tonic-gate callback.hcb_data = seg; 1471*0Sstevel@tonic-gate callback.hcb_function = segvn_hat_unload_callback; 1472*0Sstevel@tonic-gate cbp = &callback; 1473*0Sstevel@tonic-gate } 1474*0Sstevel@tonic-gate hat_unload_callback(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP, cbp); 1475*0Sstevel@tonic-gate 1476*0Sstevel@tonic-gate /* 1477*0Sstevel@tonic-gate * Check for entire segment 1478*0Sstevel@tonic-gate */ 1479*0Sstevel@tonic-gate if (addr == seg->s_base && len == seg->s_size) { 1480*0Sstevel@tonic-gate seg_free(seg); 1481*0Sstevel@tonic-gate return (0); 1482*0Sstevel@tonic-gate } 1483*0Sstevel@tonic-gate 1484*0Sstevel@tonic-gate opages = seg_pages(seg); 1485*0Sstevel@tonic-gate dpages = btop(len); 1486*0Sstevel@tonic-gate npages = opages - dpages; 1487*0Sstevel@tonic-gate amp = svd->amp; 1488*0Sstevel@tonic-gate 1489*0Sstevel@tonic-gate /* 1490*0Sstevel@tonic-gate * Check for beginning of segment 1491*0Sstevel@tonic-gate */ 1492*0Sstevel@tonic-gate if (addr == seg->s_base) { 1493*0Sstevel@tonic-gate if (svd->vpage != NULL) { 1494*0Sstevel@tonic-gate size_t nbytes; 1495*0Sstevel@tonic-gate struct vpage *ovpage; 1496*0Sstevel@tonic-gate 1497*0Sstevel@tonic-gate ovpage = svd->vpage; /* keep pointer to vpage */ 1498*0Sstevel@tonic-gate 1499*0Sstevel@tonic-gate nbytes = vpgtob(npages); 1500*0Sstevel@tonic-gate svd->vpage = kmem_alloc(nbytes, KM_SLEEP); 1501*0Sstevel@tonic-gate bcopy(&ovpage[dpages], svd->vpage, nbytes); 1502*0Sstevel@tonic-gate 1503*0Sstevel@tonic-gate /* free up old vpage */ 1504*0Sstevel@tonic-gate kmem_free(ovpage, vpgtob(opages)); 1505*0Sstevel@tonic-gate } 1506*0Sstevel@tonic-gate if (amp != NULL) { 1507*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1508*0Sstevel@tonic-gate if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { 1509*0Sstevel@tonic-gate /* 1510*0Sstevel@tonic-gate * Free up now unused parts of anon_map array. 1511*0Sstevel@tonic-gate */ 1512*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1513*0Sstevel@tonic-gate anon_free_pages(amp->ahp, 1514*0Sstevel@tonic-gate svd->anon_index, len, seg->s_szc); 1515*0Sstevel@tonic-gate } else { 1516*0Sstevel@tonic-gate anon_free(amp->ahp, svd->anon_index, 1517*0Sstevel@tonic-gate len); 1518*0Sstevel@tonic-gate } 1519*0Sstevel@tonic-gate 1520*0Sstevel@tonic-gate /* 1521*0Sstevel@tonic-gate * Unreserve swap space for the unmapped chunk 1522*0Sstevel@tonic-gate * of this segment in case it's MAP_SHARED 1523*0Sstevel@tonic-gate */ 1524*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 1525*0Sstevel@tonic-gate anon_unresv(len); 1526*0Sstevel@tonic-gate amp->swresv -= len; 1527*0Sstevel@tonic-gate } 1528*0Sstevel@tonic-gate } 1529*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1530*0Sstevel@tonic-gate svd->anon_index += dpages; 1531*0Sstevel@tonic-gate } 1532*0Sstevel@tonic-gate if (svd->vp != NULL) 1533*0Sstevel@tonic-gate svd->offset += len; 1534*0Sstevel@tonic-gate 1535*0Sstevel@tonic-gate if (svd->swresv) { 1536*0Sstevel@tonic-gate if (svd->flags & MAP_NORESERVE) { 1537*0Sstevel@tonic-gate ASSERT(amp); 1538*0Sstevel@tonic-gate oswresv = svd->swresv; 1539*0Sstevel@tonic-gate 1540*0Sstevel@tonic-gate svd->swresv = ptob(anon_pages(amp->ahp, 1541*0Sstevel@tonic-gate svd->anon_index, npages)); 1542*0Sstevel@tonic-gate anon_unresv(oswresv - svd->swresv); 1543*0Sstevel@tonic-gate } else { 1544*0Sstevel@tonic-gate anon_unresv(len); 1545*0Sstevel@tonic-gate svd->swresv -= len; 1546*0Sstevel@tonic-gate } 1547*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", 1548*0Sstevel@tonic-gate seg, len, 0); 1549*0Sstevel@tonic-gate } 1550*0Sstevel@tonic-gate 1551*0Sstevel@tonic-gate seg->s_base += len; 1552*0Sstevel@tonic-gate seg->s_size -= len; 1553*0Sstevel@tonic-gate return (0); 1554*0Sstevel@tonic-gate } 1555*0Sstevel@tonic-gate 1556*0Sstevel@tonic-gate /* 1557*0Sstevel@tonic-gate * Check for end of segment 1558*0Sstevel@tonic-gate */ 1559*0Sstevel@tonic-gate if (addr + len == seg->s_base + seg->s_size) { 1560*0Sstevel@tonic-gate if (svd->vpage != NULL) { 1561*0Sstevel@tonic-gate size_t nbytes; 1562*0Sstevel@tonic-gate struct vpage *ovpage; 1563*0Sstevel@tonic-gate 1564*0Sstevel@tonic-gate ovpage = svd->vpage; /* keep pointer to vpage */ 1565*0Sstevel@tonic-gate 1566*0Sstevel@tonic-gate nbytes = vpgtob(npages); 1567*0Sstevel@tonic-gate svd->vpage = kmem_alloc(nbytes, KM_SLEEP); 1568*0Sstevel@tonic-gate bcopy(ovpage, svd->vpage, nbytes); 1569*0Sstevel@tonic-gate 1570*0Sstevel@tonic-gate /* free up old vpage */ 1571*0Sstevel@tonic-gate kmem_free(ovpage, vpgtob(opages)); 1572*0Sstevel@tonic-gate 1573*0Sstevel@tonic-gate } 1574*0Sstevel@tonic-gate if (amp != NULL) { 1575*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1576*0Sstevel@tonic-gate if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { 1577*0Sstevel@tonic-gate /* 1578*0Sstevel@tonic-gate * Free up now unused parts of anon_map array 1579*0Sstevel@tonic-gate */ 1580*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1581*0Sstevel@tonic-gate ulong_t an_idx = svd->anon_index + 1582*0Sstevel@tonic-gate npages; 1583*0Sstevel@tonic-gate anon_free_pages(amp->ahp, an_idx, 1584*0Sstevel@tonic-gate len, seg->s_szc); 1585*0Sstevel@tonic-gate } else { 1586*0Sstevel@tonic-gate anon_free(amp->ahp, 1587*0Sstevel@tonic-gate svd->anon_index + npages, len); 1588*0Sstevel@tonic-gate } 1589*0Sstevel@tonic-gate /* 1590*0Sstevel@tonic-gate * Unreserve swap space for the unmapped chunk 1591*0Sstevel@tonic-gate * of this segment in case it's MAP_SHARED 1592*0Sstevel@tonic-gate */ 1593*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 1594*0Sstevel@tonic-gate anon_unresv(len); 1595*0Sstevel@tonic-gate amp->swresv -= len; 1596*0Sstevel@tonic-gate } 1597*0Sstevel@tonic-gate } 1598*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1599*0Sstevel@tonic-gate } 1600*0Sstevel@tonic-gate 1601*0Sstevel@tonic-gate if (svd->swresv) { 1602*0Sstevel@tonic-gate if (svd->flags & MAP_NORESERVE) { 1603*0Sstevel@tonic-gate ASSERT(amp); 1604*0Sstevel@tonic-gate oswresv = svd->swresv; 1605*0Sstevel@tonic-gate svd->swresv = ptob(anon_pages(amp->ahp, 1606*0Sstevel@tonic-gate svd->anon_index, npages)); 1607*0Sstevel@tonic-gate anon_unresv(oswresv - svd->swresv); 1608*0Sstevel@tonic-gate } else { 1609*0Sstevel@tonic-gate anon_unresv(len); 1610*0Sstevel@tonic-gate svd->swresv -= len; 1611*0Sstevel@tonic-gate } 1612*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, 1613*0Sstevel@tonic-gate "anon proc:%p %lu %u", seg, len, 0); 1614*0Sstevel@tonic-gate } 1615*0Sstevel@tonic-gate 1616*0Sstevel@tonic-gate seg->s_size -= len; 1617*0Sstevel@tonic-gate return (0); 1618*0Sstevel@tonic-gate } 1619*0Sstevel@tonic-gate 1620*0Sstevel@tonic-gate /* 1621*0Sstevel@tonic-gate * The section to go is in the middle of the segment, 1622*0Sstevel@tonic-gate * have to make it into two segments. nseg is made for 1623*0Sstevel@tonic-gate * the high end while seg is cut down at the low end. 1624*0Sstevel@tonic-gate */ 1625*0Sstevel@tonic-gate nbase = addr + len; /* new seg base */ 1626*0Sstevel@tonic-gate nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */ 1627*0Sstevel@tonic-gate seg->s_size = addr - seg->s_base; /* shrink old seg */ 1628*0Sstevel@tonic-gate nseg = seg_alloc(seg->s_as, nbase, nsize); 1629*0Sstevel@tonic-gate if (nseg == NULL) { 1630*0Sstevel@tonic-gate panic("segvn_unmap seg_alloc"); 1631*0Sstevel@tonic-gate /*NOTREACHED*/ 1632*0Sstevel@tonic-gate } 1633*0Sstevel@tonic-gate nseg->s_ops = seg->s_ops; 1634*0Sstevel@tonic-gate nsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP); 1635*0Sstevel@tonic-gate nseg->s_data = (void *)nsvd; 1636*0Sstevel@tonic-gate nseg->s_szc = seg->s_szc; 1637*0Sstevel@tonic-gate *nsvd = *svd; 1638*0Sstevel@tonic-gate nsvd->offset = svd->offset + (uintptr_t)(nseg->s_base - seg->s_base); 1639*0Sstevel@tonic-gate nsvd->swresv = 0; 1640*0Sstevel@tonic-gate nsvd->softlockcnt = 0; 1641*0Sstevel@tonic-gate 1642*0Sstevel@tonic-gate if (svd->vp != NULL) { 1643*0Sstevel@tonic-gate VN_HOLD(nsvd->vp); 1644*0Sstevel@tonic-gate if (nsvd->type == MAP_SHARED) 1645*0Sstevel@tonic-gate lgrp_shm_policy_init(NULL, nsvd->vp); 1646*0Sstevel@tonic-gate } 1647*0Sstevel@tonic-gate crhold(svd->cred); 1648*0Sstevel@tonic-gate 1649*0Sstevel@tonic-gate if (svd->vpage == NULL) { 1650*0Sstevel@tonic-gate nsvd->vpage = NULL; 1651*0Sstevel@tonic-gate } else { 1652*0Sstevel@tonic-gate /* need to split vpage into two arrays */ 1653*0Sstevel@tonic-gate size_t nbytes; 1654*0Sstevel@tonic-gate struct vpage *ovpage; 1655*0Sstevel@tonic-gate 1656*0Sstevel@tonic-gate ovpage = svd->vpage; /* keep pointer to vpage */ 1657*0Sstevel@tonic-gate 1658*0Sstevel@tonic-gate npages = seg_pages(seg); /* seg has shrunk */ 1659*0Sstevel@tonic-gate nbytes = vpgtob(npages); 1660*0Sstevel@tonic-gate svd->vpage = kmem_alloc(nbytes, KM_SLEEP); 1661*0Sstevel@tonic-gate 1662*0Sstevel@tonic-gate bcopy(ovpage, svd->vpage, nbytes); 1663*0Sstevel@tonic-gate 1664*0Sstevel@tonic-gate npages = seg_pages(nseg); 1665*0Sstevel@tonic-gate nbytes = vpgtob(npages); 1666*0Sstevel@tonic-gate nsvd->vpage = kmem_alloc(nbytes, KM_SLEEP); 1667*0Sstevel@tonic-gate 1668*0Sstevel@tonic-gate bcopy(&ovpage[opages - npages], nsvd->vpage, nbytes); 1669*0Sstevel@tonic-gate 1670*0Sstevel@tonic-gate /* free up old vpage */ 1671*0Sstevel@tonic-gate kmem_free(ovpage, vpgtob(opages)); 1672*0Sstevel@tonic-gate } 1673*0Sstevel@tonic-gate 1674*0Sstevel@tonic-gate if (amp == NULL) { 1675*0Sstevel@tonic-gate nsvd->amp = NULL; 1676*0Sstevel@tonic-gate nsvd->anon_index = 0; 1677*0Sstevel@tonic-gate } else { 1678*0Sstevel@tonic-gate /* 1679*0Sstevel@tonic-gate * Need to create a new anon map for the new segment. 1680*0Sstevel@tonic-gate * We'll also allocate a new smaller array for the old 1681*0Sstevel@tonic-gate * smaller segment to save space. 1682*0Sstevel@tonic-gate */ 1683*0Sstevel@tonic-gate opages = btop((uintptr_t)(addr - seg->s_base)); 1684*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1685*0Sstevel@tonic-gate if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { 1686*0Sstevel@tonic-gate /* 1687*0Sstevel@tonic-gate * Free up now unused parts of anon_map array 1688*0Sstevel@tonic-gate */ 1689*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1690*0Sstevel@tonic-gate ulong_t an_idx = svd->anon_index + opages; 1691*0Sstevel@tonic-gate anon_free_pages(amp->ahp, an_idx, len, 1692*0Sstevel@tonic-gate seg->s_szc); 1693*0Sstevel@tonic-gate } else { 1694*0Sstevel@tonic-gate anon_free(amp->ahp, svd->anon_index + opages, 1695*0Sstevel@tonic-gate len); 1696*0Sstevel@tonic-gate } 1697*0Sstevel@tonic-gate 1698*0Sstevel@tonic-gate /* 1699*0Sstevel@tonic-gate * Unreserve swap space for the unmapped chunk 1700*0Sstevel@tonic-gate * of this segment in case it's MAP_SHARED 1701*0Sstevel@tonic-gate */ 1702*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 1703*0Sstevel@tonic-gate anon_unresv(len); 1704*0Sstevel@tonic-gate amp->swresv -= len; 1705*0Sstevel@tonic-gate } 1706*0Sstevel@tonic-gate } 1707*0Sstevel@tonic-gate 1708*0Sstevel@tonic-gate nsvd->anon_index = svd->anon_index + 1709*0Sstevel@tonic-gate btop((uintptr_t)(nseg->s_base - seg->s_base)); 1710*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 1711*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 1712*0Sstevel@tonic-gate amp->refcnt++; 1713*0Sstevel@tonic-gate nsvd->amp = amp; 1714*0Sstevel@tonic-gate } else { 1715*0Sstevel@tonic-gate struct anon_map *namp; 1716*0Sstevel@tonic-gate struct anon_hdr *nahp; 1717*0Sstevel@tonic-gate 1718*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE); 1719*0Sstevel@tonic-gate nahp = anon_create(btop(seg->s_size), ANON_SLEEP); 1720*0Sstevel@tonic-gate namp = anonmap_alloc(nseg->s_size, 0); 1721*0Sstevel@tonic-gate namp->a_szc = seg->s_szc; 1722*0Sstevel@tonic-gate (void) anon_copy_ptr(amp->ahp, svd->anon_index, nahp, 1723*0Sstevel@tonic-gate 0, btop(seg->s_size), ANON_SLEEP); 1724*0Sstevel@tonic-gate (void) anon_copy_ptr(amp->ahp, nsvd->anon_index, 1725*0Sstevel@tonic-gate namp->ahp, 0, btop(nseg->s_size), ANON_SLEEP); 1726*0Sstevel@tonic-gate anon_release(amp->ahp, btop(amp->size)); 1727*0Sstevel@tonic-gate svd->anon_index = 0; 1728*0Sstevel@tonic-gate nsvd->anon_index = 0; 1729*0Sstevel@tonic-gate amp->ahp = nahp; 1730*0Sstevel@tonic-gate amp->size = seg->s_size; 1731*0Sstevel@tonic-gate nsvd->amp = namp; 1732*0Sstevel@tonic-gate } 1733*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1734*0Sstevel@tonic-gate } 1735*0Sstevel@tonic-gate if (svd->swresv) { 1736*0Sstevel@tonic-gate if (svd->flags & MAP_NORESERVE) { 1737*0Sstevel@tonic-gate ASSERT(amp); 1738*0Sstevel@tonic-gate oswresv = svd->swresv; 1739*0Sstevel@tonic-gate svd->swresv = ptob(anon_pages(amp->ahp, 1740*0Sstevel@tonic-gate svd->anon_index, btop(seg->s_size))); 1741*0Sstevel@tonic-gate nsvd->swresv = ptob(anon_pages(nsvd->amp->ahp, 1742*0Sstevel@tonic-gate nsvd->anon_index, btop(nseg->s_size))); 1743*0Sstevel@tonic-gate ASSERT(oswresv >= (svd->swresv + nsvd->swresv)); 1744*0Sstevel@tonic-gate anon_unresv(oswresv - (svd->swresv + nsvd->swresv)); 1745*0Sstevel@tonic-gate } else { 1746*0Sstevel@tonic-gate if (seg->s_size + nseg->s_size + len != svd->swresv) { 1747*0Sstevel@tonic-gate panic("segvn_unmap: " 1748*0Sstevel@tonic-gate "cannot split swap reservation"); 1749*0Sstevel@tonic-gate /*NOTREACHED*/ 1750*0Sstevel@tonic-gate } 1751*0Sstevel@tonic-gate anon_unresv(len); 1752*0Sstevel@tonic-gate svd->swresv = seg->s_size; 1753*0Sstevel@tonic-gate nsvd->swresv = nseg->s_size; 1754*0Sstevel@tonic-gate } 1755*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", 1756*0Sstevel@tonic-gate seg, len, 0); 1757*0Sstevel@tonic-gate } 1758*0Sstevel@tonic-gate 1759*0Sstevel@tonic-gate return (0); /* I'm glad that's all over with! */ 1760*0Sstevel@tonic-gate } 1761*0Sstevel@tonic-gate 1762*0Sstevel@tonic-gate static void 1763*0Sstevel@tonic-gate segvn_free(struct seg *seg) 1764*0Sstevel@tonic-gate { 1765*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 1766*0Sstevel@tonic-gate pgcnt_t npages = seg_pages(seg); 1767*0Sstevel@tonic-gate struct anon_map *amp; 1768*0Sstevel@tonic-gate size_t len; 1769*0Sstevel@tonic-gate 1770*0Sstevel@tonic-gate /* 1771*0Sstevel@tonic-gate * We don't need any segment level locks for "segvn" data 1772*0Sstevel@tonic-gate * since the address space is "write" locked. 1773*0Sstevel@tonic-gate */ 1774*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1775*0Sstevel@tonic-gate 1776*0Sstevel@tonic-gate /* 1777*0Sstevel@tonic-gate * Be sure to unlock pages. XXX Why do things get free'ed instead 1778*0Sstevel@tonic-gate * of unmapped? XXX 1779*0Sstevel@tonic-gate */ 1780*0Sstevel@tonic-gate (void) segvn_lockop(seg, seg->s_base, seg->s_size, 1781*0Sstevel@tonic-gate 0, MC_UNLOCK, NULL, 0); 1782*0Sstevel@tonic-gate 1783*0Sstevel@tonic-gate /* 1784*0Sstevel@tonic-gate * Deallocate the vpage and anon pointers if necessary and possible. 1785*0Sstevel@tonic-gate */ 1786*0Sstevel@tonic-gate if (svd->vpage != NULL) { 1787*0Sstevel@tonic-gate kmem_free(svd->vpage, vpgtob(npages)); 1788*0Sstevel@tonic-gate svd->vpage = NULL; 1789*0Sstevel@tonic-gate } 1790*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 1791*0Sstevel@tonic-gate /* 1792*0Sstevel@tonic-gate * If there are no more references to this anon_map 1793*0Sstevel@tonic-gate * structure, then deallocate the structure after freeing 1794*0Sstevel@tonic-gate * up all the anon slot pointers that we can. 1795*0Sstevel@tonic-gate */ 1796*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1797*0Sstevel@tonic-gate if (--amp->refcnt == 0) { 1798*0Sstevel@tonic-gate if (svd->type == MAP_PRIVATE) { 1799*0Sstevel@tonic-gate /* 1800*0Sstevel@tonic-gate * Private - we only need to anon_free 1801*0Sstevel@tonic-gate * the part that this segment refers to. 1802*0Sstevel@tonic-gate */ 1803*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1804*0Sstevel@tonic-gate anon_free_pages(amp->ahp, 1805*0Sstevel@tonic-gate svd->anon_index, seg->s_size, 1806*0Sstevel@tonic-gate seg->s_szc); 1807*0Sstevel@tonic-gate } else { 1808*0Sstevel@tonic-gate anon_free(amp->ahp, svd->anon_index, 1809*0Sstevel@tonic-gate seg->s_size); 1810*0Sstevel@tonic-gate } 1811*0Sstevel@tonic-gate } else { 1812*0Sstevel@tonic-gate /* 1813*0Sstevel@tonic-gate * Shared - anon_free the entire 1814*0Sstevel@tonic-gate * anon_map's worth of stuff and 1815*0Sstevel@tonic-gate * release any swap reservation. 1816*0Sstevel@tonic-gate */ 1817*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 1818*0Sstevel@tonic-gate anon_free(amp->ahp, 0, amp->size); 1819*0Sstevel@tonic-gate if ((len = amp->swresv) != 0) { 1820*0Sstevel@tonic-gate anon_unresv(len); 1821*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, 1822*0Sstevel@tonic-gate "anon proc:%p %lu %u", 1823*0Sstevel@tonic-gate seg, len, 0); 1824*0Sstevel@tonic-gate } 1825*0Sstevel@tonic-gate } 1826*0Sstevel@tonic-gate svd->amp = NULL; 1827*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1828*0Sstevel@tonic-gate anonmap_free(amp); 1829*0Sstevel@tonic-gate } else if (svd->type == MAP_PRIVATE) { 1830*0Sstevel@tonic-gate /* 1831*0Sstevel@tonic-gate * We had a private mapping which still has 1832*0Sstevel@tonic-gate * a held anon_map so just free up all the 1833*0Sstevel@tonic-gate * anon slot pointers that we were using. 1834*0Sstevel@tonic-gate */ 1835*0Sstevel@tonic-gate if (seg->s_szc != 0) { 1836*0Sstevel@tonic-gate anon_free_pages(amp->ahp, svd->anon_index, 1837*0Sstevel@tonic-gate seg->s_size, seg->s_szc); 1838*0Sstevel@tonic-gate } else { 1839*0Sstevel@tonic-gate anon_free(amp->ahp, svd->anon_index, 1840*0Sstevel@tonic-gate seg->s_size); 1841*0Sstevel@tonic-gate } 1842*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1843*0Sstevel@tonic-gate } else { 1844*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1845*0Sstevel@tonic-gate } 1846*0Sstevel@tonic-gate } 1847*0Sstevel@tonic-gate 1848*0Sstevel@tonic-gate /* 1849*0Sstevel@tonic-gate * Release swap reservation. 1850*0Sstevel@tonic-gate */ 1851*0Sstevel@tonic-gate if ((len = svd->swresv) != 0) { 1852*0Sstevel@tonic-gate anon_unresv(svd->swresv); 1853*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", 1854*0Sstevel@tonic-gate seg, len, 0); 1855*0Sstevel@tonic-gate svd->swresv = 0; 1856*0Sstevel@tonic-gate } 1857*0Sstevel@tonic-gate /* 1858*0Sstevel@tonic-gate * Release claim on vnode, credentials, and finally free the 1859*0Sstevel@tonic-gate * private data. 1860*0Sstevel@tonic-gate */ 1861*0Sstevel@tonic-gate if (svd->vp != NULL) { 1862*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) 1863*0Sstevel@tonic-gate lgrp_shm_policy_fini(NULL, svd->vp); 1864*0Sstevel@tonic-gate VN_RELE(svd->vp); 1865*0Sstevel@tonic-gate svd->vp = NULL; 1866*0Sstevel@tonic-gate } 1867*0Sstevel@tonic-gate crfree(svd->cred); 1868*0Sstevel@tonic-gate svd->cred = NULL; 1869*0Sstevel@tonic-gate 1870*0Sstevel@tonic-gate seg->s_data = NULL; 1871*0Sstevel@tonic-gate kmem_cache_free(segvn_cache, svd); 1872*0Sstevel@tonic-gate } 1873*0Sstevel@tonic-gate 1874*0Sstevel@tonic-gate /* 1875*0Sstevel@tonic-gate * Do a F_SOFTUNLOCK call over the range requested. The range must have 1876*0Sstevel@tonic-gate * already been F_SOFTLOCK'ed. 1877*0Sstevel@tonic-gate * Caller must always match addr and len of a softunlock with a previous 1878*0Sstevel@tonic-gate * softlock with exactly the same addr and len. 1879*0Sstevel@tonic-gate */ 1880*0Sstevel@tonic-gate static void 1881*0Sstevel@tonic-gate segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1882*0Sstevel@tonic-gate { 1883*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 1884*0Sstevel@tonic-gate page_t *pp; 1885*0Sstevel@tonic-gate caddr_t adr; 1886*0Sstevel@tonic-gate struct vnode *vp; 1887*0Sstevel@tonic-gate u_offset_t offset; 1888*0Sstevel@tonic-gate ulong_t anon_index; 1889*0Sstevel@tonic-gate struct anon_map *amp; 1890*0Sstevel@tonic-gate struct anon *ap = NULL; 1891*0Sstevel@tonic-gate 1892*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1893*0Sstevel@tonic-gate ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); 1894*0Sstevel@tonic-gate 1895*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) 1896*0Sstevel@tonic-gate anon_index = svd->anon_index + seg_page(seg, addr); 1897*0Sstevel@tonic-gate 1898*0Sstevel@tonic-gate hat_unlock(seg->s_as->a_hat, addr, len); 1899*0Sstevel@tonic-gate for (adr = addr; adr < addr + len; adr += PAGESIZE) { 1900*0Sstevel@tonic-gate if (amp != NULL) { 1901*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 1902*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_index++)) 1903*0Sstevel@tonic-gate != NULL) { 1904*0Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 1905*0Sstevel@tonic-gate } else { 1906*0Sstevel@tonic-gate vp = svd->vp; 1907*0Sstevel@tonic-gate offset = svd->offset + 1908*0Sstevel@tonic-gate (uintptr_t)(adr - seg->s_base); 1909*0Sstevel@tonic-gate } 1910*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1911*0Sstevel@tonic-gate } else { 1912*0Sstevel@tonic-gate vp = svd->vp; 1913*0Sstevel@tonic-gate offset = svd->offset + 1914*0Sstevel@tonic-gate (uintptr_t)(adr - seg->s_base); 1915*0Sstevel@tonic-gate } 1916*0Sstevel@tonic-gate 1917*0Sstevel@tonic-gate /* 1918*0Sstevel@tonic-gate * Use page_find() instead of page_lookup() to 1919*0Sstevel@tonic-gate * find the page since we know that it is locked. 1920*0Sstevel@tonic-gate */ 1921*0Sstevel@tonic-gate pp = page_find(vp, offset); 1922*0Sstevel@tonic-gate if (pp == NULL) { 1923*0Sstevel@tonic-gate panic( 1924*0Sstevel@tonic-gate "segvn_softunlock: addr %p, ap %p, vp %p, off %llx", 1925*0Sstevel@tonic-gate (void *)adr, (void *)ap, (void *)vp, offset); 1926*0Sstevel@tonic-gate /*NOTREACHED*/ 1927*0Sstevel@tonic-gate } 1928*0Sstevel@tonic-gate 1929*0Sstevel@tonic-gate if (rw == S_WRITE) { 1930*0Sstevel@tonic-gate hat_setrefmod(pp); 1931*0Sstevel@tonic-gate if (seg->s_as->a_vbits) 1932*0Sstevel@tonic-gate hat_setstat(seg->s_as, adr, PAGESIZE, 1933*0Sstevel@tonic-gate P_REF | P_MOD); 1934*0Sstevel@tonic-gate } else if (rw != S_OTHER) { 1935*0Sstevel@tonic-gate hat_setref(pp); 1936*0Sstevel@tonic-gate if (seg->s_as->a_vbits) 1937*0Sstevel@tonic-gate hat_setstat(seg->s_as, adr, PAGESIZE, P_REF); 1938*0Sstevel@tonic-gate } 1939*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGVN_FAULT, 1940*0Sstevel@tonic-gate "segvn_fault:pp %p vp %p offset %llx", pp, vp, offset); 1941*0Sstevel@tonic-gate page_unlock(pp); 1942*0Sstevel@tonic-gate } 1943*0Sstevel@tonic-gate mutex_enter(&freemem_lock); /* for availrmem */ 1944*0Sstevel@tonic-gate availrmem += btop(len); 1945*0Sstevel@tonic-gate segvn_pages_locked -= btop(len); 1946*0Sstevel@tonic-gate svd->softlockcnt -= btop(len); 1947*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1948*0Sstevel@tonic-gate if (svd->softlockcnt == 0) { 1949*0Sstevel@tonic-gate /* 1950*0Sstevel@tonic-gate * All SOFTLOCKS are gone. Wakeup any waiting 1951*0Sstevel@tonic-gate * unmappers so they can try again to unmap. 1952*0Sstevel@tonic-gate * Check for waiters first without the mutex 1953*0Sstevel@tonic-gate * held so we don't always grab the mutex on 1954*0Sstevel@tonic-gate * softunlocks. 1955*0Sstevel@tonic-gate */ 1956*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1957*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 1958*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1959*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 1960*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 1961*0Sstevel@tonic-gate } 1962*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 1963*0Sstevel@tonic-gate } 1964*0Sstevel@tonic-gate } 1965*0Sstevel@tonic-gate } 1966*0Sstevel@tonic-gate 1967*0Sstevel@tonic-gate #define PAGE_HANDLED ((page_t *)-1) 1968*0Sstevel@tonic-gate 1969*0Sstevel@tonic-gate /* 1970*0Sstevel@tonic-gate * Release all the pages in the NULL terminated ppp list 1971*0Sstevel@tonic-gate * which haven't already been converted to PAGE_HANDLED. 1972*0Sstevel@tonic-gate */ 1973*0Sstevel@tonic-gate static void 1974*0Sstevel@tonic-gate segvn_pagelist_rele(page_t **ppp) 1975*0Sstevel@tonic-gate { 1976*0Sstevel@tonic-gate for (; *ppp != NULL; ppp++) { 1977*0Sstevel@tonic-gate if (*ppp != PAGE_HANDLED) 1978*0Sstevel@tonic-gate page_unlock(*ppp); 1979*0Sstevel@tonic-gate } 1980*0Sstevel@tonic-gate } 1981*0Sstevel@tonic-gate 1982*0Sstevel@tonic-gate static int stealcow = 1; 1983*0Sstevel@tonic-gate 1984*0Sstevel@tonic-gate /* 1985*0Sstevel@tonic-gate * Workaround for viking chip bug. See bug id 1220902. 1986*0Sstevel@tonic-gate * To fix this down in pagefault() would require importing so 1987*0Sstevel@tonic-gate * much as and segvn code as to be unmaintainable. 1988*0Sstevel@tonic-gate */ 1989*0Sstevel@tonic-gate int enable_mbit_wa = 0; 1990*0Sstevel@tonic-gate 1991*0Sstevel@tonic-gate /* 1992*0Sstevel@tonic-gate * Handles all the dirty work of getting the right 1993*0Sstevel@tonic-gate * anonymous pages and loading up the translations. 1994*0Sstevel@tonic-gate * This routine is called only from segvn_fault() 1995*0Sstevel@tonic-gate * when looping over the range of addresses requested. 1996*0Sstevel@tonic-gate * 1997*0Sstevel@tonic-gate * The basic algorithm here is: 1998*0Sstevel@tonic-gate * If this is an anon_zero case 1999*0Sstevel@tonic-gate * Call anon_zero to allocate page 2000*0Sstevel@tonic-gate * Load up translation 2001*0Sstevel@tonic-gate * Return 2002*0Sstevel@tonic-gate * endif 2003*0Sstevel@tonic-gate * If this is an anon page 2004*0Sstevel@tonic-gate * Use anon_getpage to get the page 2005*0Sstevel@tonic-gate * else 2006*0Sstevel@tonic-gate * Find page in pl[] list passed in 2007*0Sstevel@tonic-gate * endif 2008*0Sstevel@tonic-gate * If not a cow 2009*0Sstevel@tonic-gate * Load up the translation to the page 2010*0Sstevel@tonic-gate * return 2011*0Sstevel@tonic-gate * endif 2012*0Sstevel@tonic-gate * Call anon_private to handle cow 2013*0Sstevel@tonic-gate * Load up (writable) translation to new page 2014*0Sstevel@tonic-gate */ 2015*0Sstevel@tonic-gate static faultcode_t 2016*0Sstevel@tonic-gate segvn_faultpage( 2017*0Sstevel@tonic-gate struct hat *hat, /* the hat to use for mapping */ 2018*0Sstevel@tonic-gate struct seg *seg, /* seg_vn of interest */ 2019*0Sstevel@tonic-gate caddr_t addr, /* address in as */ 2020*0Sstevel@tonic-gate u_offset_t off, /* offset in vp */ 2021*0Sstevel@tonic-gate struct vpage *vpage, /* pointer to vpage for vp, off */ 2022*0Sstevel@tonic-gate page_t *pl[], /* object source page pointer */ 2023*0Sstevel@tonic-gate uint_t vpprot, /* access allowed to object pages */ 2024*0Sstevel@tonic-gate enum fault_type type, /* type of fault */ 2025*0Sstevel@tonic-gate enum seg_rw rw, /* type of access at fault */ 2026*0Sstevel@tonic-gate int brkcow) /* we may need to break cow */ 2027*0Sstevel@tonic-gate { 2028*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 2029*0Sstevel@tonic-gate page_t *pp, **ppp; 2030*0Sstevel@tonic-gate uint_t pageflags = 0; 2031*0Sstevel@tonic-gate page_t *anon_pl[1 + 1]; 2032*0Sstevel@tonic-gate page_t *opp = NULL; /* original page */ 2033*0Sstevel@tonic-gate uint_t prot; 2034*0Sstevel@tonic-gate int err; 2035*0Sstevel@tonic-gate int cow; 2036*0Sstevel@tonic-gate int claim; 2037*0Sstevel@tonic-gate int steal = 0; 2038*0Sstevel@tonic-gate ulong_t anon_index; 2039*0Sstevel@tonic-gate struct anon *ap, *oldap; 2040*0Sstevel@tonic-gate struct anon_map *amp; 2041*0Sstevel@tonic-gate int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD; 2042*0Sstevel@tonic-gate int anon_lock = 0; 2043*0Sstevel@tonic-gate anon_sync_obj_t cookie; 2044*0Sstevel@tonic-gate 2045*0Sstevel@tonic-gate if (svd->flags & MAP_TEXT) { 2046*0Sstevel@tonic-gate hat_flag |= HAT_LOAD_TEXT; 2047*0Sstevel@tonic-gate } 2048*0Sstevel@tonic-gate 2049*0Sstevel@tonic-gate ASSERT(SEGVN_READ_HELD(seg->s_as, &svd->lock)); 2050*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 2051*0Sstevel@tonic-gate 2052*0Sstevel@tonic-gate /* 2053*0Sstevel@tonic-gate * Initialize protection value for this page. 2054*0Sstevel@tonic-gate * If we have per page protection values check it now. 2055*0Sstevel@tonic-gate */ 2056*0Sstevel@tonic-gate if (svd->pageprot) { 2057*0Sstevel@tonic-gate uint_t protchk; 2058*0Sstevel@tonic-gate 2059*0Sstevel@tonic-gate switch (rw) { 2060*0Sstevel@tonic-gate case S_READ: 2061*0Sstevel@tonic-gate protchk = PROT_READ; 2062*0Sstevel@tonic-gate break; 2063*0Sstevel@tonic-gate case S_WRITE: 2064*0Sstevel@tonic-gate protchk = PROT_WRITE; 2065*0Sstevel@tonic-gate break; 2066*0Sstevel@tonic-gate case S_EXEC: 2067*0Sstevel@tonic-gate protchk = PROT_EXEC; 2068*0Sstevel@tonic-gate break; 2069*0Sstevel@tonic-gate case S_OTHER: 2070*0Sstevel@tonic-gate default: 2071*0Sstevel@tonic-gate protchk = PROT_READ | PROT_WRITE | PROT_EXEC; 2072*0Sstevel@tonic-gate break; 2073*0Sstevel@tonic-gate } 2074*0Sstevel@tonic-gate 2075*0Sstevel@tonic-gate prot = VPP_PROT(vpage); 2076*0Sstevel@tonic-gate if ((prot & protchk) == 0) 2077*0Sstevel@tonic-gate return (FC_PROT); /* illegal access type */ 2078*0Sstevel@tonic-gate } else { 2079*0Sstevel@tonic-gate prot = svd->prot; 2080*0Sstevel@tonic-gate } 2081*0Sstevel@tonic-gate 2082*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 2083*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 2084*0Sstevel@tonic-gate if (availrmem <= tune.t_minarmem) { 2085*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 2086*0Sstevel@tonic-gate return (FC_MAKE_ERR(ENOMEM)); /* out of real memory */ 2087*0Sstevel@tonic-gate } else { 2088*0Sstevel@tonic-gate svd->softlockcnt++; 2089*0Sstevel@tonic-gate availrmem--; 2090*0Sstevel@tonic-gate segvn_pages_locked++; 2091*0Sstevel@tonic-gate } 2092*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 2093*0Sstevel@tonic-gate } 2094*0Sstevel@tonic-gate 2095*0Sstevel@tonic-gate /* 2096*0Sstevel@tonic-gate * Always acquire the anon array lock to prevent 2 threads from 2097*0Sstevel@tonic-gate * allocating separate anon slots for the same "addr". 2098*0Sstevel@tonic-gate */ 2099*0Sstevel@tonic-gate 2100*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 2101*0Sstevel@tonic-gate ASSERT(RW_READ_HELD(&->a_rwlock)); 2102*0Sstevel@tonic-gate anon_index = svd->anon_index + seg_page(seg, addr); 2103*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 2104*0Sstevel@tonic-gate anon_lock = 1; 2105*0Sstevel@tonic-gate } 2106*0Sstevel@tonic-gate 2107*0Sstevel@tonic-gate if (svd->vp == NULL && amp != NULL) { 2108*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_index)) == NULL) { 2109*0Sstevel@tonic-gate /* 2110*0Sstevel@tonic-gate * Allocate a (normally) writable anonymous page of 2111*0Sstevel@tonic-gate * zeroes. If no advance reservations, reserve now. 2112*0Sstevel@tonic-gate */ 2113*0Sstevel@tonic-gate if (svd->flags & MAP_NORESERVE) { 2114*0Sstevel@tonic-gate if (anon_resv(ptob(1))) { 2115*0Sstevel@tonic-gate svd->swresv += ptob(1); 2116*0Sstevel@tonic-gate } else { 2117*0Sstevel@tonic-gate err = ENOMEM; 2118*0Sstevel@tonic-gate goto out; 2119*0Sstevel@tonic-gate } 2120*0Sstevel@tonic-gate } 2121*0Sstevel@tonic-gate if ((pp = anon_zero(seg, addr, &ap, 2122*0Sstevel@tonic-gate svd->cred)) == NULL) { 2123*0Sstevel@tonic-gate err = ENOMEM; 2124*0Sstevel@tonic-gate goto out; /* out of swap space */ 2125*0Sstevel@tonic-gate } 2126*0Sstevel@tonic-gate /* 2127*0Sstevel@tonic-gate * Re-acquire the anon_map lock and 2128*0Sstevel@tonic-gate * initialize the anon array entry. 2129*0Sstevel@tonic-gate */ 2130*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_index, ap, 2131*0Sstevel@tonic-gate ANON_SLEEP); 2132*0Sstevel@tonic-gate if (enable_mbit_wa) { 2133*0Sstevel@tonic-gate if (rw == S_WRITE) 2134*0Sstevel@tonic-gate hat_setmod(pp); 2135*0Sstevel@tonic-gate else if (!hat_ismod(pp)) 2136*0Sstevel@tonic-gate prot &= ~PROT_WRITE; 2137*0Sstevel@tonic-gate } 2138*0Sstevel@tonic-gate /* 2139*0Sstevel@tonic-gate * If AS_PAGLCK is set in a_flags (via memcntl(2) 2140*0Sstevel@tonic-gate * with MC_LOCKAS, MCL_FUTURE) and this is a 2141*0Sstevel@tonic-gate * MAP_NORESERVE segment, we may need to 2142*0Sstevel@tonic-gate * permanently lock the page as it is being faulted 2143*0Sstevel@tonic-gate * for the first time. The following text applies 2144*0Sstevel@tonic-gate * only to MAP_NORESERVE segments: 2145*0Sstevel@tonic-gate * 2146*0Sstevel@tonic-gate * As per memcntl(2), if this segment was created 2147*0Sstevel@tonic-gate * after MCL_FUTURE was applied (a "future" 2148*0Sstevel@tonic-gate * segment), its pages must be locked. If this 2149*0Sstevel@tonic-gate * segment existed at MCL_FUTURE application (a 2150*0Sstevel@tonic-gate * "past" segment), the interface is unclear. 2151*0Sstevel@tonic-gate * 2152*0Sstevel@tonic-gate * We decide to lock only if vpage is present: 2153*0Sstevel@tonic-gate * 2154*0Sstevel@tonic-gate * - "future" segments will have a vpage array (see 2155*0Sstevel@tonic-gate * as_map), and so will be locked as required 2156*0Sstevel@tonic-gate * 2157*0Sstevel@tonic-gate * - "past" segments may not have a vpage array, 2158*0Sstevel@tonic-gate * depending on whether events (such as 2159*0Sstevel@tonic-gate * mprotect) have occurred. Locking if vpage 2160*0Sstevel@tonic-gate * exists will preserve legacy behavior. Not 2161*0Sstevel@tonic-gate * locking if vpage is absent, will not break 2162*0Sstevel@tonic-gate * the interface or legacy behavior. Note that 2163*0Sstevel@tonic-gate * allocating vpage here if it's absent requires 2164*0Sstevel@tonic-gate * upgrading the segvn reader lock, the cost of 2165*0Sstevel@tonic-gate * which does not seem worthwhile. 2166*0Sstevel@tonic-gate */ 2167*0Sstevel@tonic-gate if (AS_ISPGLCK(seg->s_as) && vpage != NULL && 2168*0Sstevel@tonic-gate (svd->flags & MAP_NORESERVE)) { 2169*0Sstevel@tonic-gate claim = VPP_PROT(vpage) & PROT_WRITE; 2170*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE); 2171*0Sstevel@tonic-gate if (page_pp_lock(pp, claim, 0)) 2172*0Sstevel@tonic-gate VPP_SETPPLOCK(vpage); 2173*0Sstevel@tonic-gate } 2174*0Sstevel@tonic-gate 2175*0Sstevel@tonic-gate 2176*0Sstevel@tonic-gate /* 2177*0Sstevel@tonic-gate * Handle pages that have been marked for migration 2178*0Sstevel@tonic-gate */ 2179*0Sstevel@tonic-gate if (lgrp_optimizations()) 2180*0Sstevel@tonic-gate page_migrate(seg, addr, &pp, 1); 2181*0Sstevel@tonic-gate hat_memload(hat, addr, pp, prot, hat_flag); 2182*0Sstevel@tonic-gate 2183*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) 2184*0Sstevel@tonic-gate page_unlock(pp); 2185*0Sstevel@tonic-gate 2186*0Sstevel@tonic-gate anon_array_exit(&cookie); 2187*0Sstevel@tonic-gate return (0); 2188*0Sstevel@tonic-gate } 2189*0Sstevel@tonic-gate } 2190*0Sstevel@tonic-gate 2191*0Sstevel@tonic-gate /* 2192*0Sstevel@tonic-gate * Obtain the page structure via anon_getpage() if it is 2193*0Sstevel@tonic-gate * a private copy of an object (the result of a previous 2194*0Sstevel@tonic-gate * copy-on-write). 2195*0Sstevel@tonic-gate */ 2196*0Sstevel@tonic-gate if (amp != NULL) { 2197*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_index)) != NULL) { 2198*0Sstevel@tonic-gate err = anon_getpage(&ap, &vpprot, anon_pl, PAGESIZE, 2199*0Sstevel@tonic-gate seg, addr, rw, svd->cred); 2200*0Sstevel@tonic-gate if (err) 2201*0Sstevel@tonic-gate goto out; 2202*0Sstevel@tonic-gate 2203*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 2204*0Sstevel@tonic-gate /* 2205*0Sstevel@tonic-gate * If this is a shared mapping to an 2206*0Sstevel@tonic-gate * anon_map, then ignore the write 2207*0Sstevel@tonic-gate * permissions returned by anon_getpage(). 2208*0Sstevel@tonic-gate * They apply to the private mappings 2209*0Sstevel@tonic-gate * of this anon_map. 2210*0Sstevel@tonic-gate */ 2211*0Sstevel@tonic-gate vpprot |= PROT_WRITE; 2212*0Sstevel@tonic-gate } 2213*0Sstevel@tonic-gate opp = anon_pl[0]; 2214*0Sstevel@tonic-gate } 2215*0Sstevel@tonic-gate } 2216*0Sstevel@tonic-gate 2217*0Sstevel@tonic-gate /* 2218*0Sstevel@tonic-gate * Search the pl[] list passed in if it is from the 2219*0Sstevel@tonic-gate * original object (i.e., not a private copy). 2220*0Sstevel@tonic-gate */ 2221*0Sstevel@tonic-gate if (opp == NULL) { 2222*0Sstevel@tonic-gate /* 2223*0Sstevel@tonic-gate * Find original page. We must be bringing it in 2224*0Sstevel@tonic-gate * from the list in pl[]. 2225*0Sstevel@tonic-gate */ 2226*0Sstevel@tonic-gate for (ppp = pl; (opp = *ppp) != NULL; ppp++) { 2227*0Sstevel@tonic-gate if (opp == PAGE_HANDLED) 2228*0Sstevel@tonic-gate continue; 2229*0Sstevel@tonic-gate ASSERT(opp->p_vnode == svd->vp); /* XXX */ 2230*0Sstevel@tonic-gate if (opp->p_offset == off) 2231*0Sstevel@tonic-gate break; 2232*0Sstevel@tonic-gate } 2233*0Sstevel@tonic-gate if (opp == NULL) { 2234*0Sstevel@tonic-gate panic("segvn_faultpage not found"); 2235*0Sstevel@tonic-gate /*NOTREACHED*/ 2236*0Sstevel@tonic-gate } 2237*0Sstevel@tonic-gate *ppp = PAGE_HANDLED; 2238*0Sstevel@tonic-gate 2239*0Sstevel@tonic-gate } 2240*0Sstevel@tonic-gate 2241*0Sstevel@tonic-gate ASSERT(PAGE_LOCKED(opp)); 2242*0Sstevel@tonic-gate 2243*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGVN_FAULT, 2244*0Sstevel@tonic-gate "segvn_fault:pp %p vp %p offset %llx", 2245*0Sstevel@tonic-gate opp, NULL, 0); 2246*0Sstevel@tonic-gate 2247*0Sstevel@tonic-gate /* 2248*0Sstevel@tonic-gate * The fault is treated as a copy-on-write fault if a 2249*0Sstevel@tonic-gate * write occurs on a private segment and the object 2250*0Sstevel@tonic-gate * page (i.e., mapping) is write protected. We assume 2251*0Sstevel@tonic-gate * that fatal protection checks have already been made. 2252*0Sstevel@tonic-gate */ 2253*0Sstevel@tonic-gate 2254*0Sstevel@tonic-gate cow = brkcow && ((vpprot & PROT_WRITE) == 0); 2255*0Sstevel@tonic-gate 2256*0Sstevel@tonic-gate /* 2257*0Sstevel@tonic-gate * If not a copy-on-write case load the translation 2258*0Sstevel@tonic-gate * and return. 2259*0Sstevel@tonic-gate */ 2260*0Sstevel@tonic-gate if (cow == 0) { 2261*0Sstevel@tonic-gate if (IS_VMODSORT(opp->p_vnode) || enable_mbit_wa) { 2262*0Sstevel@tonic-gate if (rw == S_WRITE) 2263*0Sstevel@tonic-gate hat_setmod(opp); 2264*0Sstevel@tonic-gate else if (rw != S_OTHER && !hat_ismod(opp)) 2265*0Sstevel@tonic-gate prot &= ~PROT_WRITE; 2266*0Sstevel@tonic-gate } 2267*0Sstevel@tonic-gate 2268*0Sstevel@tonic-gate /* 2269*0Sstevel@tonic-gate * Handle pages that have been marked for migration 2270*0Sstevel@tonic-gate */ 2271*0Sstevel@tonic-gate if (lgrp_optimizations()) 2272*0Sstevel@tonic-gate page_migrate(seg, addr, &opp, 1); 2273*0Sstevel@tonic-gate 2274*0Sstevel@tonic-gate hat_memload(hat, addr, opp, prot & vpprot, hat_flag); 2275*0Sstevel@tonic-gate 2276*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) 2277*0Sstevel@tonic-gate page_unlock(opp); 2278*0Sstevel@tonic-gate 2279*0Sstevel@tonic-gate if (anon_lock) { 2280*0Sstevel@tonic-gate anon_array_exit(&cookie); 2281*0Sstevel@tonic-gate } 2282*0Sstevel@tonic-gate return (0); 2283*0Sstevel@tonic-gate } 2284*0Sstevel@tonic-gate 2285*0Sstevel@tonic-gate hat_setref(opp); 2286*0Sstevel@tonic-gate 2287*0Sstevel@tonic-gate ASSERT(amp != NULL && anon_lock); 2288*0Sstevel@tonic-gate 2289*0Sstevel@tonic-gate /* 2290*0Sstevel@tonic-gate * Steal the page only if it isn't a private page 2291*0Sstevel@tonic-gate * since stealing a private page is not worth the effort. 2292*0Sstevel@tonic-gate */ 2293*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_index)) == NULL) 2294*0Sstevel@tonic-gate steal = 1; 2295*0Sstevel@tonic-gate 2296*0Sstevel@tonic-gate /* 2297*0Sstevel@tonic-gate * Steal the original page if the following conditions are true: 2298*0Sstevel@tonic-gate * 2299*0Sstevel@tonic-gate * We are low on memory, the page is not private, page is not 2300*0Sstevel@tonic-gate * shared, not modified, not `locked' or if we have it `locked' 2301*0Sstevel@tonic-gate * (i.e., p_cowcnt == 1 and p_lckcnt == 0, which also implies 2302*0Sstevel@tonic-gate * that the page is not shared) and if it doesn't have any 2303*0Sstevel@tonic-gate * translations. page_struct_lock isn't needed to look at p_cowcnt 2304*0Sstevel@tonic-gate * and p_lckcnt because we first get exclusive lock on page. 2305*0Sstevel@tonic-gate */ 2306*0Sstevel@tonic-gate (void) hat_pagesync(opp, HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD); 2307*0Sstevel@tonic-gate 2308*0Sstevel@tonic-gate if (stealcow && freemem < minfree && steal && 2309*0Sstevel@tonic-gate page_tryupgrade(opp) && !hat_ismod(opp) && 2310*0Sstevel@tonic-gate ((opp->p_lckcnt == 0 && opp->p_cowcnt == 0) || 2311*0Sstevel@tonic-gate (opp->p_lckcnt == 0 && opp->p_cowcnt == 1 && 2312*0Sstevel@tonic-gate vpage != NULL && VPP_ISPPLOCK(vpage)))) { 2313*0Sstevel@tonic-gate /* 2314*0Sstevel@tonic-gate * Check if this page has other translations 2315*0Sstevel@tonic-gate * after unloading our translation. 2316*0Sstevel@tonic-gate */ 2317*0Sstevel@tonic-gate if (hat_page_is_mapped(opp)) { 2318*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, PAGESIZE, 2319*0Sstevel@tonic-gate HAT_UNLOAD); 2320*0Sstevel@tonic-gate } 2321*0Sstevel@tonic-gate 2322*0Sstevel@tonic-gate /* 2323*0Sstevel@tonic-gate * hat_unload() might sync back someone else's recent 2324*0Sstevel@tonic-gate * modification, so check again. 2325*0Sstevel@tonic-gate */ 2326*0Sstevel@tonic-gate if (!hat_ismod(opp) && !hat_page_is_mapped(opp)) 2327*0Sstevel@tonic-gate pageflags |= STEAL_PAGE; 2328*0Sstevel@tonic-gate } 2329*0Sstevel@tonic-gate 2330*0Sstevel@tonic-gate /* 2331*0Sstevel@tonic-gate * If we have a vpage pointer, see if it indicates that we have 2332*0Sstevel@tonic-gate * ``locked'' the page we map -- if so, tell anon_private to 2333*0Sstevel@tonic-gate * transfer the locking resource to the new page. 2334*0Sstevel@tonic-gate * 2335*0Sstevel@tonic-gate * See Statement at the beginning of segvn_lockop regarding 2336*0Sstevel@tonic-gate * the way lockcnts/cowcnts are handled during COW. 2337*0Sstevel@tonic-gate * 2338*0Sstevel@tonic-gate */ 2339*0Sstevel@tonic-gate if (vpage != NULL && VPP_ISPPLOCK(vpage)) 2340*0Sstevel@tonic-gate pageflags |= LOCK_PAGE; 2341*0Sstevel@tonic-gate 2342*0Sstevel@tonic-gate /* 2343*0Sstevel@tonic-gate * Allocate a private page and perform the copy. 2344*0Sstevel@tonic-gate * For MAP_NORESERVE reserve swap space now, unless this 2345*0Sstevel@tonic-gate * is a cow fault on an existing anon page in which case 2346*0Sstevel@tonic-gate * MAP_NORESERVE will have made advance reservations. 2347*0Sstevel@tonic-gate */ 2348*0Sstevel@tonic-gate if ((svd->flags & MAP_NORESERVE) && (ap == NULL)) { 2349*0Sstevel@tonic-gate if (anon_resv(ptob(1))) { 2350*0Sstevel@tonic-gate svd->swresv += ptob(1); 2351*0Sstevel@tonic-gate } else { 2352*0Sstevel@tonic-gate page_unlock(opp); 2353*0Sstevel@tonic-gate err = ENOMEM; 2354*0Sstevel@tonic-gate goto out; 2355*0Sstevel@tonic-gate } 2356*0Sstevel@tonic-gate } 2357*0Sstevel@tonic-gate oldap = ap; 2358*0Sstevel@tonic-gate pp = anon_private(&ap, seg, addr, prot, opp, pageflags, svd->cred); 2359*0Sstevel@tonic-gate if (pp == NULL) { 2360*0Sstevel@tonic-gate err = ENOMEM; /* out of swap space */ 2361*0Sstevel@tonic-gate goto out; 2362*0Sstevel@tonic-gate } 2363*0Sstevel@tonic-gate 2364*0Sstevel@tonic-gate /* 2365*0Sstevel@tonic-gate * If we copied away from an anonymous page, then 2366*0Sstevel@tonic-gate * we are one step closer to freeing up an anon slot. 2367*0Sstevel@tonic-gate * 2368*0Sstevel@tonic-gate * NOTE: The original anon slot must be released while 2369*0Sstevel@tonic-gate * holding the "anon_map" lock. This is necessary to prevent 2370*0Sstevel@tonic-gate * other threads from obtaining a pointer to the anon slot 2371*0Sstevel@tonic-gate * which may be freed if its "refcnt" is 1. 2372*0Sstevel@tonic-gate */ 2373*0Sstevel@tonic-gate if (oldap != NULL) 2374*0Sstevel@tonic-gate anon_decref(oldap); 2375*0Sstevel@tonic-gate 2376*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); 2377*0Sstevel@tonic-gate 2378*0Sstevel@tonic-gate ASSERT(!IS_VMODSORT(pp->p_vnode)); 2379*0Sstevel@tonic-gate if (enable_mbit_wa) { 2380*0Sstevel@tonic-gate if (rw == S_WRITE) 2381*0Sstevel@tonic-gate hat_setmod(pp); 2382*0Sstevel@tonic-gate else if (!hat_ismod(pp)) 2383*0Sstevel@tonic-gate prot &= ~PROT_WRITE; 2384*0Sstevel@tonic-gate } 2385*0Sstevel@tonic-gate 2386*0Sstevel@tonic-gate 2387*0Sstevel@tonic-gate /* 2388*0Sstevel@tonic-gate * Handle pages that have been marked for migration 2389*0Sstevel@tonic-gate */ 2390*0Sstevel@tonic-gate if (lgrp_optimizations()) 2391*0Sstevel@tonic-gate page_migrate(seg, addr, &pp, 1); 2392*0Sstevel@tonic-gate hat_memload(hat, addr, pp, prot, hat_flag); 2393*0Sstevel@tonic-gate 2394*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) 2395*0Sstevel@tonic-gate page_unlock(pp); 2396*0Sstevel@tonic-gate 2397*0Sstevel@tonic-gate ASSERT(anon_lock); 2398*0Sstevel@tonic-gate anon_array_exit(&cookie); 2399*0Sstevel@tonic-gate return (0); 2400*0Sstevel@tonic-gate out: 2401*0Sstevel@tonic-gate if (anon_lock) 2402*0Sstevel@tonic-gate anon_array_exit(&cookie); 2403*0Sstevel@tonic-gate 2404*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 2405*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 2406*0Sstevel@tonic-gate availrmem++; 2407*0Sstevel@tonic-gate segvn_pages_locked--; 2408*0Sstevel@tonic-gate svd->softlockcnt--; 2409*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 2410*0Sstevel@tonic-gate } 2411*0Sstevel@tonic-gate return (FC_MAKE_ERR(err)); 2412*0Sstevel@tonic-gate } 2413*0Sstevel@tonic-gate 2414*0Sstevel@tonic-gate /* 2415*0Sstevel@tonic-gate * relocate a bunch of smaller targ pages into one large repl page. all targ 2416*0Sstevel@tonic-gate * pages must be complete pages smaller than replacement pages. 2417*0Sstevel@tonic-gate * it's assumed that no page's szc can change since they are all PAGESIZE or 2418*0Sstevel@tonic-gate * complete large pages locked SHARED. 2419*0Sstevel@tonic-gate */ 2420*0Sstevel@tonic-gate static void 2421*0Sstevel@tonic-gate segvn_relocate_pages(page_t **targ, page_t *replacement) 2422*0Sstevel@tonic-gate { 2423*0Sstevel@tonic-gate page_t *pp; 2424*0Sstevel@tonic-gate pgcnt_t repl_npgs, curnpgs; 2425*0Sstevel@tonic-gate pgcnt_t i; 2426*0Sstevel@tonic-gate uint_t repl_szc = replacement->p_szc; 2427*0Sstevel@tonic-gate page_t *first_repl = replacement; 2428*0Sstevel@tonic-gate page_t *repl; 2429*0Sstevel@tonic-gate spgcnt_t npgs; 2430*0Sstevel@tonic-gate 2431*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.relocatepages[0]); 2432*0Sstevel@tonic-gate 2433*0Sstevel@tonic-gate ASSERT(repl_szc != 0); 2434*0Sstevel@tonic-gate npgs = repl_npgs = page_get_pagecnt(repl_szc); 2435*0Sstevel@tonic-gate 2436*0Sstevel@tonic-gate i = 0; 2437*0Sstevel@tonic-gate while (repl_npgs) { 2438*0Sstevel@tonic-gate spgcnt_t nreloc; 2439*0Sstevel@tonic-gate int err; 2440*0Sstevel@tonic-gate ASSERT(replacement != NULL); 2441*0Sstevel@tonic-gate pp = targ[i]; 2442*0Sstevel@tonic-gate ASSERT(pp->p_szc < repl_szc); 2443*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2444*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 2445*0Sstevel@tonic-gate curnpgs = page_get_pagecnt(pp->p_szc); 2446*0Sstevel@tonic-gate if (curnpgs == 1) { 2447*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.relocatepages[1]); 2448*0Sstevel@tonic-gate repl = replacement; 2449*0Sstevel@tonic-gate page_sub(&replacement, repl); 2450*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(repl)); 2451*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(repl)); 2452*0Sstevel@tonic-gate ASSERT(repl->p_szc == repl_szc); 2453*0Sstevel@tonic-gate } else { 2454*0Sstevel@tonic-gate page_t *repl_savepp; 2455*0Sstevel@tonic-gate int j; 2456*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.relocatepages[2]); 2457*0Sstevel@tonic-gate repl_savepp = replacement; 2458*0Sstevel@tonic-gate for (j = 0; j < curnpgs; j++) { 2459*0Sstevel@tonic-gate repl = replacement; 2460*0Sstevel@tonic-gate page_sub(&replacement, repl); 2461*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(repl)); 2462*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(repl)); 2463*0Sstevel@tonic-gate ASSERT(repl->p_szc == repl_szc); 2464*0Sstevel@tonic-gate ASSERT(page_pptonum(targ[i + j]) == 2465*0Sstevel@tonic-gate page_pptonum(targ[i]) + j); 2466*0Sstevel@tonic-gate } 2467*0Sstevel@tonic-gate repl = repl_savepp; 2468*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(page_pptonum(repl), curnpgs)); 2469*0Sstevel@tonic-gate } 2470*0Sstevel@tonic-gate err = page_relocate(&pp, &repl, 0, 1, &nreloc, NULL); 2471*0Sstevel@tonic-gate if (err || nreloc != curnpgs) { 2472*0Sstevel@tonic-gate panic("segvn_relocate_pages: " 2473*0Sstevel@tonic-gate "page_relocate failed err=%d curnpgs=%ld " 2474*0Sstevel@tonic-gate "nreloc=%ld", err, curnpgs, nreloc); 2475*0Sstevel@tonic-gate } 2476*0Sstevel@tonic-gate ASSERT(curnpgs <= repl_npgs); 2477*0Sstevel@tonic-gate repl_npgs -= curnpgs; 2478*0Sstevel@tonic-gate i += curnpgs; 2479*0Sstevel@tonic-gate } 2480*0Sstevel@tonic-gate ASSERT(replacement == NULL); 2481*0Sstevel@tonic-gate 2482*0Sstevel@tonic-gate repl = first_repl; 2483*0Sstevel@tonic-gate repl_npgs = npgs; 2484*0Sstevel@tonic-gate for (i = 0; i < repl_npgs; i++) { 2485*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(repl)); 2486*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(repl)); 2487*0Sstevel@tonic-gate targ[i] = repl; 2488*0Sstevel@tonic-gate page_downgrade(targ[i]); 2489*0Sstevel@tonic-gate repl = page_next(repl); 2490*0Sstevel@tonic-gate } 2491*0Sstevel@tonic-gate } 2492*0Sstevel@tonic-gate 2493*0Sstevel@tonic-gate /* 2494*0Sstevel@tonic-gate * Check if all pages in ppa array are complete smaller than szc pages and 2495*0Sstevel@tonic-gate * their roots will still be aligned relative to their current size if the 2496*0Sstevel@tonic-gate * entire ppa array is relocated into one szc page. If these conditions are 2497*0Sstevel@tonic-gate * not met return 0. 2498*0Sstevel@tonic-gate * 2499*0Sstevel@tonic-gate * If all pages are properly aligned attempt to upgrade their locks 2500*0Sstevel@tonic-gate * to exclusive mode. If it fails set *upgrdfail to 1 and return 0. 2501*0Sstevel@tonic-gate * upgrdfail was set to 0 by caller. 2502*0Sstevel@tonic-gate * 2503*0Sstevel@tonic-gate * Return 1 if all pages are aligned and locked exclusively. 2504*0Sstevel@tonic-gate * 2505*0Sstevel@tonic-gate * If all pages in ppa array happen to be physically contiguous to make one 2506*0Sstevel@tonic-gate * szc page and all exclusive locks are successfully obtained promote the page 2507*0Sstevel@tonic-gate * size to szc and set *pszc to szc. Return 1 with pages locked shared. 2508*0Sstevel@tonic-gate */ 2509*0Sstevel@tonic-gate static int 2510*0Sstevel@tonic-gate segvn_full_szcpages(page_t **ppa, uint_t szc, int *upgrdfail, uint_t *pszc) 2511*0Sstevel@tonic-gate { 2512*0Sstevel@tonic-gate page_t *pp; 2513*0Sstevel@tonic-gate pfn_t pfn; 2514*0Sstevel@tonic-gate pgcnt_t totnpgs = page_get_pagecnt(szc); 2515*0Sstevel@tonic-gate pfn_t first_pfn; 2516*0Sstevel@tonic-gate int contig = 1; 2517*0Sstevel@tonic-gate pgcnt_t i; 2518*0Sstevel@tonic-gate pgcnt_t j; 2519*0Sstevel@tonic-gate uint_t curszc; 2520*0Sstevel@tonic-gate pgcnt_t curnpgs; 2521*0Sstevel@tonic-gate int root = 0; 2522*0Sstevel@tonic-gate 2523*0Sstevel@tonic-gate ASSERT(szc > 0); 2524*0Sstevel@tonic-gate 2525*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[0]); 2526*0Sstevel@tonic-gate 2527*0Sstevel@tonic-gate for (i = 0; i < totnpgs; i++) { 2528*0Sstevel@tonic-gate pp = ppa[i]; 2529*0Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 2530*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 2531*0Sstevel@tonic-gate pfn = page_pptonum(pp); 2532*0Sstevel@tonic-gate if (i == 0) { 2533*0Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, totnpgs)) { 2534*0Sstevel@tonic-gate contig = 0; 2535*0Sstevel@tonic-gate } else { 2536*0Sstevel@tonic-gate first_pfn = pfn; 2537*0Sstevel@tonic-gate } 2538*0Sstevel@tonic-gate } else if (contig && pfn != first_pfn + i) { 2539*0Sstevel@tonic-gate contig = 0; 2540*0Sstevel@tonic-gate } 2541*0Sstevel@tonic-gate if (pp->p_szc == 0) { 2542*0Sstevel@tonic-gate if (root) { 2543*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[1]); 2544*0Sstevel@tonic-gate return (0); 2545*0Sstevel@tonic-gate } 2546*0Sstevel@tonic-gate } else if (!root) { 2547*0Sstevel@tonic-gate if ((curszc = pp->p_szc) >= szc) { 2548*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[2]); 2549*0Sstevel@tonic-gate return (0); 2550*0Sstevel@tonic-gate } 2551*0Sstevel@tonic-gate if (curszc == 0) { 2552*0Sstevel@tonic-gate /* 2553*0Sstevel@tonic-gate * p_szc changed means we don't have all pages 2554*0Sstevel@tonic-gate * locked. return failure. 2555*0Sstevel@tonic-gate */ 2556*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[3]); 2557*0Sstevel@tonic-gate return (0); 2558*0Sstevel@tonic-gate } 2559*0Sstevel@tonic-gate curnpgs = page_get_pagecnt(curszc); 2560*0Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, curnpgs) || 2561*0Sstevel@tonic-gate !IS_P2ALIGNED(i, curnpgs)) { 2562*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[4]); 2563*0Sstevel@tonic-gate return (0); 2564*0Sstevel@tonic-gate } 2565*0Sstevel@tonic-gate root = 1; 2566*0Sstevel@tonic-gate } else { 2567*0Sstevel@tonic-gate ASSERT(i > 0); 2568*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[5]); 2569*0Sstevel@tonic-gate if (pp->p_szc != curszc) { 2570*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[6]); 2571*0Sstevel@tonic-gate return (0); 2572*0Sstevel@tonic-gate } 2573*0Sstevel@tonic-gate if (pfn - 1 != page_pptonum(ppa[i - 1])) { 2574*0Sstevel@tonic-gate panic("segvn_full_szcpages: " 2575*0Sstevel@tonic-gate "large page not physically contiguous"); 2576*0Sstevel@tonic-gate } 2577*0Sstevel@tonic-gate if (P2PHASE(pfn, curnpgs) == curnpgs - 1) { 2578*0Sstevel@tonic-gate root = 0; 2579*0Sstevel@tonic-gate } 2580*0Sstevel@tonic-gate } 2581*0Sstevel@tonic-gate } 2582*0Sstevel@tonic-gate 2583*0Sstevel@tonic-gate for (i = 0; i < totnpgs; i++) { 2584*0Sstevel@tonic-gate ASSERT(ppa[i]->p_szc < szc); 2585*0Sstevel@tonic-gate if (!page_tryupgrade(ppa[i])) { 2586*0Sstevel@tonic-gate for (j = 0; j < i; j++) { 2587*0Sstevel@tonic-gate page_downgrade(ppa[j]); 2588*0Sstevel@tonic-gate } 2589*0Sstevel@tonic-gate *pszc = ppa[i]->p_szc; 2590*0Sstevel@tonic-gate *upgrdfail = 1; 2591*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[7]); 2592*0Sstevel@tonic-gate return (0); 2593*0Sstevel@tonic-gate } 2594*0Sstevel@tonic-gate } 2595*0Sstevel@tonic-gate 2596*0Sstevel@tonic-gate /* 2597*0Sstevel@tonic-gate * When a page is put a free cachelist its szc is set to 0. if file 2598*0Sstevel@tonic-gate * system reclaimed pages from cachelist targ pages will be physically 2599*0Sstevel@tonic-gate * contiguous with 0 p_szc. in this case just upgrade szc of targ 2600*0Sstevel@tonic-gate * pages without any relocations. 2601*0Sstevel@tonic-gate * To avoid any hat issues with previous small mappings 2602*0Sstevel@tonic-gate * hat_pageunload() the target pages first. 2603*0Sstevel@tonic-gate */ 2604*0Sstevel@tonic-gate if (contig) { 2605*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[8]); 2606*0Sstevel@tonic-gate for (i = 0; i < totnpgs; i++) { 2607*0Sstevel@tonic-gate (void) hat_pageunload(ppa[i], HAT_FORCE_PGUNLOAD); 2608*0Sstevel@tonic-gate } 2609*0Sstevel@tonic-gate for (i = 0; i < totnpgs; i++) { 2610*0Sstevel@tonic-gate ppa[i]->p_szc = szc; 2611*0Sstevel@tonic-gate } 2612*0Sstevel@tonic-gate for (i = 0; i < totnpgs; i++) { 2613*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(ppa[i])); 2614*0Sstevel@tonic-gate page_downgrade(ppa[i]); 2615*0Sstevel@tonic-gate } 2616*0Sstevel@tonic-gate if (pszc != NULL) { 2617*0Sstevel@tonic-gate *pszc = szc; 2618*0Sstevel@tonic-gate } 2619*0Sstevel@tonic-gate } 2620*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fullszcpages[9]); 2621*0Sstevel@tonic-gate return (1); 2622*0Sstevel@tonic-gate } 2623*0Sstevel@tonic-gate 2624*0Sstevel@tonic-gate /* 2625*0Sstevel@tonic-gate * Create physically contiguous pages for [vp, off] - [vp, off + 2626*0Sstevel@tonic-gate * page_size(szc)) range and for private segment return them in ppa array. 2627*0Sstevel@tonic-gate * Pages are created either via IO or relocations. 2628*0Sstevel@tonic-gate * 2629*0Sstevel@tonic-gate * Return 1 on sucess and 0 on failure. 2630*0Sstevel@tonic-gate * 2631*0Sstevel@tonic-gate * If physically contiguos pages already exist for this range return 1 without 2632*0Sstevel@tonic-gate * filling ppa array. Caller initializes ppa[0] as NULL to detect that ppa 2633*0Sstevel@tonic-gate * array wasn't filled. In this case caller fills ppa array via VOP_GETPAGE(). 2634*0Sstevel@tonic-gate */ 2635*0Sstevel@tonic-gate 2636*0Sstevel@tonic-gate static int 2637*0Sstevel@tonic-gate segvn_fill_vp_pages(struct segvn_data *svd, vnode_t *vp, u_offset_t off, 2638*0Sstevel@tonic-gate uint_t szc, page_t **ppa, page_t **ppplist, uint_t *ret_pszc, 2639*0Sstevel@tonic-gate int *downsize) 2640*0Sstevel@tonic-gate 2641*0Sstevel@tonic-gate { 2642*0Sstevel@tonic-gate page_t *pplist = *ppplist; 2643*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(szc); 2644*0Sstevel@tonic-gate pgcnt_t pages = btop(pgsz); 2645*0Sstevel@tonic-gate ulong_t start_off = off; 2646*0Sstevel@tonic-gate u_offset_t eoff = off + pgsz; 2647*0Sstevel@tonic-gate spgcnt_t nreloc; 2648*0Sstevel@tonic-gate u_offset_t io_off = off; 2649*0Sstevel@tonic-gate size_t io_len; 2650*0Sstevel@tonic-gate page_t *io_pplist = NULL; 2651*0Sstevel@tonic-gate page_t *done_pplist = NULL; 2652*0Sstevel@tonic-gate pgcnt_t pgidx = 0; 2653*0Sstevel@tonic-gate page_t *pp; 2654*0Sstevel@tonic-gate page_t *newpp; 2655*0Sstevel@tonic-gate page_t *targpp; 2656*0Sstevel@tonic-gate int io_err = 0; 2657*0Sstevel@tonic-gate int i; 2658*0Sstevel@tonic-gate pfn_t pfn; 2659*0Sstevel@tonic-gate ulong_t ppages; 2660*0Sstevel@tonic-gate page_t *targ_pplist = NULL; 2661*0Sstevel@tonic-gate page_t *repl_pplist = NULL; 2662*0Sstevel@tonic-gate page_t *tmp_pplist; 2663*0Sstevel@tonic-gate int nios = 0; 2664*0Sstevel@tonic-gate uint_t pszc; 2665*0Sstevel@tonic-gate struct vattr va; 2666*0Sstevel@tonic-gate 2667*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[0]); 2668*0Sstevel@tonic-gate 2669*0Sstevel@tonic-gate ASSERT(szc != 0); 2670*0Sstevel@tonic-gate ASSERT(pplist->p_szc == szc); 2671*0Sstevel@tonic-gate 2672*0Sstevel@tonic-gate /* 2673*0Sstevel@tonic-gate * downsize will be set to 1 only if we fail to lock pages. this will 2674*0Sstevel@tonic-gate * allow subsequent faults to try to relocate the page again. If we 2675*0Sstevel@tonic-gate * fail due to misalignment don't downsize and let the caller map the 2676*0Sstevel@tonic-gate * whole region with small mappings to avoid more faults into the area 2677*0Sstevel@tonic-gate * where we can't get large pages anyway. 2678*0Sstevel@tonic-gate */ 2679*0Sstevel@tonic-gate *downsize = 0; 2680*0Sstevel@tonic-gate 2681*0Sstevel@tonic-gate while (off < eoff) { 2682*0Sstevel@tonic-gate newpp = pplist; 2683*0Sstevel@tonic-gate ASSERT(newpp != NULL); 2684*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(newpp)); 2685*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(newpp)); 2686*0Sstevel@tonic-gate /* 2687*0Sstevel@tonic-gate * we pass NULL for nrelocp to page_lookup_create() 2688*0Sstevel@tonic-gate * so that it doesn't relocate. We relocate here 2689*0Sstevel@tonic-gate * later only after we make sure we can lock all 2690*0Sstevel@tonic-gate * pages in the range we handle and they are all 2691*0Sstevel@tonic-gate * aligned. 2692*0Sstevel@tonic-gate */ 2693*0Sstevel@tonic-gate pp = page_lookup_create(vp, off, SE_SHARED, newpp, NULL, 0); 2694*0Sstevel@tonic-gate ASSERT(pp != NULL); 2695*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 2696*0Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 2697*0Sstevel@tonic-gate ASSERT(pp->p_offset == off); 2698*0Sstevel@tonic-gate if (pp == newpp) { 2699*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[1]); 2700*0Sstevel@tonic-gate page_sub(&pplist, pp); 2701*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2702*0Sstevel@tonic-gate ASSERT(page_iolock_assert(pp)); 2703*0Sstevel@tonic-gate page_list_concat(&io_pplist, &pp); 2704*0Sstevel@tonic-gate off += PAGESIZE; 2705*0Sstevel@tonic-gate continue; 2706*0Sstevel@tonic-gate } 2707*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[2]); 2708*0Sstevel@tonic-gate pfn = page_pptonum(pp); 2709*0Sstevel@tonic-gate pszc = pp->p_szc; 2710*0Sstevel@tonic-gate if (pszc >= szc && targ_pplist == NULL && io_pplist == NULL && 2711*0Sstevel@tonic-gate IS_P2ALIGNED(pfn, pages)) { 2712*0Sstevel@tonic-gate ASSERT(repl_pplist == NULL); 2713*0Sstevel@tonic-gate ASSERT(done_pplist == NULL); 2714*0Sstevel@tonic-gate ASSERT(pplist == *ppplist); 2715*0Sstevel@tonic-gate page_unlock(pp); 2716*0Sstevel@tonic-gate page_free_replacement_page(pplist); 2717*0Sstevel@tonic-gate page_create_putback(pages); 2718*0Sstevel@tonic-gate *ppplist = NULL; 2719*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[3]); 2720*0Sstevel@tonic-gate return (1); 2721*0Sstevel@tonic-gate } 2722*0Sstevel@tonic-gate if (pszc >= szc) { 2723*0Sstevel@tonic-gate page_unlock(pp); 2724*0Sstevel@tonic-gate segvn_faultvnmpss_align_err1++; 2725*0Sstevel@tonic-gate goto out; 2726*0Sstevel@tonic-gate } 2727*0Sstevel@tonic-gate ppages = page_get_pagecnt(pszc); 2728*0Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, ppages)) { 2729*0Sstevel@tonic-gate ASSERT(pszc > 0); 2730*0Sstevel@tonic-gate /* 2731*0Sstevel@tonic-gate * sizing down to pszc won't help. 2732*0Sstevel@tonic-gate */ 2733*0Sstevel@tonic-gate page_unlock(pp); 2734*0Sstevel@tonic-gate segvn_faultvnmpss_align_err2++; 2735*0Sstevel@tonic-gate goto out; 2736*0Sstevel@tonic-gate } 2737*0Sstevel@tonic-gate pfn = page_pptonum(newpp); 2738*0Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, ppages)) { 2739*0Sstevel@tonic-gate ASSERT(pszc > 0); 2740*0Sstevel@tonic-gate /* 2741*0Sstevel@tonic-gate * sizing down to pszc won't help. 2742*0Sstevel@tonic-gate */ 2743*0Sstevel@tonic-gate page_unlock(pp); 2744*0Sstevel@tonic-gate segvn_faultvnmpss_align_err3++; 2745*0Sstevel@tonic-gate goto out; 2746*0Sstevel@tonic-gate } 2747*0Sstevel@tonic-gate if (!PAGE_EXCL(pp)) { 2748*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[4]); 2749*0Sstevel@tonic-gate page_unlock(pp); 2750*0Sstevel@tonic-gate *downsize = 1; 2751*0Sstevel@tonic-gate *ret_pszc = pp->p_szc; 2752*0Sstevel@tonic-gate goto out; 2753*0Sstevel@tonic-gate } 2754*0Sstevel@tonic-gate targpp = pp; 2755*0Sstevel@tonic-gate if (io_pplist != NULL) { 2756*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[5]); 2757*0Sstevel@tonic-gate io_len = off - io_off; 2758*0Sstevel@tonic-gate /* 2759*0Sstevel@tonic-gate * Some file systems like NFS don't check EOF 2760*0Sstevel@tonic-gate * conditions in VOP_PAGEIO(). Check it here 2761*0Sstevel@tonic-gate * now that pages are locked SE_EXCL. Any file 2762*0Sstevel@tonic-gate * truncation will wait until the pages are 2763*0Sstevel@tonic-gate * unlocked so no need to worry that file will 2764*0Sstevel@tonic-gate * be truncated after we check its size here. 2765*0Sstevel@tonic-gate * XXX fix NFS to remove this check. 2766*0Sstevel@tonic-gate */ 2767*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 2768*0Sstevel@tonic-gate if (VOP_GETATTR(vp, &va, ATTR_HINT, svd->cred) != 0) { 2769*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[6]); 2770*0Sstevel@tonic-gate page_unlock(targpp); 2771*0Sstevel@tonic-gate goto out; 2772*0Sstevel@tonic-gate } 2773*0Sstevel@tonic-gate if (btopr(va.va_size) < btopr(io_off + io_len)) { 2774*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[7]); 2775*0Sstevel@tonic-gate *downsize = 1; 2776*0Sstevel@tonic-gate *ret_pszc = 0; 2777*0Sstevel@tonic-gate page_unlock(targpp); 2778*0Sstevel@tonic-gate goto out; 2779*0Sstevel@tonic-gate } 2780*0Sstevel@tonic-gate io_err = VOP_PAGEIO(vp, io_pplist, io_off, io_len, 2781*0Sstevel@tonic-gate B_READ, svd->cred); 2782*0Sstevel@tonic-gate if (io_err) { 2783*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[8]); 2784*0Sstevel@tonic-gate page_unlock(targpp); 2785*0Sstevel@tonic-gate if (io_err == EDEADLK) { 2786*0Sstevel@tonic-gate segvn_vmpss_pageio_deadlk_err++; 2787*0Sstevel@tonic-gate } 2788*0Sstevel@tonic-gate goto out; 2789*0Sstevel@tonic-gate } 2790*0Sstevel@tonic-gate nios++; 2791*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[9]); 2792*0Sstevel@tonic-gate while (io_pplist != NULL) { 2793*0Sstevel@tonic-gate pp = io_pplist; 2794*0Sstevel@tonic-gate page_sub(&io_pplist, pp); 2795*0Sstevel@tonic-gate ASSERT(page_iolock_assert(pp)); 2796*0Sstevel@tonic-gate page_io_unlock(pp); 2797*0Sstevel@tonic-gate pgidx = (pp->p_offset - start_off) >> 2798*0Sstevel@tonic-gate PAGESHIFT; 2799*0Sstevel@tonic-gate ASSERT(pgidx < pages); 2800*0Sstevel@tonic-gate ppa[pgidx] = pp; 2801*0Sstevel@tonic-gate page_list_concat(&done_pplist, &pp); 2802*0Sstevel@tonic-gate } 2803*0Sstevel@tonic-gate } 2804*0Sstevel@tonic-gate pp = targpp; 2805*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2806*0Sstevel@tonic-gate ASSERT(pp->p_szc <= pszc); 2807*0Sstevel@tonic-gate if (pszc != 0 && !group_page_trylock(pp, SE_EXCL)) { 2808*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[10]); 2809*0Sstevel@tonic-gate page_unlock(pp); 2810*0Sstevel@tonic-gate *downsize = 1; 2811*0Sstevel@tonic-gate *ret_pszc = pp->p_szc; 2812*0Sstevel@tonic-gate goto out; 2813*0Sstevel@tonic-gate } 2814*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[11]); 2815*0Sstevel@tonic-gate /* 2816*0Sstevel@tonic-gate * page szc chould have changed before the entire group was 2817*0Sstevel@tonic-gate * locked. reread page szc. 2818*0Sstevel@tonic-gate */ 2819*0Sstevel@tonic-gate pszc = pp->p_szc; 2820*0Sstevel@tonic-gate ppages = page_get_pagecnt(pszc); 2821*0Sstevel@tonic-gate 2822*0Sstevel@tonic-gate /* link just the roots */ 2823*0Sstevel@tonic-gate page_list_concat(&targ_pplist, &pp); 2824*0Sstevel@tonic-gate page_sub(&pplist, newpp); 2825*0Sstevel@tonic-gate page_list_concat(&repl_pplist, &newpp); 2826*0Sstevel@tonic-gate off += PAGESIZE; 2827*0Sstevel@tonic-gate while (--ppages != 0) { 2828*0Sstevel@tonic-gate newpp = pplist; 2829*0Sstevel@tonic-gate page_sub(&pplist, newpp); 2830*0Sstevel@tonic-gate off += PAGESIZE; 2831*0Sstevel@tonic-gate } 2832*0Sstevel@tonic-gate io_off = off; 2833*0Sstevel@tonic-gate } 2834*0Sstevel@tonic-gate if (io_pplist != NULL) { 2835*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[12]); 2836*0Sstevel@tonic-gate io_len = eoff - io_off; 2837*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 2838*0Sstevel@tonic-gate if (VOP_GETATTR(vp, &va, ATTR_HINT, svd->cred) != 0) { 2839*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[13]); 2840*0Sstevel@tonic-gate goto out; 2841*0Sstevel@tonic-gate } 2842*0Sstevel@tonic-gate if (btopr(va.va_size) < btopr(io_off + io_len)) { 2843*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[14]); 2844*0Sstevel@tonic-gate *downsize = 1; 2845*0Sstevel@tonic-gate *ret_pszc = 0; 2846*0Sstevel@tonic-gate goto out; 2847*0Sstevel@tonic-gate } 2848*0Sstevel@tonic-gate io_err = VOP_PAGEIO(vp, io_pplist, io_off, io_len, 2849*0Sstevel@tonic-gate B_READ, svd->cred); 2850*0Sstevel@tonic-gate if (io_err) { 2851*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[15]); 2852*0Sstevel@tonic-gate if (io_err == EDEADLK) { 2853*0Sstevel@tonic-gate segvn_vmpss_pageio_deadlk_err++; 2854*0Sstevel@tonic-gate } 2855*0Sstevel@tonic-gate goto out; 2856*0Sstevel@tonic-gate } 2857*0Sstevel@tonic-gate nios++; 2858*0Sstevel@tonic-gate while (io_pplist != NULL) { 2859*0Sstevel@tonic-gate pp = io_pplist; 2860*0Sstevel@tonic-gate page_sub(&io_pplist, pp); 2861*0Sstevel@tonic-gate ASSERT(page_iolock_assert(pp)); 2862*0Sstevel@tonic-gate page_io_unlock(pp); 2863*0Sstevel@tonic-gate pgidx = (pp->p_offset - start_off) >> PAGESHIFT; 2864*0Sstevel@tonic-gate ASSERT(pgidx < pages); 2865*0Sstevel@tonic-gate ppa[pgidx] = pp; 2866*0Sstevel@tonic-gate } 2867*0Sstevel@tonic-gate } 2868*0Sstevel@tonic-gate /* 2869*0Sstevel@tonic-gate * we're now bound to succeed or panic. 2870*0Sstevel@tonic-gate * remove pages from done_pplist. it's not needed anymore. 2871*0Sstevel@tonic-gate */ 2872*0Sstevel@tonic-gate while (done_pplist != NULL) { 2873*0Sstevel@tonic-gate pp = done_pplist; 2874*0Sstevel@tonic-gate page_sub(&done_pplist, pp); 2875*0Sstevel@tonic-gate } 2876*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[16]); 2877*0Sstevel@tonic-gate ASSERT(pplist == NULL); 2878*0Sstevel@tonic-gate *ppplist = NULL; 2879*0Sstevel@tonic-gate while (targ_pplist != NULL) { 2880*0Sstevel@tonic-gate int ret; 2881*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[17]); 2882*0Sstevel@tonic-gate ASSERT(repl_pplist); 2883*0Sstevel@tonic-gate pp = targ_pplist; 2884*0Sstevel@tonic-gate page_sub(&targ_pplist, pp); 2885*0Sstevel@tonic-gate pgidx = (pp->p_offset - start_off) >> PAGESHIFT; 2886*0Sstevel@tonic-gate newpp = repl_pplist; 2887*0Sstevel@tonic-gate page_sub(&repl_pplist, newpp); 2888*0Sstevel@tonic-gate #ifdef DEBUG 2889*0Sstevel@tonic-gate pfn = page_pptonum(pp); 2890*0Sstevel@tonic-gate pszc = pp->p_szc; 2891*0Sstevel@tonic-gate ppages = page_get_pagecnt(pszc); 2892*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pfn, ppages)); 2893*0Sstevel@tonic-gate pfn = page_pptonum(newpp); 2894*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pfn, ppages)); 2895*0Sstevel@tonic-gate ASSERT(P2PHASE(pfn, pages) == pgidx); 2896*0Sstevel@tonic-gate #endif 2897*0Sstevel@tonic-gate nreloc = 0; 2898*0Sstevel@tonic-gate ret = page_relocate(&pp, &newpp, 0, 1, &nreloc, NULL); 2899*0Sstevel@tonic-gate if (ret != 0 || nreloc == 0) { 2900*0Sstevel@tonic-gate panic("segvn_fill_vp_pages: " 2901*0Sstevel@tonic-gate "page_relocate failed"); 2902*0Sstevel@tonic-gate } 2903*0Sstevel@tonic-gate pp = newpp; 2904*0Sstevel@tonic-gate while (nreloc-- != 0) { 2905*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2906*0Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 2907*0Sstevel@tonic-gate ASSERT(pgidx == 2908*0Sstevel@tonic-gate ((pp->p_offset - start_off) >> PAGESHIFT)); 2909*0Sstevel@tonic-gate ppa[pgidx++] = pp; 2910*0Sstevel@tonic-gate pp = page_next(pp); 2911*0Sstevel@tonic-gate } 2912*0Sstevel@tonic-gate } 2913*0Sstevel@tonic-gate 2914*0Sstevel@tonic-gate if (svd->type == MAP_PRIVATE) { 2915*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[18]); 2916*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 2917*0Sstevel@tonic-gate ASSERT(ppa[i] != NULL); 2918*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(ppa[i])); 2919*0Sstevel@tonic-gate ASSERT(ppa[i]->p_vnode == vp); 2920*0Sstevel@tonic-gate ASSERT(ppa[i]->p_offset == 2921*0Sstevel@tonic-gate start_off + (i << PAGESHIFT)); 2922*0Sstevel@tonic-gate page_downgrade(ppa[i]); 2923*0Sstevel@tonic-gate } 2924*0Sstevel@tonic-gate ppa[pages] = NULL; 2925*0Sstevel@tonic-gate } else { 2926*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[19]); 2927*0Sstevel@tonic-gate /* 2928*0Sstevel@tonic-gate * the caller will still call VOP_GETPAGE() for shared segments 2929*0Sstevel@tonic-gate * to check FS write permissions. For private segments we map 2930*0Sstevel@tonic-gate * file read only anyway. so no VOP_GETPAGE is needed. 2931*0Sstevel@tonic-gate */ 2932*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 2933*0Sstevel@tonic-gate ASSERT(ppa[i] != NULL); 2934*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(ppa[i])); 2935*0Sstevel@tonic-gate ASSERT(ppa[i]->p_vnode == vp); 2936*0Sstevel@tonic-gate ASSERT(ppa[i]->p_offset == 2937*0Sstevel@tonic-gate start_off + (i << PAGESHIFT)); 2938*0Sstevel@tonic-gate page_unlock(ppa[i]); 2939*0Sstevel@tonic-gate } 2940*0Sstevel@tonic-gate ppa[0] = NULL; 2941*0Sstevel@tonic-gate } 2942*0Sstevel@tonic-gate 2943*0Sstevel@tonic-gate return (1); 2944*0Sstevel@tonic-gate out: 2945*0Sstevel@tonic-gate /* 2946*0Sstevel@tonic-gate * Do the cleanup. Unlock target pages we didn't relocate. They are 2947*0Sstevel@tonic-gate * linked on targ_pplist by root pages. reassemble unused replacement 2948*0Sstevel@tonic-gate * and io pages back to pplist. 2949*0Sstevel@tonic-gate */ 2950*0Sstevel@tonic-gate if (io_pplist != NULL) { 2951*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[20]); 2952*0Sstevel@tonic-gate pp = io_pplist; 2953*0Sstevel@tonic-gate do { 2954*0Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 2955*0Sstevel@tonic-gate ASSERT(pp->p_offset == io_off); 2956*0Sstevel@tonic-gate ASSERT(page_iolock_assert(pp)); 2957*0Sstevel@tonic-gate page_io_unlock(pp); 2958*0Sstevel@tonic-gate page_hashout(pp, NULL); 2959*0Sstevel@tonic-gate io_off += PAGESIZE; 2960*0Sstevel@tonic-gate } while ((pp = pp->p_next) != io_pplist); 2961*0Sstevel@tonic-gate page_list_concat(&io_pplist, &pplist); 2962*0Sstevel@tonic-gate pplist = io_pplist; 2963*0Sstevel@tonic-gate } 2964*0Sstevel@tonic-gate tmp_pplist = NULL; 2965*0Sstevel@tonic-gate while (targ_pplist != NULL) { 2966*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[21]); 2967*0Sstevel@tonic-gate pp = targ_pplist; 2968*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2969*0Sstevel@tonic-gate page_sub(&targ_pplist, pp); 2970*0Sstevel@tonic-gate 2971*0Sstevel@tonic-gate pszc = pp->p_szc; 2972*0Sstevel@tonic-gate ppages = page_get_pagecnt(pszc); 2973*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(page_pptonum(pp), ppages)); 2974*0Sstevel@tonic-gate 2975*0Sstevel@tonic-gate if (pszc != 0) { 2976*0Sstevel@tonic-gate group_page_unlock(pp); 2977*0Sstevel@tonic-gate } 2978*0Sstevel@tonic-gate page_unlock(pp); 2979*0Sstevel@tonic-gate 2980*0Sstevel@tonic-gate pp = repl_pplist; 2981*0Sstevel@tonic-gate ASSERT(pp != NULL); 2982*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2983*0Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 2984*0Sstevel@tonic-gate page_sub(&repl_pplist, pp); 2985*0Sstevel@tonic-gate 2986*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(page_pptonum(pp), ppages)); 2987*0Sstevel@tonic-gate 2988*0Sstevel@tonic-gate /* relink replacement page */ 2989*0Sstevel@tonic-gate page_list_concat(&tmp_pplist, &pp); 2990*0Sstevel@tonic-gate while (--ppages != 0) { 2991*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[22]); 2992*0Sstevel@tonic-gate pp = page_next(pp); 2993*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2994*0Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 2995*0Sstevel@tonic-gate page_list_concat(&tmp_pplist, &pp); 2996*0Sstevel@tonic-gate } 2997*0Sstevel@tonic-gate } 2998*0Sstevel@tonic-gate if (tmp_pplist != NULL) { 2999*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[23]); 3000*0Sstevel@tonic-gate page_list_concat(&tmp_pplist, &pplist); 3001*0Sstevel@tonic-gate pplist = tmp_pplist; 3002*0Sstevel@tonic-gate } 3003*0Sstevel@tonic-gate /* 3004*0Sstevel@tonic-gate * at this point all pages are either on done_pplist or 3005*0Sstevel@tonic-gate * pplist. They can't be all on done_pplist otherwise 3006*0Sstevel@tonic-gate * we'd've been done. 3007*0Sstevel@tonic-gate */ 3008*0Sstevel@tonic-gate ASSERT(pplist != NULL); 3009*0Sstevel@tonic-gate if (nios != 0) { 3010*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[24]); 3011*0Sstevel@tonic-gate pp = pplist; 3012*0Sstevel@tonic-gate do { 3013*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[25]); 3014*0Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 3015*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 3016*0Sstevel@tonic-gate ASSERT(pp->p_vnode != vp); 3017*0Sstevel@tonic-gate pp->p_szc = 0; 3018*0Sstevel@tonic-gate } while ((pp = pp->p_next) != pplist); 3019*0Sstevel@tonic-gate 3020*0Sstevel@tonic-gate pp = done_pplist; 3021*0Sstevel@tonic-gate do { 3022*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[26]); 3023*0Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 3024*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 3025*0Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 3026*0Sstevel@tonic-gate pp->p_szc = 0; 3027*0Sstevel@tonic-gate } while ((pp = pp->p_next) != done_pplist); 3028*0Sstevel@tonic-gate 3029*0Sstevel@tonic-gate while (pplist != NULL) { 3030*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[27]); 3031*0Sstevel@tonic-gate pp = pplist; 3032*0Sstevel@tonic-gate page_sub(&pplist, pp); 3033*0Sstevel@tonic-gate page_free(pp, 0); 3034*0Sstevel@tonic-gate } 3035*0Sstevel@tonic-gate 3036*0Sstevel@tonic-gate while (done_pplist != NULL) { 3037*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[28]); 3038*0Sstevel@tonic-gate pp = done_pplist; 3039*0Sstevel@tonic-gate page_sub(&done_pplist, pp); 3040*0Sstevel@tonic-gate page_unlock(pp); 3041*0Sstevel@tonic-gate } 3042*0Sstevel@tonic-gate *ppplist = NULL; 3043*0Sstevel@tonic-gate return (0); 3044*0Sstevel@tonic-gate } 3045*0Sstevel@tonic-gate ASSERT(pplist == *ppplist); 3046*0Sstevel@tonic-gate if (io_err) { 3047*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[29]); 3048*0Sstevel@tonic-gate /* 3049*0Sstevel@tonic-gate * don't downsize on io error. 3050*0Sstevel@tonic-gate * see if vop_getpage succeeds. 3051*0Sstevel@tonic-gate * pplist may still be used in this case 3052*0Sstevel@tonic-gate * for relocations. 3053*0Sstevel@tonic-gate */ 3054*0Sstevel@tonic-gate return (0); 3055*0Sstevel@tonic-gate } 3056*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fill_vp_pages[30]); 3057*0Sstevel@tonic-gate page_free_replacement_page(pplist); 3058*0Sstevel@tonic-gate page_create_putback(pages); 3059*0Sstevel@tonic-gate *ppplist = NULL; 3060*0Sstevel@tonic-gate return (0); 3061*0Sstevel@tonic-gate } 3062*0Sstevel@tonic-gate 3063*0Sstevel@tonic-gate int segvn_anypgsz = 0; 3064*0Sstevel@tonic-gate 3065*0Sstevel@tonic-gate #define SEGVN_RESTORE_SOFTLOCK(type, pages) \ 3066*0Sstevel@tonic-gate if ((type) == F_SOFTLOCK) { \ 3067*0Sstevel@tonic-gate mutex_enter(&freemem_lock); \ 3068*0Sstevel@tonic-gate availrmem += (pages); \ 3069*0Sstevel@tonic-gate segvn_pages_locked -= (pages); \ 3070*0Sstevel@tonic-gate svd->softlockcnt -= (pages); \ 3071*0Sstevel@tonic-gate mutex_exit(&freemem_lock); \ 3072*0Sstevel@tonic-gate } 3073*0Sstevel@tonic-gate 3074*0Sstevel@tonic-gate #define SEGVN_UPDATE_MODBITS(ppa, pages, rw, prot, vpprot) \ 3075*0Sstevel@tonic-gate if (IS_VMODSORT((ppa)[0]->p_vnode)) { \ 3076*0Sstevel@tonic-gate if ((rw) == S_WRITE) { \ 3077*0Sstevel@tonic-gate for (i = 0; i < (pages); i++) { \ 3078*0Sstevel@tonic-gate ASSERT((ppa)[i]->p_vnode == \ 3079*0Sstevel@tonic-gate (ppa)[0]->p_vnode); \ 3080*0Sstevel@tonic-gate hat_setmod((ppa)[i]); \ 3081*0Sstevel@tonic-gate } \ 3082*0Sstevel@tonic-gate } else if ((rw) != S_OTHER && \ 3083*0Sstevel@tonic-gate ((prot) & (vpprot) & PROT_WRITE)) { \ 3084*0Sstevel@tonic-gate for (i = 0; i < (pages); i++) { \ 3085*0Sstevel@tonic-gate ASSERT((ppa)[i]->p_vnode == \ 3086*0Sstevel@tonic-gate (ppa)[0]->p_vnode); \ 3087*0Sstevel@tonic-gate if (!hat_ismod((ppa)[i])) { \ 3088*0Sstevel@tonic-gate prot &= ~PROT_WRITE; \ 3089*0Sstevel@tonic-gate break; \ 3090*0Sstevel@tonic-gate } \ 3091*0Sstevel@tonic-gate } \ 3092*0Sstevel@tonic-gate } \ 3093*0Sstevel@tonic-gate } 3094*0Sstevel@tonic-gate 3095*0Sstevel@tonic-gate #ifdef VM_STATS 3096*0Sstevel@tonic-gate 3097*0Sstevel@tonic-gate #define SEGVN_VMSTAT_FLTVNPAGES(idx) \ 3098*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltvnpages[(idx)]); 3099*0Sstevel@tonic-gate 3100*0Sstevel@tonic-gate #else /* VM_STATS */ 3101*0Sstevel@tonic-gate 3102*0Sstevel@tonic-gate #define SEGVN_VMSTAT_FLTVNPAGES(idx) 3103*0Sstevel@tonic-gate 3104*0Sstevel@tonic-gate #endif 3105*0Sstevel@tonic-gate 3106*0Sstevel@tonic-gate static faultcode_t 3107*0Sstevel@tonic-gate segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, 3108*0Sstevel@tonic-gate caddr_t lpgeaddr, enum fault_type type, enum seg_rw rw, caddr_t addr, 3109*0Sstevel@tonic-gate caddr_t eaddr, int brkcow) 3110*0Sstevel@tonic-gate { 3111*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 3112*0Sstevel@tonic-gate struct anon_map *amp = svd->amp; 3113*0Sstevel@tonic-gate uchar_t segtype = svd->type; 3114*0Sstevel@tonic-gate uint_t szc = seg->s_szc; 3115*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(szc); 3116*0Sstevel@tonic-gate size_t maxpgsz = pgsz; 3117*0Sstevel@tonic-gate pgcnt_t pages = btop(pgsz); 3118*0Sstevel@tonic-gate pgcnt_t maxpages = pages; 3119*0Sstevel@tonic-gate size_t ppasize = (pages + 1) * sizeof (page_t *); 3120*0Sstevel@tonic-gate caddr_t a = lpgaddr; 3121*0Sstevel@tonic-gate caddr_t maxlpgeaddr = lpgeaddr; 3122*0Sstevel@tonic-gate u_offset_t off = svd->offset + (uintptr_t)(a - seg->s_base); 3123*0Sstevel@tonic-gate ulong_t aindx = svd->anon_index + seg_page(seg, a); 3124*0Sstevel@tonic-gate struct vpage *vpage = (svd->vpage != NULL) ? 3125*0Sstevel@tonic-gate &svd->vpage[seg_page(seg, a)] : NULL; 3126*0Sstevel@tonic-gate vnode_t *vp = svd->vp; 3127*0Sstevel@tonic-gate page_t **ppa; 3128*0Sstevel@tonic-gate uint_t pszc; 3129*0Sstevel@tonic-gate size_t ppgsz; 3130*0Sstevel@tonic-gate pgcnt_t ppages; 3131*0Sstevel@tonic-gate faultcode_t err = 0; 3132*0Sstevel@tonic-gate int ierr; 3133*0Sstevel@tonic-gate int vop_size_err = 0; 3134*0Sstevel@tonic-gate uint_t protchk, prot, vpprot; 3135*0Sstevel@tonic-gate ulong_t i; 3136*0Sstevel@tonic-gate int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD; 3137*0Sstevel@tonic-gate anon_sync_obj_t an_cookie; 3138*0Sstevel@tonic-gate enum seg_rw arw; 3139*0Sstevel@tonic-gate int alloc_failed = 0; 3140*0Sstevel@tonic-gate int adjszc_chk; 3141*0Sstevel@tonic-gate struct vattr va; 3142*0Sstevel@tonic-gate int xhat = 0; 3143*0Sstevel@tonic-gate page_t *pplist; 3144*0Sstevel@tonic-gate pfn_t pfn; 3145*0Sstevel@tonic-gate int physcontig; 3146*0Sstevel@tonic-gate int upgrdfail; 3147*0Sstevel@tonic-gate int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */ 3148*0Sstevel@tonic-gate 3149*0Sstevel@tonic-gate ASSERT(szc != 0); 3150*0Sstevel@tonic-gate ASSERT(vp != NULL); 3151*0Sstevel@tonic-gate ASSERT(brkcow == 0 || amp != NULL); 3152*0Sstevel@tonic-gate ASSERT(enable_mbit_wa == 0); /* no mbit simulations with large pages */ 3153*0Sstevel@tonic-gate ASSERT(!(svd->flags & MAP_NORESERVE)); 3154*0Sstevel@tonic-gate ASSERT(type != F_SOFTUNLOCK); 3155*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(a, maxpgsz)); 3156*0Sstevel@tonic-gate ASSERT(amp == NULL || IS_P2ALIGNED(aindx, maxpages)); 3157*0Sstevel@tonic-gate ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); 3158*0Sstevel@tonic-gate ASSERT(seg->s_szc < NBBY * sizeof (int)); 3159*0Sstevel@tonic-gate 3160*0Sstevel@tonic-gate VM_STAT_COND_ADD(type == F_SOFTLOCK, segvnvmstats.fltvnpages[0]); 3161*0Sstevel@tonic-gate VM_STAT_COND_ADD(type != F_SOFTLOCK, segvnvmstats.fltvnpages[1]); 3162*0Sstevel@tonic-gate 3163*0Sstevel@tonic-gate if (svd->flags & MAP_TEXT) { 3164*0Sstevel@tonic-gate hat_flag |= HAT_LOAD_TEXT; 3165*0Sstevel@tonic-gate } 3166*0Sstevel@tonic-gate 3167*0Sstevel@tonic-gate if (svd->pageprot) { 3168*0Sstevel@tonic-gate switch (rw) { 3169*0Sstevel@tonic-gate case S_READ: 3170*0Sstevel@tonic-gate protchk = PROT_READ; 3171*0Sstevel@tonic-gate break; 3172*0Sstevel@tonic-gate case S_WRITE: 3173*0Sstevel@tonic-gate protchk = PROT_WRITE; 3174*0Sstevel@tonic-gate break; 3175*0Sstevel@tonic-gate case S_EXEC: 3176*0Sstevel@tonic-gate protchk = PROT_EXEC; 3177*0Sstevel@tonic-gate break; 3178*0Sstevel@tonic-gate case S_OTHER: 3179*0Sstevel@tonic-gate default: 3180*0Sstevel@tonic-gate protchk = PROT_READ | PROT_WRITE | PROT_EXEC; 3181*0Sstevel@tonic-gate break; 3182*0Sstevel@tonic-gate } 3183*0Sstevel@tonic-gate } else { 3184*0Sstevel@tonic-gate prot = svd->prot; 3185*0Sstevel@tonic-gate /* caller has already done segment level protection check. */ 3186*0Sstevel@tonic-gate } 3187*0Sstevel@tonic-gate 3188*0Sstevel@tonic-gate if (seg->s_as->a_hat != hat) { 3189*0Sstevel@tonic-gate xhat = 1; 3190*0Sstevel@tonic-gate } 3191*0Sstevel@tonic-gate 3192*0Sstevel@tonic-gate if (rw == S_WRITE && segtype == MAP_PRIVATE) { 3193*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(2); 3194*0Sstevel@tonic-gate arw = S_READ; 3195*0Sstevel@tonic-gate } else { 3196*0Sstevel@tonic-gate arw = rw; 3197*0Sstevel@tonic-gate } 3198*0Sstevel@tonic-gate 3199*0Sstevel@tonic-gate ppa = kmem_alloc(ppasize, KM_SLEEP); 3200*0Sstevel@tonic-gate 3201*0Sstevel@tonic-gate VM_STAT_COND_ADD(amp != NULL, segvnvmstats.fltvnpages[3]); 3202*0Sstevel@tonic-gate 3203*0Sstevel@tonic-gate for (;;) { 3204*0Sstevel@tonic-gate adjszc_chk = 0; 3205*0Sstevel@tonic-gate for (; a < lpgeaddr; a += pgsz, off += pgsz, aindx += pages) { 3206*0Sstevel@tonic-gate if (adjszc_chk) { 3207*0Sstevel@tonic-gate while (szc < seg->s_szc) { 3208*0Sstevel@tonic-gate uintptr_t e; 3209*0Sstevel@tonic-gate uint_t tszc; 3210*0Sstevel@tonic-gate tszc = segvn_anypgsz_vnode ? szc + 1 : 3211*0Sstevel@tonic-gate seg->s_szc; 3212*0Sstevel@tonic-gate ppgsz = page_get_pagesize(tszc); 3213*0Sstevel@tonic-gate if (!IS_P2ALIGNED(a, ppgsz) || 3214*0Sstevel@tonic-gate ((alloc_failed >> tszc) & 3215*0Sstevel@tonic-gate 0x1)) { 3216*0Sstevel@tonic-gate break; 3217*0Sstevel@tonic-gate } 3218*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(4); 3219*0Sstevel@tonic-gate szc = tszc; 3220*0Sstevel@tonic-gate pgsz = ppgsz; 3221*0Sstevel@tonic-gate pages = btop(pgsz); 3222*0Sstevel@tonic-gate e = P2ROUNDUP((uintptr_t)eaddr, pgsz); 3223*0Sstevel@tonic-gate lpgeaddr = (caddr_t)e; 3224*0Sstevel@tonic-gate } 3225*0Sstevel@tonic-gate } 3226*0Sstevel@tonic-gate 3227*0Sstevel@tonic-gate again: 3228*0Sstevel@tonic-gate if (IS_P2ALIGNED(a, maxpgsz) && amp != NULL) { 3229*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(aindx, maxpages)); 3230*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3231*0Sstevel@tonic-gate anon_array_enter(amp, aindx, &an_cookie); 3232*0Sstevel@tonic-gate if (anon_get_ptr(amp->ahp, aindx) != NULL) { 3233*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(5); 3234*0Sstevel@tonic-gate if (anon_pages(amp->ahp, aindx, 3235*0Sstevel@tonic-gate maxpages) != maxpages) { 3236*0Sstevel@tonic-gate panic("segvn_fault_vnodepages:" 3237*0Sstevel@tonic-gate " empty anon slots\n"); 3238*0Sstevel@tonic-gate } 3239*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3240*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3241*0Sstevel@tonic-gate err = segvn_fault_anonpages(hat, seg, 3242*0Sstevel@tonic-gate a, a + maxpgsz, type, rw, 3243*0Sstevel@tonic-gate MAX(a, addr), 3244*0Sstevel@tonic-gate MIN(a + maxpgsz, eaddr), brkcow); 3245*0Sstevel@tonic-gate if (err != 0) { 3246*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(6); 3247*0Sstevel@tonic-gate goto out; 3248*0Sstevel@tonic-gate } 3249*0Sstevel@tonic-gate if (szc < seg->s_szc) { 3250*0Sstevel@tonic-gate szc = seg->s_szc; 3251*0Sstevel@tonic-gate pgsz = maxpgsz; 3252*0Sstevel@tonic-gate pages = maxpages; 3253*0Sstevel@tonic-gate lpgeaddr = maxlpgeaddr; 3254*0Sstevel@tonic-gate } 3255*0Sstevel@tonic-gate goto next; 3256*0Sstevel@tonic-gate } else if (anon_pages(amp->ahp, aindx, 3257*0Sstevel@tonic-gate maxpages)) { 3258*0Sstevel@tonic-gate panic("segvn_fault_vnodepages:" 3259*0Sstevel@tonic-gate " non empty anon slots\n"); 3260*0Sstevel@tonic-gate } else { 3261*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(7); 3262*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3263*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3264*0Sstevel@tonic-gate } 3265*0Sstevel@tonic-gate } 3266*0Sstevel@tonic-gate ASSERT(!brkcow || IS_P2ALIGNED(a, maxpgsz)); 3267*0Sstevel@tonic-gate 3268*0Sstevel@tonic-gate if (svd->pageprot != 0 && IS_P2ALIGNED(a, maxpgsz)) { 3269*0Sstevel@tonic-gate ASSERT(vpage != NULL); 3270*0Sstevel@tonic-gate prot = VPP_PROT(vpage); 3271*0Sstevel@tonic-gate ASSERT(sameprot(seg, a, maxpgsz)); 3272*0Sstevel@tonic-gate if ((prot & protchk) == 0) { 3273*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(8); 3274*0Sstevel@tonic-gate err = FC_PROT; 3275*0Sstevel@tonic-gate goto out; 3276*0Sstevel@tonic-gate } 3277*0Sstevel@tonic-gate } 3278*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 3279*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 3280*0Sstevel@tonic-gate if (availrmem < tune.t_minarmem + pages) { 3281*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 3282*0Sstevel@tonic-gate err = FC_MAKE_ERR(ENOMEM); 3283*0Sstevel@tonic-gate goto out; 3284*0Sstevel@tonic-gate } else { 3285*0Sstevel@tonic-gate availrmem -= pages; 3286*0Sstevel@tonic-gate segvn_pages_locked += pages; 3287*0Sstevel@tonic-gate svd->softlockcnt += pages; 3288*0Sstevel@tonic-gate } 3289*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 3290*0Sstevel@tonic-gate } 3291*0Sstevel@tonic-gate 3292*0Sstevel@tonic-gate pplist = NULL; 3293*0Sstevel@tonic-gate physcontig = 0; 3294*0Sstevel@tonic-gate ppa[0] = NULL; 3295*0Sstevel@tonic-gate if (!brkcow && szc && 3296*0Sstevel@tonic-gate !page_exists_physcontig(vp, off, szc, 3297*0Sstevel@tonic-gate segtype == MAP_PRIVATE ? ppa : NULL)) { 3298*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(9); 3299*0Sstevel@tonic-gate if (page_alloc_pages(seg, a, &pplist, NULL, 3300*0Sstevel@tonic-gate szc, 0)) { 3301*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3302*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(10); 3303*0Sstevel@tonic-gate pszc = 0; 3304*0Sstevel@tonic-gate ierr = -1; 3305*0Sstevel@tonic-gate alloc_failed |= (1 << szc); 3306*0Sstevel@tonic-gate break; 3307*0Sstevel@tonic-gate } 3308*0Sstevel@tonic-gate if (vp->v_mpssdata == SEGVN_PAGEIO) { 3309*0Sstevel@tonic-gate int downsize; 3310*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(11); 3311*0Sstevel@tonic-gate physcontig = segvn_fill_vp_pages(svd, 3312*0Sstevel@tonic-gate vp, off, szc, ppa, &pplist, 3313*0Sstevel@tonic-gate &pszc, &downsize); 3314*0Sstevel@tonic-gate ASSERT(!physcontig || pplist == NULL); 3315*0Sstevel@tonic-gate if (!physcontig && downsize) { 3316*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, 3317*0Sstevel@tonic-gate pages); 3318*0Sstevel@tonic-gate ASSERT(pplist == NULL); 3319*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(12); 3320*0Sstevel@tonic-gate ierr = -1; 3321*0Sstevel@tonic-gate break; 3322*0Sstevel@tonic-gate } 3323*0Sstevel@tonic-gate ASSERT(!physcontig || 3324*0Sstevel@tonic-gate segtype == MAP_PRIVATE || 3325*0Sstevel@tonic-gate ppa[0] == NULL); 3326*0Sstevel@tonic-gate if (physcontig && ppa[0] == NULL) { 3327*0Sstevel@tonic-gate physcontig = 0; 3328*0Sstevel@tonic-gate } 3329*0Sstevel@tonic-gate } 3330*0Sstevel@tonic-gate } else if (!brkcow && szc && ppa[0] != NULL) { 3331*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(13); 3332*0Sstevel@tonic-gate ASSERT(segtype == MAP_PRIVATE); 3333*0Sstevel@tonic-gate physcontig = 1; 3334*0Sstevel@tonic-gate } 3335*0Sstevel@tonic-gate 3336*0Sstevel@tonic-gate if (!physcontig) { 3337*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(14); 3338*0Sstevel@tonic-gate ppa[0] = NULL; 3339*0Sstevel@tonic-gate ierr = VOP_GETPAGE(vp, (offset_t)off, pgsz, 3340*0Sstevel@tonic-gate &vpprot, ppa, pgsz, seg, a, arw, 3341*0Sstevel@tonic-gate svd->cred); 3342*0Sstevel@tonic-gate if (segtype == MAP_PRIVATE) { 3343*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(15); 3344*0Sstevel@tonic-gate vpprot &= ~PROT_WRITE; 3345*0Sstevel@tonic-gate } 3346*0Sstevel@tonic-gate } else { 3347*0Sstevel@tonic-gate ASSERT(segtype == MAP_PRIVATE); 3348*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(16); 3349*0Sstevel@tonic-gate vpprot = PROT_ALL & ~PROT_WRITE; 3350*0Sstevel@tonic-gate ierr = 0; 3351*0Sstevel@tonic-gate } 3352*0Sstevel@tonic-gate 3353*0Sstevel@tonic-gate if (ierr != 0) { 3354*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(17); 3355*0Sstevel@tonic-gate if (pplist != NULL) { 3356*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(18); 3357*0Sstevel@tonic-gate page_free_replacement_page(pplist); 3358*0Sstevel@tonic-gate page_create_putback(pages); 3359*0Sstevel@tonic-gate } 3360*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3361*0Sstevel@tonic-gate if (a + pgsz <= eaddr) { 3362*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(19); 3363*0Sstevel@tonic-gate err = FC_MAKE_ERR(ierr); 3364*0Sstevel@tonic-gate goto out; 3365*0Sstevel@tonic-gate } 3366*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 3367*0Sstevel@tonic-gate if (VOP_GETATTR(vp, &va, 0, svd->cred) != 0) { 3368*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(20); 3369*0Sstevel@tonic-gate err = FC_MAKE_ERR(EIO); 3370*0Sstevel@tonic-gate goto out; 3371*0Sstevel@tonic-gate } 3372*0Sstevel@tonic-gate if (btopr(va.va_size) >= btopr(off + pgsz)) { 3373*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(21); 3374*0Sstevel@tonic-gate err = FC_MAKE_ERR(EIO); 3375*0Sstevel@tonic-gate goto out; 3376*0Sstevel@tonic-gate } 3377*0Sstevel@tonic-gate if (btopr(va.va_size) < 3378*0Sstevel@tonic-gate btopr(off + (eaddr - a))) { 3379*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(22); 3380*0Sstevel@tonic-gate err = FC_MAKE_ERR(EIO); 3381*0Sstevel@tonic-gate goto out; 3382*0Sstevel@tonic-gate } 3383*0Sstevel@tonic-gate if (brkcow || type == F_SOFTLOCK) { 3384*0Sstevel@tonic-gate /* can't reduce map area */ 3385*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(23); 3386*0Sstevel@tonic-gate vop_size_err = 1; 3387*0Sstevel@tonic-gate goto out; 3388*0Sstevel@tonic-gate } 3389*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(24); 3390*0Sstevel@tonic-gate ASSERT(szc != 0); 3391*0Sstevel@tonic-gate pszc = 0; 3392*0Sstevel@tonic-gate ierr = -1; 3393*0Sstevel@tonic-gate break; 3394*0Sstevel@tonic-gate } 3395*0Sstevel@tonic-gate 3396*0Sstevel@tonic-gate if (amp != NULL) { 3397*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3398*0Sstevel@tonic-gate anon_array_enter(amp, aindx, &an_cookie); 3399*0Sstevel@tonic-gate } 3400*0Sstevel@tonic-gate if (amp != NULL && 3401*0Sstevel@tonic-gate anon_get_ptr(amp->ahp, aindx) != NULL) { 3402*0Sstevel@tonic-gate ulong_t taindx = P2ALIGN(aindx, maxpages); 3403*0Sstevel@tonic-gate 3404*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(25); 3405*0Sstevel@tonic-gate if (anon_pages(amp->ahp, taindx, maxpages) != 3406*0Sstevel@tonic-gate maxpages) { 3407*0Sstevel@tonic-gate panic("segvn_fault_vnodepages:" 3408*0Sstevel@tonic-gate " empty anon slots\n"); 3409*0Sstevel@tonic-gate } 3410*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3411*0Sstevel@tonic-gate page_unlock(ppa[i]); 3412*0Sstevel@tonic-gate } 3413*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3414*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3415*0Sstevel@tonic-gate if (pplist != NULL) { 3416*0Sstevel@tonic-gate page_free_replacement_page(pplist); 3417*0Sstevel@tonic-gate page_create_putback(pages); 3418*0Sstevel@tonic-gate } 3419*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3420*0Sstevel@tonic-gate if (szc < seg->s_szc) { 3421*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(26); 3422*0Sstevel@tonic-gate /* 3423*0Sstevel@tonic-gate * For private segments SOFTLOCK 3424*0Sstevel@tonic-gate * either always breaks cow (any rw 3425*0Sstevel@tonic-gate * type except S_READ_NOCOW) or 3426*0Sstevel@tonic-gate * address space is locked as writer 3427*0Sstevel@tonic-gate * (S_READ_NOCOW case) and anon slots 3428*0Sstevel@tonic-gate * can't show up on second check. 3429*0Sstevel@tonic-gate * Therefore if we are here for 3430*0Sstevel@tonic-gate * SOFTLOCK case it must be a cow 3431*0Sstevel@tonic-gate * break but cow break never reduces 3432*0Sstevel@tonic-gate * szc. Thus the assert below. 3433*0Sstevel@tonic-gate */ 3434*0Sstevel@tonic-gate ASSERT(!brkcow && type != F_SOFTLOCK); 3435*0Sstevel@tonic-gate pszc = seg->s_szc; 3436*0Sstevel@tonic-gate ierr = -2; 3437*0Sstevel@tonic-gate break; 3438*0Sstevel@tonic-gate } 3439*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(a, maxpgsz)); 3440*0Sstevel@tonic-gate goto again; 3441*0Sstevel@tonic-gate } 3442*0Sstevel@tonic-gate #ifdef DEBUG 3443*0Sstevel@tonic-gate if (amp != NULL) { 3444*0Sstevel@tonic-gate ulong_t taindx = P2ALIGN(aindx, maxpages); 3445*0Sstevel@tonic-gate ASSERT(!anon_pages(amp->ahp, taindx, maxpages)); 3446*0Sstevel@tonic-gate } 3447*0Sstevel@tonic-gate #endif /* DEBUG */ 3448*0Sstevel@tonic-gate 3449*0Sstevel@tonic-gate if (brkcow) { 3450*0Sstevel@tonic-gate ASSERT(amp != NULL); 3451*0Sstevel@tonic-gate ASSERT(pplist == NULL); 3452*0Sstevel@tonic-gate ASSERT(szc == seg->s_szc); 3453*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(a, maxpgsz)); 3454*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(aindx, maxpages)); 3455*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(27); 3456*0Sstevel@tonic-gate ierr = anon_map_privatepages(amp, aindx, szc, 3457*0Sstevel@tonic-gate seg, a, prot, ppa, vpage, segvn_anypgsz, 3458*0Sstevel@tonic-gate svd->cred); 3459*0Sstevel@tonic-gate if (ierr != 0) { 3460*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(28); 3461*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3462*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3463*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3464*0Sstevel@tonic-gate err = FC_MAKE_ERR(ierr); 3465*0Sstevel@tonic-gate goto out; 3466*0Sstevel@tonic-gate } 3467*0Sstevel@tonic-gate 3468*0Sstevel@tonic-gate ASSERT(!IS_VMODSORT(ppa[0]->p_vnode)); 3469*0Sstevel@tonic-gate /* 3470*0Sstevel@tonic-gate * p_szc can't be changed for locked 3471*0Sstevel@tonic-gate * swapfs pages. 3472*0Sstevel@tonic-gate */ 3473*0Sstevel@tonic-gate hat_memload_array(hat, a, pgsz, ppa, prot, 3474*0Sstevel@tonic-gate hat_flag); 3475*0Sstevel@tonic-gate 3476*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) { 3477*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(29); 3478*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3479*0Sstevel@tonic-gate page_unlock(ppa[i]); 3480*0Sstevel@tonic-gate } 3481*0Sstevel@tonic-gate } 3482*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3483*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3484*0Sstevel@tonic-gate goto next; 3485*0Sstevel@tonic-gate } 3486*0Sstevel@tonic-gate 3487*0Sstevel@tonic-gate pfn = page_pptonum(ppa[0]); 3488*0Sstevel@tonic-gate /* 3489*0Sstevel@tonic-gate * hat_page_demote() needs an EXCl lock on one of 3490*0Sstevel@tonic-gate * constituent page_t's and it decreases root's p_szc 3491*0Sstevel@tonic-gate * last. This means if root's p_szc is equal szc and 3492*0Sstevel@tonic-gate * all its constituent pages are locked 3493*0Sstevel@tonic-gate * hat_page_demote() that could have changed p_szc to 3494*0Sstevel@tonic-gate * szc is already done and no new have page_demote() 3495*0Sstevel@tonic-gate * can start for this large page. 3496*0Sstevel@tonic-gate */ 3497*0Sstevel@tonic-gate 3498*0Sstevel@tonic-gate /* 3499*0Sstevel@tonic-gate * we need to make sure same mapping size is used for 3500*0Sstevel@tonic-gate * the same address range if there's a possibility the 3501*0Sstevel@tonic-gate * adddress is already mapped because hat layer panics 3502*0Sstevel@tonic-gate * when translation is loaded for the range already 3503*0Sstevel@tonic-gate * mapped with a different page size. We achieve it 3504*0Sstevel@tonic-gate * by always using largest page size possible subject 3505*0Sstevel@tonic-gate * to the constraints of page size, segment page size 3506*0Sstevel@tonic-gate * and page alignment. Since mappings are invalidated 3507*0Sstevel@tonic-gate * when those constraints change and make it 3508*0Sstevel@tonic-gate * impossible to use previously used mapping size no 3509*0Sstevel@tonic-gate * mapping size conflicts should happen. 3510*0Sstevel@tonic-gate */ 3511*0Sstevel@tonic-gate 3512*0Sstevel@tonic-gate chkszc: 3513*0Sstevel@tonic-gate if ((pszc = ppa[0]->p_szc) == szc && 3514*0Sstevel@tonic-gate IS_P2ALIGNED(pfn, pages)) { 3515*0Sstevel@tonic-gate 3516*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(30); 3517*0Sstevel@tonic-gate #ifdef DEBUG 3518*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3519*0Sstevel@tonic-gate ASSERT(PAGE_LOCKED(ppa[i])); 3520*0Sstevel@tonic-gate ASSERT(!PP_ISFREE(ppa[i])); 3521*0Sstevel@tonic-gate ASSERT(page_pptonum(ppa[i]) == 3522*0Sstevel@tonic-gate pfn + i); 3523*0Sstevel@tonic-gate ASSERT(ppa[i]->p_szc == szc); 3524*0Sstevel@tonic-gate ASSERT(ppa[i]->p_vnode == vp); 3525*0Sstevel@tonic-gate ASSERT(ppa[i]->p_offset == 3526*0Sstevel@tonic-gate off + (i << PAGESHIFT)); 3527*0Sstevel@tonic-gate } 3528*0Sstevel@tonic-gate #endif 3529*0Sstevel@tonic-gate /* 3530*0Sstevel@tonic-gate * All pages are of szc we need and they are 3531*0Sstevel@tonic-gate * all locked so they can't change szc. load 3532*0Sstevel@tonic-gate * translations. 3533*0Sstevel@tonic-gate * 3534*0Sstevel@tonic-gate * if page got promoted since last check 3535*0Sstevel@tonic-gate * we don't need pplist. 3536*0Sstevel@tonic-gate */ 3537*0Sstevel@tonic-gate if (pplist != NULL) { 3538*0Sstevel@tonic-gate page_free_replacement_page(pplist); 3539*0Sstevel@tonic-gate page_create_putback(pages); 3540*0Sstevel@tonic-gate } 3541*0Sstevel@tonic-gate if (PP_ISMIGRATE(ppa[0])) { 3542*0Sstevel@tonic-gate page_migrate(seg, a, ppa, pages); 3543*0Sstevel@tonic-gate } 3544*0Sstevel@tonic-gate SEGVN_UPDATE_MODBITS(ppa, pages, rw, 3545*0Sstevel@tonic-gate prot, vpprot); 3546*0Sstevel@tonic-gate if (!xhat) { 3547*0Sstevel@tonic-gate hat_memload_array(hat, a, pgsz, ppa, 3548*0Sstevel@tonic-gate prot & vpprot, hat_flag); 3549*0Sstevel@tonic-gate } else { 3550*0Sstevel@tonic-gate /* 3551*0Sstevel@tonic-gate * avoid large xhat mappings to FS 3552*0Sstevel@tonic-gate * pages so that hat_page_demote() 3553*0Sstevel@tonic-gate * doesn't need to check for xhat 3554*0Sstevel@tonic-gate * large mappings. 3555*0Sstevel@tonic-gate */ 3556*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3557*0Sstevel@tonic-gate hat_memload(hat, 3558*0Sstevel@tonic-gate a + (i << PAGESHIFT), 3559*0Sstevel@tonic-gate ppa[i], prot & vpprot, 3560*0Sstevel@tonic-gate hat_flag); 3561*0Sstevel@tonic-gate } 3562*0Sstevel@tonic-gate } 3563*0Sstevel@tonic-gate 3564*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) { 3565*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3566*0Sstevel@tonic-gate page_unlock(ppa[i]); 3567*0Sstevel@tonic-gate } 3568*0Sstevel@tonic-gate } 3569*0Sstevel@tonic-gate if (amp != NULL) { 3570*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3571*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3572*0Sstevel@tonic-gate } 3573*0Sstevel@tonic-gate goto next; 3574*0Sstevel@tonic-gate } 3575*0Sstevel@tonic-gate 3576*0Sstevel@tonic-gate /* 3577*0Sstevel@tonic-gate * See if upsize is possible. 3578*0Sstevel@tonic-gate */ 3579*0Sstevel@tonic-gate if (pszc > szc && szc < seg->s_szc && 3580*0Sstevel@tonic-gate (segvn_anypgsz_vnode || pszc >= seg->s_szc)) { 3581*0Sstevel@tonic-gate pgcnt_t aphase; 3582*0Sstevel@tonic-gate uint_t pszc1 = MIN(pszc, seg->s_szc); 3583*0Sstevel@tonic-gate ppgsz = page_get_pagesize(pszc1); 3584*0Sstevel@tonic-gate ppages = btop(ppgsz); 3585*0Sstevel@tonic-gate aphase = btop(P2PHASE((uintptr_t)a, ppgsz)); 3586*0Sstevel@tonic-gate 3587*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(31); 3588*0Sstevel@tonic-gate if (aphase != P2PHASE(pfn, ppages)) { 3589*0Sstevel@tonic-gate segvn_faultvnmpss_align_err4++; 3590*0Sstevel@tonic-gate } else if (type == F_SOFTLOCK && 3591*0Sstevel@tonic-gate a != lpgaddr && 3592*0Sstevel@tonic-gate !IS_P2ALIGNED(pfn, 3593*0Sstevel@tonic-gate page_get_pagecnt(ppa[0]->p_szc))) { 3594*0Sstevel@tonic-gate /* 3595*0Sstevel@tonic-gate * if we locked previous offsets for 3596*0Sstevel@tonic-gate * smaller szc page larger page can't 3597*0Sstevel@tonic-gate * be here since one needs excl locks 3598*0Sstevel@tonic-gate * to promote page size. 3599*0Sstevel@tonic-gate */ 3600*0Sstevel@tonic-gate panic("segvn_fault_vnodepages: " 3601*0Sstevel@tonic-gate "unexpected larger than szc page" 3602*0Sstevel@tonic-gate " found after SOFTLOCK"); 3603*0Sstevel@tonic-gate } else { 3604*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(32); 3605*0Sstevel@tonic-gate if (pplist != NULL) { 3606*0Sstevel@tonic-gate page_t *pl = pplist; 3607*0Sstevel@tonic-gate page_free_replacement_page(pl); 3608*0Sstevel@tonic-gate page_create_putback(pages); 3609*0Sstevel@tonic-gate } 3610*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3611*0Sstevel@tonic-gate page_unlock(ppa[i]); 3612*0Sstevel@tonic-gate } 3613*0Sstevel@tonic-gate if (amp != NULL) { 3614*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3615*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3616*0Sstevel@tonic-gate } 3617*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3618*0Sstevel@tonic-gate pszc = pszc1; 3619*0Sstevel@tonic-gate ierr = -2; 3620*0Sstevel@tonic-gate break; 3621*0Sstevel@tonic-gate } 3622*0Sstevel@tonic-gate } 3623*0Sstevel@tonic-gate 3624*0Sstevel@tonic-gate /* 3625*0Sstevel@tonic-gate * check if we should use smallest mapping size. 3626*0Sstevel@tonic-gate */ 3627*0Sstevel@tonic-gate upgrdfail = 0; 3628*0Sstevel@tonic-gate if (szc == 0 || xhat || 3629*0Sstevel@tonic-gate (pszc >= szc && 3630*0Sstevel@tonic-gate !IS_P2ALIGNED(pfn, pages)) || 3631*0Sstevel@tonic-gate (pszc < szc && 3632*0Sstevel@tonic-gate !segvn_full_szcpages(ppa, szc, &upgrdfail, 3633*0Sstevel@tonic-gate &pszc))) { 3634*0Sstevel@tonic-gate 3635*0Sstevel@tonic-gate if (upgrdfail) { 3636*0Sstevel@tonic-gate /* 3637*0Sstevel@tonic-gate * segvn_full_szcpages failed to lock 3638*0Sstevel@tonic-gate * all pages EXCL. Size down. 3639*0Sstevel@tonic-gate */ 3640*0Sstevel@tonic-gate ASSERT(pszc < szc); 3641*0Sstevel@tonic-gate 3642*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(33); 3643*0Sstevel@tonic-gate 3644*0Sstevel@tonic-gate if (pplist != NULL) { 3645*0Sstevel@tonic-gate page_t *pl = pplist; 3646*0Sstevel@tonic-gate page_free_replacement_page(pl); 3647*0Sstevel@tonic-gate page_create_putback(pages); 3648*0Sstevel@tonic-gate } 3649*0Sstevel@tonic-gate 3650*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3651*0Sstevel@tonic-gate page_unlock(ppa[i]); 3652*0Sstevel@tonic-gate } 3653*0Sstevel@tonic-gate if (amp != NULL) { 3654*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3655*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3656*0Sstevel@tonic-gate } 3657*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3658*0Sstevel@tonic-gate ierr = -1; 3659*0Sstevel@tonic-gate break; 3660*0Sstevel@tonic-gate } 3661*0Sstevel@tonic-gate if (szc != 0 && !xhat) { 3662*0Sstevel@tonic-gate segvn_faultvnmpss_align_err5++; 3663*0Sstevel@tonic-gate } 3664*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(34); 3665*0Sstevel@tonic-gate if (pplist != NULL) { 3666*0Sstevel@tonic-gate page_free_replacement_page(pplist); 3667*0Sstevel@tonic-gate page_create_putback(pages); 3668*0Sstevel@tonic-gate } 3669*0Sstevel@tonic-gate SEGVN_UPDATE_MODBITS(ppa, pages, rw, 3670*0Sstevel@tonic-gate prot, vpprot); 3671*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3672*0Sstevel@tonic-gate hat_memload(hat, a + (i << PAGESHIFT), 3673*0Sstevel@tonic-gate ppa[i], prot & vpprot, hat_flag); 3674*0Sstevel@tonic-gate } 3675*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) { 3676*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3677*0Sstevel@tonic-gate page_unlock(ppa[i]); 3678*0Sstevel@tonic-gate } 3679*0Sstevel@tonic-gate } 3680*0Sstevel@tonic-gate if (amp != NULL) { 3681*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3682*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3683*0Sstevel@tonic-gate } 3684*0Sstevel@tonic-gate goto next; 3685*0Sstevel@tonic-gate } 3686*0Sstevel@tonic-gate 3687*0Sstevel@tonic-gate if (pszc == szc) { 3688*0Sstevel@tonic-gate /* 3689*0Sstevel@tonic-gate * segvn_full_szcpages() upgraded pages szc. 3690*0Sstevel@tonic-gate */ 3691*0Sstevel@tonic-gate ASSERT(pszc == ppa[0]->p_szc); 3692*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pfn, pages)); 3693*0Sstevel@tonic-gate goto chkszc; 3694*0Sstevel@tonic-gate } 3695*0Sstevel@tonic-gate 3696*0Sstevel@tonic-gate if (pszc > szc) { 3697*0Sstevel@tonic-gate kmutex_t *szcmtx; 3698*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(35); 3699*0Sstevel@tonic-gate /* 3700*0Sstevel@tonic-gate * p_szc of ppa[0] can change since we haven't 3701*0Sstevel@tonic-gate * locked all constituent pages. Call 3702*0Sstevel@tonic-gate * page_lock_szc() to prevent szc changes. 3703*0Sstevel@tonic-gate * This should be a rare case that happens when 3704*0Sstevel@tonic-gate * multiple segments use a different page size 3705*0Sstevel@tonic-gate * to map the same file offsets. 3706*0Sstevel@tonic-gate */ 3707*0Sstevel@tonic-gate szcmtx = page_szc_lock(ppa[0]); 3708*0Sstevel@tonic-gate pszc = ppa[0]->p_szc; 3709*0Sstevel@tonic-gate ASSERT(szcmtx != NULL || pszc == 0); 3710*0Sstevel@tonic-gate ASSERT(ppa[0]->p_szc <= pszc); 3711*0Sstevel@tonic-gate if (pszc <= szc) { 3712*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(36); 3713*0Sstevel@tonic-gate if (szcmtx != NULL) { 3714*0Sstevel@tonic-gate mutex_exit(szcmtx); 3715*0Sstevel@tonic-gate } 3716*0Sstevel@tonic-gate goto chkszc; 3717*0Sstevel@tonic-gate } 3718*0Sstevel@tonic-gate if (pplist != NULL) { 3719*0Sstevel@tonic-gate /* 3720*0Sstevel@tonic-gate * page got promoted since last check. 3721*0Sstevel@tonic-gate * we don't need preaalocated large 3722*0Sstevel@tonic-gate * page. 3723*0Sstevel@tonic-gate */ 3724*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(37); 3725*0Sstevel@tonic-gate page_free_replacement_page(pplist); 3726*0Sstevel@tonic-gate page_create_putback(pages); 3727*0Sstevel@tonic-gate } 3728*0Sstevel@tonic-gate SEGVN_UPDATE_MODBITS(ppa, pages, rw, 3729*0Sstevel@tonic-gate prot, vpprot); 3730*0Sstevel@tonic-gate hat_memload_array(hat, a, pgsz, ppa, 3731*0Sstevel@tonic-gate prot & vpprot, hat_flag); 3732*0Sstevel@tonic-gate mutex_exit(szcmtx); 3733*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) { 3734*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3735*0Sstevel@tonic-gate page_unlock(ppa[i]); 3736*0Sstevel@tonic-gate } 3737*0Sstevel@tonic-gate } 3738*0Sstevel@tonic-gate if (amp != NULL) { 3739*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3740*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3741*0Sstevel@tonic-gate } 3742*0Sstevel@tonic-gate goto next; 3743*0Sstevel@tonic-gate } 3744*0Sstevel@tonic-gate 3745*0Sstevel@tonic-gate /* 3746*0Sstevel@tonic-gate * if page got demoted since last check 3747*0Sstevel@tonic-gate * we could have not allocated larger page. 3748*0Sstevel@tonic-gate * allocate now. 3749*0Sstevel@tonic-gate */ 3750*0Sstevel@tonic-gate if (pplist == NULL && 3751*0Sstevel@tonic-gate page_alloc_pages(seg, a, &pplist, NULL, szc, 0)) { 3752*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(38); 3753*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3754*0Sstevel@tonic-gate page_unlock(ppa[i]); 3755*0Sstevel@tonic-gate } 3756*0Sstevel@tonic-gate if (amp != NULL) { 3757*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3758*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3759*0Sstevel@tonic-gate } 3760*0Sstevel@tonic-gate SEGVN_RESTORE_SOFTLOCK(type, pages); 3761*0Sstevel@tonic-gate ierr = -1; 3762*0Sstevel@tonic-gate alloc_failed |= (1 << szc); 3763*0Sstevel@tonic-gate break; 3764*0Sstevel@tonic-gate } 3765*0Sstevel@tonic-gate 3766*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(39); 3767*0Sstevel@tonic-gate 3768*0Sstevel@tonic-gate segvn_relocate_pages(ppa, pplist); 3769*0Sstevel@tonic-gate 3770*0Sstevel@tonic-gate SEGVN_UPDATE_MODBITS(ppa, pages, rw, prot, vpprot); 3771*0Sstevel@tonic-gate hat_memload_array(hat, a, pgsz, ppa, prot & vpprot, 3772*0Sstevel@tonic-gate hat_flag); 3773*0Sstevel@tonic-gate if (!(hat_flag & HAT_LOAD_LOCK)) { 3774*0Sstevel@tonic-gate for (i = 0; i < pages; i++) { 3775*0Sstevel@tonic-gate ASSERT(PAGE_SHARED(ppa[i])); 3776*0Sstevel@tonic-gate page_unlock(ppa[i]); 3777*0Sstevel@tonic-gate } 3778*0Sstevel@tonic-gate } 3779*0Sstevel@tonic-gate if (amp != NULL) { 3780*0Sstevel@tonic-gate anon_array_exit(&an_cookie); 3781*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3782*0Sstevel@tonic-gate } 3783*0Sstevel@tonic-gate 3784*0Sstevel@tonic-gate next: 3785*0Sstevel@tonic-gate if (vpage != NULL) { 3786*0Sstevel@tonic-gate vpage += pages; 3787*0Sstevel@tonic-gate } 3788*0Sstevel@tonic-gate adjszc_chk = 1; 3789*0Sstevel@tonic-gate } 3790*0Sstevel@tonic-gate if (a == lpgeaddr) 3791*0Sstevel@tonic-gate break; 3792*0Sstevel@tonic-gate ASSERT(a < lpgeaddr); 3793*0Sstevel@tonic-gate /* 3794*0Sstevel@tonic-gate * ierr == -1 means we failed to map with a large page. 3795*0Sstevel@tonic-gate * (either due to allocation/relocation failures or 3796*0Sstevel@tonic-gate * misalignment with other mappings to this file. 3797*0Sstevel@tonic-gate * 3798*0Sstevel@tonic-gate * ierr == -2 means some other thread allocated a large page 3799*0Sstevel@tonic-gate * after we gave up tp map with a large page. retry with 3800*0Sstevel@tonic-gate * larger mapping. 3801*0Sstevel@tonic-gate */ 3802*0Sstevel@tonic-gate ASSERT(ierr == -1 || ierr == -2); 3803*0Sstevel@tonic-gate ASSERT(ierr == -2 || szc != 0); 3804*0Sstevel@tonic-gate ASSERT(ierr == -1 || szc < seg->s_szc); 3805*0Sstevel@tonic-gate if (ierr == -2) { 3806*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(40); 3807*0Sstevel@tonic-gate ASSERT(pszc > szc && pszc <= seg->s_szc); 3808*0Sstevel@tonic-gate szc = pszc; 3809*0Sstevel@tonic-gate } else if (segvn_anypgsz_vnode) { 3810*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(41); 3811*0Sstevel@tonic-gate szc--; 3812*0Sstevel@tonic-gate } else { 3813*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(42); 3814*0Sstevel@tonic-gate ASSERT(pszc < szc); 3815*0Sstevel@tonic-gate /* 3816*0Sstevel@tonic-gate * other process created pszc large page. 3817*0Sstevel@tonic-gate * but we still have to drop to 0 szc. 3818*0Sstevel@tonic-gate */ 3819*0Sstevel@tonic-gate szc = 0; 3820*0Sstevel@tonic-gate } 3821*0Sstevel@tonic-gate 3822*0Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 3823*0Sstevel@tonic-gate pages = btop(pgsz); 3824*0Sstevel@tonic-gate ASSERT(type != F_SOFTLOCK || ierr == -1 || 3825*0Sstevel@tonic-gate (IS_P2ALIGNED(a, pgsz) && IS_P2ALIGNED(lpgeaddr, pgsz))); 3826*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 3827*0Sstevel@tonic-gate /* 3828*0Sstevel@tonic-gate * For softlocks we cannot reduce the fault area 3829*0Sstevel@tonic-gate * (calculated based on the largest page size for this 3830*0Sstevel@tonic-gate * segment) for size down and a is already next 3831*0Sstevel@tonic-gate * page size aligned as assertted above for size 3832*0Sstevel@tonic-gate * ups. Therefore just continue in case of softlock. 3833*0Sstevel@tonic-gate */ 3834*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(43); 3835*0Sstevel@tonic-gate continue; /* keep lint happy */ 3836*0Sstevel@tonic-gate } else if (ierr == -2) { 3837*0Sstevel@tonic-gate 3838*0Sstevel@tonic-gate /* 3839*0Sstevel@tonic-gate * Size up case. Note lpgaddr may only be needed for 3840*0Sstevel@tonic-gate * softlock case so we don't adjust it here. 3841*0Sstevel@tonic-gate */ 3842*0Sstevel@tonic-gate a = (caddr_t)P2ALIGN((uintptr_t)a, pgsz); 3843*0Sstevel@tonic-gate ASSERT(a >= lpgaddr); 3844*0Sstevel@tonic-gate lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)eaddr, pgsz); 3845*0Sstevel@tonic-gate off = svd->offset + (uintptr_t)(a - seg->s_base); 3846*0Sstevel@tonic-gate aindx = svd->anon_index + seg_page(seg, a); 3847*0Sstevel@tonic-gate vpage = (svd->vpage != NULL) ? 3848*0Sstevel@tonic-gate &svd->vpage[seg_page(seg, a)] : NULL; 3849*0Sstevel@tonic-gate } else { 3850*0Sstevel@tonic-gate /* 3851*0Sstevel@tonic-gate * Size down case. Note lpgaddr may only be needed for 3852*0Sstevel@tonic-gate * softlock case so we don't adjust it here. 3853*0Sstevel@tonic-gate */ 3854*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(a, pgsz)); 3855*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(lpgeaddr, pgsz)); 3856*0Sstevel@tonic-gate lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)eaddr, pgsz); 3857*0Sstevel@tonic-gate ASSERT(a < lpgeaddr); 3858*0Sstevel@tonic-gate if (a < addr) { 3859*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(44); 3860*0Sstevel@tonic-gate /* 3861*0Sstevel@tonic-gate * The beginning of the large page region can 3862*0Sstevel@tonic-gate * be pulled to the right to make a smaller 3863*0Sstevel@tonic-gate * region. We haven't yet faulted a single 3864*0Sstevel@tonic-gate * page. 3865*0Sstevel@tonic-gate */ 3866*0Sstevel@tonic-gate a = (caddr_t)P2ALIGN((uintptr_t)addr, pgsz); 3867*0Sstevel@tonic-gate ASSERT(a >= lpgaddr); 3868*0Sstevel@tonic-gate off = svd->offset + 3869*0Sstevel@tonic-gate (uintptr_t)(a - seg->s_base); 3870*0Sstevel@tonic-gate aindx = svd->anon_index + seg_page(seg, a); 3871*0Sstevel@tonic-gate vpage = (svd->vpage != NULL) ? 3872*0Sstevel@tonic-gate &svd->vpage[seg_page(seg, a)] : NULL; 3873*0Sstevel@tonic-gate } 3874*0Sstevel@tonic-gate } 3875*0Sstevel@tonic-gate } 3876*0Sstevel@tonic-gate out: 3877*0Sstevel@tonic-gate kmem_free(ppa, ppasize); 3878*0Sstevel@tonic-gate if (!err && !vop_size_err) { 3879*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(45); 3880*0Sstevel@tonic-gate return (0); 3881*0Sstevel@tonic-gate } 3882*0Sstevel@tonic-gate if (type == F_SOFTLOCK && a > lpgaddr) { 3883*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(46); 3884*0Sstevel@tonic-gate segvn_softunlock(seg, lpgaddr, a - lpgaddr, S_OTHER); 3885*0Sstevel@tonic-gate } 3886*0Sstevel@tonic-gate if (!vop_size_err) { 3887*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(47); 3888*0Sstevel@tonic-gate return (err); 3889*0Sstevel@tonic-gate } 3890*0Sstevel@tonic-gate ASSERT(brkcow || type == F_SOFTLOCK); 3891*0Sstevel@tonic-gate /* 3892*0Sstevel@tonic-gate * Large page end is mapped beyond the end of file and it's a cow 3893*0Sstevel@tonic-gate * fault or softlock so we can't reduce the map area. For now just 3894*0Sstevel@tonic-gate * demote the segment. This should really only happen if the end of 3895*0Sstevel@tonic-gate * the file changed after the mapping was established since when large 3896*0Sstevel@tonic-gate * page segments are created we make sure they don't extend beyond the 3897*0Sstevel@tonic-gate * end of the file. 3898*0Sstevel@tonic-gate */ 3899*0Sstevel@tonic-gate SEGVN_VMSTAT_FLTVNPAGES(48); 3900*0Sstevel@tonic-gate 3901*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 3902*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); 3903*0Sstevel@tonic-gate err = 0; 3904*0Sstevel@tonic-gate if (seg->s_szc != 0) { 3905*0Sstevel@tonic-gate err = segvn_clrszc(seg); 3906*0Sstevel@tonic-gate if (err != 0) { 3907*0Sstevel@tonic-gate segvn_fltvnpages_clrszc_err++; 3908*0Sstevel@tonic-gate } 3909*0Sstevel@tonic-gate } 3910*0Sstevel@tonic-gate ASSERT(err || seg->s_szc == 0); 3911*0Sstevel@tonic-gate SEGVN_LOCK_DOWNGRADE(seg->s_as, &svd->lock); 3912*0Sstevel@tonic-gate /* segvn_fault will do its job as if szc had been zero to begin with */ 3913*0Sstevel@tonic-gate return (err == 0 ? IE_RETRY : FC_MAKE_ERR(err)); 3914*0Sstevel@tonic-gate } 3915*0Sstevel@tonic-gate 3916*0Sstevel@tonic-gate /* 3917*0Sstevel@tonic-gate * This routine will attempt to fault in one large page. 3918*0Sstevel@tonic-gate * it will use smaller pages if that fails. 3919*0Sstevel@tonic-gate * It should only be called for pure anonymous segments. 3920*0Sstevel@tonic-gate */ 3921*0Sstevel@tonic-gate static faultcode_t 3922*0Sstevel@tonic-gate segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, 3923*0Sstevel@tonic-gate caddr_t lpgeaddr, enum fault_type type, enum seg_rw rw, caddr_t addr, 3924*0Sstevel@tonic-gate caddr_t eaddr, int brkcow) 3925*0Sstevel@tonic-gate { 3926*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 3927*0Sstevel@tonic-gate struct anon_map *amp = svd->amp; 3928*0Sstevel@tonic-gate uchar_t segtype = svd->type; 3929*0Sstevel@tonic-gate uint_t szc = seg->s_szc; 3930*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(szc); 3931*0Sstevel@tonic-gate size_t maxpgsz = pgsz; 3932*0Sstevel@tonic-gate pgcnt_t pages = btop(pgsz); 3933*0Sstevel@tonic-gate size_t ppasize = pages * sizeof (page_t *); 3934*0Sstevel@tonic-gate caddr_t a = lpgaddr; 3935*0Sstevel@tonic-gate ulong_t aindx = svd->anon_index + seg_page(seg, a); 3936*0Sstevel@tonic-gate struct vpage *vpage = (svd->vpage != NULL) ? 3937*0Sstevel@tonic-gate &svd->vpage[seg_page(seg, a)] : NULL; 3938*0Sstevel@tonic-gate page_t **ppa; 3939*0Sstevel@tonic-gate uint_t ppa_szc; 3940*0Sstevel@tonic-gate faultcode_t err; 3941*0Sstevel@tonic-gate int ierr; 3942*0Sstevel@tonic-gate uint_t protchk, prot, vpprot; 3943*0Sstevel@tonic-gate int i; 3944*0Sstevel@tonic-gate int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD; 3945*0Sstevel@tonic-gate anon_sync_obj_t cookie; 3946*0Sstevel@tonic-gate 3947*0Sstevel@tonic-gate ASSERT(szc != 0); 3948*0Sstevel@tonic-gate ASSERT(amp != NULL); 3949*0Sstevel@tonic-gate ASSERT(enable_mbit_wa == 0); /* no mbit simulations with large pages */ 3950*0Sstevel@tonic-gate ASSERT(!(svd->flags & MAP_NORESERVE)); 3951*0Sstevel@tonic-gate ASSERT(type != F_SOFTUNLOCK); 3952*0Sstevel@tonic-gate ASSERT(segtype == MAP_PRIVATE); 3953*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(a, maxpgsz)); 3954*0Sstevel@tonic-gate 3955*0Sstevel@tonic-gate ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); 3956*0Sstevel@tonic-gate 3957*0Sstevel@tonic-gate VM_STAT_COND_ADD(type == F_SOFTLOCK, segvnvmstats.fltanpages[0]); 3958*0Sstevel@tonic-gate VM_STAT_COND_ADD(type != F_SOFTLOCK, segvnvmstats.fltanpages[1]); 3959*0Sstevel@tonic-gate 3960*0Sstevel@tonic-gate if (svd->flags & MAP_TEXT) { 3961*0Sstevel@tonic-gate hat_flag |= HAT_LOAD_TEXT; 3962*0Sstevel@tonic-gate } 3963*0Sstevel@tonic-gate 3964*0Sstevel@tonic-gate if (svd->pageprot) { 3965*0Sstevel@tonic-gate switch (rw) { 3966*0Sstevel@tonic-gate case S_READ: 3967*0Sstevel@tonic-gate protchk = PROT_READ; 3968*0Sstevel@tonic-gate break; 3969*0Sstevel@tonic-gate case S_WRITE: 3970*0Sstevel@tonic-gate protchk = PROT_WRITE; 3971*0Sstevel@tonic-gate break; 3972*0Sstevel@tonic-gate case S_EXEC: 3973*0Sstevel@tonic-gate protchk = PROT_EXEC; 3974*0Sstevel@tonic-gate break; 3975*0Sstevel@tonic-gate case S_OTHER: 3976*0Sstevel@tonic-gate default: 3977*0Sstevel@tonic-gate protchk = PROT_READ | PROT_WRITE | PROT_EXEC; 3978*0Sstevel@tonic-gate break; 3979*0Sstevel@tonic-gate } 3980*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[2]); 3981*0Sstevel@tonic-gate } else { 3982*0Sstevel@tonic-gate prot = svd->prot; 3983*0Sstevel@tonic-gate /* caller has already done segment level protection check. */ 3984*0Sstevel@tonic-gate } 3985*0Sstevel@tonic-gate 3986*0Sstevel@tonic-gate ppa = kmem_alloc(ppasize, KM_SLEEP); 3987*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3988*0Sstevel@tonic-gate for (;;) { 3989*0Sstevel@tonic-gate for (; a < lpgeaddr; a += pgsz, aindx += pages) { 3990*0Sstevel@tonic-gate if (svd->pageprot != 0 && IS_P2ALIGNED(a, maxpgsz)) { 3991*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[3]); 3992*0Sstevel@tonic-gate ASSERT(vpage != NULL); 3993*0Sstevel@tonic-gate prot = VPP_PROT(vpage); 3994*0Sstevel@tonic-gate ASSERT(sameprot(seg, a, maxpgsz)); 3995*0Sstevel@tonic-gate if ((prot & protchk) == 0) { 3996*0Sstevel@tonic-gate err = FC_PROT; 3997*0Sstevel@tonic-gate goto error; 3998*0Sstevel@tonic-gate } 3999*0Sstevel@tonic-gate } 4000*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 4001*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 4002*0Sstevel@tonic-gate if (availrmem < tune.t_minarmem + pages) { 4003*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 4004*0Sstevel@tonic-gate err = FC_MAKE_ERR(ENOMEM); 4005*0Sstevel@tonic-gate goto error; 4006*0Sstevel@tonic-gate } else { 4007*0Sstevel@tonic-gate availrmem -= pages; 4008*0Sstevel@tonic-gate segvn_pages_locked += pages; 4009*0Sstevel@tonic-gate svd->softlockcnt += pages; 4010*0Sstevel@tonic-gate } 4011*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 4012*0Sstevel@tonic-gate } 4013*0Sstevel@tonic-gate anon_array_enter(amp, aindx, &cookie); 4014*0Sstevel@tonic-gate ppa_szc = (uint_t)-1; 4015*0Sstevel@tonic-gate ierr = anon_map_getpages(amp, aindx, szc, seg, a, 4016*0Sstevel@tonic-gate prot, &vpprot, ppa, &ppa_szc, vpage, rw, brkcow, 4017*0Sstevel@tonic-gate segvn_anypgsz, svd->cred); 4018*0Sstevel@tonic-gate if (ierr != 0) { 4019*0Sstevel@tonic-gate anon_array_exit(&cookie); 4020*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[4]); 4021*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 4022*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[5]); 4023*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 4024*0Sstevel@tonic-gate availrmem += pages; 4025*0Sstevel@tonic-gate segvn_pages_locked -= pages; 4026*0Sstevel@tonic-gate svd->softlockcnt -= pages; 4027*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 4028*0Sstevel@tonic-gate } 4029*0Sstevel@tonic-gate if (ierr > 0) { 4030*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[6]); 4031*0Sstevel@tonic-gate err = FC_MAKE_ERR(ierr); 4032*0Sstevel@tonic-gate goto error; 4033*0Sstevel@tonic-gate } 4034*0Sstevel@tonic-gate break; 4035*0Sstevel@tonic-gate } 4036*0Sstevel@tonic-gate 4037*0Sstevel@tonic-gate ASSERT(!IS_VMODSORT(ppa[0]->p_vnode)); 4038*0Sstevel@tonic-gate 4039*0Sstevel@tonic-gate /* 4040*0Sstevel@tonic-gate * Handle pages that have been marked for migration 4041*0Sstevel@tonic-gate */ 4042*0Sstevel@tonic-gate if (lgrp_optimizations()) 4043*0Sstevel@tonic-gate page_migrate(seg, a, ppa, pages); 4044*0Sstevel@tonic-gate 4045*0Sstevel@tonic-gate hat_memload_array(hat, a, pgsz, ppa, 4046*0Sstevel@tonic-gate prot & vpprot, hat_flag); 4047*0Sstevel@tonic-gate 4048*0Sstevel@tonic-gate if (hat_flag & HAT_LOAD_LOCK) { 4049*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[7]); 4050*0Sstevel@tonic-gate } else { 4051*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[8]); 4052*0Sstevel@tonic-gate for (i = 0; i < pages; i++) 4053*0Sstevel@tonic-gate page_unlock(ppa[i]); 4054*0Sstevel@tonic-gate } 4055*0Sstevel@tonic-gate if (vpage != NULL) 4056*0Sstevel@tonic-gate vpage += pages; 4057*0Sstevel@tonic-gate 4058*0Sstevel@tonic-gate anon_array_exit(&cookie); 4059*0Sstevel@tonic-gate } 4060*0Sstevel@tonic-gate if (a == lpgeaddr) 4061*0Sstevel@tonic-gate break; 4062*0Sstevel@tonic-gate ASSERT(a < lpgeaddr); 4063*0Sstevel@tonic-gate /* 4064*0Sstevel@tonic-gate * ierr == -1 means we failed to allocate a large page. 4065*0Sstevel@tonic-gate * so do a size down operation. 4066*0Sstevel@tonic-gate * 4067*0Sstevel@tonic-gate * ierr == -2 means some other process that privately shares 4068*0Sstevel@tonic-gate * pages with this process has allocated a larger page and we 4069*0Sstevel@tonic-gate * need to retry with larger pages. So do a size up 4070*0Sstevel@tonic-gate * operation. This relies on the fact that large pages are 4071*0Sstevel@tonic-gate * never partially shared i.e. if we share any constituent 4072*0Sstevel@tonic-gate * page of a large page with another process we must share the 4073*0Sstevel@tonic-gate * entire large page. Note this cannot happen for SOFTLOCK 4074*0Sstevel@tonic-gate * case, unless current address (a) is at the beginning of the 4075*0Sstevel@tonic-gate * next page size boundary because the other process couldn't 4076*0Sstevel@tonic-gate * have relocated locked pages. 4077*0Sstevel@tonic-gate */ 4078*0Sstevel@tonic-gate ASSERT(ierr == -1 || ierr == -2); 4079*0Sstevel@tonic-gate if (segvn_anypgsz) { 4080*0Sstevel@tonic-gate ASSERT(ierr == -2 || szc != 0); 4081*0Sstevel@tonic-gate ASSERT(ierr == -1 || szc < seg->s_szc); 4082*0Sstevel@tonic-gate szc = (ierr == -1) ? szc - 1 : szc + 1; 4083*0Sstevel@tonic-gate } else { 4084*0Sstevel@tonic-gate /* 4085*0Sstevel@tonic-gate * For non COW faults and segvn_anypgsz == 0 4086*0Sstevel@tonic-gate * we need to be careful not to loop forever 4087*0Sstevel@tonic-gate * if existing page is found with szc other 4088*0Sstevel@tonic-gate * than 0 or seg->s_szc. This could be due 4089*0Sstevel@tonic-gate * to page relocations on behalf of DR or 4090*0Sstevel@tonic-gate * more likely large page creation. For this 4091*0Sstevel@tonic-gate * case simply re-size to existing page's szc 4092*0Sstevel@tonic-gate * if returned by anon_map_getpages(). 4093*0Sstevel@tonic-gate */ 4094*0Sstevel@tonic-gate if (ppa_szc == (uint_t)-1) { 4095*0Sstevel@tonic-gate szc = (ierr == -1) ? 0 : seg->s_szc; 4096*0Sstevel@tonic-gate } else { 4097*0Sstevel@tonic-gate ASSERT(ppa_szc <= seg->s_szc); 4098*0Sstevel@tonic-gate ASSERT(ierr == -2 || ppa_szc < szc); 4099*0Sstevel@tonic-gate ASSERT(ierr == -1 || ppa_szc > szc); 4100*0Sstevel@tonic-gate szc = ppa_szc; 4101*0Sstevel@tonic-gate } 4102*0Sstevel@tonic-gate } 4103*0Sstevel@tonic-gate 4104*0Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 4105*0Sstevel@tonic-gate pages = btop(pgsz); 4106*0Sstevel@tonic-gate ASSERT(type != F_SOFTLOCK || ierr == -1 || 4107*0Sstevel@tonic-gate (IS_P2ALIGNED(a, pgsz) && IS_P2ALIGNED(lpgeaddr, pgsz))); 4108*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 4109*0Sstevel@tonic-gate /* 4110*0Sstevel@tonic-gate * For softlocks we cannot reduce the fault area 4111*0Sstevel@tonic-gate * (calculated based on the largest page size for this 4112*0Sstevel@tonic-gate * segment) for size down and a is already next 4113*0Sstevel@tonic-gate * page size aligned as assertted above for size 4114*0Sstevel@tonic-gate * ups. Therefore just continue in case of softlock. 4115*0Sstevel@tonic-gate */ 4116*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[9]); 4117*0Sstevel@tonic-gate continue; /* keep lint happy */ 4118*0Sstevel@tonic-gate } else if (ierr == -2) { 4119*0Sstevel@tonic-gate 4120*0Sstevel@tonic-gate /* 4121*0Sstevel@tonic-gate * Size up case. Note lpgaddr may only be needed for 4122*0Sstevel@tonic-gate * softlock case so we don't adjust it here. 4123*0Sstevel@tonic-gate */ 4124*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[10]); 4125*0Sstevel@tonic-gate a = (caddr_t)P2ALIGN((uintptr_t)a, pgsz); 4126*0Sstevel@tonic-gate ASSERT(a >= lpgaddr); 4127*0Sstevel@tonic-gate lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)eaddr, pgsz); 4128*0Sstevel@tonic-gate aindx = svd->anon_index + seg_page(seg, a); 4129*0Sstevel@tonic-gate vpage = (svd->vpage != NULL) ? 4130*0Sstevel@tonic-gate &svd->vpage[seg_page(seg, a)] : NULL; 4131*0Sstevel@tonic-gate } else { 4132*0Sstevel@tonic-gate /* 4133*0Sstevel@tonic-gate * Size down case. Note lpgaddr may only be needed for 4134*0Sstevel@tonic-gate * softlock case so we don't adjust it here. 4135*0Sstevel@tonic-gate */ 4136*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[11]); 4137*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(a, pgsz)); 4138*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(lpgeaddr, pgsz)); 4139*0Sstevel@tonic-gate lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)eaddr, pgsz); 4140*0Sstevel@tonic-gate ASSERT(a < lpgeaddr); 4141*0Sstevel@tonic-gate if (a < addr) { 4142*0Sstevel@tonic-gate /* 4143*0Sstevel@tonic-gate * The beginning of the large page region can 4144*0Sstevel@tonic-gate * be pulled to the right to make a smaller 4145*0Sstevel@tonic-gate * region. We haven't yet faulted a single 4146*0Sstevel@tonic-gate * page. 4147*0Sstevel@tonic-gate */ 4148*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[12]); 4149*0Sstevel@tonic-gate a = (caddr_t)P2ALIGN((uintptr_t)addr, pgsz); 4150*0Sstevel@tonic-gate ASSERT(a >= lpgaddr); 4151*0Sstevel@tonic-gate aindx = svd->anon_index + seg_page(seg, a); 4152*0Sstevel@tonic-gate vpage = (svd->vpage != NULL) ? 4153*0Sstevel@tonic-gate &svd->vpage[seg_page(seg, a)] : NULL; 4154*0Sstevel@tonic-gate } 4155*0Sstevel@tonic-gate } 4156*0Sstevel@tonic-gate } 4157*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[13]); 4158*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4159*0Sstevel@tonic-gate kmem_free(ppa, ppasize); 4160*0Sstevel@tonic-gate return (0); 4161*0Sstevel@tonic-gate error: 4162*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[14]); 4163*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4164*0Sstevel@tonic-gate kmem_free(ppa, ppasize); 4165*0Sstevel@tonic-gate if (type == F_SOFTLOCK && a > lpgaddr) { 4166*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.fltanpages[15]); 4167*0Sstevel@tonic-gate segvn_softunlock(seg, lpgaddr, a - lpgaddr, S_OTHER); 4168*0Sstevel@tonic-gate } 4169*0Sstevel@tonic-gate return (err); 4170*0Sstevel@tonic-gate } 4171*0Sstevel@tonic-gate 4172*0Sstevel@tonic-gate int fltadvice = 1; /* set to free behind pages for sequential access */ 4173*0Sstevel@tonic-gate 4174*0Sstevel@tonic-gate /* 4175*0Sstevel@tonic-gate * This routine is called via a machine specific fault handling routine. 4176*0Sstevel@tonic-gate * It is also called by software routines wishing to lock or unlock 4177*0Sstevel@tonic-gate * a range of addresses. 4178*0Sstevel@tonic-gate * 4179*0Sstevel@tonic-gate * Here is the basic algorithm: 4180*0Sstevel@tonic-gate * If unlocking 4181*0Sstevel@tonic-gate * Call segvn_softunlock 4182*0Sstevel@tonic-gate * Return 4183*0Sstevel@tonic-gate * endif 4184*0Sstevel@tonic-gate * Checking and set up work 4185*0Sstevel@tonic-gate * If we will need some non-anonymous pages 4186*0Sstevel@tonic-gate * Call VOP_GETPAGE over the range of non-anonymous pages 4187*0Sstevel@tonic-gate * endif 4188*0Sstevel@tonic-gate * Loop over all addresses requested 4189*0Sstevel@tonic-gate * Call segvn_faultpage passing in page list 4190*0Sstevel@tonic-gate * to load up translations and handle anonymous pages 4191*0Sstevel@tonic-gate * endloop 4192*0Sstevel@tonic-gate * Load up translation to any additional pages in page list not 4193*0Sstevel@tonic-gate * already handled that fit into this segment 4194*0Sstevel@tonic-gate */ 4195*0Sstevel@tonic-gate static faultcode_t 4196*0Sstevel@tonic-gate segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 4197*0Sstevel@tonic-gate enum fault_type type, enum seg_rw rw) 4198*0Sstevel@tonic-gate { 4199*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 4200*0Sstevel@tonic-gate page_t **plp, **ppp, *pp; 4201*0Sstevel@tonic-gate u_offset_t off; 4202*0Sstevel@tonic-gate caddr_t a; 4203*0Sstevel@tonic-gate struct vpage *vpage; 4204*0Sstevel@tonic-gate uint_t vpprot, prot; 4205*0Sstevel@tonic-gate int err; 4206*0Sstevel@tonic-gate page_t *pl[PVN_GETPAGE_NUM + 1]; 4207*0Sstevel@tonic-gate size_t plsz, pl_alloc_sz; 4208*0Sstevel@tonic-gate size_t page; 4209*0Sstevel@tonic-gate ulong_t anon_index; 4210*0Sstevel@tonic-gate struct anon_map *amp; 4211*0Sstevel@tonic-gate int dogetpage = 0; 4212*0Sstevel@tonic-gate caddr_t lpgaddr, lpgeaddr; 4213*0Sstevel@tonic-gate size_t pgsz; 4214*0Sstevel@tonic-gate anon_sync_obj_t cookie; 4215*0Sstevel@tonic-gate int brkcow = BREAK_COW_SHARE(rw, type, svd->type); 4216*0Sstevel@tonic-gate 4217*0Sstevel@tonic-gate /* 4218*0Sstevel@tonic-gate * S_READ_NOCOW is like read 4219*0Sstevel@tonic-gate * except caller advises no need 4220*0Sstevel@tonic-gate * to copy-on-write for softlock 4221*0Sstevel@tonic-gate * because it holds address space 4222*0Sstevel@tonic-gate * locked as writer and thus prevents 4223*0Sstevel@tonic-gate * any copy on writes of a softlocked 4224*0Sstevel@tonic-gate * page by another thread. 4225*0Sstevel@tonic-gate * S_READ_NOCOW vs S_READ distinction was 4226*0Sstevel@tonic-gate * only needed for BREAK_COW_SHARE(). After 4227*0Sstevel@tonic-gate * that we treat S_READ_NOW as just S_READ. 4228*0Sstevel@tonic-gate */ 4229*0Sstevel@tonic-gate if (rw == S_READ_NOCOW) { 4230*0Sstevel@tonic-gate rw = S_READ; 4231*0Sstevel@tonic-gate ASSERT(type == F_SOFTLOCK && 4232*0Sstevel@tonic-gate AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 4233*0Sstevel@tonic-gate } 4234*0Sstevel@tonic-gate 4235*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 4236*0Sstevel@tonic-gate 4237*0Sstevel@tonic-gate /* 4238*0Sstevel@tonic-gate * First handle the easy stuff 4239*0Sstevel@tonic-gate */ 4240*0Sstevel@tonic-gate if (type == F_SOFTUNLOCK) { 4241*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 4242*0Sstevel@tonic-gate pgsz = (seg->s_szc == 0) ? PAGESIZE : 4243*0Sstevel@tonic-gate page_get_pagesize(seg->s_szc); 4244*0Sstevel@tonic-gate VM_STAT_COND_ADD(pgsz > PAGESIZE, segvnvmstats.fltanpages[16]); 4245*0Sstevel@tonic-gate CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); 4246*0Sstevel@tonic-gate segvn_softunlock(seg, lpgaddr, lpgeaddr - lpgaddr, rw); 4247*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4248*0Sstevel@tonic-gate return (0); 4249*0Sstevel@tonic-gate } 4250*0Sstevel@tonic-gate 4251*0Sstevel@tonic-gate top: 4252*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 4253*0Sstevel@tonic-gate 4254*0Sstevel@tonic-gate /* 4255*0Sstevel@tonic-gate * If we have the same protections for the entire segment, 4256*0Sstevel@tonic-gate * insure that the access being attempted is legitimate. 4257*0Sstevel@tonic-gate */ 4258*0Sstevel@tonic-gate 4259*0Sstevel@tonic-gate if (svd->pageprot == 0) { 4260*0Sstevel@tonic-gate uint_t protchk; 4261*0Sstevel@tonic-gate 4262*0Sstevel@tonic-gate switch (rw) { 4263*0Sstevel@tonic-gate case S_READ: 4264*0Sstevel@tonic-gate protchk = PROT_READ; 4265*0Sstevel@tonic-gate break; 4266*0Sstevel@tonic-gate case S_WRITE: 4267*0Sstevel@tonic-gate protchk = PROT_WRITE; 4268*0Sstevel@tonic-gate break; 4269*0Sstevel@tonic-gate case S_EXEC: 4270*0Sstevel@tonic-gate protchk = PROT_EXEC; 4271*0Sstevel@tonic-gate break; 4272*0Sstevel@tonic-gate case S_OTHER: 4273*0Sstevel@tonic-gate default: 4274*0Sstevel@tonic-gate protchk = PROT_READ | PROT_WRITE | PROT_EXEC; 4275*0Sstevel@tonic-gate break; 4276*0Sstevel@tonic-gate } 4277*0Sstevel@tonic-gate 4278*0Sstevel@tonic-gate if ((svd->prot & protchk) == 0) { 4279*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4280*0Sstevel@tonic-gate return (FC_PROT); /* illegal access type */ 4281*0Sstevel@tonic-gate } 4282*0Sstevel@tonic-gate } 4283*0Sstevel@tonic-gate 4284*0Sstevel@tonic-gate /* 4285*0Sstevel@tonic-gate * Check to see if we need to allocate an anon_map structure. 4286*0Sstevel@tonic-gate */ 4287*0Sstevel@tonic-gate if (svd->amp == NULL && (svd->vp == NULL || brkcow)) { 4288*0Sstevel@tonic-gate /* 4289*0Sstevel@tonic-gate * Drop the "read" lock on the segment and acquire 4290*0Sstevel@tonic-gate * the "write" version since we have to allocate the 4291*0Sstevel@tonic-gate * anon_map. 4292*0Sstevel@tonic-gate */ 4293*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4294*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); 4295*0Sstevel@tonic-gate 4296*0Sstevel@tonic-gate if (svd->amp == NULL) { 4297*0Sstevel@tonic-gate svd->amp = anonmap_alloc(seg->s_size, 0); 4298*0Sstevel@tonic-gate svd->amp->a_szc = seg->s_szc; 4299*0Sstevel@tonic-gate } 4300*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4301*0Sstevel@tonic-gate 4302*0Sstevel@tonic-gate /* 4303*0Sstevel@tonic-gate * Start all over again since segment protections 4304*0Sstevel@tonic-gate * may have changed after we dropped the "read" lock. 4305*0Sstevel@tonic-gate */ 4306*0Sstevel@tonic-gate goto top; 4307*0Sstevel@tonic-gate } 4308*0Sstevel@tonic-gate 4309*0Sstevel@tonic-gate amp = svd->amp; 4310*0Sstevel@tonic-gate 4311*0Sstevel@tonic-gate /* 4312*0Sstevel@tonic-gate * MADV_SEQUENTIAL work is ignored for large page segments. 4313*0Sstevel@tonic-gate */ 4314*0Sstevel@tonic-gate if (seg->s_szc != 0) { 4315*0Sstevel@tonic-gate pgsz = page_get_pagesize(seg->s_szc); 4316*0Sstevel@tonic-gate ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); 4317*0Sstevel@tonic-gate /* 4318*0Sstevel@tonic-gate * We may need to do relocations so purge seg_pcache to allow 4319*0Sstevel@tonic-gate * pages to be locked exclusively. 4320*0Sstevel@tonic-gate */ 4321*0Sstevel@tonic-gate if (svd->softlockcnt != 0) 4322*0Sstevel@tonic-gate segvn_purge(seg); 4323*0Sstevel@tonic-gate CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); 4324*0Sstevel@tonic-gate if (svd->vp == NULL) { 4325*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE); 4326*0Sstevel@tonic-gate err = segvn_fault_anonpages(hat, seg, lpgaddr, 4327*0Sstevel@tonic-gate lpgeaddr, type, rw, addr, addr + len, brkcow); 4328*0Sstevel@tonic-gate } else { 4329*0Sstevel@tonic-gate err = segvn_fault_vnodepages(hat, seg, lpgaddr, 4330*0Sstevel@tonic-gate lpgeaddr, type, rw, addr, addr + len, brkcow); 4331*0Sstevel@tonic-gate if (err == IE_RETRY) { 4332*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 4333*0Sstevel@tonic-gate ASSERT(SEGVN_READ_HELD(seg->s_as, &svd->lock)); 4334*0Sstevel@tonic-gate goto cont; 4335*0Sstevel@tonic-gate } 4336*0Sstevel@tonic-gate } 4337*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4338*0Sstevel@tonic-gate return (err); 4339*0Sstevel@tonic-gate } 4340*0Sstevel@tonic-gate 4341*0Sstevel@tonic-gate cont: 4342*0Sstevel@tonic-gate page = seg_page(seg, addr); 4343*0Sstevel@tonic-gate if (amp != NULL) { 4344*0Sstevel@tonic-gate anon_index = svd->anon_index + page; 4345*0Sstevel@tonic-gate 4346*0Sstevel@tonic-gate if ((type == F_PROT) && (rw == S_READ) && 4347*0Sstevel@tonic-gate svd->type == MAP_PRIVATE && svd->pageprot == 0) { 4348*0Sstevel@tonic-gate size_t index = anon_index; 4349*0Sstevel@tonic-gate struct anon *ap; 4350*0Sstevel@tonic-gate 4351*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 4352*0Sstevel@tonic-gate /* 4353*0Sstevel@tonic-gate * The fast path could apply to S_WRITE also, except 4354*0Sstevel@tonic-gate * that the protection fault could be caused by lazy 4355*0Sstevel@tonic-gate * tlb flush when ro->rw. In this case, the pte is 4356*0Sstevel@tonic-gate * RW already. But RO in the other cpu's tlb causes 4357*0Sstevel@tonic-gate * the fault. Since hat_chgprot won't do anything if 4358*0Sstevel@tonic-gate * pte doesn't change, we may end up faulting 4359*0Sstevel@tonic-gate * indefinitely until the RO tlb entry gets replaced. 4360*0Sstevel@tonic-gate */ 4361*0Sstevel@tonic-gate for (a = addr; a < addr + len; a += PAGESIZE, index++) { 4362*0Sstevel@tonic-gate anon_array_enter(amp, index, &cookie); 4363*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, index); 4364*0Sstevel@tonic-gate anon_array_exit(&cookie); 4365*0Sstevel@tonic-gate if ((ap == NULL) || (ap->an_refcnt != 1)) { 4366*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4367*0Sstevel@tonic-gate goto slow; 4368*0Sstevel@tonic-gate } 4369*0Sstevel@tonic-gate } 4370*0Sstevel@tonic-gate hat_chgprot(seg->s_as->a_hat, addr, len, svd->prot); 4371*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4372*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4373*0Sstevel@tonic-gate return (0); 4374*0Sstevel@tonic-gate } 4375*0Sstevel@tonic-gate } 4376*0Sstevel@tonic-gate slow: 4377*0Sstevel@tonic-gate 4378*0Sstevel@tonic-gate if (svd->vpage == NULL) 4379*0Sstevel@tonic-gate vpage = NULL; 4380*0Sstevel@tonic-gate else 4381*0Sstevel@tonic-gate vpage = &svd->vpage[page]; 4382*0Sstevel@tonic-gate 4383*0Sstevel@tonic-gate off = svd->offset + (uintptr_t)(addr - seg->s_base); 4384*0Sstevel@tonic-gate 4385*0Sstevel@tonic-gate /* 4386*0Sstevel@tonic-gate * If MADV_SEQUENTIAL has been set for the particular page we 4387*0Sstevel@tonic-gate * are faulting on, free behind all pages in the segment and put 4388*0Sstevel@tonic-gate * them on the free list. 4389*0Sstevel@tonic-gate */ 4390*0Sstevel@tonic-gate if ((page != 0) && fltadvice) { /* not if first page in segment */ 4391*0Sstevel@tonic-gate struct vpage *vpp; 4392*0Sstevel@tonic-gate ulong_t fanon_index; 4393*0Sstevel@tonic-gate size_t fpage; 4394*0Sstevel@tonic-gate u_offset_t pgoff, fpgoff; 4395*0Sstevel@tonic-gate struct vnode *fvp; 4396*0Sstevel@tonic-gate struct anon *fap = NULL; 4397*0Sstevel@tonic-gate 4398*0Sstevel@tonic-gate if (svd->advice == MADV_SEQUENTIAL || 4399*0Sstevel@tonic-gate (svd->pageadvice && 4400*0Sstevel@tonic-gate VPP_ADVICE(vpage) == MADV_SEQUENTIAL)) { 4401*0Sstevel@tonic-gate pgoff = off - PAGESIZE; 4402*0Sstevel@tonic-gate fpage = page - 1; 4403*0Sstevel@tonic-gate if (vpage != NULL) 4404*0Sstevel@tonic-gate vpp = &svd->vpage[fpage]; 4405*0Sstevel@tonic-gate if (amp != NULL) 4406*0Sstevel@tonic-gate fanon_index = svd->anon_index + fpage; 4407*0Sstevel@tonic-gate 4408*0Sstevel@tonic-gate while (pgoff > svd->offset) { 4409*0Sstevel@tonic-gate if (svd->advice != MADV_SEQUENTIAL && 4410*0Sstevel@tonic-gate (!svd->pageadvice || (vpage && 4411*0Sstevel@tonic-gate VPP_ADVICE(vpp) != MADV_SEQUENTIAL))) 4412*0Sstevel@tonic-gate break; 4413*0Sstevel@tonic-gate 4414*0Sstevel@tonic-gate /* 4415*0Sstevel@tonic-gate * If this is an anon page, we must find the 4416*0Sstevel@tonic-gate * correct <vp, offset> for it 4417*0Sstevel@tonic-gate */ 4418*0Sstevel@tonic-gate fap = NULL; 4419*0Sstevel@tonic-gate if (amp != NULL) { 4420*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, 4421*0Sstevel@tonic-gate RW_READER); 4422*0Sstevel@tonic-gate anon_array_enter(amp, fanon_index, 4423*0Sstevel@tonic-gate &cookie); 4424*0Sstevel@tonic-gate fap = anon_get_ptr(amp->ahp, 4425*0Sstevel@tonic-gate fanon_index); 4426*0Sstevel@tonic-gate if (fap != NULL) { 4427*0Sstevel@tonic-gate swap_xlate(fap, &fvp, &fpgoff); 4428*0Sstevel@tonic-gate } else { 4429*0Sstevel@tonic-gate fpgoff = pgoff; 4430*0Sstevel@tonic-gate fvp = svd->vp; 4431*0Sstevel@tonic-gate } 4432*0Sstevel@tonic-gate anon_array_exit(&cookie); 4433*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4434*0Sstevel@tonic-gate } else { 4435*0Sstevel@tonic-gate fpgoff = pgoff; 4436*0Sstevel@tonic-gate fvp = svd->vp; 4437*0Sstevel@tonic-gate } 4438*0Sstevel@tonic-gate if (fvp == NULL) 4439*0Sstevel@tonic-gate break; /* XXX */ 4440*0Sstevel@tonic-gate /* 4441*0Sstevel@tonic-gate * Skip pages that are free or have an 4442*0Sstevel@tonic-gate * "exclusive" lock. 4443*0Sstevel@tonic-gate */ 4444*0Sstevel@tonic-gate pp = page_lookup_nowait(fvp, fpgoff, SE_SHARED); 4445*0Sstevel@tonic-gate if (pp == NULL) 4446*0Sstevel@tonic-gate break; 4447*0Sstevel@tonic-gate /* 4448*0Sstevel@tonic-gate * We don't need the page_struct_lock to test 4449*0Sstevel@tonic-gate * as this is only advisory; even if we 4450*0Sstevel@tonic-gate * acquire it someone might race in and lock 4451*0Sstevel@tonic-gate * the page after we unlock and before the 4452*0Sstevel@tonic-gate * PUTPAGE, then VOP_PUTPAGE will do nothing. 4453*0Sstevel@tonic-gate */ 4454*0Sstevel@tonic-gate if (pp->p_lckcnt == 0 && pp->p_cowcnt == 0) { 4455*0Sstevel@tonic-gate /* 4456*0Sstevel@tonic-gate * Hold the vnode before releasing 4457*0Sstevel@tonic-gate * the page lock to prevent it from 4458*0Sstevel@tonic-gate * being freed and re-used by some 4459*0Sstevel@tonic-gate * other thread. 4460*0Sstevel@tonic-gate */ 4461*0Sstevel@tonic-gate VN_HOLD(fvp); 4462*0Sstevel@tonic-gate page_unlock(pp); 4463*0Sstevel@tonic-gate /* 4464*0Sstevel@tonic-gate * We should build a page list 4465*0Sstevel@tonic-gate * to kluster putpages XXX 4466*0Sstevel@tonic-gate */ 4467*0Sstevel@tonic-gate (void) VOP_PUTPAGE(fvp, 4468*0Sstevel@tonic-gate (offset_t)fpgoff, PAGESIZE, 4469*0Sstevel@tonic-gate (B_DONTNEED|B_FREE|B_ASYNC), 4470*0Sstevel@tonic-gate svd->cred); 4471*0Sstevel@tonic-gate VN_RELE(fvp); 4472*0Sstevel@tonic-gate } else { 4473*0Sstevel@tonic-gate /* 4474*0Sstevel@tonic-gate * XXX - Should the loop terminate if 4475*0Sstevel@tonic-gate * the page is `locked'? 4476*0Sstevel@tonic-gate */ 4477*0Sstevel@tonic-gate page_unlock(pp); 4478*0Sstevel@tonic-gate } 4479*0Sstevel@tonic-gate --vpp; 4480*0Sstevel@tonic-gate --fanon_index; 4481*0Sstevel@tonic-gate pgoff -= PAGESIZE; 4482*0Sstevel@tonic-gate } 4483*0Sstevel@tonic-gate } 4484*0Sstevel@tonic-gate } 4485*0Sstevel@tonic-gate 4486*0Sstevel@tonic-gate plp = pl; 4487*0Sstevel@tonic-gate *plp = NULL; 4488*0Sstevel@tonic-gate pl_alloc_sz = 0; 4489*0Sstevel@tonic-gate 4490*0Sstevel@tonic-gate /* 4491*0Sstevel@tonic-gate * See if we need to call VOP_GETPAGE for 4492*0Sstevel@tonic-gate * *any* of the range being faulted on. 4493*0Sstevel@tonic-gate * We can skip all of this work if there 4494*0Sstevel@tonic-gate * was no original vnode. 4495*0Sstevel@tonic-gate */ 4496*0Sstevel@tonic-gate if (svd->vp != NULL) { 4497*0Sstevel@tonic-gate u_offset_t vp_off; 4498*0Sstevel@tonic-gate size_t vp_len; 4499*0Sstevel@tonic-gate struct anon *ap; 4500*0Sstevel@tonic-gate vnode_t *vp; 4501*0Sstevel@tonic-gate 4502*0Sstevel@tonic-gate vp_off = off; 4503*0Sstevel@tonic-gate vp_len = len; 4504*0Sstevel@tonic-gate 4505*0Sstevel@tonic-gate if (amp == NULL) 4506*0Sstevel@tonic-gate dogetpage = 1; 4507*0Sstevel@tonic-gate else { 4508*0Sstevel@tonic-gate /* 4509*0Sstevel@tonic-gate * Only acquire reader lock to prevent amp->ahp 4510*0Sstevel@tonic-gate * from being changed. It's ok to miss pages, 4511*0Sstevel@tonic-gate * hence we don't do anon_array_enter 4512*0Sstevel@tonic-gate */ 4513*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 4514*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 4515*0Sstevel@tonic-gate 4516*0Sstevel@tonic-gate if (len <= PAGESIZE) 4517*0Sstevel@tonic-gate /* inline non_anon() */ 4518*0Sstevel@tonic-gate dogetpage = (ap == NULL); 4519*0Sstevel@tonic-gate else 4520*0Sstevel@tonic-gate dogetpage = non_anon(amp->ahp, anon_index, 4521*0Sstevel@tonic-gate &vp_off, &vp_len); 4522*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4523*0Sstevel@tonic-gate } 4524*0Sstevel@tonic-gate 4525*0Sstevel@tonic-gate if (dogetpage) { 4526*0Sstevel@tonic-gate enum seg_rw arw; 4527*0Sstevel@tonic-gate struct as *as = seg->s_as; 4528*0Sstevel@tonic-gate 4529*0Sstevel@tonic-gate if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) { 4530*0Sstevel@tonic-gate /* 4531*0Sstevel@tonic-gate * Page list won't fit in local array, 4532*0Sstevel@tonic-gate * allocate one of the needed size. 4533*0Sstevel@tonic-gate */ 4534*0Sstevel@tonic-gate pl_alloc_sz = 4535*0Sstevel@tonic-gate (btop(len) + 1) * sizeof (page_t *); 4536*0Sstevel@tonic-gate plp = kmem_alloc(pl_alloc_sz, KM_SLEEP); 4537*0Sstevel@tonic-gate plp[0] = NULL; 4538*0Sstevel@tonic-gate plsz = len; 4539*0Sstevel@tonic-gate } else if (rw == S_WRITE && svd->type == MAP_PRIVATE || 4540*0Sstevel@tonic-gate rw == S_OTHER || 4541*0Sstevel@tonic-gate (((size_t)(addr + PAGESIZE) < 4542*0Sstevel@tonic-gate (size_t)(seg->s_base + seg->s_size)) && 4543*0Sstevel@tonic-gate hat_probe(as->a_hat, addr + PAGESIZE))) { 4544*0Sstevel@tonic-gate /* 4545*0Sstevel@tonic-gate * Ask VOP_GETPAGE to return the exact number 4546*0Sstevel@tonic-gate * of pages if 4547*0Sstevel@tonic-gate * (a) this is a COW fault, or 4548*0Sstevel@tonic-gate * (b) this is a software fault, or 4549*0Sstevel@tonic-gate * (c) next page is already mapped. 4550*0Sstevel@tonic-gate */ 4551*0Sstevel@tonic-gate plsz = len; 4552*0Sstevel@tonic-gate } else { 4553*0Sstevel@tonic-gate /* 4554*0Sstevel@tonic-gate * Ask VOP_GETPAGE to return adjacent pages 4555*0Sstevel@tonic-gate * within the segment. 4556*0Sstevel@tonic-gate */ 4557*0Sstevel@tonic-gate plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t) 4558*0Sstevel@tonic-gate ((seg->s_base + seg->s_size) - addr)); 4559*0Sstevel@tonic-gate ASSERT((addr + plsz) <= 4560*0Sstevel@tonic-gate (seg->s_base + seg->s_size)); 4561*0Sstevel@tonic-gate } 4562*0Sstevel@tonic-gate 4563*0Sstevel@tonic-gate /* 4564*0Sstevel@tonic-gate * Need to get some non-anonymous pages. 4565*0Sstevel@tonic-gate * We need to make only one call to GETPAGE to do 4566*0Sstevel@tonic-gate * this to prevent certain deadlocking conditions 4567*0Sstevel@tonic-gate * when we are doing locking. In this case 4568*0Sstevel@tonic-gate * non_anon() should have picked up the smallest 4569*0Sstevel@tonic-gate * range which includes all the non-anonymous 4570*0Sstevel@tonic-gate * pages in the requested range. We have to 4571*0Sstevel@tonic-gate * be careful regarding which rw flag to pass in 4572*0Sstevel@tonic-gate * because on a private mapping, the underlying 4573*0Sstevel@tonic-gate * object is never allowed to be written. 4574*0Sstevel@tonic-gate */ 4575*0Sstevel@tonic-gate if (rw == S_WRITE && svd->type == MAP_PRIVATE) { 4576*0Sstevel@tonic-gate arw = S_READ; 4577*0Sstevel@tonic-gate } else { 4578*0Sstevel@tonic-gate arw = rw; 4579*0Sstevel@tonic-gate } 4580*0Sstevel@tonic-gate vp = svd->vp; 4581*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGVN_GETPAGE, 4582*0Sstevel@tonic-gate "segvn_getpage:seg %p addr %p vp %p", 4583*0Sstevel@tonic-gate seg, addr, vp); 4584*0Sstevel@tonic-gate err = VOP_GETPAGE(vp, (offset_t)vp_off, vp_len, 4585*0Sstevel@tonic-gate &vpprot, plp, plsz, seg, addr + (vp_off - off), arw, 4586*0Sstevel@tonic-gate svd->cred); 4587*0Sstevel@tonic-gate if (err) { 4588*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4589*0Sstevel@tonic-gate segvn_pagelist_rele(plp); 4590*0Sstevel@tonic-gate if (pl_alloc_sz) 4591*0Sstevel@tonic-gate kmem_free(plp, pl_alloc_sz); 4592*0Sstevel@tonic-gate return (FC_MAKE_ERR(err)); 4593*0Sstevel@tonic-gate } 4594*0Sstevel@tonic-gate if (svd->type == MAP_PRIVATE) 4595*0Sstevel@tonic-gate vpprot &= ~PROT_WRITE; 4596*0Sstevel@tonic-gate } 4597*0Sstevel@tonic-gate } 4598*0Sstevel@tonic-gate 4599*0Sstevel@tonic-gate /* 4600*0Sstevel@tonic-gate * N.B. at this time the plp array has all the needed non-anon 4601*0Sstevel@tonic-gate * pages in addition to (possibly) having some adjacent pages. 4602*0Sstevel@tonic-gate */ 4603*0Sstevel@tonic-gate 4604*0Sstevel@tonic-gate /* 4605*0Sstevel@tonic-gate * Always acquire the anon_array_lock to prevent 4606*0Sstevel@tonic-gate * 2 threads from allocating separate anon slots for 4607*0Sstevel@tonic-gate * the same "addr". 4608*0Sstevel@tonic-gate * 4609*0Sstevel@tonic-gate * If this is a copy-on-write fault and we don't already 4610*0Sstevel@tonic-gate * have the anon_array_lock, acquire it to prevent the 4611*0Sstevel@tonic-gate * fault routine from handling multiple copy-on-write faults 4612*0Sstevel@tonic-gate * on the same "addr" in the same address space. 4613*0Sstevel@tonic-gate * 4614*0Sstevel@tonic-gate * Only one thread should deal with the fault since after 4615*0Sstevel@tonic-gate * it is handled, the other threads can acquire a translation 4616*0Sstevel@tonic-gate * to the newly created private page. This prevents two or 4617*0Sstevel@tonic-gate * more threads from creating different private pages for the 4618*0Sstevel@tonic-gate * same fault. 4619*0Sstevel@tonic-gate * 4620*0Sstevel@tonic-gate * We grab "serialization" lock here if this is a MAP_PRIVATE segment 4621*0Sstevel@tonic-gate * to prevent deadlock between this thread and another thread 4622*0Sstevel@tonic-gate * which has soft-locked this page and wants to acquire serial_lock. 4623*0Sstevel@tonic-gate * ( bug 4026339 ) 4624*0Sstevel@tonic-gate * 4625*0Sstevel@tonic-gate * The fix for bug 4026339 becomes unnecessary when using the 4626*0Sstevel@tonic-gate * locking scheme with per amp rwlock and a global set of hash 4627*0Sstevel@tonic-gate * lock, anon_array_lock. If we steal a vnode page when low 4628*0Sstevel@tonic-gate * on memory and upgrad the page lock through page_rename, 4629*0Sstevel@tonic-gate * then the page is PAGE_HANDLED, nothing needs to be done 4630*0Sstevel@tonic-gate * for this page after returning from segvn_faultpage. 4631*0Sstevel@tonic-gate * 4632*0Sstevel@tonic-gate * But really, the page lock should be downgraded after 4633*0Sstevel@tonic-gate * the stolen page is page_rename'd. 4634*0Sstevel@tonic-gate */ 4635*0Sstevel@tonic-gate 4636*0Sstevel@tonic-gate if (amp != NULL) 4637*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 4638*0Sstevel@tonic-gate 4639*0Sstevel@tonic-gate /* 4640*0Sstevel@tonic-gate * Ok, now loop over the address range and handle faults 4641*0Sstevel@tonic-gate */ 4642*0Sstevel@tonic-gate for (a = addr; a < addr + len; a += PAGESIZE, off += PAGESIZE) { 4643*0Sstevel@tonic-gate err = segvn_faultpage(hat, seg, a, off, vpage, plp, vpprot, 4644*0Sstevel@tonic-gate type, rw, brkcow); 4645*0Sstevel@tonic-gate if (err) { 4646*0Sstevel@tonic-gate if (amp != NULL) 4647*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4648*0Sstevel@tonic-gate if (type == F_SOFTLOCK && a > addr) 4649*0Sstevel@tonic-gate segvn_softunlock(seg, addr, (a - addr), 4650*0Sstevel@tonic-gate S_OTHER); 4651*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4652*0Sstevel@tonic-gate segvn_pagelist_rele(plp); 4653*0Sstevel@tonic-gate if (pl_alloc_sz) 4654*0Sstevel@tonic-gate kmem_free(plp, pl_alloc_sz); 4655*0Sstevel@tonic-gate return (err); 4656*0Sstevel@tonic-gate } 4657*0Sstevel@tonic-gate if (vpage) { 4658*0Sstevel@tonic-gate vpage++; 4659*0Sstevel@tonic-gate } else if (svd->vpage) { 4660*0Sstevel@tonic-gate page = seg_page(seg, addr); 4661*0Sstevel@tonic-gate vpage = &svd->vpage[++page]; 4662*0Sstevel@tonic-gate } 4663*0Sstevel@tonic-gate } 4664*0Sstevel@tonic-gate 4665*0Sstevel@tonic-gate /* Didn't get pages from the underlying fs so we're done */ 4666*0Sstevel@tonic-gate if (!dogetpage) 4667*0Sstevel@tonic-gate goto done; 4668*0Sstevel@tonic-gate 4669*0Sstevel@tonic-gate /* 4670*0Sstevel@tonic-gate * Now handle any other pages in the list returned. 4671*0Sstevel@tonic-gate * If the page can be used, load up the translations now. 4672*0Sstevel@tonic-gate * Note that the for loop will only be entered if "plp" 4673*0Sstevel@tonic-gate * is pointing to a non-NULL page pointer which means that 4674*0Sstevel@tonic-gate * VOP_GETPAGE() was called and vpprot has been initialized. 4675*0Sstevel@tonic-gate */ 4676*0Sstevel@tonic-gate if (svd->pageprot == 0) 4677*0Sstevel@tonic-gate prot = svd->prot & vpprot; 4678*0Sstevel@tonic-gate 4679*0Sstevel@tonic-gate 4680*0Sstevel@tonic-gate /* 4681*0Sstevel@tonic-gate * Large Files: diff should be unsigned value because we started 4682*0Sstevel@tonic-gate * supporting > 2GB segment sizes from 2.5.1 and when a 4683*0Sstevel@tonic-gate * large file of size > 2GB gets mapped to address space 4684*0Sstevel@tonic-gate * the diff value can be > 2GB. 4685*0Sstevel@tonic-gate */ 4686*0Sstevel@tonic-gate 4687*0Sstevel@tonic-gate for (ppp = plp; (pp = *ppp) != NULL; ppp++) { 4688*0Sstevel@tonic-gate size_t diff; 4689*0Sstevel@tonic-gate struct anon *ap; 4690*0Sstevel@tonic-gate int anon_index; 4691*0Sstevel@tonic-gate anon_sync_obj_t cookie; 4692*0Sstevel@tonic-gate int hat_flag = HAT_LOAD_ADV; 4693*0Sstevel@tonic-gate 4694*0Sstevel@tonic-gate if (svd->flags & MAP_TEXT) { 4695*0Sstevel@tonic-gate hat_flag |= HAT_LOAD_TEXT; 4696*0Sstevel@tonic-gate } 4697*0Sstevel@tonic-gate 4698*0Sstevel@tonic-gate if (pp == PAGE_HANDLED) 4699*0Sstevel@tonic-gate continue; 4700*0Sstevel@tonic-gate 4701*0Sstevel@tonic-gate if (pp->p_offset >= svd->offset && 4702*0Sstevel@tonic-gate (pp->p_offset < svd->offset + seg->s_size)) { 4703*0Sstevel@tonic-gate 4704*0Sstevel@tonic-gate diff = pp->p_offset - svd->offset; 4705*0Sstevel@tonic-gate 4706*0Sstevel@tonic-gate /* 4707*0Sstevel@tonic-gate * Large Files: Following is the assertion 4708*0Sstevel@tonic-gate * validating the above cast. 4709*0Sstevel@tonic-gate */ 4710*0Sstevel@tonic-gate ASSERT(svd->vp == pp->p_vnode); 4711*0Sstevel@tonic-gate 4712*0Sstevel@tonic-gate page = btop(diff); 4713*0Sstevel@tonic-gate if (svd->pageprot) 4714*0Sstevel@tonic-gate prot = VPP_PROT(&svd->vpage[page]) & vpprot; 4715*0Sstevel@tonic-gate 4716*0Sstevel@tonic-gate /* 4717*0Sstevel@tonic-gate * Prevent other threads in the address space from 4718*0Sstevel@tonic-gate * creating private pages (i.e., allocating anon slots) 4719*0Sstevel@tonic-gate * while we are in the process of loading translations 4720*0Sstevel@tonic-gate * to additional pages returned by the underlying 4721*0Sstevel@tonic-gate * object. 4722*0Sstevel@tonic-gate */ 4723*0Sstevel@tonic-gate if (amp != NULL) { 4724*0Sstevel@tonic-gate anon_index = svd->anon_index + page; 4725*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 4726*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 4727*0Sstevel@tonic-gate } 4728*0Sstevel@tonic-gate if ((amp == NULL) || (ap == NULL)) { 4729*0Sstevel@tonic-gate if (IS_VMODSORT(pp->p_vnode) || 4730*0Sstevel@tonic-gate enable_mbit_wa) { 4731*0Sstevel@tonic-gate if (rw == S_WRITE) 4732*0Sstevel@tonic-gate hat_setmod(pp); 4733*0Sstevel@tonic-gate else if (rw != S_OTHER && 4734*0Sstevel@tonic-gate !hat_ismod(pp)) 4735*0Sstevel@tonic-gate prot &= ~PROT_WRITE; 4736*0Sstevel@tonic-gate } 4737*0Sstevel@tonic-gate /* 4738*0Sstevel@tonic-gate * Skip mapping read ahead pages marked 4739*0Sstevel@tonic-gate * for migration, so they will get migrated 4740*0Sstevel@tonic-gate * properly on fault 4741*0Sstevel@tonic-gate */ 4742*0Sstevel@tonic-gate if ((prot & PROT_READ) && !PP_ISMIGRATE(pp)) { 4743*0Sstevel@tonic-gate hat_memload(hat, seg->s_base + diff, 4744*0Sstevel@tonic-gate pp, prot, hat_flag); 4745*0Sstevel@tonic-gate } 4746*0Sstevel@tonic-gate } 4747*0Sstevel@tonic-gate if (amp != NULL) 4748*0Sstevel@tonic-gate anon_array_exit(&cookie); 4749*0Sstevel@tonic-gate } 4750*0Sstevel@tonic-gate page_unlock(pp); 4751*0Sstevel@tonic-gate } 4752*0Sstevel@tonic-gate done: 4753*0Sstevel@tonic-gate if (amp != NULL) 4754*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4755*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4756*0Sstevel@tonic-gate if (pl_alloc_sz) 4757*0Sstevel@tonic-gate kmem_free(plp, pl_alloc_sz); 4758*0Sstevel@tonic-gate return (0); 4759*0Sstevel@tonic-gate } 4760*0Sstevel@tonic-gate 4761*0Sstevel@tonic-gate /* 4762*0Sstevel@tonic-gate * This routine is used to start I/O on pages asynchronously. XXX it will 4763*0Sstevel@tonic-gate * only create PAGESIZE pages. At fault time they will be relocated into 4764*0Sstevel@tonic-gate * larger pages. 4765*0Sstevel@tonic-gate */ 4766*0Sstevel@tonic-gate static faultcode_t 4767*0Sstevel@tonic-gate segvn_faulta(struct seg *seg, caddr_t addr) 4768*0Sstevel@tonic-gate { 4769*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 4770*0Sstevel@tonic-gate int err; 4771*0Sstevel@tonic-gate struct anon_map *amp; 4772*0Sstevel@tonic-gate vnode_t *vp; 4773*0Sstevel@tonic-gate 4774*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 4775*0Sstevel@tonic-gate 4776*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 4777*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 4778*0Sstevel@tonic-gate struct anon *ap; 4779*0Sstevel@tonic-gate 4780*0Sstevel@tonic-gate /* 4781*0Sstevel@tonic-gate * Reader lock to prevent amp->ahp from being changed. 4782*0Sstevel@tonic-gate * This is advisory, it's ok to miss a page, so 4783*0Sstevel@tonic-gate * we don't do anon_array_enter lock. 4784*0Sstevel@tonic-gate */ 4785*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 4786*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, 4787*0Sstevel@tonic-gate svd->anon_index + seg_page(seg, addr))) != NULL) { 4788*0Sstevel@tonic-gate 4789*0Sstevel@tonic-gate err = anon_getpage(&ap, NULL, NULL, 4790*0Sstevel@tonic-gate 0, seg, addr, S_READ, svd->cred); 4791*0Sstevel@tonic-gate 4792*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4793*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4794*0Sstevel@tonic-gate if (err) 4795*0Sstevel@tonic-gate return (FC_MAKE_ERR(err)); 4796*0Sstevel@tonic-gate return (0); 4797*0Sstevel@tonic-gate } 4798*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4799*0Sstevel@tonic-gate } 4800*0Sstevel@tonic-gate 4801*0Sstevel@tonic-gate if (svd->vp == NULL) { 4802*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4803*0Sstevel@tonic-gate return (0); /* zfod page - do nothing now */ 4804*0Sstevel@tonic-gate } 4805*0Sstevel@tonic-gate 4806*0Sstevel@tonic-gate vp = svd->vp; 4807*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGVN_GETPAGE, 4808*0Sstevel@tonic-gate "segvn_getpage:seg %p addr %p vp %p", seg, addr, vp); 4809*0Sstevel@tonic-gate err = VOP_GETPAGE(vp, 4810*0Sstevel@tonic-gate (offset_t)(svd->offset + (uintptr_t)(addr - seg->s_base)), 4811*0Sstevel@tonic-gate PAGESIZE, NULL, NULL, 0, seg, addr, 4812*0Sstevel@tonic-gate S_OTHER, svd->cred); 4813*0Sstevel@tonic-gate 4814*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4815*0Sstevel@tonic-gate if (err) 4816*0Sstevel@tonic-gate return (FC_MAKE_ERR(err)); 4817*0Sstevel@tonic-gate return (0); 4818*0Sstevel@tonic-gate } 4819*0Sstevel@tonic-gate 4820*0Sstevel@tonic-gate static int 4821*0Sstevel@tonic-gate segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 4822*0Sstevel@tonic-gate { 4823*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 4824*0Sstevel@tonic-gate struct vpage *svp, *evp; 4825*0Sstevel@tonic-gate struct vnode *vp; 4826*0Sstevel@tonic-gate size_t pgsz; 4827*0Sstevel@tonic-gate pgcnt_t pgcnt; 4828*0Sstevel@tonic-gate anon_sync_obj_t cookie; 4829*0Sstevel@tonic-gate 4830*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 4831*0Sstevel@tonic-gate 4832*0Sstevel@tonic-gate if ((svd->maxprot & prot) != prot) 4833*0Sstevel@tonic-gate return (EACCES); /* violated maxprot */ 4834*0Sstevel@tonic-gate 4835*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); 4836*0Sstevel@tonic-gate 4837*0Sstevel@tonic-gate /* return if prot is the same */ 4838*0Sstevel@tonic-gate if (!svd->pageprot && svd->prot == prot) { 4839*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4840*0Sstevel@tonic-gate return (0); 4841*0Sstevel@tonic-gate } 4842*0Sstevel@tonic-gate 4843*0Sstevel@tonic-gate /* 4844*0Sstevel@tonic-gate * Since we change protections we first have to flush the cache. 4845*0Sstevel@tonic-gate * This makes sure all the pagelock calls have to recheck 4846*0Sstevel@tonic-gate * protections. 4847*0Sstevel@tonic-gate */ 4848*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 4849*0Sstevel@tonic-gate /* 4850*0Sstevel@tonic-gate * Since we do have the segvn writers lock nobody can fill 4851*0Sstevel@tonic-gate * the cache with entries belonging to this seg during 4852*0Sstevel@tonic-gate * the purge. The flush either succeeds or we still have 4853*0Sstevel@tonic-gate * pending I/Os. 4854*0Sstevel@tonic-gate */ 4855*0Sstevel@tonic-gate segvn_purge(seg); 4856*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 4857*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4858*0Sstevel@tonic-gate return (EAGAIN); 4859*0Sstevel@tonic-gate } 4860*0Sstevel@tonic-gate } 4861*0Sstevel@tonic-gate 4862*0Sstevel@tonic-gate if (seg->s_szc != 0) { 4863*0Sstevel@tonic-gate int err; 4864*0Sstevel@tonic-gate pgsz = page_get_pagesize(seg->s_szc); 4865*0Sstevel@tonic-gate pgcnt = pgsz >> PAGESHIFT; 4866*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 4867*0Sstevel@tonic-gate if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { 4868*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4869*0Sstevel@tonic-gate ASSERT(seg->s_base != addr || seg->s_size != len); 4870*0Sstevel@tonic-gate /* 4871*0Sstevel@tonic-gate * If we are holding the as lock as a reader then 4872*0Sstevel@tonic-gate * we need to return IE_RETRY and let the as 4873*0Sstevel@tonic-gate * layer drop and re-aquire the lock as a writer. 4874*0Sstevel@tonic-gate */ 4875*0Sstevel@tonic-gate if (AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)) 4876*0Sstevel@tonic-gate return (IE_RETRY); 4877*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.demoterange[1]); 4878*0Sstevel@tonic-gate err = segvn_demote_range(seg, addr, len, SDR_END); 4879*0Sstevel@tonic-gate if (err == 0) 4880*0Sstevel@tonic-gate return (IE_RETRY); 4881*0Sstevel@tonic-gate if (err == ENOMEM) 4882*0Sstevel@tonic-gate return (IE_NOMEM); 4883*0Sstevel@tonic-gate return (err); 4884*0Sstevel@tonic-gate } 4885*0Sstevel@tonic-gate } 4886*0Sstevel@tonic-gate 4887*0Sstevel@tonic-gate 4888*0Sstevel@tonic-gate /* 4889*0Sstevel@tonic-gate * If it's a private mapping and we're making it writable 4890*0Sstevel@tonic-gate * and no swap space has been reserved, have to reserve 4891*0Sstevel@tonic-gate * it all now. If it's a private mapping to a file (i.e., vp != NULL) 4892*0Sstevel@tonic-gate * and we're removing write permission on the entire segment and 4893*0Sstevel@tonic-gate * we haven't modified any pages, we can release the swap space. 4894*0Sstevel@tonic-gate */ 4895*0Sstevel@tonic-gate if (svd->type == MAP_PRIVATE) { 4896*0Sstevel@tonic-gate if (prot & PROT_WRITE) { 4897*0Sstevel@tonic-gate size_t sz; 4898*0Sstevel@tonic-gate if (svd->swresv == 0 && !(svd->flags & MAP_NORESERVE)) { 4899*0Sstevel@tonic-gate if (anon_resv(seg->s_size) == 0) { 4900*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4901*0Sstevel@tonic-gate return (IE_NOMEM); 4902*0Sstevel@tonic-gate } 4903*0Sstevel@tonic-gate sz = svd->swresv = seg->s_size; 4904*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, 4905*0Sstevel@tonic-gate "anon proc:%p %lu %u", 4906*0Sstevel@tonic-gate seg, sz, 1); 4907*0Sstevel@tonic-gate } 4908*0Sstevel@tonic-gate } else { 4909*0Sstevel@tonic-gate /* 4910*0Sstevel@tonic-gate * Swap space is released only if this segment 4911*0Sstevel@tonic-gate * does not map anonymous memory, since read faults 4912*0Sstevel@tonic-gate * on such segments still need an anon slot to read 4913*0Sstevel@tonic-gate * in the data. 4914*0Sstevel@tonic-gate */ 4915*0Sstevel@tonic-gate if (svd->swresv != 0 && svd->vp != NULL && 4916*0Sstevel@tonic-gate svd->amp == NULL && addr == seg->s_base && 4917*0Sstevel@tonic-gate len == seg->s_size && svd->pageprot == 0) { 4918*0Sstevel@tonic-gate anon_unresv(svd->swresv); 4919*0Sstevel@tonic-gate svd->swresv = 0; 4920*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_PROC, 4921*0Sstevel@tonic-gate "anon proc:%p %lu %u", 4922*0Sstevel@tonic-gate seg, 0, 0); 4923*0Sstevel@tonic-gate } 4924*0Sstevel@tonic-gate } 4925*0Sstevel@tonic-gate } 4926*0Sstevel@tonic-gate 4927*0Sstevel@tonic-gate if (addr == seg->s_base && len == seg->s_size && svd->pageprot == 0) { 4928*0Sstevel@tonic-gate if (svd->prot == prot) { 4929*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 4930*0Sstevel@tonic-gate return (0); /* all done */ 4931*0Sstevel@tonic-gate } 4932*0Sstevel@tonic-gate svd->prot = (uchar_t)prot; 4933*0Sstevel@tonic-gate } else { 4934*0Sstevel@tonic-gate struct anon *ap = NULL; 4935*0Sstevel@tonic-gate page_t *pp; 4936*0Sstevel@tonic-gate u_offset_t offset, off; 4937*0Sstevel@tonic-gate struct anon_map *amp; 4938*0Sstevel@tonic-gate ulong_t anon_idx = 0; 4939*0Sstevel@tonic-gate 4940*0Sstevel@tonic-gate /* 4941*0Sstevel@tonic-gate * A vpage structure exists or else the change does not 4942*0Sstevel@tonic-gate * involve the entire segment. Establish a vpage structure 4943*0Sstevel@tonic-gate * if none is there. Then, for each page in the range, 4944*0Sstevel@tonic-gate * adjust its individual permissions. Note that write- 4945*0Sstevel@tonic-gate * enabling a MAP_PRIVATE page can affect the claims for 4946*0Sstevel@tonic-gate * locked down memory. Overcommitting memory terminates 4947*0Sstevel@tonic-gate * the operation. 4948*0Sstevel@tonic-gate */ 4949*0Sstevel@tonic-gate segvn_vpage(seg); 4950*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 4951*0Sstevel@tonic-gate anon_idx = svd->anon_index + seg_page(seg, addr); 4952*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0 || 4953*0Sstevel@tonic-gate IS_P2ALIGNED(anon_idx, pgcnt)); 4954*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 4955*0Sstevel@tonic-gate } 4956*0Sstevel@tonic-gate 4957*0Sstevel@tonic-gate offset = svd->offset + (uintptr_t)(addr - seg->s_base); 4958*0Sstevel@tonic-gate evp = &svd->vpage[seg_page(seg, addr + len)]; 4959*0Sstevel@tonic-gate 4960*0Sstevel@tonic-gate /* 4961*0Sstevel@tonic-gate * See Statement at the beginning of segvn_lockop regarding 4962*0Sstevel@tonic-gate * the way cowcnts and lckcnts are handled. 4963*0Sstevel@tonic-gate */ 4964*0Sstevel@tonic-gate for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) { 4965*0Sstevel@tonic-gate 4966*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0 || 4967*0Sstevel@tonic-gate (svd->vp != NULL || svd->type == MAP_PRIVATE)); 4968*0Sstevel@tonic-gate 4969*0Sstevel@tonic-gate if (seg->s_szc != 0 && svd->type == MAP_PRIVATE) { 4970*0Sstevel@tonic-gate if (amp != NULL) { 4971*0Sstevel@tonic-gate anon_array_enter(amp, anon_idx, 4972*0Sstevel@tonic-gate &cookie); 4973*0Sstevel@tonic-gate } 4974*0Sstevel@tonic-gate if (IS_P2ALIGNED(anon_idx, pgcnt) && 4975*0Sstevel@tonic-gate !segvn_claim_pages(seg, svp, offset, 4976*0Sstevel@tonic-gate anon_idx, prot)) { 4977*0Sstevel@tonic-gate if (amp != NULL) { 4978*0Sstevel@tonic-gate anon_array_exit(&cookie); 4979*0Sstevel@tonic-gate } 4980*0Sstevel@tonic-gate break; 4981*0Sstevel@tonic-gate } 4982*0Sstevel@tonic-gate if (amp != NULL) { 4983*0Sstevel@tonic-gate anon_array_exit(&cookie); 4984*0Sstevel@tonic-gate } 4985*0Sstevel@tonic-gate anon_idx++; 4986*0Sstevel@tonic-gate } else { 4987*0Sstevel@tonic-gate if (amp != NULL) { 4988*0Sstevel@tonic-gate anon_array_enter(amp, anon_idx, 4989*0Sstevel@tonic-gate &cookie); 4990*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_idx++); 4991*0Sstevel@tonic-gate } 4992*0Sstevel@tonic-gate 4993*0Sstevel@tonic-gate if (VPP_ISPPLOCK(svp) && 4994*0Sstevel@tonic-gate (VPP_PROT(svp) != prot) && 4995*0Sstevel@tonic-gate (svd->type == MAP_PRIVATE)) { 4996*0Sstevel@tonic-gate 4997*0Sstevel@tonic-gate if (amp == NULL || ap == NULL) { 4998*0Sstevel@tonic-gate vp = svd->vp; 4999*0Sstevel@tonic-gate off = offset; 5000*0Sstevel@tonic-gate } else 5001*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 5002*0Sstevel@tonic-gate if (amp != NULL) 5003*0Sstevel@tonic-gate anon_array_exit(&cookie); 5004*0Sstevel@tonic-gate 5005*0Sstevel@tonic-gate if ((pp = page_lookup(vp, off, 5006*0Sstevel@tonic-gate SE_SHARED)) == NULL) { 5007*0Sstevel@tonic-gate panic("segvn_setprot: no page"); 5008*0Sstevel@tonic-gate /*NOTREACHED*/ 5009*0Sstevel@tonic-gate } 5010*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 5011*0Sstevel@tonic-gate if ((VPP_PROT(svp) ^ prot) & 5012*0Sstevel@tonic-gate PROT_WRITE) { 5013*0Sstevel@tonic-gate if (prot & PROT_WRITE) { 5014*0Sstevel@tonic-gate if (!page_addclaim(pp)) { 5015*0Sstevel@tonic-gate page_unlock(pp); 5016*0Sstevel@tonic-gate break; 5017*0Sstevel@tonic-gate } 5018*0Sstevel@tonic-gate } else { 5019*0Sstevel@tonic-gate if (!page_subclaim(pp)) { 5020*0Sstevel@tonic-gate page_unlock(pp); 5021*0Sstevel@tonic-gate break; 5022*0Sstevel@tonic-gate } 5023*0Sstevel@tonic-gate } 5024*0Sstevel@tonic-gate } 5025*0Sstevel@tonic-gate page_unlock(pp); 5026*0Sstevel@tonic-gate } else if (amp != NULL) 5027*0Sstevel@tonic-gate anon_array_exit(&cookie); 5028*0Sstevel@tonic-gate } 5029*0Sstevel@tonic-gate VPP_SETPROT(svp, prot); 5030*0Sstevel@tonic-gate offset += PAGESIZE; 5031*0Sstevel@tonic-gate } 5032*0Sstevel@tonic-gate if (amp != NULL) 5033*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5034*0Sstevel@tonic-gate 5035*0Sstevel@tonic-gate /* 5036*0Sstevel@tonic-gate * Did we terminate prematurely? If so, simply unload 5037*0Sstevel@tonic-gate * the translations to the things we've updated so far. 5038*0Sstevel@tonic-gate */ 5039*0Sstevel@tonic-gate if (svp != evp) { 5040*0Sstevel@tonic-gate len = (svp - &svd->vpage[seg_page(seg, addr)]) * 5041*0Sstevel@tonic-gate PAGESIZE; 5042*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0 || IS_P2ALIGNED(len, pgsz)); 5043*0Sstevel@tonic-gate if (len != 0) 5044*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, 5045*0Sstevel@tonic-gate len, HAT_UNLOAD); 5046*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 5047*0Sstevel@tonic-gate return (IE_NOMEM); 5048*0Sstevel@tonic-gate } 5049*0Sstevel@tonic-gate } 5050*0Sstevel@tonic-gate 5051*0Sstevel@tonic-gate if ((prot & PROT_WRITE) != 0 || (prot & ~PROT_USER) == PROT_NONE) { 5052*0Sstevel@tonic-gate /* 5053*0Sstevel@tonic-gate * Either private or shared data with write access (in 5054*0Sstevel@tonic-gate * which case we need to throw out all former translations 5055*0Sstevel@tonic-gate * so that we get the right translations set up on fault 5056*0Sstevel@tonic-gate * and we don't allow write access to any copy-on-write pages 5057*0Sstevel@tonic-gate * that might be around or to prevent write access to pages 5058*0Sstevel@tonic-gate * representing holes in a file), or we don't have permission 5059*0Sstevel@tonic-gate * to access the memory at all (in which case we have to 5060*0Sstevel@tonic-gate * unload any current translations that might exist). 5061*0Sstevel@tonic-gate */ 5062*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD); 5063*0Sstevel@tonic-gate } else { 5064*0Sstevel@tonic-gate /* 5065*0Sstevel@tonic-gate * A shared mapping or a private mapping in which write 5066*0Sstevel@tonic-gate * protection is going to be denied - just change all the 5067*0Sstevel@tonic-gate * protections over the range of addresses in question. 5068*0Sstevel@tonic-gate * segvn does not support any other attributes other 5069*0Sstevel@tonic-gate * than prot so we can use hat_chgattr. 5070*0Sstevel@tonic-gate */ 5071*0Sstevel@tonic-gate hat_chgattr(seg->s_as->a_hat, addr, len, prot); 5072*0Sstevel@tonic-gate } 5073*0Sstevel@tonic-gate 5074*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 5075*0Sstevel@tonic-gate 5076*0Sstevel@tonic-gate return (0); 5077*0Sstevel@tonic-gate } 5078*0Sstevel@tonic-gate 5079*0Sstevel@tonic-gate /* 5080*0Sstevel@tonic-gate * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize, 5081*0Sstevel@tonic-gate * to determine if the seg is capable of mapping the requested szc. 5082*0Sstevel@tonic-gate */ 5083*0Sstevel@tonic-gate static int 5084*0Sstevel@tonic-gate segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 5085*0Sstevel@tonic-gate { 5086*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5087*0Sstevel@tonic-gate struct segvn_data *nsvd; 5088*0Sstevel@tonic-gate struct anon_map *amp = svd->amp; 5089*0Sstevel@tonic-gate struct seg *nseg; 5090*0Sstevel@tonic-gate caddr_t eaddr = addr + len, a; 5091*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(szc); 5092*0Sstevel@tonic-gate int err; 5093*0Sstevel@tonic-gate u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base); 5094*0Sstevel@tonic-gate extern struct vnode kvp; 5095*0Sstevel@tonic-gate 5096*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 5097*0Sstevel@tonic-gate ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size); 5098*0Sstevel@tonic-gate 5099*0Sstevel@tonic-gate if (seg->s_szc == szc || segvn_lpg_disable != 0) { 5100*0Sstevel@tonic-gate return (0); 5101*0Sstevel@tonic-gate } 5102*0Sstevel@tonic-gate 5103*0Sstevel@tonic-gate /* 5104*0Sstevel@tonic-gate * addr should always be pgsz aligned but eaddr may be misaligned if 5105*0Sstevel@tonic-gate * it's at the end of the segment. 5106*0Sstevel@tonic-gate * 5107*0Sstevel@tonic-gate * XXX we should assert this condition since as_setpagesize() logic 5108*0Sstevel@tonic-gate * guarantees it. 5109*0Sstevel@tonic-gate */ 5110*0Sstevel@tonic-gate if (!IS_P2ALIGNED(addr, pgsz) || 5111*0Sstevel@tonic-gate (!IS_P2ALIGNED(eaddr, pgsz) && 5112*0Sstevel@tonic-gate eaddr != seg->s_base + seg->s_size)) { 5113*0Sstevel@tonic-gate 5114*0Sstevel@tonic-gate segvn_setpgsz_align_err++; 5115*0Sstevel@tonic-gate return (EINVAL); 5116*0Sstevel@tonic-gate } 5117*0Sstevel@tonic-gate 5118*0Sstevel@tonic-gate if ((svd->vp == NULL && svd->type == MAP_SHARED) || 5119*0Sstevel@tonic-gate (svd->flags & MAP_NORESERVE) || seg->s_as == &kas || 5120*0Sstevel@tonic-gate szc > segvn_maxpgszc) { 5121*0Sstevel@tonic-gate return (EINVAL); 5122*0Sstevel@tonic-gate } 5123*0Sstevel@tonic-gate 5124*0Sstevel@tonic-gate /* paranoid check */ 5125*0Sstevel@tonic-gate if (svd->vp != NULL && 5126*0Sstevel@tonic-gate (IS_SWAPFSVP(svd->vp) || svd->vp == &kvp)) { 5127*0Sstevel@tonic-gate return (EINVAL); 5128*0Sstevel@tonic-gate } 5129*0Sstevel@tonic-gate 5130*0Sstevel@tonic-gate if (seg->s_szc == 0 && svd->vp != NULL && 5131*0Sstevel@tonic-gate map_addr_vacalign_check(addr, off)) { 5132*0Sstevel@tonic-gate return (EINVAL); 5133*0Sstevel@tonic-gate } 5134*0Sstevel@tonic-gate 5135*0Sstevel@tonic-gate /* 5136*0Sstevel@tonic-gate * Check that protections are the same within new page 5137*0Sstevel@tonic-gate * size boundaries. 5138*0Sstevel@tonic-gate */ 5139*0Sstevel@tonic-gate if (svd->pageprot) { 5140*0Sstevel@tonic-gate for (a = addr; a < eaddr; a += pgsz) { 5141*0Sstevel@tonic-gate if ((a + pgsz) > eaddr) { 5142*0Sstevel@tonic-gate if (!sameprot(seg, a, eaddr - a)) { 5143*0Sstevel@tonic-gate return (EINVAL); 5144*0Sstevel@tonic-gate } 5145*0Sstevel@tonic-gate } else { 5146*0Sstevel@tonic-gate if (!sameprot(seg, a, pgsz)) { 5147*0Sstevel@tonic-gate return (EINVAL); 5148*0Sstevel@tonic-gate } 5149*0Sstevel@tonic-gate } 5150*0Sstevel@tonic-gate } 5151*0Sstevel@tonic-gate } 5152*0Sstevel@tonic-gate 5153*0Sstevel@tonic-gate /* 5154*0Sstevel@tonic-gate * Since we are changing page size we first have to flush 5155*0Sstevel@tonic-gate * the cache. This makes sure all the pagelock calls have 5156*0Sstevel@tonic-gate * to recheck protections. 5157*0Sstevel@tonic-gate */ 5158*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 5159*0Sstevel@tonic-gate /* 5160*0Sstevel@tonic-gate * Since we do have the segvn writers lock nobody can fill 5161*0Sstevel@tonic-gate * the cache with entries belonging to this seg during 5162*0Sstevel@tonic-gate * the purge. The flush either succeeds or we still have 5163*0Sstevel@tonic-gate * pending I/Os. 5164*0Sstevel@tonic-gate */ 5165*0Sstevel@tonic-gate segvn_purge(seg); 5166*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 5167*0Sstevel@tonic-gate return (EAGAIN); 5168*0Sstevel@tonic-gate } 5169*0Sstevel@tonic-gate } 5170*0Sstevel@tonic-gate 5171*0Sstevel@tonic-gate /* 5172*0Sstevel@tonic-gate * Operation for sub range of existing segment. 5173*0Sstevel@tonic-gate */ 5174*0Sstevel@tonic-gate if (addr != seg->s_base || eaddr != (seg->s_base + seg->s_size)) { 5175*0Sstevel@tonic-gate if (szc < seg->s_szc) { 5176*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.demoterange[2]); 5177*0Sstevel@tonic-gate err = segvn_demote_range(seg, addr, len, SDR_RANGE); 5178*0Sstevel@tonic-gate if (err == 0) { 5179*0Sstevel@tonic-gate return (IE_RETRY); 5180*0Sstevel@tonic-gate } 5181*0Sstevel@tonic-gate if (err == ENOMEM) { 5182*0Sstevel@tonic-gate return (IE_NOMEM); 5183*0Sstevel@tonic-gate } 5184*0Sstevel@tonic-gate return (err); 5185*0Sstevel@tonic-gate } 5186*0Sstevel@tonic-gate if (addr != seg->s_base) { 5187*0Sstevel@tonic-gate nseg = segvn_split_seg(seg, addr); 5188*0Sstevel@tonic-gate if (eaddr != (nseg->s_base + nseg->s_size)) { 5189*0Sstevel@tonic-gate /* eaddr is szc aligned */ 5190*0Sstevel@tonic-gate (void) segvn_split_seg(nseg, eaddr); 5191*0Sstevel@tonic-gate } 5192*0Sstevel@tonic-gate return (IE_RETRY); 5193*0Sstevel@tonic-gate } 5194*0Sstevel@tonic-gate if (eaddr != (seg->s_base + seg->s_size)) { 5195*0Sstevel@tonic-gate /* eaddr is szc aligned */ 5196*0Sstevel@tonic-gate (void) segvn_split_seg(seg, eaddr); 5197*0Sstevel@tonic-gate } 5198*0Sstevel@tonic-gate return (IE_RETRY); 5199*0Sstevel@tonic-gate } 5200*0Sstevel@tonic-gate 5201*0Sstevel@tonic-gate /* 5202*0Sstevel@tonic-gate * Break any low level sharing and reset seg->s_szc to 0. 5203*0Sstevel@tonic-gate */ 5204*0Sstevel@tonic-gate if ((err = segvn_clrszc(seg)) != 0) { 5205*0Sstevel@tonic-gate if (err == ENOMEM) { 5206*0Sstevel@tonic-gate err = IE_NOMEM; 5207*0Sstevel@tonic-gate } 5208*0Sstevel@tonic-gate return (err); 5209*0Sstevel@tonic-gate } 5210*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 5211*0Sstevel@tonic-gate 5212*0Sstevel@tonic-gate /* 5213*0Sstevel@tonic-gate * If the end of the current segment is not pgsz aligned 5214*0Sstevel@tonic-gate * then attempt to concatenate with the next segment. 5215*0Sstevel@tonic-gate */ 5216*0Sstevel@tonic-gate if (!IS_P2ALIGNED(eaddr, pgsz)) { 5217*0Sstevel@tonic-gate nseg = AS_SEGNEXT(seg->s_as, seg); 5218*0Sstevel@tonic-gate if (nseg == NULL || nseg == seg || eaddr != nseg->s_base) { 5219*0Sstevel@tonic-gate return (ENOMEM); 5220*0Sstevel@tonic-gate } 5221*0Sstevel@tonic-gate if (nseg->s_ops != &segvn_ops) { 5222*0Sstevel@tonic-gate return (EINVAL); 5223*0Sstevel@tonic-gate } 5224*0Sstevel@tonic-gate nsvd = (struct segvn_data *)nseg->s_data; 5225*0Sstevel@tonic-gate if (nsvd->softlockcnt > 0) { 5226*0Sstevel@tonic-gate segvn_purge(nseg); 5227*0Sstevel@tonic-gate if (nsvd->softlockcnt > 0) { 5228*0Sstevel@tonic-gate return (EAGAIN); 5229*0Sstevel@tonic-gate } 5230*0Sstevel@tonic-gate } 5231*0Sstevel@tonic-gate err = segvn_clrszc(nseg); 5232*0Sstevel@tonic-gate if (err == ENOMEM) { 5233*0Sstevel@tonic-gate err = IE_NOMEM; 5234*0Sstevel@tonic-gate } 5235*0Sstevel@tonic-gate if (err != 0) { 5236*0Sstevel@tonic-gate return (err); 5237*0Sstevel@tonic-gate } 5238*0Sstevel@tonic-gate err = segvn_concat(seg, nseg, 1); 5239*0Sstevel@tonic-gate if (err == -1) { 5240*0Sstevel@tonic-gate return (EINVAL); 5241*0Sstevel@tonic-gate } 5242*0Sstevel@tonic-gate if (err == -2) { 5243*0Sstevel@tonic-gate return (IE_NOMEM); 5244*0Sstevel@tonic-gate } 5245*0Sstevel@tonic-gate return (IE_RETRY); 5246*0Sstevel@tonic-gate } 5247*0Sstevel@tonic-gate 5248*0Sstevel@tonic-gate /* 5249*0Sstevel@tonic-gate * May need to re-align anon array to 5250*0Sstevel@tonic-gate * new szc. 5251*0Sstevel@tonic-gate */ 5252*0Sstevel@tonic-gate if (amp != NULL) { 5253*0Sstevel@tonic-gate pgcnt_t pgcnt = pgsz >> PAGESHIFT; 5254*0Sstevel@tonic-gate if (!IS_P2ALIGNED(svd->anon_index, pgcnt)) { 5255*0Sstevel@tonic-gate struct anon_hdr *nahp; 5256*0Sstevel@tonic-gate 5257*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 5258*0Sstevel@tonic-gate ASSERT(amp->refcnt == 1); 5259*0Sstevel@tonic-gate nahp = anon_create(btop(amp->size), ANON_NOSLEEP); 5260*0Sstevel@tonic-gate if (nahp == NULL) { 5261*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5262*0Sstevel@tonic-gate return (IE_NOMEM); 5263*0Sstevel@tonic-gate } 5264*0Sstevel@tonic-gate if (anon_copy_ptr(amp->ahp, svd->anon_index, 5265*0Sstevel@tonic-gate nahp, 0, btop(seg->s_size), ANON_NOSLEEP)) { 5266*0Sstevel@tonic-gate anon_release(nahp, btop(amp->size)); 5267*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5268*0Sstevel@tonic-gate return (IE_NOMEM); 5269*0Sstevel@tonic-gate } 5270*0Sstevel@tonic-gate anon_release(amp->ahp, btop(amp->size)); 5271*0Sstevel@tonic-gate amp->ahp = nahp; 5272*0Sstevel@tonic-gate svd->anon_index = 0; 5273*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5274*0Sstevel@tonic-gate } 5275*0Sstevel@tonic-gate } 5276*0Sstevel@tonic-gate if (svd->vp != NULL && szc != 0) { 5277*0Sstevel@tonic-gate struct vattr va; 5278*0Sstevel@tonic-gate u_offset_t eoffpage = svd->offset; 5279*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 5280*0Sstevel@tonic-gate eoffpage += seg->s_size; 5281*0Sstevel@tonic-gate eoffpage = btopr(eoffpage); 5282*0Sstevel@tonic-gate if (VOP_GETATTR(svd->vp, &va, 0, svd->cred) != 0) { 5283*0Sstevel@tonic-gate segvn_setpgsz_getattr_err++; 5284*0Sstevel@tonic-gate return (EINVAL); 5285*0Sstevel@tonic-gate } 5286*0Sstevel@tonic-gate if (btopr(va.va_size) < eoffpage) { 5287*0Sstevel@tonic-gate segvn_setpgsz_eof_err++; 5288*0Sstevel@tonic-gate return (EINVAL); 5289*0Sstevel@tonic-gate } 5290*0Sstevel@tonic-gate if (amp != NULL) { 5291*0Sstevel@tonic-gate /* 5292*0Sstevel@tonic-gate * anon_fill_cow_holes() may call VOP_GETPAGE(). 5293*0Sstevel@tonic-gate * don't take anon map lock here to avoid holding it 5294*0Sstevel@tonic-gate * across VOP_GETPAGE() calls that may call back into 5295*0Sstevel@tonic-gate * segvn for klsutering checks. We don't really need 5296*0Sstevel@tonic-gate * anon map lock here since it's a private segment and 5297*0Sstevel@tonic-gate * we hold as level lock as writers. 5298*0Sstevel@tonic-gate */ 5299*0Sstevel@tonic-gate if ((err = anon_fill_cow_holes(seg, seg->s_base, 5300*0Sstevel@tonic-gate amp->ahp, svd->anon_index, svd->vp, svd->offset, 5301*0Sstevel@tonic-gate seg->s_size, szc, svd->prot, svd->vpage, 5302*0Sstevel@tonic-gate svd->cred)) != 0) { 5303*0Sstevel@tonic-gate return (EINVAL); 5304*0Sstevel@tonic-gate } 5305*0Sstevel@tonic-gate } 5306*0Sstevel@tonic-gate segvn_setvnode_mpss(svd->vp); 5307*0Sstevel@tonic-gate } 5308*0Sstevel@tonic-gate 5309*0Sstevel@tonic-gate if (amp != NULL) { 5310*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 5311*0Sstevel@tonic-gate amp->a_szc = szc; 5312*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5313*0Sstevel@tonic-gate } 5314*0Sstevel@tonic-gate 5315*0Sstevel@tonic-gate seg->s_szc = szc; 5316*0Sstevel@tonic-gate 5317*0Sstevel@tonic-gate return (0); 5318*0Sstevel@tonic-gate } 5319*0Sstevel@tonic-gate 5320*0Sstevel@tonic-gate static int 5321*0Sstevel@tonic-gate segvn_clrszc(struct seg *seg) 5322*0Sstevel@tonic-gate { 5323*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5324*0Sstevel@tonic-gate struct anon_map *amp = svd->amp; 5325*0Sstevel@tonic-gate size_t pgsz; 5326*0Sstevel@tonic-gate pgcnt_t pages; 5327*0Sstevel@tonic-gate int err = 0; 5328*0Sstevel@tonic-gate caddr_t a = seg->s_base; 5329*0Sstevel@tonic-gate caddr_t ea = a + seg->s_size; 5330*0Sstevel@tonic-gate ulong_t an_idx = svd->anon_index; 5331*0Sstevel@tonic-gate vnode_t *vp = svd->vp; 5332*0Sstevel@tonic-gate struct vpage *vpage = svd->vpage; 5333*0Sstevel@tonic-gate page_t *anon_pl[1 + 1], *pp; 5334*0Sstevel@tonic-gate struct anon *ap, *oldap; 5335*0Sstevel@tonic-gate uint_t prot = svd->prot, vpprot; 5336*0Sstevel@tonic-gate 5337*0Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) || 5338*0Sstevel@tonic-gate SEGVN_WRITE_HELD(seg->s_as, &svd->lock)); 5339*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE || 5340*0Sstevel@tonic-gate (vp != NULL && svd->amp == NULL)); 5341*0Sstevel@tonic-gate 5342*0Sstevel@tonic-gate if (vp == NULL && amp == NULL) { 5343*0Sstevel@tonic-gate seg->s_szc = 0; 5344*0Sstevel@tonic-gate return (0); 5345*0Sstevel@tonic-gate } 5346*0Sstevel@tonic-gate 5347*0Sstevel@tonic-gate /* 5348*0Sstevel@tonic-gate * do HAT_UNLOAD_UNMAP since we are changing the pagesize. 5349*0Sstevel@tonic-gate * unload argument is 0 when we are freeing the segment 5350*0Sstevel@tonic-gate * and unload was already done. 5351*0Sstevel@tonic-gate */ 5352*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size, 5353*0Sstevel@tonic-gate HAT_UNLOAD_UNMAP); 5354*0Sstevel@tonic-gate 5355*0Sstevel@tonic-gate if (amp == NULL) { 5356*0Sstevel@tonic-gate seg->s_szc = 0; 5357*0Sstevel@tonic-gate return (0); 5358*0Sstevel@tonic-gate } 5359*0Sstevel@tonic-gate 5360*0Sstevel@tonic-gate pgsz = page_get_pagesize(seg->s_szc); 5361*0Sstevel@tonic-gate pages = btop(pgsz); 5362*0Sstevel@tonic-gate 5363*0Sstevel@tonic-gate /* 5364*0Sstevel@tonic-gate * XXX anon rwlock is not really needed because this is a 5365*0Sstevel@tonic-gate * private segment and we are writers. 5366*0Sstevel@tonic-gate */ 5367*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 5368*0Sstevel@tonic-gate 5369*0Sstevel@tonic-gate for (; a < ea; a += pgsz, an_idx += pages) { 5370*0Sstevel@tonic-gate if ((oldap = anon_get_ptr(amp->ahp, an_idx)) != NULL) { 5371*0Sstevel@tonic-gate if (svd->pageprot != 0) { 5372*0Sstevel@tonic-gate ASSERT(vpage != NULL); 5373*0Sstevel@tonic-gate prot = VPP_PROT(vpage); 5374*0Sstevel@tonic-gate ASSERT(sameprot(seg, a, pgsz)); 5375*0Sstevel@tonic-gate } 5376*0Sstevel@tonic-gate if (seg->s_szc != 0) { 5377*0Sstevel@tonic-gate ASSERT(vp == NULL || anon_pages(amp->ahp, 5378*0Sstevel@tonic-gate an_idx, pages) == pages); 5379*0Sstevel@tonic-gate if ((err = anon_map_demotepages(amp, an_idx, 5380*0Sstevel@tonic-gate seg, a, prot, vpage, svd->cred)) != 0) { 5381*0Sstevel@tonic-gate goto out; 5382*0Sstevel@tonic-gate } 5383*0Sstevel@tonic-gate } else { 5384*0Sstevel@tonic-gate if (oldap->an_refcnt == 1) { 5385*0Sstevel@tonic-gate continue; 5386*0Sstevel@tonic-gate } 5387*0Sstevel@tonic-gate if ((err = anon_getpage(&oldap, &vpprot, 5388*0Sstevel@tonic-gate anon_pl, PAGESIZE, seg, a, S_READ, 5389*0Sstevel@tonic-gate svd->cred))) { 5390*0Sstevel@tonic-gate goto out; 5391*0Sstevel@tonic-gate } 5392*0Sstevel@tonic-gate if ((pp = anon_private(&ap, seg, a, prot, 5393*0Sstevel@tonic-gate anon_pl[0], 0, svd->cred)) == NULL) { 5394*0Sstevel@tonic-gate err = ENOMEM; 5395*0Sstevel@tonic-gate goto out; 5396*0Sstevel@tonic-gate } 5397*0Sstevel@tonic-gate anon_decref(oldap); 5398*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, an_idx, ap, 5399*0Sstevel@tonic-gate ANON_SLEEP); 5400*0Sstevel@tonic-gate page_unlock(pp); 5401*0Sstevel@tonic-gate } 5402*0Sstevel@tonic-gate } 5403*0Sstevel@tonic-gate vpage = (vpage == NULL) ? NULL : vpage + pages; 5404*0Sstevel@tonic-gate } 5405*0Sstevel@tonic-gate 5406*0Sstevel@tonic-gate amp->a_szc = 0; 5407*0Sstevel@tonic-gate seg->s_szc = 0; 5408*0Sstevel@tonic-gate out: 5409*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5410*0Sstevel@tonic-gate return (err); 5411*0Sstevel@tonic-gate } 5412*0Sstevel@tonic-gate 5413*0Sstevel@tonic-gate static int 5414*0Sstevel@tonic-gate segvn_claim_pages( 5415*0Sstevel@tonic-gate struct seg *seg, 5416*0Sstevel@tonic-gate struct vpage *svp, 5417*0Sstevel@tonic-gate u_offset_t off, 5418*0Sstevel@tonic-gate ulong_t anon_idx, 5419*0Sstevel@tonic-gate uint_t prot) 5420*0Sstevel@tonic-gate { 5421*0Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(seg->s_szc); 5422*0Sstevel@tonic-gate size_t ppasize = (pgcnt + 1) * sizeof (page_t *); 5423*0Sstevel@tonic-gate page_t **ppa; 5424*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5425*0Sstevel@tonic-gate struct anon_map *amp = svd->amp; 5426*0Sstevel@tonic-gate struct vpage *evp = svp + pgcnt; 5427*0Sstevel@tonic-gate caddr_t addr = ((uintptr_t)(svp - svd->vpage) << PAGESHIFT) 5428*0Sstevel@tonic-gate + seg->s_base; 5429*0Sstevel@tonic-gate struct anon *ap; 5430*0Sstevel@tonic-gate struct vnode *vp = svd->vp; 5431*0Sstevel@tonic-gate page_t *pp; 5432*0Sstevel@tonic-gate pgcnt_t pg_idx, i; 5433*0Sstevel@tonic-gate int err = 0; 5434*0Sstevel@tonic-gate anoff_t aoff; 5435*0Sstevel@tonic-gate int anon = (amp != NULL) ? 1 : 0; 5436*0Sstevel@tonic-gate 5437*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE); 5438*0Sstevel@tonic-gate ASSERT(svd->vpage != NULL); 5439*0Sstevel@tonic-gate ASSERT(seg->s_szc != 0); 5440*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 5441*0Sstevel@tonic-gate ASSERT(amp == NULL || IS_P2ALIGNED(anon_idx, pgcnt)); 5442*0Sstevel@tonic-gate ASSERT(sameprot(seg, addr, pgcnt << PAGESHIFT)); 5443*0Sstevel@tonic-gate 5444*0Sstevel@tonic-gate if (VPP_PROT(svp) == prot) 5445*0Sstevel@tonic-gate return (1); 5446*0Sstevel@tonic-gate if (!((VPP_PROT(svp) ^ prot) & PROT_WRITE)) 5447*0Sstevel@tonic-gate return (1); 5448*0Sstevel@tonic-gate 5449*0Sstevel@tonic-gate ppa = kmem_alloc(ppasize, KM_SLEEP); 5450*0Sstevel@tonic-gate if (anon && vp != NULL) { 5451*0Sstevel@tonic-gate if (anon_get_ptr(amp->ahp, anon_idx) == NULL) { 5452*0Sstevel@tonic-gate anon = 0; 5453*0Sstevel@tonic-gate ASSERT(!anon_pages(amp->ahp, anon_idx, pgcnt)); 5454*0Sstevel@tonic-gate } 5455*0Sstevel@tonic-gate ASSERT(!anon || 5456*0Sstevel@tonic-gate anon_pages(amp->ahp, anon_idx, pgcnt) == pgcnt); 5457*0Sstevel@tonic-gate } 5458*0Sstevel@tonic-gate 5459*0Sstevel@tonic-gate for (*ppa = NULL, pg_idx = 0; svp < evp; svp++, anon_idx++) { 5460*0Sstevel@tonic-gate if (!VPP_ISPPLOCK(svp)) 5461*0Sstevel@tonic-gate continue; 5462*0Sstevel@tonic-gate if (anon) { 5463*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_idx); 5464*0Sstevel@tonic-gate if (ap == NULL) { 5465*0Sstevel@tonic-gate panic("segvn_claim_pages: no anon slot"); 5466*0Sstevel@tonic-gate } 5467*0Sstevel@tonic-gate swap_xlate(ap, &vp, &aoff); 5468*0Sstevel@tonic-gate off = (u_offset_t)aoff; 5469*0Sstevel@tonic-gate } 5470*0Sstevel@tonic-gate ASSERT(vp != NULL); 5471*0Sstevel@tonic-gate if ((pp = page_lookup(vp, 5472*0Sstevel@tonic-gate (u_offset_t)off, SE_SHARED)) == NULL) { 5473*0Sstevel@tonic-gate panic("segvn_claim_pages: no page"); 5474*0Sstevel@tonic-gate } 5475*0Sstevel@tonic-gate ppa[pg_idx++] = pp; 5476*0Sstevel@tonic-gate off += PAGESIZE; 5477*0Sstevel@tonic-gate } 5478*0Sstevel@tonic-gate 5479*0Sstevel@tonic-gate if (ppa[0] == NULL) { 5480*0Sstevel@tonic-gate kmem_free(ppa, ppasize); 5481*0Sstevel@tonic-gate return (1); 5482*0Sstevel@tonic-gate } 5483*0Sstevel@tonic-gate 5484*0Sstevel@tonic-gate ASSERT(pg_idx <= pgcnt); 5485*0Sstevel@tonic-gate ppa[pg_idx] = NULL; 5486*0Sstevel@tonic-gate 5487*0Sstevel@tonic-gate if (prot & PROT_WRITE) 5488*0Sstevel@tonic-gate err = page_addclaim_pages(ppa); 5489*0Sstevel@tonic-gate else 5490*0Sstevel@tonic-gate err = page_subclaim_pages(ppa); 5491*0Sstevel@tonic-gate 5492*0Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 5493*0Sstevel@tonic-gate ASSERT(ppa[i] != NULL); 5494*0Sstevel@tonic-gate page_unlock(ppa[i]); 5495*0Sstevel@tonic-gate } 5496*0Sstevel@tonic-gate 5497*0Sstevel@tonic-gate kmem_free(ppa, ppasize); 5498*0Sstevel@tonic-gate return (err); 5499*0Sstevel@tonic-gate } 5500*0Sstevel@tonic-gate 5501*0Sstevel@tonic-gate /* 5502*0Sstevel@tonic-gate * Returns right (upper address) segment if split occured. 5503*0Sstevel@tonic-gate * If the address is equal to the beginning or end of its segment it returns 5504*0Sstevel@tonic-gate * the current segment. 5505*0Sstevel@tonic-gate */ 5506*0Sstevel@tonic-gate static struct seg * 5507*0Sstevel@tonic-gate segvn_split_seg(struct seg *seg, caddr_t addr) 5508*0Sstevel@tonic-gate { 5509*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5510*0Sstevel@tonic-gate struct seg *nseg; 5511*0Sstevel@tonic-gate size_t nsize; 5512*0Sstevel@tonic-gate struct segvn_data *nsvd; 5513*0Sstevel@tonic-gate 5514*0Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 5515*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE || svd->amp == NULL); 5516*0Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 5517*0Sstevel@tonic-gate ASSERT(addr <= seg->s_base + seg->s_size); 5518*0Sstevel@tonic-gate 5519*0Sstevel@tonic-gate if (addr == seg->s_base || addr == seg->s_base + seg->s_size) 5520*0Sstevel@tonic-gate return (seg); 5521*0Sstevel@tonic-gate 5522*0Sstevel@tonic-gate nsize = seg->s_base + seg->s_size - addr; 5523*0Sstevel@tonic-gate seg->s_size = addr - seg->s_base; 5524*0Sstevel@tonic-gate nseg = seg_alloc(seg->s_as, addr, nsize); 5525*0Sstevel@tonic-gate ASSERT(nseg != NULL); 5526*0Sstevel@tonic-gate nseg->s_ops = seg->s_ops; 5527*0Sstevel@tonic-gate nsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP); 5528*0Sstevel@tonic-gate nseg->s_data = (void *)nsvd; 5529*0Sstevel@tonic-gate nseg->s_szc = seg->s_szc; 5530*0Sstevel@tonic-gate *nsvd = *svd; 5531*0Sstevel@tonic-gate rw_init(&nsvd->lock, NULL, RW_DEFAULT, NULL); 5532*0Sstevel@tonic-gate 5533*0Sstevel@tonic-gate if (nsvd->vp != NULL) { 5534*0Sstevel@tonic-gate VN_HOLD(nsvd->vp); 5535*0Sstevel@tonic-gate nsvd->offset = svd->offset + 5536*0Sstevel@tonic-gate (uintptr_t)(nseg->s_base - seg->s_base); 5537*0Sstevel@tonic-gate if (nsvd->type == MAP_SHARED) 5538*0Sstevel@tonic-gate lgrp_shm_policy_init(NULL, nsvd->vp); 5539*0Sstevel@tonic-gate } else { 5540*0Sstevel@tonic-gate /* 5541*0Sstevel@tonic-gate * The offset for an anonymous segment has no signifigance in 5542*0Sstevel@tonic-gate * terms of an offset into a file. If we were to use the above 5543*0Sstevel@tonic-gate * calculation instead, the structures read out of 5544*0Sstevel@tonic-gate * /proc/<pid>/xmap would be more difficult to decipher since 5545*0Sstevel@tonic-gate * it would be unclear whether two seemingly contiguous 5546*0Sstevel@tonic-gate * prxmap_t structures represented different segments or a 5547*0Sstevel@tonic-gate * single segment that had been split up into multiple prxmap_t 5548*0Sstevel@tonic-gate * structures (e.g. if some part of the segment had not yet 5549*0Sstevel@tonic-gate * been faulted in). 5550*0Sstevel@tonic-gate */ 5551*0Sstevel@tonic-gate nsvd->offset = 0; 5552*0Sstevel@tonic-gate } 5553*0Sstevel@tonic-gate 5554*0Sstevel@tonic-gate ASSERT(svd->softlockcnt == 0); 5555*0Sstevel@tonic-gate crhold(svd->cred); 5556*0Sstevel@tonic-gate 5557*0Sstevel@tonic-gate if (svd->vpage != NULL) { 5558*0Sstevel@tonic-gate size_t bytes = vpgtob(seg_pages(seg)); 5559*0Sstevel@tonic-gate size_t nbytes = vpgtob(seg_pages(nseg)); 5560*0Sstevel@tonic-gate struct vpage *ovpage = svd->vpage; 5561*0Sstevel@tonic-gate 5562*0Sstevel@tonic-gate svd->vpage = kmem_alloc(bytes, KM_SLEEP); 5563*0Sstevel@tonic-gate bcopy(ovpage, svd->vpage, bytes); 5564*0Sstevel@tonic-gate nsvd->vpage = kmem_alloc(nbytes, KM_SLEEP); 5565*0Sstevel@tonic-gate bcopy(ovpage + seg_pages(seg), nsvd->vpage, nbytes); 5566*0Sstevel@tonic-gate kmem_free(ovpage, bytes + nbytes); 5567*0Sstevel@tonic-gate } 5568*0Sstevel@tonic-gate if (svd->amp != NULL) { 5569*0Sstevel@tonic-gate struct anon_map *oamp = svd->amp, *namp; 5570*0Sstevel@tonic-gate struct anon_hdr *nahp; 5571*0Sstevel@tonic-gate 5572*0Sstevel@tonic-gate ANON_LOCK_ENTER(&oamp->a_rwlock, RW_WRITER); 5573*0Sstevel@tonic-gate ASSERT(oamp->refcnt == 1); 5574*0Sstevel@tonic-gate nahp = anon_create(btop(seg->s_size), ANON_SLEEP); 5575*0Sstevel@tonic-gate (void) anon_copy_ptr(oamp->ahp, svd->anon_index, 5576*0Sstevel@tonic-gate nahp, 0, btop(seg->s_size), ANON_SLEEP); 5577*0Sstevel@tonic-gate 5578*0Sstevel@tonic-gate namp = anonmap_alloc(nseg->s_size, 0); 5579*0Sstevel@tonic-gate namp->a_szc = nseg->s_szc; 5580*0Sstevel@tonic-gate (void) anon_copy_ptr(oamp->ahp, 5581*0Sstevel@tonic-gate svd->anon_index + btop(seg->s_size), 5582*0Sstevel@tonic-gate namp->ahp, 0, btop(nseg->s_size), ANON_SLEEP); 5583*0Sstevel@tonic-gate anon_release(oamp->ahp, btop(oamp->size)); 5584*0Sstevel@tonic-gate oamp->ahp = nahp; 5585*0Sstevel@tonic-gate oamp->size = seg->s_size; 5586*0Sstevel@tonic-gate svd->anon_index = 0; 5587*0Sstevel@tonic-gate nsvd->amp = namp; 5588*0Sstevel@tonic-gate nsvd->anon_index = 0; 5589*0Sstevel@tonic-gate ANON_LOCK_EXIT(&oamp->a_rwlock); 5590*0Sstevel@tonic-gate } 5591*0Sstevel@tonic-gate 5592*0Sstevel@tonic-gate /* 5593*0Sstevel@tonic-gate * Split amount of swap reserve 5594*0Sstevel@tonic-gate */ 5595*0Sstevel@tonic-gate if (svd->swresv) { 5596*0Sstevel@tonic-gate /* 5597*0Sstevel@tonic-gate * For MAP_NORESERVE, only allocate swap reserve for pages 5598*0Sstevel@tonic-gate * being used. Other segments get enough to cover whole 5599*0Sstevel@tonic-gate * segment. 5600*0Sstevel@tonic-gate */ 5601*0Sstevel@tonic-gate if (svd->flags & MAP_NORESERVE) { 5602*0Sstevel@tonic-gate size_t oswresv; 5603*0Sstevel@tonic-gate 5604*0Sstevel@tonic-gate ASSERT(svd->amp); 5605*0Sstevel@tonic-gate oswresv = svd->swresv; 5606*0Sstevel@tonic-gate svd->swresv = ptob(anon_pages(svd->amp->ahp, 5607*0Sstevel@tonic-gate svd->anon_index, btop(seg->s_size))); 5608*0Sstevel@tonic-gate nsvd->swresv = ptob(anon_pages(nsvd->amp->ahp, 5609*0Sstevel@tonic-gate nsvd->anon_index, btop(nseg->s_size))); 5610*0Sstevel@tonic-gate ASSERT(oswresv >= (svd->swresv + nsvd->swresv)); 5611*0Sstevel@tonic-gate } else { 5612*0Sstevel@tonic-gate ASSERT(svd->swresv == seg->s_size + nseg->s_size); 5613*0Sstevel@tonic-gate svd->swresv = seg->s_size; 5614*0Sstevel@tonic-gate nsvd->swresv = nseg->s_size; 5615*0Sstevel@tonic-gate } 5616*0Sstevel@tonic-gate } 5617*0Sstevel@tonic-gate 5618*0Sstevel@tonic-gate return (nseg); 5619*0Sstevel@tonic-gate } 5620*0Sstevel@tonic-gate 5621*0Sstevel@tonic-gate 5622*0Sstevel@tonic-gate /* 5623*0Sstevel@tonic-gate * called on memory operations (unmap, setprot, setpagesize) for a subset 5624*0Sstevel@tonic-gate * of a large page segment to either demote the memory range (SDR_RANGE) 5625*0Sstevel@tonic-gate * or the ends (SDR_END) by addr/len. 5626*0Sstevel@tonic-gate * 5627*0Sstevel@tonic-gate * returns 0 on success. returns errno, including ENOMEM, on failure. 5628*0Sstevel@tonic-gate */ 5629*0Sstevel@tonic-gate static int 5630*0Sstevel@tonic-gate segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag) 5631*0Sstevel@tonic-gate { 5632*0Sstevel@tonic-gate caddr_t eaddr = addr + len; 5633*0Sstevel@tonic-gate caddr_t lpgaddr, lpgeaddr; 5634*0Sstevel@tonic-gate struct seg *nseg; 5635*0Sstevel@tonic-gate struct seg *badseg1 = NULL; 5636*0Sstevel@tonic-gate struct seg *badseg2 = NULL; 5637*0Sstevel@tonic-gate size_t pgsz; 5638*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5639*0Sstevel@tonic-gate int err; 5640*0Sstevel@tonic-gate 5641*0Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 5642*0Sstevel@tonic-gate ASSERT(seg->s_szc != 0); 5643*0Sstevel@tonic-gate pgsz = page_get_pagesize(seg->s_szc); 5644*0Sstevel@tonic-gate ASSERT(seg->s_base != addr || seg->s_size != len); 5645*0Sstevel@tonic-gate ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size); 5646*0Sstevel@tonic-gate ASSERT(svd->softlockcnt == 0); 5647*0Sstevel@tonic-gate ASSERT(svd->type == MAP_PRIVATE || 5648*0Sstevel@tonic-gate (svd->vp != NULL && svd->amp == NULL)); 5649*0Sstevel@tonic-gate 5650*0Sstevel@tonic-gate CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); 5651*0Sstevel@tonic-gate ASSERT(flag == SDR_RANGE || eaddr < lpgeaddr || addr > lpgaddr); 5652*0Sstevel@tonic-gate if (flag == SDR_RANGE) { 5653*0Sstevel@tonic-gate /* demote entire range */ 5654*0Sstevel@tonic-gate badseg1 = nseg = segvn_split_seg(seg, lpgaddr); 5655*0Sstevel@tonic-gate (void) segvn_split_seg(nseg, lpgeaddr); 5656*0Sstevel@tonic-gate ASSERT(badseg1->s_base == lpgaddr); 5657*0Sstevel@tonic-gate ASSERT(badseg1->s_size == lpgeaddr - lpgaddr); 5658*0Sstevel@tonic-gate } else if (addr != lpgaddr) { 5659*0Sstevel@tonic-gate ASSERT(flag == SDR_END); 5660*0Sstevel@tonic-gate badseg1 = nseg = segvn_split_seg(seg, lpgaddr); 5661*0Sstevel@tonic-gate if (eaddr != lpgeaddr && eaddr > lpgaddr + pgsz && 5662*0Sstevel@tonic-gate eaddr < lpgaddr + 2 * pgsz) { 5663*0Sstevel@tonic-gate (void) segvn_split_seg(nseg, lpgeaddr); 5664*0Sstevel@tonic-gate ASSERT(badseg1->s_base == lpgaddr); 5665*0Sstevel@tonic-gate ASSERT(badseg1->s_size == 2 * pgsz); 5666*0Sstevel@tonic-gate } else { 5667*0Sstevel@tonic-gate nseg = segvn_split_seg(nseg, lpgaddr + pgsz); 5668*0Sstevel@tonic-gate ASSERT(badseg1->s_base == lpgaddr); 5669*0Sstevel@tonic-gate ASSERT(badseg1->s_size == pgsz); 5670*0Sstevel@tonic-gate if (eaddr != lpgeaddr && eaddr > lpgaddr + pgsz) { 5671*0Sstevel@tonic-gate ASSERT(lpgeaddr - lpgaddr > 2 * pgsz); 5672*0Sstevel@tonic-gate nseg = segvn_split_seg(nseg, lpgeaddr - pgsz); 5673*0Sstevel@tonic-gate badseg2 = nseg; 5674*0Sstevel@tonic-gate (void) segvn_split_seg(nseg, lpgeaddr); 5675*0Sstevel@tonic-gate ASSERT(badseg2->s_base == lpgeaddr - pgsz); 5676*0Sstevel@tonic-gate ASSERT(badseg2->s_size == pgsz); 5677*0Sstevel@tonic-gate } 5678*0Sstevel@tonic-gate } 5679*0Sstevel@tonic-gate } else { 5680*0Sstevel@tonic-gate ASSERT(flag == SDR_END); 5681*0Sstevel@tonic-gate ASSERT(eaddr < lpgeaddr); 5682*0Sstevel@tonic-gate badseg1 = nseg = segvn_split_seg(seg, lpgeaddr - pgsz); 5683*0Sstevel@tonic-gate (void) segvn_split_seg(nseg, lpgeaddr); 5684*0Sstevel@tonic-gate ASSERT(badseg1->s_base == lpgeaddr - pgsz); 5685*0Sstevel@tonic-gate ASSERT(badseg1->s_size == pgsz); 5686*0Sstevel@tonic-gate } 5687*0Sstevel@tonic-gate 5688*0Sstevel@tonic-gate ASSERT(badseg1 != NULL); 5689*0Sstevel@tonic-gate ASSERT(badseg1->s_szc != 0); 5690*0Sstevel@tonic-gate ASSERT(page_get_pagesize(badseg1->s_szc) == pgsz); 5691*0Sstevel@tonic-gate ASSERT(flag == SDR_RANGE || badseg1->s_size == pgsz || 5692*0Sstevel@tonic-gate badseg1->s_size == 2 * pgsz); 5693*0Sstevel@tonic-gate if (err = segvn_clrszc(badseg1)) { 5694*0Sstevel@tonic-gate return (err); 5695*0Sstevel@tonic-gate } 5696*0Sstevel@tonic-gate ASSERT(badseg1->s_szc == 0); 5697*0Sstevel@tonic-gate 5698*0Sstevel@tonic-gate if (badseg2 == NULL) 5699*0Sstevel@tonic-gate return (0); 5700*0Sstevel@tonic-gate ASSERT(badseg2->s_szc != 0); 5701*0Sstevel@tonic-gate ASSERT(page_get_pagesize(badseg2->s_szc) == pgsz); 5702*0Sstevel@tonic-gate ASSERT(badseg2->s_size == pgsz); 5703*0Sstevel@tonic-gate ASSERT(sameprot(badseg2, badseg2->s_base, badseg2->s_size)); 5704*0Sstevel@tonic-gate if (err = segvn_clrszc(badseg2)) { 5705*0Sstevel@tonic-gate return (err); 5706*0Sstevel@tonic-gate } 5707*0Sstevel@tonic-gate ASSERT(badseg2->s_szc == 0); 5708*0Sstevel@tonic-gate return (0); 5709*0Sstevel@tonic-gate } 5710*0Sstevel@tonic-gate 5711*0Sstevel@tonic-gate static int 5712*0Sstevel@tonic-gate segvn_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 5713*0Sstevel@tonic-gate { 5714*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5715*0Sstevel@tonic-gate struct vpage *vp, *evp; 5716*0Sstevel@tonic-gate 5717*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5718*0Sstevel@tonic-gate 5719*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 5720*0Sstevel@tonic-gate /* 5721*0Sstevel@tonic-gate * If segment protection can be used, simply check against them. 5722*0Sstevel@tonic-gate */ 5723*0Sstevel@tonic-gate if (svd->pageprot == 0) { 5724*0Sstevel@tonic-gate int err; 5725*0Sstevel@tonic-gate 5726*0Sstevel@tonic-gate err = ((svd->prot & prot) != prot) ? EACCES : 0; 5727*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 5728*0Sstevel@tonic-gate return (err); 5729*0Sstevel@tonic-gate } 5730*0Sstevel@tonic-gate 5731*0Sstevel@tonic-gate /* 5732*0Sstevel@tonic-gate * Have to check down to the vpage level. 5733*0Sstevel@tonic-gate */ 5734*0Sstevel@tonic-gate evp = &svd->vpage[seg_page(seg, addr + len)]; 5735*0Sstevel@tonic-gate for (vp = &svd->vpage[seg_page(seg, addr)]; vp < evp; vp++) { 5736*0Sstevel@tonic-gate if ((VPP_PROT(vp) & prot) != prot) { 5737*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 5738*0Sstevel@tonic-gate return (EACCES); 5739*0Sstevel@tonic-gate } 5740*0Sstevel@tonic-gate } 5741*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 5742*0Sstevel@tonic-gate return (0); 5743*0Sstevel@tonic-gate } 5744*0Sstevel@tonic-gate 5745*0Sstevel@tonic-gate static int 5746*0Sstevel@tonic-gate segvn_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 5747*0Sstevel@tonic-gate { 5748*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5749*0Sstevel@tonic-gate size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 5750*0Sstevel@tonic-gate 5751*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5752*0Sstevel@tonic-gate 5753*0Sstevel@tonic-gate if (pgno != 0) { 5754*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 5755*0Sstevel@tonic-gate if (svd->pageprot == 0) { 5756*0Sstevel@tonic-gate do 5757*0Sstevel@tonic-gate protv[--pgno] = svd->prot; 5758*0Sstevel@tonic-gate while (pgno != 0); 5759*0Sstevel@tonic-gate } else { 5760*0Sstevel@tonic-gate size_t pgoff = seg_page(seg, addr); 5761*0Sstevel@tonic-gate 5762*0Sstevel@tonic-gate do { 5763*0Sstevel@tonic-gate pgno--; 5764*0Sstevel@tonic-gate protv[pgno] = VPP_PROT(&svd->vpage[pgno+pgoff]); 5765*0Sstevel@tonic-gate } while (pgno != 0); 5766*0Sstevel@tonic-gate } 5767*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 5768*0Sstevel@tonic-gate } 5769*0Sstevel@tonic-gate return (0); 5770*0Sstevel@tonic-gate } 5771*0Sstevel@tonic-gate 5772*0Sstevel@tonic-gate static u_offset_t 5773*0Sstevel@tonic-gate segvn_getoffset(struct seg *seg, caddr_t addr) 5774*0Sstevel@tonic-gate { 5775*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5776*0Sstevel@tonic-gate 5777*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5778*0Sstevel@tonic-gate 5779*0Sstevel@tonic-gate return (svd->offset + (uintptr_t)(addr - seg->s_base)); 5780*0Sstevel@tonic-gate } 5781*0Sstevel@tonic-gate 5782*0Sstevel@tonic-gate /*ARGSUSED*/ 5783*0Sstevel@tonic-gate static int 5784*0Sstevel@tonic-gate segvn_gettype(struct seg *seg, caddr_t addr) 5785*0Sstevel@tonic-gate { 5786*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5787*0Sstevel@tonic-gate 5788*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5789*0Sstevel@tonic-gate 5790*0Sstevel@tonic-gate return (svd->type | (svd->flags & MAP_NORESERVE)); 5791*0Sstevel@tonic-gate } 5792*0Sstevel@tonic-gate 5793*0Sstevel@tonic-gate /*ARGSUSED*/ 5794*0Sstevel@tonic-gate static int 5795*0Sstevel@tonic-gate segvn_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 5796*0Sstevel@tonic-gate { 5797*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5798*0Sstevel@tonic-gate 5799*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5800*0Sstevel@tonic-gate 5801*0Sstevel@tonic-gate *vpp = svd->vp; 5802*0Sstevel@tonic-gate return (0); 5803*0Sstevel@tonic-gate } 5804*0Sstevel@tonic-gate 5805*0Sstevel@tonic-gate /* 5806*0Sstevel@tonic-gate * Check to see if it makes sense to do kluster/read ahead to 5807*0Sstevel@tonic-gate * addr + delta relative to the mapping at addr. We assume here 5808*0Sstevel@tonic-gate * that delta is a signed PAGESIZE'd multiple (which can be negative). 5809*0Sstevel@tonic-gate * 5810*0Sstevel@tonic-gate * For segvn, we currently "approve" of the action if we are 5811*0Sstevel@tonic-gate * still in the segment and it maps from the same vp/off, 5812*0Sstevel@tonic-gate * or if the advice stored in segvn_data or vpages allows it. 5813*0Sstevel@tonic-gate * Currently, klustering is not allowed only if MADV_RANDOM is set. 5814*0Sstevel@tonic-gate */ 5815*0Sstevel@tonic-gate static int 5816*0Sstevel@tonic-gate segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 5817*0Sstevel@tonic-gate { 5818*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5819*0Sstevel@tonic-gate struct anon *oap, *ap; 5820*0Sstevel@tonic-gate ssize_t pd; 5821*0Sstevel@tonic-gate size_t page; 5822*0Sstevel@tonic-gate struct vnode *vp1, *vp2; 5823*0Sstevel@tonic-gate u_offset_t off1, off2; 5824*0Sstevel@tonic-gate struct anon_map *amp; 5825*0Sstevel@tonic-gate 5826*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5827*0Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) || 5828*0Sstevel@tonic-gate SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); 5829*0Sstevel@tonic-gate 5830*0Sstevel@tonic-gate if (addr + delta < seg->s_base || 5831*0Sstevel@tonic-gate addr + delta >= (seg->s_base + seg->s_size)) 5832*0Sstevel@tonic-gate return (-1); /* exceeded segment bounds */ 5833*0Sstevel@tonic-gate 5834*0Sstevel@tonic-gate pd = delta / (ssize_t)PAGESIZE; /* divide to preserve sign bit */ 5835*0Sstevel@tonic-gate page = seg_page(seg, addr); 5836*0Sstevel@tonic-gate 5837*0Sstevel@tonic-gate /* 5838*0Sstevel@tonic-gate * Check to see if either of the pages addr or addr + delta 5839*0Sstevel@tonic-gate * have advice set that prevents klustering (if MADV_RANDOM advice 5840*0Sstevel@tonic-gate * is set for entire segment, or MADV_SEQUENTIAL is set and delta 5841*0Sstevel@tonic-gate * is negative). 5842*0Sstevel@tonic-gate */ 5843*0Sstevel@tonic-gate if (svd->advice == MADV_RANDOM || 5844*0Sstevel@tonic-gate svd->advice == MADV_SEQUENTIAL && delta < 0) 5845*0Sstevel@tonic-gate return (-1); 5846*0Sstevel@tonic-gate else if (svd->pageadvice && svd->vpage) { 5847*0Sstevel@tonic-gate struct vpage *bvpp, *evpp; 5848*0Sstevel@tonic-gate 5849*0Sstevel@tonic-gate bvpp = &svd->vpage[page]; 5850*0Sstevel@tonic-gate evpp = &svd->vpage[page + pd]; 5851*0Sstevel@tonic-gate if (VPP_ADVICE(bvpp) == MADV_RANDOM || 5852*0Sstevel@tonic-gate VPP_ADVICE(evpp) == MADV_SEQUENTIAL && delta < 0) 5853*0Sstevel@tonic-gate return (-1); 5854*0Sstevel@tonic-gate if (VPP_ADVICE(bvpp) != VPP_ADVICE(evpp) && 5855*0Sstevel@tonic-gate VPP_ADVICE(evpp) == MADV_RANDOM) 5856*0Sstevel@tonic-gate return (-1); 5857*0Sstevel@tonic-gate } 5858*0Sstevel@tonic-gate 5859*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) 5860*0Sstevel@tonic-gate return (0); /* shared mapping - all ok */ 5861*0Sstevel@tonic-gate 5862*0Sstevel@tonic-gate if ((amp = svd->amp) == NULL) 5863*0Sstevel@tonic-gate return (0); /* off original vnode */ 5864*0Sstevel@tonic-gate 5865*0Sstevel@tonic-gate page += svd->anon_index; 5866*0Sstevel@tonic-gate 5867*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 5868*0Sstevel@tonic-gate 5869*0Sstevel@tonic-gate oap = anon_get_ptr(amp->ahp, page); 5870*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, page + pd); 5871*0Sstevel@tonic-gate 5872*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5873*0Sstevel@tonic-gate 5874*0Sstevel@tonic-gate if ((oap == NULL && ap != NULL) || (oap != NULL && ap == NULL)) { 5875*0Sstevel@tonic-gate return (-1); /* one with and one without an anon */ 5876*0Sstevel@tonic-gate } 5877*0Sstevel@tonic-gate 5878*0Sstevel@tonic-gate if (oap == NULL) { /* implies that ap == NULL */ 5879*0Sstevel@tonic-gate return (0); /* off original vnode */ 5880*0Sstevel@tonic-gate } 5881*0Sstevel@tonic-gate 5882*0Sstevel@tonic-gate /* 5883*0Sstevel@tonic-gate * Now we know we have two anon pointers - check to 5884*0Sstevel@tonic-gate * see if they happen to be properly allocated. 5885*0Sstevel@tonic-gate */ 5886*0Sstevel@tonic-gate 5887*0Sstevel@tonic-gate /* 5888*0Sstevel@tonic-gate * XXX We cheat here and don't lock the anon slots. We can't because 5889*0Sstevel@tonic-gate * we may have been called from the anon layer which might already 5890*0Sstevel@tonic-gate * have locked them. We are holding a refcnt on the slots so they 5891*0Sstevel@tonic-gate * can't disappear. The worst that will happen is we'll get the wrong 5892*0Sstevel@tonic-gate * names (vp, off) for the slots and make a poor klustering decision. 5893*0Sstevel@tonic-gate */ 5894*0Sstevel@tonic-gate swap_xlate(ap, &vp1, &off1); 5895*0Sstevel@tonic-gate swap_xlate(oap, &vp2, &off2); 5896*0Sstevel@tonic-gate 5897*0Sstevel@tonic-gate 5898*0Sstevel@tonic-gate if (!VOP_CMP(vp1, vp2) || off1 - off2 != delta) 5899*0Sstevel@tonic-gate return (-1); 5900*0Sstevel@tonic-gate return (0); 5901*0Sstevel@tonic-gate } 5902*0Sstevel@tonic-gate 5903*0Sstevel@tonic-gate /* 5904*0Sstevel@tonic-gate * Swap the pages of seg out to secondary storage, returning the 5905*0Sstevel@tonic-gate * number of bytes of storage freed. 5906*0Sstevel@tonic-gate * 5907*0Sstevel@tonic-gate * The basic idea is first to unload all translations and then to call 5908*0Sstevel@tonic-gate * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the 5909*0Sstevel@tonic-gate * swap device. Pages to which other segments have mappings will remain 5910*0Sstevel@tonic-gate * mapped and won't be swapped. Our caller (as_swapout) has already 5911*0Sstevel@tonic-gate * performed the unloading step. 5912*0Sstevel@tonic-gate * 5913*0Sstevel@tonic-gate * The value returned is intended to correlate well with the process's 5914*0Sstevel@tonic-gate * memory requirements. However, there are some caveats: 5915*0Sstevel@tonic-gate * 1) When given a shared segment as argument, this routine will 5916*0Sstevel@tonic-gate * only succeed in swapping out pages for the last sharer of the 5917*0Sstevel@tonic-gate * segment. (Previous callers will only have decremented mapping 5918*0Sstevel@tonic-gate * reference counts.) 5919*0Sstevel@tonic-gate * 2) We assume that the hat layer maintains a large enough translation 5920*0Sstevel@tonic-gate * cache to capture process reference patterns. 5921*0Sstevel@tonic-gate */ 5922*0Sstevel@tonic-gate static size_t 5923*0Sstevel@tonic-gate segvn_swapout(struct seg *seg) 5924*0Sstevel@tonic-gate { 5925*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 5926*0Sstevel@tonic-gate struct anon_map *amp; 5927*0Sstevel@tonic-gate pgcnt_t pgcnt = 0; 5928*0Sstevel@tonic-gate pgcnt_t npages; 5929*0Sstevel@tonic-gate pgcnt_t page; 5930*0Sstevel@tonic-gate ulong_t anon_index; 5931*0Sstevel@tonic-gate 5932*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 5933*0Sstevel@tonic-gate 5934*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 5935*0Sstevel@tonic-gate /* 5936*0Sstevel@tonic-gate * Find pages unmapped by our caller and force them 5937*0Sstevel@tonic-gate * out to the virtual swap device. 5938*0Sstevel@tonic-gate */ 5939*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) 5940*0Sstevel@tonic-gate anon_index = svd->anon_index; 5941*0Sstevel@tonic-gate npages = seg->s_size >> PAGESHIFT; 5942*0Sstevel@tonic-gate for (page = 0; page < npages; page++) { 5943*0Sstevel@tonic-gate page_t *pp; 5944*0Sstevel@tonic-gate struct anon *ap; 5945*0Sstevel@tonic-gate struct vnode *vp; 5946*0Sstevel@tonic-gate u_offset_t off; 5947*0Sstevel@tonic-gate anon_sync_obj_t cookie; 5948*0Sstevel@tonic-gate 5949*0Sstevel@tonic-gate /* 5950*0Sstevel@tonic-gate * Obtain <vp, off> pair for the page, then look it up. 5951*0Sstevel@tonic-gate * 5952*0Sstevel@tonic-gate * Note that this code is willing to consider regular 5953*0Sstevel@tonic-gate * pages as well as anon pages. Is this appropriate here? 5954*0Sstevel@tonic-gate */ 5955*0Sstevel@tonic-gate ap = NULL; 5956*0Sstevel@tonic-gate if (amp != NULL) { 5957*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 5958*0Sstevel@tonic-gate anon_array_enter(amp, anon_index + page, &cookie); 5959*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index + page); 5960*0Sstevel@tonic-gate if (ap != NULL) { 5961*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 5962*0Sstevel@tonic-gate } else { 5963*0Sstevel@tonic-gate vp = svd->vp; 5964*0Sstevel@tonic-gate off = svd->offset + ptob(page); 5965*0Sstevel@tonic-gate } 5966*0Sstevel@tonic-gate anon_array_exit(&cookie); 5967*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 5968*0Sstevel@tonic-gate } else { 5969*0Sstevel@tonic-gate vp = svd->vp; 5970*0Sstevel@tonic-gate off = svd->offset + ptob(page); 5971*0Sstevel@tonic-gate } 5972*0Sstevel@tonic-gate if (vp == NULL) { /* untouched zfod page */ 5973*0Sstevel@tonic-gate ASSERT(ap == NULL); 5974*0Sstevel@tonic-gate continue; 5975*0Sstevel@tonic-gate } 5976*0Sstevel@tonic-gate 5977*0Sstevel@tonic-gate pp = page_lookup_nowait(vp, off, SE_SHARED); 5978*0Sstevel@tonic-gate if (pp == NULL) 5979*0Sstevel@tonic-gate continue; 5980*0Sstevel@tonic-gate 5981*0Sstevel@tonic-gate 5982*0Sstevel@tonic-gate /* 5983*0Sstevel@tonic-gate * Examine the page to see whether it can be tossed out, 5984*0Sstevel@tonic-gate * keeping track of how many we've found. 5985*0Sstevel@tonic-gate */ 5986*0Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 5987*0Sstevel@tonic-gate /* 5988*0Sstevel@tonic-gate * If the page has an i/o lock and no mappings, 5989*0Sstevel@tonic-gate * it's very likely that the page is being 5990*0Sstevel@tonic-gate * written out as a result of klustering. 5991*0Sstevel@tonic-gate * Assume this is so and take credit for it here. 5992*0Sstevel@tonic-gate */ 5993*0Sstevel@tonic-gate if (!page_io_trylock(pp)) { 5994*0Sstevel@tonic-gate if (!hat_page_is_mapped(pp)) 5995*0Sstevel@tonic-gate pgcnt++; 5996*0Sstevel@tonic-gate } else { 5997*0Sstevel@tonic-gate page_io_unlock(pp); 5998*0Sstevel@tonic-gate } 5999*0Sstevel@tonic-gate page_unlock(pp); 6000*0Sstevel@tonic-gate continue; 6001*0Sstevel@tonic-gate } 6002*0Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp)); 6003*0Sstevel@tonic-gate 6004*0Sstevel@tonic-gate 6005*0Sstevel@tonic-gate /* 6006*0Sstevel@tonic-gate * Skip if page is locked or has mappings. 6007*0Sstevel@tonic-gate * We don't need the page_struct_lock to look at lckcnt 6008*0Sstevel@tonic-gate * and cowcnt because the page is exclusive locked. 6009*0Sstevel@tonic-gate */ 6010*0Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 || 6011*0Sstevel@tonic-gate hat_page_is_mapped(pp)) { 6012*0Sstevel@tonic-gate page_unlock(pp); 6013*0Sstevel@tonic-gate continue; 6014*0Sstevel@tonic-gate } 6015*0Sstevel@tonic-gate 6016*0Sstevel@tonic-gate /* 6017*0Sstevel@tonic-gate * dispose skips large pages so try to demote first. 6018*0Sstevel@tonic-gate */ 6019*0Sstevel@tonic-gate if (pp->p_szc != 0 && !page_try_demote_pages(pp)) { 6020*0Sstevel@tonic-gate page_unlock(pp); 6021*0Sstevel@tonic-gate /* 6022*0Sstevel@tonic-gate * XXX should skip the remaining page_t's of this 6023*0Sstevel@tonic-gate * large page. 6024*0Sstevel@tonic-gate */ 6025*0Sstevel@tonic-gate continue; 6026*0Sstevel@tonic-gate } 6027*0Sstevel@tonic-gate 6028*0Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 6029*0Sstevel@tonic-gate 6030*0Sstevel@tonic-gate /* 6031*0Sstevel@tonic-gate * No longer mapped -- we can toss it out. How 6032*0Sstevel@tonic-gate * we do so depends on whether or not it's dirty. 6033*0Sstevel@tonic-gate */ 6034*0Sstevel@tonic-gate if (hat_ismod(pp) && pp->p_vnode) { 6035*0Sstevel@tonic-gate /* 6036*0Sstevel@tonic-gate * We must clean the page before it can be 6037*0Sstevel@tonic-gate * freed. Setting B_FREE will cause pvn_done 6038*0Sstevel@tonic-gate * to free the page when the i/o completes. 6039*0Sstevel@tonic-gate * XXX: This also causes it to be accounted 6040*0Sstevel@tonic-gate * as a pageout instead of a swap: need 6041*0Sstevel@tonic-gate * B_SWAPOUT bit to use instead of B_FREE. 6042*0Sstevel@tonic-gate * 6043*0Sstevel@tonic-gate * Hold the vnode before releasing the page lock 6044*0Sstevel@tonic-gate * to prevent it from being freed and re-used by 6045*0Sstevel@tonic-gate * some other thread. 6046*0Sstevel@tonic-gate */ 6047*0Sstevel@tonic-gate VN_HOLD(vp); 6048*0Sstevel@tonic-gate page_unlock(pp); 6049*0Sstevel@tonic-gate 6050*0Sstevel@tonic-gate /* 6051*0Sstevel@tonic-gate * Queue all i/o requests for the pageout thread 6052*0Sstevel@tonic-gate * to avoid saturating the pageout devices. 6053*0Sstevel@tonic-gate */ 6054*0Sstevel@tonic-gate if (!queue_io_request(vp, off)) 6055*0Sstevel@tonic-gate VN_RELE(vp); 6056*0Sstevel@tonic-gate } else { 6057*0Sstevel@tonic-gate /* 6058*0Sstevel@tonic-gate * The page was clean, free it. 6059*0Sstevel@tonic-gate * 6060*0Sstevel@tonic-gate * XXX: Can we ever encounter modified pages 6061*0Sstevel@tonic-gate * with no associated vnode here? 6062*0Sstevel@tonic-gate */ 6063*0Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 6064*0Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 6065*0Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 6066*0Sstevel@tonic-gate } 6067*0Sstevel@tonic-gate 6068*0Sstevel@tonic-gate /* 6069*0Sstevel@tonic-gate * Credit now even if i/o is in progress. 6070*0Sstevel@tonic-gate */ 6071*0Sstevel@tonic-gate pgcnt++; 6072*0Sstevel@tonic-gate } 6073*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6074*0Sstevel@tonic-gate 6075*0Sstevel@tonic-gate /* 6076*0Sstevel@tonic-gate * Wakeup pageout to initiate i/o on all queued requests. 6077*0Sstevel@tonic-gate */ 6078*0Sstevel@tonic-gate cv_signal_pageout(); 6079*0Sstevel@tonic-gate return (ptob(pgcnt)); 6080*0Sstevel@tonic-gate } 6081*0Sstevel@tonic-gate 6082*0Sstevel@tonic-gate /* 6083*0Sstevel@tonic-gate * Synchronize primary storage cache with real object in virtual memory. 6084*0Sstevel@tonic-gate * 6085*0Sstevel@tonic-gate * XXX - Anonymous pages should not be sync'ed out at all. 6086*0Sstevel@tonic-gate */ 6087*0Sstevel@tonic-gate static int 6088*0Sstevel@tonic-gate segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 6089*0Sstevel@tonic-gate { 6090*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 6091*0Sstevel@tonic-gate struct vpage *vpp; 6092*0Sstevel@tonic-gate page_t *pp; 6093*0Sstevel@tonic-gate u_offset_t offset; 6094*0Sstevel@tonic-gate struct vnode *vp; 6095*0Sstevel@tonic-gate u_offset_t off; 6096*0Sstevel@tonic-gate caddr_t eaddr; 6097*0Sstevel@tonic-gate int bflags; 6098*0Sstevel@tonic-gate int err = 0; 6099*0Sstevel@tonic-gate int segtype; 6100*0Sstevel@tonic-gate int pageprot; 6101*0Sstevel@tonic-gate int prot; 6102*0Sstevel@tonic-gate ulong_t anon_index; 6103*0Sstevel@tonic-gate struct anon_map *amp; 6104*0Sstevel@tonic-gate struct anon *ap; 6105*0Sstevel@tonic-gate anon_sync_obj_t cookie; 6106*0Sstevel@tonic-gate 6107*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 6108*0Sstevel@tonic-gate 6109*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 6110*0Sstevel@tonic-gate 6111*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 6112*0Sstevel@tonic-gate /* 6113*0Sstevel@tonic-gate * flush all pages from seg cache 6114*0Sstevel@tonic-gate * otherwise we may deadlock in swap_putpage 6115*0Sstevel@tonic-gate * for B_INVAL page (4175402). 6116*0Sstevel@tonic-gate * 6117*0Sstevel@tonic-gate * Even if we grab segvn WRITER's lock or segp_slock 6118*0Sstevel@tonic-gate * here, there might be another thread which could've 6119*0Sstevel@tonic-gate * successfully performed lookup/insert just before 6120*0Sstevel@tonic-gate * we acquired the lock here. So, grabbing either 6121*0Sstevel@tonic-gate * lock here is of not much use. Until we devise 6122*0Sstevel@tonic-gate * a strategy at upper layers to solve the 6123*0Sstevel@tonic-gate * synchronization issues completely, we expect 6124*0Sstevel@tonic-gate * applications to handle this appropriately. 6125*0Sstevel@tonic-gate */ 6126*0Sstevel@tonic-gate segvn_purge(seg); 6127*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 6128*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6129*0Sstevel@tonic-gate return (EAGAIN); 6130*0Sstevel@tonic-gate } 6131*0Sstevel@tonic-gate } 6132*0Sstevel@tonic-gate 6133*0Sstevel@tonic-gate vpp = svd->vpage; 6134*0Sstevel@tonic-gate offset = svd->offset + (uintptr_t)(addr - seg->s_base); 6135*0Sstevel@tonic-gate bflags = ((flags & MS_ASYNC) ? B_ASYNC : 0) | 6136*0Sstevel@tonic-gate ((flags & MS_INVALIDATE) ? B_INVAL : 0); 6137*0Sstevel@tonic-gate 6138*0Sstevel@tonic-gate if (attr) { 6139*0Sstevel@tonic-gate pageprot = attr & ~(SHARED|PRIVATE); 6140*0Sstevel@tonic-gate segtype = (attr & SHARED) ? MAP_SHARED : MAP_PRIVATE; 6141*0Sstevel@tonic-gate 6142*0Sstevel@tonic-gate /* 6143*0Sstevel@tonic-gate * We are done if the segment types don't match 6144*0Sstevel@tonic-gate * or if we have segment level protections and 6145*0Sstevel@tonic-gate * they don't match. 6146*0Sstevel@tonic-gate */ 6147*0Sstevel@tonic-gate if (svd->type != segtype) { 6148*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6149*0Sstevel@tonic-gate return (0); 6150*0Sstevel@tonic-gate } 6151*0Sstevel@tonic-gate if (vpp == NULL) { 6152*0Sstevel@tonic-gate if (svd->prot != pageprot) { 6153*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6154*0Sstevel@tonic-gate return (0); 6155*0Sstevel@tonic-gate } 6156*0Sstevel@tonic-gate prot = svd->prot; 6157*0Sstevel@tonic-gate } else 6158*0Sstevel@tonic-gate vpp = &svd->vpage[seg_page(seg, addr)]; 6159*0Sstevel@tonic-gate 6160*0Sstevel@tonic-gate } else if (svd->vp && svd->amp == NULL && 6161*0Sstevel@tonic-gate (flags & MS_INVALIDATE) == 0) { 6162*0Sstevel@tonic-gate 6163*0Sstevel@tonic-gate /* 6164*0Sstevel@tonic-gate * No attributes, no anonymous pages and MS_INVALIDATE flag 6165*0Sstevel@tonic-gate * is not on, just use one big request. 6166*0Sstevel@tonic-gate */ 6167*0Sstevel@tonic-gate err = VOP_PUTPAGE(svd->vp, (offset_t)offset, len, 6168*0Sstevel@tonic-gate bflags, svd->cred); 6169*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6170*0Sstevel@tonic-gate return (err); 6171*0Sstevel@tonic-gate } 6172*0Sstevel@tonic-gate 6173*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) 6174*0Sstevel@tonic-gate anon_index = svd->anon_index + seg_page(seg, addr); 6175*0Sstevel@tonic-gate 6176*0Sstevel@tonic-gate for (eaddr = addr + len; addr < eaddr; addr += PAGESIZE) { 6177*0Sstevel@tonic-gate ap = NULL; 6178*0Sstevel@tonic-gate if (amp != NULL) { 6179*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 6180*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 6181*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 6182*0Sstevel@tonic-gate if (ap != NULL) { 6183*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 6184*0Sstevel@tonic-gate } else { 6185*0Sstevel@tonic-gate vp = svd->vp; 6186*0Sstevel@tonic-gate off = offset; 6187*0Sstevel@tonic-gate } 6188*0Sstevel@tonic-gate anon_array_exit(&cookie); 6189*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6190*0Sstevel@tonic-gate } else { 6191*0Sstevel@tonic-gate vp = svd->vp; 6192*0Sstevel@tonic-gate off = offset; 6193*0Sstevel@tonic-gate } 6194*0Sstevel@tonic-gate offset += PAGESIZE; 6195*0Sstevel@tonic-gate 6196*0Sstevel@tonic-gate if (vp == NULL) /* untouched zfod page */ 6197*0Sstevel@tonic-gate continue; 6198*0Sstevel@tonic-gate 6199*0Sstevel@tonic-gate if (attr) { 6200*0Sstevel@tonic-gate if (vpp) { 6201*0Sstevel@tonic-gate prot = VPP_PROT(vpp); 6202*0Sstevel@tonic-gate vpp++; 6203*0Sstevel@tonic-gate } 6204*0Sstevel@tonic-gate if (prot != pageprot) { 6205*0Sstevel@tonic-gate continue; 6206*0Sstevel@tonic-gate } 6207*0Sstevel@tonic-gate } 6208*0Sstevel@tonic-gate 6209*0Sstevel@tonic-gate /* 6210*0Sstevel@tonic-gate * See if any of these pages are locked -- if so, then we 6211*0Sstevel@tonic-gate * will have to truncate an invalidate request at the first 6212*0Sstevel@tonic-gate * locked one. We don't need the page_struct_lock to test 6213*0Sstevel@tonic-gate * as this is only advisory; even if we acquire it someone 6214*0Sstevel@tonic-gate * might race in and lock the page after we unlock and before 6215*0Sstevel@tonic-gate * we do the PUTPAGE, then PUTPAGE simply does nothing. 6216*0Sstevel@tonic-gate */ 6217*0Sstevel@tonic-gate if (flags & MS_INVALIDATE) { 6218*0Sstevel@tonic-gate if ((pp = page_lookup(vp, off, SE_SHARED)) != NULL) { 6219*0Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 6220*0Sstevel@tonic-gate page_unlock(pp); 6221*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6222*0Sstevel@tonic-gate return (EBUSY); 6223*0Sstevel@tonic-gate } 6224*0Sstevel@tonic-gate if (ap != NULL && pp->p_szc != 0 && 6225*0Sstevel@tonic-gate page_tryupgrade(pp)) { 6226*0Sstevel@tonic-gate if (pp->p_lckcnt == 0 && 6227*0Sstevel@tonic-gate pp->p_cowcnt == 0) { 6228*0Sstevel@tonic-gate /* 6229*0Sstevel@tonic-gate * swapfs VN_DISPOSE() won't 6230*0Sstevel@tonic-gate * invalidate large pages. 6231*0Sstevel@tonic-gate * Attempt to demote. 6232*0Sstevel@tonic-gate * XXX can't help it if it 6233*0Sstevel@tonic-gate * fails. But for swapfs 6234*0Sstevel@tonic-gate * pages it is no big deal. 6235*0Sstevel@tonic-gate */ 6236*0Sstevel@tonic-gate (void) page_try_demote_pages( 6237*0Sstevel@tonic-gate pp); 6238*0Sstevel@tonic-gate } 6239*0Sstevel@tonic-gate } 6240*0Sstevel@tonic-gate page_unlock(pp); 6241*0Sstevel@tonic-gate } 6242*0Sstevel@tonic-gate } else if (svd->type == MAP_SHARED && amp != NULL) { 6243*0Sstevel@tonic-gate /* 6244*0Sstevel@tonic-gate * Avoid writting out to disk ISM's large pages 6245*0Sstevel@tonic-gate * because segspt_free_pages() relies on NULL an_pvp 6246*0Sstevel@tonic-gate * of anon slots of such pages. 6247*0Sstevel@tonic-gate */ 6248*0Sstevel@tonic-gate 6249*0Sstevel@tonic-gate ASSERT(svd->vp == NULL); 6250*0Sstevel@tonic-gate /* 6251*0Sstevel@tonic-gate * swapfs uses page_lookup_nowait if not freeing or 6252*0Sstevel@tonic-gate * invalidating and skips a page if 6253*0Sstevel@tonic-gate * page_lookup_nowait returns NULL. 6254*0Sstevel@tonic-gate */ 6255*0Sstevel@tonic-gate pp = page_lookup_nowait(vp, off, SE_SHARED); 6256*0Sstevel@tonic-gate if (pp == NULL) { 6257*0Sstevel@tonic-gate continue; 6258*0Sstevel@tonic-gate } 6259*0Sstevel@tonic-gate if (pp->p_szc != 0) { 6260*0Sstevel@tonic-gate page_unlock(pp); 6261*0Sstevel@tonic-gate continue; 6262*0Sstevel@tonic-gate } 6263*0Sstevel@tonic-gate 6264*0Sstevel@tonic-gate /* 6265*0Sstevel@tonic-gate * Note ISM pages are created large so (vp, off)'s 6266*0Sstevel@tonic-gate * page cannot suddenly become large after we unlock 6267*0Sstevel@tonic-gate * pp. 6268*0Sstevel@tonic-gate */ 6269*0Sstevel@tonic-gate page_unlock(pp); 6270*0Sstevel@tonic-gate } 6271*0Sstevel@tonic-gate /* 6272*0Sstevel@tonic-gate * XXX - Should ultimately try to kluster 6273*0Sstevel@tonic-gate * calls to VOP_PUTPAGE() for performance. 6274*0Sstevel@tonic-gate */ 6275*0Sstevel@tonic-gate VN_HOLD(vp); 6276*0Sstevel@tonic-gate err = VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE, 6277*0Sstevel@tonic-gate bflags, svd->cred); 6278*0Sstevel@tonic-gate VN_RELE(vp); 6279*0Sstevel@tonic-gate if (err) 6280*0Sstevel@tonic-gate break; 6281*0Sstevel@tonic-gate } 6282*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6283*0Sstevel@tonic-gate return (err); 6284*0Sstevel@tonic-gate } 6285*0Sstevel@tonic-gate 6286*0Sstevel@tonic-gate /* 6287*0Sstevel@tonic-gate * Determine if we have data corresponding to pages in the 6288*0Sstevel@tonic-gate * primary storage virtual memory cache (i.e., "in core"). 6289*0Sstevel@tonic-gate */ 6290*0Sstevel@tonic-gate static size_t 6291*0Sstevel@tonic-gate segvn_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 6292*0Sstevel@tonic-gate { 6293*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 6294*0Sstevel@tonic-gate struct vnode *vp, *avp; 6295*0Sstevel@tonic-gate u_offset_t offset, aoffset; 6296*0Sstevel@tonic-gate size_t p, ep; 6297*0Sstevel@tonic-gate int ret; 6298*0Sstevel@tonic-gate struct vpage *vpp; 6299*0Sstevel@tonic-gate page_t *pp; 6300*0Sstevel@tonic-gate uint_t start; 6301*0Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 6302*0Sstevel@tonic-gate struct anon *ap; 6303*0Sstevel@tonic-gate uint_t attr; 6304*0Sstevel@tonic-gate anon_sync_obj_t cookie; 6305*0Sstevel@tonic-gate 6306*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 6307*0Sstevel@tonic-gate 6308*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 6309*0Sstevel@tonic-gate if (svd->amp == NULL && svd->vp == NULL) { 6310*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6311*0Sstevel@tonic-gate bzero(vec, btopr(len)); 6312*0Sstevel@tonic-gate return (len); /* no anonymous pages created yet */ 6313*0Sstevel@tonic-gate } 6314*0Sstevel@tonic-gate 6315*0Sstevel@tonic-gate p = seg_page(seg, addr); 6316*0Sstevel@tonic-gate ep = seg_page(seg, addr + len); 6317*0Sstevel@tonic-gate start = svd->vp ? SEG_PAGE_VNODEBACKED : 0; 6318*0Sstevel@tonic-gate 6319*0Sstevel@tonic-gate amp = svd->amp; 6320*0Sstevel@tonic-gate for (; p < ep; p++, addr += PAGESIZE) { 6321*0Sstevel@tonic-gate vpp = (svd->vpage) ? &svd->vpage[p]: NULL; 6322*0Sstevel@tonic-gate ret = start; 6323*0Sstevel@tonic-gate ap = NULL; 6324*0Sstevel@tonic-gate avp = NULL; 6325*0Sstevel@tonic-gate /* Grab the vnode/offset for the anon slot */ 6326*0Sstevel@tonic-gate if (amp != NULL) { 6327*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 6328*0Sstevel@tonic-gate anon_array_enter(amp, svd->anon_index + p, &cookie); 6329*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, svd->anon_index + p); 6330*0Sstevel@tonic-gate if (ap != NULL) { 6331*0Sstevel@tonic-gate swap_xlate(ap, &avp, &aoffset); 6332*0Sstevel@tonic-gate } 6333*0Sstevel@tonic-gate anon_array_exit(&cookie); 6334*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6335*0Sstevel@tonic-gate } 6336*0Sstevel@tonic-gate if ((avp != NULL) && page_exists(avp, aoffset)) { 6337*0Sstevel@tonic-gate /* A page exists for the anon slot */ 6338*0Sstevel@tonic-gate ret |= SEG_PAGE_INCORE; 6339*0Sstevel@tonic-gate 6340*0Sstevel@tonic-gate /* 6341*0Sstevel@tonic-gate * If page is mapped and writable 6342*0Sstevel@tonic-gate */ 6343*0Sstevel@tonic-gate attr = (uint_t)0; 6344*0Sstevel@tonic-gate if ((hat_getattr(seg->s_as->a_hat, addr, 6345*0Sstevel@tonic-gate &attr) != -1) && (attr & PROT_WRITE)) { 6346*0Sstevel@tonic-gate ret |= SEG_PAGE_ANON; 6347*0Sstevel@tonic-gate } 6348*0Sstevel@tonic-gate /* 6349*0Sstevel@tonic-gate * Don't get page_struct lock for lckcnt and cowcnt, 6350*0Sstevel@tonic-gate * since this is purely advisory. 6351*0Sstevel@tonic-gate */ 6352*0Sstevel@tonic-gate if ((pp = page_lookup_nowait(avp, aoffset, 6353*0Sstevel@tonic-gate SE_SHARED)) != NULL) { 6354*0Sstevel@tonic-gate if (pp->p_lckcnt) 6355*0Sstevel@tonic-gate ret |= SEG_PAGE_SOFTLOCK; 6356*0Sstevel@tonic-gate if (pp->p_cowcnt) 6357*0Sstevel@tonic-gate ret |= SEG_PAGE_HASCOW; 6358*0Sstevel@tonic-gate page_unlock(pp); 6359*0Sstevel@tonic-gate } 6360*0Sstevel@tonic-gate } 6361*0Sstevel@tonic-gate 6362*0Sstevel@tonic-gate /* Gather vnode statistics */ 6363*0Sstevel@tonic-gate vp = svd->vp; 6364*0Sstevel@tonic-gate offset = svd->offset + (uintptr_t)(addr - seg->s_base); 6365*0Sstevel@tonic-gate 6366*0Sstevel@tonic-gate if (vp != NULL) { 6367*0Sstevel@tonic-gate /* 6368*0Sstevel@tonic-gate * Try to obtain a "shared" lock on the page 6369*0Sstevel@tonic-gate * without blocking. If this fails, determine 6370*0Sstevel@tonic-gate * if the page is in memory. 6371*0Sstevel@tonic-gate */ 6372*0Sstevel@tonic-gate pp = page_lookup_nowait(vp, offset, SE_SHARED); 6373*0Sstevel@tonic-gate if ((pp == NULL) && (page_exists(vp, offset))) { 6374*0Sstevel@tonic-gate /* Page is incore, and is named */ 6375*0Sstevel@tonic-gate ret |= (SEG_PAGE_INCORE | SEG_PAGE_VNODE); 6376*0Sstevel@tonic-gate } 6377*0Sstevel@tonic-gate /* 6378*0Sstevel@tonic-gate * Don't get page_struct lock for lckcnt and cowcnt, 6379*0Sstevel@tonic-gate * since this is purely advisory. 6380*0Sstevel@tonic-gate */ 6381*0Sstevel@tonic-gate if (pp != NULL) { 6382*0Sstevel@tonic-gate ret |= (SEG_PAGE_INCORE | SEG_PAGE_VNODE); 6383*0Sstevel@tonic-gate if (pp->p_lckcnt) 6384*0Sstevel@tonic-gate ret |= SEG_PAGE_SOFTLOCK; 6385*0Sstevel@tonic-gate if (pp->p_cowcnt) 6386*0Sstevel@tonic-gate ret |= SEG_PAGE_HASCOW; 6387*0Sstevel@tonic-gate page_unlock(pp); 6388*0Sstevel@tonic-gate } 6389*0Sstevel@tonic-gate } 6390*0Sstevel@tonic-gate 6391*0Sstevel@tonic-gate /* Gather virtual page information */ 6392*0Sstevel@tonic-gate if (vpp) { 6393*0Sstevel@tonic-gate if (VPP_ISPPLOCK(vpp)) 6394*0Sstevel@tonic-gate ret |= SEG_PAGE_LOCKED; 6395*0Sstevel@tonic-gate vpp++; 6396*0Sstevel@tonic-gate } 6397*0Sstevel@tonic-gate 6398*0Sstevel@tonic-gate *vec++ = (char)ret; 6399*0Sstevel@tonic-gate } 6400*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6401*0Sstevel@tonic-gate return (len); 6402*0Sstevel@tonic-gate } 6403*0Sstevel@tonic-gate 6404*0Sstevel@tonic-gate /* 6405*0Sstevel@tonic-gate * Statement for p_cowcnts/p_lckcnts. 6406*0Sstevel@tonic-gate * 6407*0Sstevel@tonic-gate * p_cowcnt is updated while mlock/munlocking MAP_PRIVATE and PROT_WRITE region 6408*0Sstevel@tonic-gate * irrespective of the following factors or anything else: 6409*0Sstevel@tonic-gate * 6410*0Sstevel@tonic-gate * (1) anon slots are populated or not 6411*0Sstevel@tonic-gate * (2) cow is broken or not 6412*0Sstevel@tonic-gate * (3) refcnt on ap is 1 or greater than 1 6413*0Sstevel@tonic-gate * 6414*0Sstevel@tonic-gate * If it's not MAP_PRIVATE and PROT_WRITE, p_lckcnt is updated during mlock 6415*0Sstevel@tonic-gate * and munlock. 6416*0Sstevel@tonic-gate * 6417*0Sstevel@tonic-gate * 6418*0Sstevel@tonic-gate * Handling p_cowcnts/p_lckcnts during copy-on-write fault: 6419*0Sstevel@tonic-gate * 6420*0Sstevel@tonic-gate * if vpage has PROT_WRITE 6421*0Sstevel@tonic-gate * transfer cowcnt on the oldpage -> cowcnt on the newpage 6422*0Sstevel@tonic-gate * else 6423*0Sstevel@tonic-gate * transfer lckcnt on the oldpage -> lckcnt on the newpage 6424*0Sstevel@tonic-gate * 6425*0Sstevel@tonic-gate * During copy-on-write, decrement p_cowcnt on the oldpage and increment 6426*0Sstevel@tonic-gate * p_cowcnt on the newpage *if* the corresponding vpage has PROT_WRITE. 6427*0Sstevel@tonic-gate * 6428*0Sstevel@tonic-gate * We may also break COW if softlocking on read access in the physio case. 6429*0Sstevel@tonic-gate * In this case, vpage may not have PROT_WRITE. So, we need to decrement 6430*0Sstevel@tonic-gate * p_lckcnt on the oldpage and increment p_lckcnt on the newpage *if* the 6431*0Sstevel@tonic-gate * vpage doesn't have PROT_WRITE. 6432*0Sstevel@tonic-gate * 6433*0Sstevel@tonic-gate * 6434*0Sstevel@tonic-gate * Handling p_cowcnts/p_lckcnts during mprotect on mlocked region: 6435*0Sstevel@tonic-gate * 6436*0Sstevel@tonic-gate * If a MAP_PRIVATE region loses PROT_WRITE, we decrement p_cowcnt and 6437*0Sstevel@tonic-gate * increment p_lckcnt by calling page_subclaim() which takes care of 6438*0Sstevel@tonic-gate * availrmem accounting and p_lckcnt overflow. 6439*0Sstevel@tonic-gate * 6440*0Sstevel@tonic-gate * If a MAP_PRIVATE region gains PROT_WRITE, we decrement p_lckcnt and 6441*0Sstevel@tonic-gate * increment p_cowcnt by calling page_addclaim() which takes care of 6442*0Sstevel@tonic-gate * availrmem availability and p_cowcnt overflow. 6443*0Sstevel@tonic-gate */ 6444*0Sstevel@tonic-gate 6445*0Sstevel@tonic-gate /* 6446*0Sstevel@tonic-gate * Lock down (or unlock) pages mapped by this segment. 6447*0Sstevel@tonic-gate * 6448*0Sstevel@tonic-gate * XXX only creates PAGESIZE pages if anon slots are not initialized. 6449*0Sstevel@tonic-gate * At fault time they will be relocated into larger pages. 6450*0Sstevel@tonic-gate */ 6451*0Sstevel@tonic-gate static int 6452*0Sstevel@tonic-gate segvn_lockop(struct seg *seg, caddr_t addr, size_t len, 6453*0Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos) 6454*0Sstevel@tonic-gate { 6455*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 6456*0Sstevel@tonic-gate struct vpage *vpp; 6457*0Sstevel@tonic-gate struct vpage *evp; 6458*0Sstevel@tonic-gate page_t *pp; 6459*0Sstevel@tonic-gate u_offset_t offset; 6460*0Sstevel@tonic-gate u_offset_t off; 6461*0Sstevel@tonic-gate int segtype; 6462*0Sstevel@tonic-gate int pageprot; 6463*0Sstevel@tonic-gate int claim; 6464*0Sstevel@tonic-gate struct vnode *vp; 6465*0Sstevel@tonic-gate ulong_t anon_index; 6466*0Sstevel@tonic-gate struct anon_map *amp; 6467*0Sstevel@tonic-gate struct anon *ap; 6468*0Sstevel@tonic-gate struct vattr va; 6469*0Sstevel@tonic-gate anon_sync_obj_t cookie; 6470*0Sstevel@tonic-gate 6471*0Sstevel@tonic-gate /* 6472*0Sstevel@tonic-gate * Hold write lock on address space because may split or concatenate 6473*0Sstevel@tonic-gate * segments 6474*0Sstevel@tonic-gate */ 6475*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 6476*0Sstevel@tonic-gate 6477*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); 6478*0Sstevel@tonic-gate if (attr) { 6479*0Sstevel@tonic-gate pageprot = attr & ~(SHARED|PRIVATE); 6480*0Sstevel@tonic-gate segtype = attr & SHARED ? MAP_SHARED : MAP_PRIVATE; 6481*0Sstevel@tonic-gate 6482*0Sstevel@tonic-gate /* 6483*0Sstevel@tonic-gate * We are done if the segment types don't match 6484*0Sstevel@tonic-gate * or if we have segment level protections and 6485*0Sstevel@tonic-gate * they don't match. 6486*0Sstevel@tonic-gate */ 6487*0Sstevel@tonic-gate if (svd->type != segtype) { 6488*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6489*0Sstevel@tonic-gate return (0); 6490*0Sstevel@tonic-gate } 6491*0Sstevel@tonic-gate if (svd->pageprot == 0 && svd->prot != pageprot) { 6492*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6493*0Sstevel@tonic-gate return (0); 6494*0Sstevel@tonic-gate } 6495*0Sstevel@tonic-gate } 6496*0Sstevel@tonic-gate 6497*0Sstevel@tonic-gate /* 6498*0Sstevel@tonic-gate * If we're locking, then we must create a vpage structure if 6499*0Sstevel@tonic-gate * none exists. If we're unlocking, then check to see if there 6500*0Sstevel@tonic-gate * is a vpage -- if not, then we could not have locked anything. 6501*0Sstevel@tonic-gate */ 6502*0Sstevel@tonic-gate 6503*0Sstevel@tonic-gate if ((vpp = svd->vpage) == NULL) { 6504*0Sstevel@tonic-gate if (op == MC_LOCK) 6505*0Sstevel@tonic-gate segvn_vpage(seg); 6506*0Sstevel@tonic-gate else { 6507*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6508*0Sstevel@tonic-gate return (0); 6509*0Sstevel@tonic-gate } 6510*0Sstevel@tonic-gate } 6511*0Sstevel@tonic-gate 6512*0Sstevel@tonic-gate /* 6513*0Sstevel@tonic-gate * The anonymous data vector (i.e., previously 6514*0Sstevel@tonic-gate * unreferenced mapping to swap space) can be allocated 6515*0Sstevel@tonic-gate * by lazily testing for its existence. 6516*0Sstevel@tonic-gate */ 6517*0Sstevel@tonic-gate if (op == MC_LOCK && svd->amp == NULL && svd->vp == NULL) { 6518*0Sstevel@tonic-gate svd->amp = anonmap_alloc(seg->s_size, 0); 6519*0Sstevel@tonic-gate svd->amp->a_szc = seg->s_szc; 6520*0Sstevel@tonic-gate } 6521*0Sstevel@tonic-gate 6522*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 6523*0Sstevel@tonic-gate anon_index = svd->anon_index + seg_page(seg, addr); 6524*0Sstevel@tonic-gate } 6525*0Sstevel@tonic-gate 6526*0Sstevel@tonic-gate offset = svd->offset + (uintptr_t)(addr - seg->s_base); 6527*0Sstevel@tonic-gate evp = &svd->vpage[seg_page(seg, addr + len)]; 6528*0Sstevel@tonic-gate 6529*0Sstevel@tonic-gate /* 6530*0Sstevel@tonic-gate * Loop over all pages in the range. Process if we're locking and 6531*0Sstevel@tonic-gate * page has not already been locked in this mapping; or if we're 6532*0Sstevel@tonic-gate * unlocking and the page has been locked. 6533*0Sstevel@tonic-gate */ 6534*0Sstevel@tonic-gate for (vpp = &svd->vpage[seg_page(seg, addr)]; vpp < evp; 6535*0Sstevel@tonic-gate vpp++, pos++, addr += PAGESIZE, offset += PAGESIZE, anon_index++) { 6536*0Sstevel@tonic-gate if ((attr == 0 || VPP_PROT(vpp) == pageprot) && 6537*0Sstevel@tonic-gate ((op == MC_LOCK && !VPP_ISPPLOCK(vpp)) || 6538*0Sstevel@tonic-gate (op == MC_UNLOCK && VPP_ISPPLOCK(vpp)))) { 6539*0Sstevel@tonic-gate 6540*0Sstevel@tonic-gate if (amp != NULL) 6541*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 6542*0Sstevel@tonic-gate /* 6543*0Sstevel@tonic-gate * If this isn't a MAP_NORESERVE segment and 6544*0Sstevel@tonic-gate * we're locking, allocate anon slots if they 6545*0Sstevel@tonic-gate * don't exist. The page is brought in later on. 6546*0Sstevel@tonic-gate */ 6547*0Sstevel@tonic-gate if (op == MC_LOCK && svd->vp == NULL && 6548*0Sstevel@tonic-gate ((svd->flags & MAP_NORESERVE) == 0) && 6549*0Sstevel@tonic-gate amp != NULL && 6550*0Sstevel@tonic-gate ((ap = anon_get_ptr(amp->ahp, anon_index)) 6551*0Sstevel@tonic-gate == NULL)) { 6552*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 6553*0Sstevel@tonic-gate 6554*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, 6555*0Sstevel@tonic-gate anon_index)) == NULL) { 6556*0Sstevel@tonic-gate pp = anon_zero(seg, addr, &ap, 6557*0Sstevel@tonic-gate svd->cred); 6558*0Sstevel@tonic-gate if (pp == NULL) { 6559*0Sstevel@tonic-gate anon_array_exit(&cookie); 6560*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6561*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, 6562*0Sstevel@tonic-gate &svd->lock); 6563*0Sstevel@tonic-gate return (ENOMEM); 6564*0Sstevel@tonic-gate } 6565*0Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, 6566*0Sstevel@tonic-gate anon_index) == NULL); 6567*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, 6568*0Sstevel@tonic-gate anon_index, ap, ANON_SLEEP); 6569*0Sstevel@tonic-gate page_unlock(pp); 6570*0Sstevel@tonic-gate } 6571*0Sstevel@tonic-gate anon_array_exit(&cookie); 6572*0Sstevel@tonic-gate } 6573*0Sstevel@tonic-gate 6574*0Sstevel@tonic-gate /* 6575*0Sstevel@tonic-gate * Get name for page, accounting for 6576*0Sstevel@tonic-gate * existence of private copy. 6577*0Sstevel@tonic-gate */ 6578*0Sstevel@tonic-gate ap = NULL; 6579*0Sstevel@tonic-gate if (amp != NULL) { 6580*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 6581*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 6582*0Sstevel@tonic-gate if (ap != NULL) { 6583*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 6584*0Sstevel@tonic-gate } else { 6585*0Sstevel@tonic-gate if (svd->vp == NULL && 6586*0Sstevel@tonic-gate (svd->flags & MAP_NORESERVE)) { 6587*0Sstevel@tonic-gate anon_array_exit(&cookie); 6588*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6589*0Sstevel@tonic-gate continue; 6590*0Sstevel@tonic-gate } 6591*0Sstevel@tonic-gate vp = svd->vp; 6592*0Sstevel@tonic-gate off = offset; 6593*0Sstevel@tonic-gate } 6594*0Sstevel@tonic-gate anon_array_exit(&cookie); 6595*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6596*0Sstevel@tonic-gate } else { 6597*0Sstevel@tonic-gate vp = svd->vp; 6598*0Sstevel@tonic-gate off = offset; 6599*0Sstevel@tonic-gate } 6600*0Sstevel@tonic-gate 6601*0Sstevel@tonic-gate /* 6602*0Sstevel@tonic-gate * Get page frame. It's ok if the page is 6603*0Sstevel@tonic-gate * not available when we're unlocking, as this 6604*0Sstevel@tonic-gate * may simply mean that a page we locked got 6605*0Sstevel@tonic-gate * truncated out of existence after we locked it. 6606*0Sstevel@tonic-gate * 6607*0Sstevel@tonic-gate * Invoke VOP_GETPAGE() to obtain the page struct 6608*0Sstevel@tonic-gate * since we may need to read it from disk if its 6609*0Sstevel@tonic-gate * been paged out. 6610*0Sstevel@tonic-gate */ 6611*0Sstevel@tonic-gate if (op != MC_LOCK) 6612*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 6613*0Sstevel@tonic-gate else { 6614*0Sstevel@tonic-gate page_t *pl[1 + 1]; 6615*0Sstevel@tonic-gate int error; 6616*0Sstevel@tonic-gate 6617*0Sstevel@tonic-gate ASSERT(vp != NULL); 6618*0Sstevel@tonic-gate 6619*0Sstevel@tonic-gate error = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, 6620*0Sstevel@tonic-gate (uint_t *)NULL, pl, PAGESIZE, seg, addr, 6621*0Sstevel@tonic-gate S_OTHER, svd->cred); 6622*0Sstevel@tonic-gate 6623*0Sstevel@tonic-gate /* 6624*0Sstevel@tonic-gate * If the error is EDEADLK then we must bounce 6625*0Sstevel@tonic-gate * up and drop all vm subsystem locks and then 6626*0Sstevel@tonic-gate * retry the operation later 6627*0Sstevel@tonic-gate * This behavior is a temporary measure because 6628*0Sstevel@tonic-gate * ufs/sds logging is badly designed and will 6629*0Sstevel@tonic-gate * deadlock if we don't allow this bounce to 6630*0Sstevel@tonic-gate * happen. The real solution is to re-design 6631*0Sstevel@tonic-gate * the logging code to work properly. See bug 6632*0Sstevel@tonic-gate * 4125102 for details of the problem. 6633*0Sstevel@tonic-gate */ 6634*0Sstevel@tonic-gate if (error == EDEADLK) { 6635*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6636*0Sstevel@tonic-gate return (error); 6637*0Sstevel@tonic-gate } 6638*0Sstevel@tonic-gate /* 6639*0Sstevel@tonic-gate * Quit if we fail to fault in the page. Treat 6640*0Sstevel@tonic-gate * the failure as an error, unless the addr 6641*0Sstevel@tonic-gate * is mapped beyond the end of a file. 6642*0Sstevel@tonic-gate */ 6643*0Sstevel@tonic-gate if (error && svd->vp) { 6644*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 6645*0Sstevel@tonic-gate if (VOP_GETATTR(svd->vp, &va, 0, 6646*0Sstevel@tonic-gate svd->cred) != 0) { 6647*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, 6648*0Sstevel@tonic-gate &svd->lock); 6649*0Sstevel@tonic-gate return (EIO); 6650*0Sstevel@tonic-gate } 6651*0Sstevel@tonic-gate if (btopr(va.va_size) >= 6652*0Sstevel@tonic-gate btopr(off + 1)) { 6653*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, 6654*0Sstevel@tonic-gate &svd->lock); 6655*0Sstevel@tonic-gate return (EIO); 6656*0Sstevel@tonic-gate } 6657*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6658*0Sstevel@tonic-gate return (0); 6659*0Sstevel@tonic-gate } else if (error) { 6660*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6661*0Sstevel@tonic-gate return (EIO); 6662*0Sstevel@tonic-gate } 6663*0Sstevel@tonic-gate pp = pl[0]; 6664*0Sstevel@tonic-gate ASSERT(pp != NULL); 6665*0Sstevel@tonic-gate } 6666*0Sstevel@tonic-gate 6667*0Sstevel@tonic-gate /* 6668*0Sstevel@tonic-gate * See Statement at the beginning of this routine. 6669*0Sstevel@tonic-gate * 6670*0Sstevel@tonic-gate * claim is always set if MAP_PRIVATE and PROT_WRITE 6671*0Sstevel@tonic-gate * irrespective of following factors: 6672*0Sstevel@tonic-gate * 6673*0Sstevel@tonic-gate * (1) anon slots are populated or not 6674*0Sstevel@tonic-gate * (2) cow is broken or not 6675*0Sstevel@tonic-gate * (3) refcnt on ap is 1 or greater than 1 6676*0Sstevel@tonic-gate * 6677*0Sstevel@tonic-gate * See 4140683 for details 6678*0Sstevel@tonic-gate */ 6679*0Sstevel@tonic-gate claim = ((VPP_PROT(vpp) & PROT_WRITE) && 6680*0Sstevel@tonic-gate (svd->type == MAP_PRIVATE)); 6681*0Sstevel@tonic-gate 6682*0Sstevel@tonic-gate /* 6683*0Sstevel@tonic-gate * Perform page-level operation appropriate to 6684*0Sstevel@tonic-gate * operation. If locking, undo the SOFTLOCK 6685*0Sstevel@tonic-gate * performed to bring the page into memory 6686*0Sstevel@tonic-gate * after setting the lock. If unlocking, 6687*0Sstevel@tonic-gate * and no page was found, account for the claim 6688*0Sstevel@tonic-gate * separately. 6689*0Sstevel@tonic-gate */ 6690*0Sstevel@tonic-gate if (op == MC_LOCK) { 6691*0Sstevel@tonic-gate int ret = 1; /* Assume success */ 6692*0Sstevel@tonic-gate 6693*0Sstevel@tonic-gate /* 6694*0Sstevel@tonic-gate * Make sure another thread didn't lock 6695*0Sstevel@tonic-gate * the page after we released the segment 6696*0Sstevel@tonic-gate * lock. 6697*0Sstevel@tonic-gate */ 6698*0Sstevel@tonic-gate if ((attr == 0 || VPP_PROT(vpp) == pageprot) && 6699*0Sstevel@tonic-gate !VPP_ISPPLOCK(vpp)) { 6700*0Sstevel@tonic-gate ret = page_pp_lock(pp, claim, 0); 6701*0Sstevel@tonic-gate if (ret != 0) { 6702*0Sstevel@tonic-gate VPP_SETPPLOCK(vpp); 6703*0Sstevel@tonic-gate if (lockmap != (ulong_t *)NULL) 6704*0Sstevel@tonic-gate BT_SET(lockmap, pos); 6705*0Sstevel@tonic-gate } 6706*0Sstevel@tonic-gate } 6707*0Sstevel@tonic-gate page_unlock(pp); 6708*0Sstevel@tonic-gate if (ret == 0) { 6709*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6710*0Sstevel@tonic-gate return (EAGAIN); 6711*0Sstevel@tonic-gate } 6712*0Sstevel@tonic-gate } else { 6713*0Sstevel@tonic-gate if (pp != NULL) { 6714*0Sstevel@tonic-gate if ((attr == 0 || 6715*0Sstevel@tonic-gate VPP_PROT(vpp) == pageprot) && 6716*0Sstevel@tonic-gate VPP_ISPPLOCK(vpp)) 6717*0Sstevel@tonic-gate page_pp_unlock(pp, claim, 0); 6718*0Sstevel@tonic-gate page_unlock(pp); 6719*0Sstevel@tonic-gate } 6720*0Sstevel@tonic-gate VPP_CLRPPLOCK(vpp); 6721*0Sstevel@tonic-gate } 6722*0Sstevel@tonic-gate } 6723*0Sstevel@tonic-gate } 6724*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6725*0Sstevel@tonic-gate return (0); 6726*0Sstevel@tonic-gate } 6727*0Sstevel@tonic-gate 6728*0Sstevel@tonic-gate /* 6729*0Sstevel@tonic-gate * Set advice from user for specified pages 6730*0Sstevel@tonic-gate * There are 5 types of advice: 6731*0Sstevel@tonic-gate * MADV_NORMAL - Normal (default) behavior (whatever that is) 6732*0Sstevel@tonic-gate * MADV_RANDOM - Random page references 6733*0Sstevel@tonic-gate * do not allow readahead or 'klustering' 6734*0Sstevel@tonic-gate * MADV_SEQUENTIAL - Sequential page references 6735*0Sstevel@tonic-gate * Pages previous to the one currently being 6736*0Sstevel@tonic-gate * accessed (determined by fault) are 'not needed' 6737*0Sstevel@tonic-gate * and are freed immediately 6738*0Sstevel@tonic-gate * MADV_WILLNEED - Pages are likely to be used (fault ahead in mctl) 6739*0Sstevel@tonic-gate * MADV_DONTNEED - Pages are not needed (synced out in mctl) 6740*0Sstevel@tonic-gate * MADV_FREE - Contents can be discarded 6741*0Sstevel@tonic-gate * MADV_ACCESS_DEFAULT- Default access 6742*0Sstevel@tonic-gate * MADV_ACCESS_LWP - Next LWP will access heavily 6743*0Sstevel@tonic-gate * MADV_ACCESS_MANY- Many LWPs or processes will access heavily 6744*0Sstevel@tonic-gate */ 6745*0Sstevel@tonic-gate static int 6746*0Sstevel@tonic-gate segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 6747*0Sstevel@tonic-gate { 6748*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 6749*0Sstevel@tonic-gate size_t page; 6750*0Sstevel@tonic-gate int err = 0; 6751*0Sstevel@tonic-gate int already_set; 6752*0Sstevel@tonic-gate struct anon_map *amp; 6753*0Sstevel@tonic-gate ulong_t anon_index; 6754*0Sstevel@tonic-gate struct seg *next; 6755*0Sstevel@tonic-gate lgrp_mem_policy_t policy; 6756*0Sstevel@tonic-gate struct seg *prev; 6757*0Sstevel@tonic-gate struct vnode *vp; 6758*0Sstevel@tonic-gate 6759*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 6760*0Sstevel@tonic-gate 6761*0Sstevel@tonic-gate /* 6762*0Sstevel@tonic-gate * In case of MADV_FREE, we won't be modifying any segment private 6763*0Sstevel@tonic-gate * data structures; so, we only need to grab READER's lock 6764*0Sstevel@tonic-gate */ 6765*0Sstevel@tonic-gate if (behav != MADV_FREE) 6766*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); 6767*0Sstevel@tonic-gate else 6768*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 6769*0Sstevel@tonic-gate 6770*0Sstevel@tonic-gate /* 6771*0Sstevel@tonic-gate * Large pages are assumed to be only turned on when accesses to the 6772*0Sstevel@tonic-gate * segment's address range have spatial and temporal locality. That 6773*0Sstevel@tonic-gate * justifies ignoring MADV_SEQUENTIAL for large page segments. 6774*0Sstevel@tonic-gate * Also, ignore advice affecting lgroup memory allocation 6775*0Sstevel@tonic-gate * if don't need to do lgroup optimizations on this system 6776*0Sstevel@tonic-gate */ 6777*0Sstevel@tonic-gate 6778*0Sstevel@tonic-gate if ((behav == MADV_SEQUENTIAL && seg->s_szc != 0) || 6779*0Sstevel@tonic-gate (!lgrp_optimizations() && (behav == MADV_ACCESS_DEFAULT || 6780*0Sstevel@tonic-gate behav == MADV_ACCESS_LWP || behav == MADV_ACCESS_MANY))) { 6781*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6782*0Sstevel@tonic-gate return (0); 6783*0Sstevel@tonic-gate } 6784*0Sstevel@tonic-gate 6785*0Sstevel@tonic-gate if (behav == MADV_SEQUENTIAL || behav == MADV_ACCESS_DEFAULT || 6786*0Sstevel@tonic-gate behav == MADV_ACCESS_LWP || behav == MADV_ACCESS_MANY) { 6787*0Sstevel@tonic-gate /* 6788*0Sstevel@tonic-gate * Since we are going to unload hat mappings 6789*0Sstevel@tonic-gate * we first have to flush the cache. Otherwise 6790*0Sstevel@tonic-gate * this might lead to system panic if another 6791*0Sstevel@tonic-gate * thread is doing physio on the range whose 6792*0Sstevel@tonic-gate * mappings are unloaded by madvise(3C). 6793*0Sstevel@tonic-gate */ 6794*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 6795*0Sstevel@tonic-gate /* 6796*0Sstevel@tonic-gate * Since we do have the segvn writers lock 6797*0Sstevel@tonic-gate * nobody can fill the cache with entries 6798*0Sstevel@tonic-gate * belonging to this seg during the purge. 6799*0Sstevel@tonic-gate * The flush either succeeds or we still 6800*0Sstevel@tonic-gate * have pending I/Os. In the later case, 6801*0Sstevel@tonic-gate * madvise(3C) fails. 6802*0Sstevel@tonic-gate */ 6803*0Sstevel@tonic-gate segvn_purge(seg); 6804*0Sstevel@tonic-gate if (svd->softlockcnt > 0) { 6805*0Sstevel@tonic-gate /* 6806*0Sstevel@tonic-gate * Since madvise(3C) is advisory and 6807*0Sstevel@tonic-gate * it's not part of UNIX98, madvise(3C) 6808*0Sstevel@tonic-gate * failure here doesn't cause any hardship. 6809*0Sstevel@tonic-gate * Note that we don't block in "as" layer. 6810*0Sstevel@tonic-gate */ 6811*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6812*0Sstevel@tonic-gate return (EAGAIN); 6813*0Sstevel@tonic-gate } 6814*0Sstevel@tonic-gate } 6815*0Sstevel@tonic-gate } 6816*0Sstevel@tonic-gate 6817*0Sstevel@tonic-gate amp = svd->amp; 6818*0Sstevel@tonic-gate vp = svd->vp; 6819*0Sstevel@tonic-gate if (behav == MADV_FREE) { 6820*0Sstevel@tonic-gate /* 6821*0Sstevel@tonic-gate * MADV_FREE is not supported for segments with 6822*0Sstevel@tonic-gate * underlying object; if anonmap is NULL, anon slots 6823*0Sstevel@tonic-gate * are not yet populated and there is nothing for 6824*0Sstevel@tonic-gate * us to do. As MADV_FREE is advisory, we don't 6825*0Sstevel@tonic-gate * return error in either case. 6826*0Sstevel@tonic-gate */ 6827*0Sstevel@tonic-gate if (vp || amp == NULL) { 6828*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6829*0Sstevel@tonic-gate return (0); 6830*0Sstevel@tonic-gate } 6831*0Sstevel@tonic-gate 6832*0Sstevel@tonic-gate page = seg_page(seg, addr); 6833*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 6834*0Sstevel@tonic-gate anon_disclaim(amp, svd->anon_index + page, len, 0); 6835*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6836*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6837*0Sstevel@tonic-gate return (0); 6838*0Sstevel@tonic-gate } 6839*0Sstevel@tonic-gate 6840*0Sstevel@tonic-gate /* 6841*0Sstevel@tonic-gate * If advice is to be applied to entire segment, 6842*0Sstevel@tonic-gate * use advice field in seg_data structure 6843*0Sstevel@tonic-gate * otherwise use appropriate vpage entry. 6844*0Sstevel@tonic-gate */ 6845*0Sstevel@tonic-gate if ((addr == seg->s_base) && (len == seg->s_size)) { 6846*0Sstevel@tonic-gate switch (behav) { 6847*0Sstevel@tonic-gate case MADV_ACCESS_LWP: 6848*0Sstevel@tonic-gate case MADV_ACCESS_MANY: 6849*0Sstevel@tonic-gate case MADV_ACCESS_DEFAULT: 6850*0Sstevel@tonic-gate /* 6851*0Sstevel@tonic-gate * Set memory allocation policy for this segment 6852*0Sstevel@tonic-gate */ 6853*0Sstevel@tonic-gate policy = lgrp_madv_to_policy(behav, len, svd->type); 6854*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) 6855*0Sstevel@tonic-gate already_set = lgrp_shm_policy_set(policy, amp, 6856*0Sstevel@tonic-gate svd->anon_index, vp, svd->offset, len); 6857*0Sstevel@tonic-gate else { 6858*0Sstevel@tonic-gate /* 6859*0Sstevel@tonic-gate * For private memory, need writers lock on 6860*0Sstevel@tonic-gate * address space because the segment may be 6861*0Sstevel@tonic-gate * split or concatenated when changing policy 6862*0Sstevel@tonic-gate */ 6863*0Sstevel@tonic-gate if (AS_READ_HELD(seg->s_as, 6864*0Sstevel@tonic-gate &seg->s_as->a_lock)) { 6865*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6866*0Sstevel@tonic-gate return (IE_RETRY); 6867*0Sstevel@tonic-gate } 6868*0Sstevel@tonic-gate 6869*0Sstevel@tonic-gate already_set = lgrp_privm_policy_set(policy, 6870*0Sstevel@tonic-gate &svd->policy_info, len); 6871*0Sstevel@tonic-gate } 6872*0Sstevel@tonic-gate 6873*0Sstevel@tonic-gate /* 6874*0Sstevel@tonic-gate * If policy set already and it shouldn't be reapplied, 6875*0Sstevel@tonic-gate * don't do anything. 6876*0Sstevel@tonic-gate */ 6877*0Sstevel@tonic-gate if (already_set && 6878*0Sstevel@tonic-gate !LGRP_MEM_POLICY_REAPPLICABLE(policy)) 6879*0Sstevel@tonic-gate break; 6880*0Sstevel@tonic-gate 6881*0Sstevel@tonic-gate /* 6882*0Sstevel@tonic-gate * Mark any existing pages in given range for 6883*0Sstevel@tonic-gate * migration 6884*0Sstevel@tonic-gate */ 6885*0Sstevel@tonic-gate page_mark_migrate(seg, addr, len, amp, svd->anon_index, 6886*0Sstevel@tonic-gate vp, svd->offset, 1); 6887*0Sstevel@tonic-gate 6888*0Sstevel@tonic-gate /* 6889*0Sstevel@tonic-gate * If same policy set already or this is a shared 6890*0Sstevel@tonic-gate * memory segment, don't need to try to concatenate 6891*0Sstevel@tonic-gate * segment with adjacent ones. 6892*0Sstevel@tonic-gate */ 6893*0Sstevel@tonic-gate if (already_set || svd->type == MAP_SHARED) 6894*0Sstevel@tonic-gate break; 6895*0Sstevel@tonic-gate 6896*0Sstevel@tonic-gate /* 6897*0Sstevel@tonic-gate * Try to concatenate this segment with previous 6898*0Sstevel@tonic-gate * one and next one, since we changed policy for 6899*0Sstevel@tonic-gate * this one and it may be compatible with adjacent 6900*0Sstevel@tonic-gate * ones now. 6901*0Sstevel@tonic-gate */ 6902*0Sstevel@tonic-gate prev = AS_SEGPREV(seg->s_as, seg); 6903*0Sstevel@tonic-gate next = AS_SEGNEXT(seg->s_as, seg); 6904*0Sstevel@tonic-gate 6905*0Sstevel@tonic-gate if (next && next->s_ops == &segvn_ops && 6906*0Sstevel@tonic-gate addr + len == next->s_base) 6907*0Sstevel@tonic-gate (void) segvn_concat(seg, next, 1); 6908*0Sstevel@tonic-gate 6909*0Sstevel@tonic-gate if (prev && prev->s_ops == &segvn_ops && 6910*0Sstevel@tonic-gate addr == prev->s_base + prev->s_size) { 6911*0Sstevel@tonic-gate /* 6912*0Sstevel@tonic-gate * Drop lock for private data of current 6913*0Sstevel@tonic-gate * segment before concatenating (deleting) it 6914*0Sstevel@tonic-gate * and return IE_REATTACH to tell as_ctl() that 6915*0Sstevel@tonic-gate * current segment has changed 6916*0Sstevel@tonic-gate */ 6917*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 6918*0Sstevel@tonic-gate if (!segvn_concat(prev, seg, 1)) 6919*0Sstevel@tonic-gate err = IE_REATTACH; 6920*0Sstevel@tonic-gate 6921*0Sstevel@tonic-gate return (err); 6922*0Sstevel@tonic-gate } 6923*0Sstevel@tonic-gate break; 6924*0Sstevel@tonic-gate 6925*0Sstevel@tonic-gate case MADV_SEQUENTIAL: 6926*0Sstevel@tonic-gate /* 6927*0Sstevel@tonic-gate * unloading mapping guarantees 6928*0Sstevel@tonic-gate * detection in segvn_fault 6929*0Sstevel@tonic-gate */ 6930*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 6931*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, len, 6932*0Sstevel@tonic-gate HAT_UNLOAD); 6933*0Sstevel@tonic-gate /* FALLTHROUGH */ 6934*0Sstevel@tonic-gate case MADV_NORMAL: 6935*0Sstevel@tonic-gate case MADV_RANDOM: 6936*0Sstevel@tonic-gate svd->advice = (uchar_t)behav; 6937*0Sstevel@tonic-gate svd->pageadvice = 0; 6938*0Sstevel@tonic-gate break; 6939*0Sstevel@tonic-gate case MADV_WILLNEED: /* handled in memcntl */ 6940*0Sstevel@tonic-gate case MADV_DONTNEED: /* handled in memcntl */ 6941*0Sstevel@tonic-gate case MADV_FREE: /* handled above */ 6942*0Sstevel@tonic-gate break; 6943*0Sstevel@tonic-gate default: 6944*0Sstevel@tonic-gate err = EINVAL; 6945*0Sstevel@tonic-gate } 6946*0Sstevel@tonic-gate } else { 6947*0Sstevel@tonic-gate caddr_t eaddr; 6948*0Sstevel@tonic-gate struct seg *new_seg; 6949*0Sstevel@tonic-gate struct segvn_data *new_svd; 6950*0Sstevel@tonic-gate u_offset_t off; 6951*0Sstevel@tonic-gate caddr_t oldeaddr; 6952*0Sstevel@tonic-gate 6953*0Sstevel@tonic-gate page = seg_page(seg, addr); 6954*0Sstevel@tonic-gate 6955*0Sstevel@tonic-gate segvn_vpage(seg); 6956*0Sstevel@tonic-gate 6957*0Sstevel@tonic-gate switch (behav) { 6958*0Sstevel@tonic-gate struct vpage *bvpp, *evpp; 6959*0Sstevel@tonic-gate 6960*0Sstevel@tonic-gate case MADV_ACCESS_LWP: 6961*0Sstevel@tonic-gate case MADV_ACCESS_MANY: 6962*0Sstevel@tonic-gate case MADV_ACCESS_DEFAULT: 6963*0Sstevel@tonic-gate /* 6964*0Sstevel@tonic-gate * Set memory allocation policy for portion of this 6965*0Sstevel@tonic-gate * segment 6966*0Sstevel@tonic-gate */ 6967*0Sstevel@tonic-gate 6968*0Sstevel@tonic-gate /* 6969*0Sstevel@tonic-gate * Align address and length of advice to page 6970*0Sstevel@tonic-gate * boundaries for large pages 6971*0Sstevel@tonic-gate */ 6972*0Sstevel@tonic-gate if (seg->s_szc != 0) { 6973*0Sstevel@tonic-gate size_t pgsz; 6974*0Sstevel@tonic-gate 6975*0Sstevel@tonic-gate pgsz = page_get_pagesize(seg->s_szc); 6976*0Sstevel@tonic-gate addr = (caddr_t)P2ALIGN((uintptr_t)addr, pgsz); 6977*0Sstevel@tonic-gate len = P2ROUNDUP(len, pgsz); 6978*0Sstevel@tonic-gate } 6979*0Sstevel@tonic-gate 6980*0Sstevel@tonic-gate /* 6981*0Sstevel@tonic-gate * Check to see whether policy is set already 6982*0Sstevel@tonic-gate */ 6983*0Sstevel@tonic-gate policy = lgrp_madv_to_policy(behav, len, svd->type); 6984*0Sstevel@tonic-gate 6985*0Sstevel@tonic-gate anon_index = svd->anon_index + page; 6986*0Sstevel@tonic-gate off = svd->offset + (uintptr_t)(addr - seg->s_base); 6987*0Sstevel@tonic-gate 6988*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) 6989*0Sstevel@tonic-gate already_set = lgrp_shm_policy_set(policy, amp, 6990*0Sstevel@tonic-gate anon_index, vp, off, len); 6991*0Sstevel@tonic-gate else 6992*0Sstevel@tonic-gate already_set = 6993*0Sstevel@tonic-gate (policy == svd->policy_info.mem_policy); 6994*0Sstevel@tonic-gate 6995*0Sstevel@tonic-gate /* 6996*0Sstevel@tonic-gate * If policy set already and it shouldn't be reapplied, 6997*0Sstevel@tonic-gate * don't do anything. 6998*0Sstevel@tonic-gate */ 6999*0Sstevel@tonic-gate if (already_set && 7000*0Sstevel@tonic-gate !LGRP_MEM_POLICY_REAPPLICABLE(policy)) 7001*0Sstevel@tonic-gate break; 7002*0Sstevel@tonic-gate 7003*0Sstevel@tonic-gate /* 7004*0Sstevel@tonic-gate * For private memory, need writers lock on 7005*0Sstevel@tonic-gate * address space because the segment may be 7006*0Sstevel@tonic-gate * split or concatenated when changing policy 7007*0Sstevel@tonic-gate */ 7008*0Sstevel@tonic-gate if (svd->type == MAP_PRIVATE && 7009*0Sstevel@tonic-gate AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)) { 7010*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7011*0Sstevel@tonic-gate return (IE_RETRY); 7012*0Sstevel@tonic-gate } 7013*0Sstevel@tonic-gate 7014*0Sstevel@tonic-gate /* 7015*0Sstevel@tonic-gate * Mark any existing pages in given range for 7016*0Sstevel@tonic-gate * migration 7017*0Sstevel@tonic-gate */ 7018*0Sstevel@tonic-gate page_mark_migrate(seg, addr, len, amp, svd->anon_index, 7019*0Sstevel@tonic-gate vp, svd->offset, 1); 7020*0Sstevel@tonic-gate 7021*0Sstevel@tonic-gate /* 7022*0Sstevel@tonic-gate * Don't need to try to split or concatenate 7023*0Sstevel@tonic-gate * segments, since policy is same or this is a shared 7024*0Sstevel@tonic-gate * memory segment 7025*0Sstevel@tonic-gate */ 7026*0Sstevel@tonic-gate if (already_set || svd->type == MAP_SHARED) 7027*0Sstevel@tonic-gate break; 7028*0Sstevel@tonic-gate 7029*0Sstevel@tonic-gate /* 7030*0Sstevel@tonic-gate * Split off new segment if advice only applies to a 7031*0Sstevel@tonic-gate * portion of existing segment starting in middle 7032*0Sstevel@tonic-gate */ 7033*0Sstevel@tonic-gate new_seg = NULL; 7034*0Sstevel@tonic-gate eaddr = addr + len; 7035*0Sstevel@tonic-gate oldeaddr = seg->s_base + seg->s_size; 7036*0Sstevel@tonic-gate if (addr > seg->s_base) { 7037*0Sstevel@tonic-gate /* 7038*0Sstevel@tonic-gate * Must flush I/O page cache 7039*0Sstevel@tonic-gate * before splitting segment 7040*0Sstevel@tonic-gate */ 7041*0Sstevel@tonic-gate if (svd->softlockcnt > 0) 7042*0Sstevel@tonic-gate segvn_purge(seg); 7043*0Sstevel@tonic-gate 7044*0Sstevel@tonic-gate /* 7045*0Sstevel@tonic-gate * Split segment and return IE_REATTACH to tell 7046*0Sstevel@tonic-gate * as_ctl() that current segment changed 7047*0Sstevel@tonic-gate */ 7048*0Sstevel@tonic-gate new_seg = segvn_split_seg(seg, addr); 7049*0Sstevel@tonic-gate new_svd = (struct segvn_data *)new_seg->s_data; 7050*0Sstevel@tonic-gate err = IE_REATTACH; 7051*0Sstevel@tonic-gate 7052*0Sstevel@tonic-gate /* 7053*0Sstevel@tonic-gate * If new segment ends where old one 7054*0Sstevel@tonic-gate * did, try to concatenate the new 7055*0Sstevel@tonic-gate * segment with next one. 7056*0Sstevel@tonic-gate */ 7057*0Sstevel@tonic-gate if (eaddr == oldeaddr) { 7058*0Sstevel@tonic-gate /* 7059*0Sstevel@tonic-gate * Set policy for new segment 7060*0Sstevel@tonic-gate */ 7061*0Sstevel@tonic-gate (void) lgrp_privm_policy_set(policy, 7062*0Sstevel@tonic-gate &new_svd->policy_info, 7063*0Sstevel@tonic-gate new_seg->s_size); 7064*0Sstevel@tonic-gate 7065*0Sstevel@tonic-gate next = AS_SEGNEXT(new_seg->s_as, 7066*0Sstevel@tonic-gate new_seg); 7067*0Sstevel@tonic-gate 7068*0Sstevel@tonic-gate if (next && 7069*0Sstevel@tonic-gate next->s_ops == &segvn_ops && 7070*0Sstevel@tonic-gate eaddr == next->s_base) 7071*0Sstevel@tonic-gate (void) segvn_concat(new_seg, 7072*0Sstevel@tonic-gate next, 1); 7073*0Sstevel@tonic-gate } 7074*0Sstevel@tonic-gate } 7075*0Sstevel@tonic-gate 7076*0Sstevel@tonic-gate /* 7077*0Sstevel@tonic-gate * Split off end of existing segment if advice only 7078*0Sstevel@tonic-gate * applies to a portion of segment ending before 7079*0Sstevel@tonic-gate * end of the existing segment 7080*0Sstevel@tonic-gate */ 7081*0Sstevel@tonic-gate if (eaddr < oldeaddr) { 7082*0Sstevel@tonic-gate /* 7083*0Sstevel@tonic-gate * Must flush I/O page cache 7084*0Sstevel@tonic-gate * before splitting segment 7085*0Sstevel@tonic-gate */ 7086*0Sstevel@tonic-gate if (svd->softlockcnt > 0) 7087*0Sstevel@tonic-gate segvn_purge(seg); 7088*0Sstevel@tonic-gate 7089*0Sstevel@tonic-gate /* 7090*0Sstevel@tonic-gate * If beginning of old segment was already 7091*0Sstevel@tonic-gate * split off, use new segment to split end off 7092*0Sstevel@tonic-gate * from. 7093*0Sstevel@tonic-gate */ 7094*0Sstevel@tonic-gate if (new_seg != NULL && new_seg != seg) { 7095*0Sstevel@tonic-gate /* 7096*0Sstevel@tonic-gate * Split segment 7097*0Sstevel@tonic-gate */ 7098*0Sstevel@tonic-gate (void) segvn_split_seg(new_seg, eaddr); 7099*0Sstevel@tonic-gate 7100*0Sstevel@tonic-gate /* 7101*0Sstevel@tonic-gate * Set policy for new segment 7102*0Sstevel@tonic-gate */ 7103*0Sstevel@tonic-gate (void) lgrp_privm_policy_set(policy, 7104*0Sstevel@tonic-gate &new_svd->policy_info, 7105*0Sstevel@tonic-gate new_seg->s_size); 7106*0Sstevel@tonic-gate } else { 7107*0Sstevel@tonic-gate /* 7108*0Sstevel@tonic-gate * Split segment and return IE_REATTACH 7109*0Sstevel@tonic-gate * to tell as_ctl() that current 7110*0Sstevel@tonic-gate * segment changed 7111*0Sstevel@tonic-gate */ 7112*0Sstevel@tonic-gate (void) segvn_split_seg(seg, eaddr); 7113*0Sstevel@tonic-gate err = IE_REATTACH; 7114*0Sstevel@tonic-gate 7115*0Sstevel@tonic-gate (void) lgrp_privm_policy_set(policy, 7116*0Sstevel@tonic-gate &svd->policy_info, seg->s_size); 7117*0Sstevel@tonic-gate 7118*0Sstevel@tonic-gate /* 7119*0Sstevel@tonic-gate * If new segment starts where old one 7120*0Sstevel@tonic-gate * did, try to concatenate it with 7121*0Sstevel@tonic-gate * previous segment. 7122*0Sstevel@tonic-gate */ 7123*0Sstevel@tonic-gate if (addr == seg->s_base) { 7124*0Sstevel@tonic-gate prev = AS_SEGPREV(seg->s_as, 7125*0Sstevel@tonic-gate seg); 7126*0Sstevel@tonic-gate 7127*0Sstevel@tonic-gate /* 7128*0Sstevel@tonic-gate * Drop lock for private data 7129*0Sstevel@tonic-gate * of current segment before 7130*0Sstevel@tonic-gate * concatenating (deleting) it 7131*0Sstevel@tonic-gate */ 7132*0Sstevel@tonic-gate if (prev && 7133*0Sstevel@tonic-gate prev->s_ops == 7134*0Sstevel@tonic-gate &segvn_ops && 7135*0Sstevel@tonic-gate addr == prev->s_base + 7136*0Sstevel@tonic-gate prev->s_size) { 7137*0Sstevel@tonic-gate SEGVN_LOCK_EXIT( 7138*0Sstevel@tonic-gate seg->s_as, 7139*0Sstevel@tonic-gate &svd->lock); 7140*0Sstevel@tonic-gate (void) segvn_concat( 7141*0Sstevel@tonic-gate prev, seg, 1); 7142*0Sstevel@tonic-gate return (err); 7143*0Sstevel@tonic-gate } 7144*0Sstevel@tonic-gate } 7145*0Sstevel@tonic-gate } 7146*0Sstevel@tonic-gate } 7147*0Sstevel@tonic-gate break; 7148*0Sstevel@tonic-gate case MADV_SEQUENTIAL: 7149*0Sstevel@tonic-gate ASSERT(seg->s_szc == 0); 7150*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD); 7151*0Sstevel@tonic-gate /* FALLTHROUGH */ 7152*0Sstevel@tonic-gate case MADV_NORMAL: 7153*0Sstevel@tonic-gate case MADV_RANDOM: 7154*0Sstevel@tonic-gate bvpp = &svd->vpage[page]; 7155*0Sstevel@tonic-gate evpp = &svd->vpage[page + (len >> PAGESHIFT)]; 7156*0Sstevel@tonic-gate for (; bvpp < evpp; bvpp++) 7157*0Sstevel@tonic-gate VPP_SETADVICE(bvpp, behav); 7158*0Sstevel@tonic-gate svd->advice = MADV_NORMAL; 7159*0Sstevel@tonic-gate break; 7160*0Sstevel@tonic-gate case MADV_WILLNEED: /* handled in memcntl */ 7161*0Sstevel@tonic-gate case MADV_DONTNEED: /* handled in memcntl */ 7162*0Sstevel@tonic-gate case MADV_FREE: /* handled above */ 7163*0Sstevel@tonic-gate break; 7164*0Sstevel@tonic-gate default: 7165*0Sstevel@tonic-gate err = EINVAL; 7166*0Sstevel@tonic-gate } 7167*0Sstevel@tonic-gate } 7168*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7169*0Sstevel@tonic-gate return (err); 7170*0Sstevel@tonic-gate } 7171*0Sstevel@tonic-gate 7172*0Sstevel@tonic-gate /* 7173*0Sstevel@tonic-gate * Create a vpage structure for this seg. 7174*0Sstevel@tonic-gate */ 7175*0Sstevel@tonic-gate static void 7176*0Sstevel@tonic-gate segvn_vpage(struct seg *seg) 7177*0Sstevel@tonic-gate { 7178*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 7179*0Sstevel@tonic-gate struct vpage *vp, *evp; 7180*0Sstevel@tonic-gate 7181*0Sstevel@tonic-gate ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock)); 7182*0Sstevel@tonic-gate 7183*0Sstevel@tonic-gate /* 7184*0Sstevel@tonic-gate * If no vpage structure exists, allocate one. Copy the protections 7185*0Sstevel@tonic-gate * and the advice from the segment itself to the individual pages. 7186*0Sstevel@tonic-gate */ 7187*0Sstevel@tonic-gate if (svd->vpage == NULL) { 7188*0Sstevel@tonic-gate svd->pageprot = 1; 7189*0Sstevel@tonic-gate svd->pageadvice = 1; 7190*0Sstevel@tonic-gate svd->vpage = kmem_zalloc(seg_pages(seg) * sizeof (struct vpage), 7191*0Sstevel@tonic-gate KM_SLEEP); 7192*0Sstevel@tonic-gate evp = &svd->vpage[seg_page(seg, seg->s_base + seg->s_size)]; 7193*0Sstevel@tonic-gate for (vp = svd->vpage; vp < evp; vp++) { 7194*0Sstevel@tonic-gate VPP_SETPROT(vp, svd->prot); 7195*0Sstevel@tonic-gate VPP_SETADVICE(vp, svd->advice); 7196*0Sstevel@tonic-gate } 7197*0Sstevel@tonic-gate } 7198*0Sstevel@tonic-gate } 7199*0Sstevel@tonic-gate 7200*0Sstevel@tonic-gate /* 7201*0Sstevel@tonic-gate * Dump the pages belonging to this segvn segment. 7202*0Sstevel@tonic-gate */ 7203*0Sstevel@tonic-gate static void 7204*0Sstevel@tonic-gate segvn_dump(struct seg *seg) 7205*0Sstevel@tonic-gate { 7206*0Sstevel@tonic-gate struct segvn_data *svd; 7207*0Sstevel@tonic-gate page_t *pp; 7208*0Sstevel@tonic-gate struct anon_map *amp; 7209*0Sstevel@tonic-gate ulong_t anon_index; 7210*0Sstevel@tonic-gate struct vnode *vp; 7211*0Sstevel@tonic-gate u_offset_t off, offset; 7212*0Sstevel@tonic-gate pfn_t pfn; 7213*0Sstevel@tonic-gate pgcnt_t page, npages; 7214*0Sstevel@tonic-gate caddr_t addr; 7215*0Sstevel@tonic-gate 7216*0Sstevel@tonic-gate npages = seg_pages(seg); 7217*0Sstevel@tonic-gate svd = (struct segvn_data *)seg->s_data; 7218*0Sstevel@tonic-gate vp = svd->vp; 7219*0Sstevel@tonic-gate off = offset = svd->offset; 7220*0Sstevel@tonic-gate addr = seg->s_base; 7221*0Sstevel@tonic-gate 7222*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 7223*0Sstevel@tonic-gate anon_index = svd->anon_index; 7224*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 7225*0Sstevel@tonic-gate } 7226*0Sstevel@tonic-gate 7227*0Sstevel@tonic-gate for (page = 0; page < npages; page++, offset += PAGESIZE) { 7228*0Sstevel@tonic-gate struct anon *ap; 7229*0Sstevel@tonic-gate int we_own_it = 0; 7230*0Sstevel@tonic-gate 7231*0Sstevel@tonic-gate if (amp && (ap = anon_get_ptr(svd->amp->ahp, anon_index++))) { 7232*0Sstevel@tonic-gate swap_xlate_nopanic(ap, &vp, &off); 7233*0Sstevel@tonic-gate } else { 7234*0Sstevel@tonic-gate vp = svd->vp; 7235*0Sstevel@tonic-gate off = offset; 7236*0Sstevel@tonic-gate } 7237*0Sstevel@tonic-gate 7238*0Sstevel@tonic-gate /* 7239*0Sstevel@tonic-gate * If pp == NULL, the page either does not exist 7240*0Sstevel@tonic-gate * or is exclusively locked. So determine if it 7241*0Sstevel@tonic-gate * exists before searching for it. 7242*0Sstevel@tonic-gate */ 7243*0Sstevel@tonic-gate 7244*0Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, off, SE_SHARED))) 7245*0Sstevel@tonic-gate we_own_it = 1; 7246*0Sstevel@tonic-gate else 7247*0Sstevel@tonic-gate pp = page_exists(vp, off); 7248*0Sstevel@tonic-gate 7249*0Sstevel@tonic-gate if (pp) { 7250*0Sstevel@tonic-gate pfn = page_pptonum(pp); 7251*0Sstevel@tonic-gate dump_addpage(seg->s_as, addr, pfn); 7252*0Sstevel@tonic-gate if (we_own_it) 7253*0Sstevel@tonic-gate page_unlock(pp); 7254*0Sstevel@tonic-gate } 7255*0Sstevel@tonic-gate addr += PAGESIZE; 7256*0Sstevel@tonic-gate dump_timeleft = dump_timeout; 7257*0Sstevel@tonic-gate } 7258*0Sstevel@tonic-gate 7259*0Sstevel@tonic-gate if (amp != NULL) 7260*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 7261*0Sstevel@tonic-gate } 7262*0Sstevel@tonic-gate 7263*0Sstevel@tonic-gate /* 7264*0Sstevel@tonic-gate * lock/unlock anon pages over a given range. Return shadow list 7265*0Sstevel@tonic-gate */ 7266*0Sstevel@tonic-gate static int 7267*0Sstevel@tonic-gate segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, 7268*0Sstevel@tonic-gate enum lock_type type, enum seg_rw rw) 7269*0Sstevel@tonic-gate { 7270*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 7271*0Sstevel@tonic-gate size_t np, adjustpages = 0, npages = (len >> PAGESHIFT); 7272*0Sstevel@tonic-gate ulong_t anon_index; 7273*0Sstevel@tonic-gate uint_t protchk; 7274*0Sstevel@tonic-gate uint_t error; 7275*0Sstevel@tonic-gate struct anon_map *amp; 7276*0Sstevel@tonic-gate struct page **pplist, **pl, *pp; 7277*0Sstevel@tonic-gate caddr_t a; 7278*0Sstevel@tonic-gate size_t page; 7279*0Sstevel@tonic-gate caddr_t lpgaddr, lpgeaddr; 7280*0Sstevel@tonic-gate 7281*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_START, 7282*0Sstevel@tonic-gate "segvn_pagelock: start seg %p addr %p", seg, addr); 7283*0Sstevel@tonic-gate 7284*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 7285*0Sstevel@tonic-gate if (seg->s_szc != 0 && (type == L_PAGELOCK || type == L_PAGEUNLOCK)) { 7286*0Sstevel@tonic-gate /* 7287*0Sstevel@tonic-gate * We are adjusting the pagelock region to the large page size 7288*0Sstevel@tonic-gate * boundary because the unlocked part of a large page cannot 7289*0Sstevel@tonic-gate * be freed anyway unless all constituent pages of a large 7290*0Sstevel@tonic-gate * page are locked. Therefore this adjustment allows us to 7291*0Sstevel@tonic-gate * decrement availrmem by the right value (note we don't want 7292*0Sstevel@tonic-gate * to just decrement availrem by the large page size without 7293*0Sstevel@tonic-gate * adjusting addr and len because then we may end up 7294*0Sstevel@tonic-gate * decrementing availrmem by large page size for every 7295*0Sstevel@tonic-gate * constituent page locked by a new as_pagelock call). 7296*0Sstevel@tonic-gate * as_pageunlock caller must always match as_pagelock call's 7297*0Sstevel@tonic-gate * addr and len. 7298*0Sstevel@tonic-gate * 7299*0Sstevel@tonic-gate * Note segment's page size cannot change while we are holding 7300*0Sstevel@tonic-gate * as lock. And then it cannot change while softlockcnt is 7301*0Sstevel@tonic-gate * not 0. This will allow us to correctly recalculate large 7302*0Sstevel@tonic-gate * page size region for the matching pageunlock/reclaim call. 7303*0Sstevel@tonic-gate * 7304*0Sstevel@tonic-gate * for pageunlock *ppp points to the pointer of page_t that 7305*0Sstevel@tonic-gate * corresponds to the real unadjusted start address. Similar 7306*0Sstevel@tonic-gate * for pagelock *ppp must point to the pointer of page_t that 7307*0Sstevel@tonic-gate * corresponds to the real unadjusted start address. 7308*0Sstevel@tonic-gate */ 7309*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(seg->s_szc); 7310*0Sstevel@tonic-gate CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); 7311*0Sstevel@tonic-gate adjustpages = ((uintptr_t)(addr - lpgaddr)) >> PAGESHIFT; 7312*0Sstevel@tonic-gate } 7313*0Sstevel@tonic-gate 7314*0Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 7315*0Sstevel@tonic-gate 7316*0Sstevel@tonic-gate /* 7317*0Sstevel@tonic-gate * update hat ref bits for /proc. We need to make sure 7318*0Sstevel@tonic-gate * that threads tracing the ref and mod bits of the 7319*0Sstevel@tonic-gate * address space get the right data. 7320*0Sstevel@tonic-gate * Note: page ref and mod bits are updated at reclaim time 7321*0Sstevel@tonic-gate */ 7322*0Sstevel@tonic-gate if (seg->s_as->a_vbits) { 7323*0Sstevel@tonic-gate for (a = addr; a < addr + len; a += PAGESIZE) { 7324*0Sstevel@tonic-gate if (rw == S_WRITE) { 7325*0Sstevel@tonic-gate hat_setstat(seg->s_as, a, 7326*0Sstevel@tonic-gate PAGESIZE, P_REF | P_MOD); 7327*0Sstevel@tonic-gate } else { 7328*0Sstevel@tonic-gate hat_setstat(seg->s_as, a, 7329*0Sstevel@tonic-gate PAGESIZE, P_REF); 7330*0Sstevel@tonic-gate } 7331*0Sstevel@tonic-gate } 7332*0Sstevel@tonic-gate } 7333*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 7334*0Sstevel@tonic-gate if (seg->s_szc != 0) { 7335*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.pagelock[0]); 7336*0Sstevel@tonic-gate seg_pinactive(seg, lpgaddr, lpgeaddr - lpgaddr, 7337*0Sstevel@tonic-gate *ppp - adjustpages, rw, segvn_reclaim); 7338*0Sstevel@tonic-gate } else { 7339*0Sstevel@tonic-gate seg_pinactive(seg, addr, len, *ppp, rw, segvn_reclaim); 7340*0Sstevel@tonic-gate } 7341*0Sstevel@tonic-gate 7342*0Sstevel@tonic-gate /* 7343*0Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 7344*0Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 7345*0Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 7346*0Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 7347*0Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 7348*0Sstevel@tonic-gate * exits on data fault in a multithreaded application. 7349*0Sstevel@tonic-gate */ 7350*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as) && (svd->softlockcnt > 0)) { 7351*0Sstevel@tonic-gate /* 7352*0Sstevel@tonic-gate * Even if we grab segvn WRITER's lock or segp_slock 7353*0Sstevel@tonic-gate * here, there might be another thread which could've 7354*0Sstevel@tonic-gate * successfully performed lookup/insert just before 7355*0Sstevel@tonic-gate * we acquired the lock here. So, grabbing either 7356*0Sstevel@tonic-gate * lock here is of not much use. Until we devise 7357*0Sstevel@tonic-gate * a strategy at upper layers to solve the 7358*0Sstevel@tonic-gate * synchronization issues completely, we expect 7359*0Sstevel@tonic-gate * applications to handle this appropriately. 7360*0Sstevel@tonic-gate */ 7361*0Sstevel@tonic-gate segvn_purge(seg); 7362*0Sstevel@tonic-gate } 7363*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7364*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_UNLOCK_END, 7365*0Sstevel@tonic-gate "segvn_pagelock: unlock seg %p addr %p", seg, addr); 7366*0Sstevel@tonic-gate return (0); 7367*0Sstevel@tonic-gate } else if (type == L_PAGERECLAIM) { 7368*0Sstevel@tonic-gate VM_STAT_COND_ADD(seg->s_szc != 0, segvnvmstats.pagelock[1]); 7369*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 7370*0Sstevel@tonic-gate (void) segvn_reclaim(seg, addr, len, *ppp, rw); 7371*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7372*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_UNLOCK_END, 7373*0Sstevel@tonic-gate "segvn_pagelock: reclaim seg %p addr %p", seg, addr); 7374*0Sstevel@tonic-gate return (0); 7375*0Sstevel@tonic-gate } 7376*0Sstevel@tonic-gate 7377*0Sstevel@tonic-gate if (seg->s_szc != 0) { 7378*0Sstevel@tonic-gate VM_STAT_ADD(segvnvmstats.pagelock[2]); 7379*0Sstevel@tonic-gate addr = lpgaddr; 7380*0Sstevel@tonic-gate len = lpgeaddr - lpgaddr; 7381*0Sstevel@tonic-gate npages = (len >> PAGESHIFT); 7382*0Sstevel@tonic-gate } 7383*0Sstevel@tonic-gate 7384*0Sstevel@tonic-gate /* 7385*0Sstevel@tonic-gate * for now we only support pagelock to anon memory. We've to check 7386*0Sstevel@tonic-gate * protections for vnode objects and call into the vnode driver. 7387*0Sstevel@tonic-gate * That's too much for a fast path. Let the fault entry point handle it. 7388*0Sstevel@tonic-gate */ 7389*0Sstevel@tonic-gate if (svd->vp != NULL) { 7390*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_MISS_END, 7391*0Sstevel@tonic-gate "segvn_pagelock: mapped vnode seg %p addr %p", seg, addr); 7392*0Sstevel@tonic-gate *ppp = NULL; 7393*0Sstevel@tonic-gate return (ENOTSUP); 7394*0Sstevel@tonic-gate } 7395*0Sstevel@tonic-gate 7396*0Sstevel@tonic-gate /* 7397*0Sstevel@tonic-gate * if anonmap is not yet created, let the fault entry point populate it 7398*0Sstevel@tonic-gate * with anon ptrs. 7399*0Sstevel@tonic-gate */ 7400*0Sstevel@tonic-gate if ((amp = svd->amp) == NULL) { 7401*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_MISS_END, 7402*0Sstevel@tonic-gate "segvn_pagelock: anonmap null seg %p addr %p", seg, addr); 7403*0Sstevel@tonic-gate *ppp = NULL; 7404*0Sstevel@tonic-gate return (EFAULT); 7405*0Sstevel@tonic-gate } 7406*0Sstevel@tonic-gate 7407*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 7408*0Sstevel@tonic-gate 7409*0Sstevel@tonic-gate /* 7410*0Sstevel@tonic-gate * we acquire segp_slock to prevent duplicate entries 7411*0Sstevel@tonic-gate * in seg_pcache 7412*0Sstevel@tonic-gate */ 7413*0Sstevel@tonic-gate mutex_enter(&svd->segp_slock); 7414*0Sstevel@tonic-gate 7415*0Sstevel@tonic-gate /* 7416*0Sstevel@tonic-gate * try to find pages in segment page cache 7417*0Sstevel@tonic-gate */ 7418*0Sstevel@tonic-gate pplist = seg_plookup(seg, addr, len, rw); 7419*0Sstevel@tonic-gate if (pplist != NULL) { 7420*0Sstevel@tonic-gate mutex_exit(&svd->segp_slock); 7421*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7422*0Sstevel@tonic-gate *ppp = pplist + adjustpages; 7423*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_HIT_END, 7424*0Sstevel@tonic-gate "segvn_pagelock: cache hit seg %p addr %p", seg, addr); 7425*0Sstevel@tonic-gate return (0); 7426*0Sstevel@tonic-gate } 7427*0Sstevel@tonic-gate 7428*0Sstevel@tonic-gate if (rw == S_READ) { 7429*0Sstevel@tonic-gate protchk = PROT_READ; 7430*0Sstevel@tonic-gate } else { 7431*0Sstevel@tonic-gate protchk = PROT_WRITE; 7432*0Sstevel@tonic-gate } 7433*0Sstevel@tonic-gate 7434*0Sstevel@tonic-gate if (svd->pageprot == 0) { 7435*0Sstevel@tonic-gate if ((svd->prot & protchk) == 0) { 7436*0Sstevel@tonic-gate mutex_exit(&svd->segp_slock); 7437*0Sstevel@tonic-gate error = EFAULT; 7438*0Sstevel@tonic-gate goto out; 7439*0Sstevel@tonic-gate } 7440*0Sstevel@tonic-gate } else { 7441*0Sstevel@tonic-gate /* 7442*0Sstevel@tonic-gate * check page protections 7443*0Sstevel@tonic-gate */ 7444*0Sstevel@tonic-gate for (a = addr; a < addr + len; a += PAGESIZE) { 7445*0Sstevel@tonic-gate struct vpage *vp; 7446*0Sstevel@tonic-gate 7447*0Sstevel@tonic-gate vp = &svd->vpage[seg_page(seg, a)]; 7448*0Sstevel@tonic-gate if ((VPP_PROT(vp) & protchk) == 0) { 7449*0Sstevel@tonic-gate mutex_exit(&svd->segp_slock); 7450*0Sstevel@tonic-gate error = EFAULT; 7451*0Sstevel@tonic-gate goto out; 7452*0Sstevel@tonic-gate } 7453*0Sstevel@tonic-gate } 7454*0Sstevel@tonic-gate } 7455*0Sstevel@tonic-gate 7456*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 7457*0Sstevel@tonic-gate if (availrmem < tune.t_minarmem + npages) { 7458*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 7459*0Sstevel@tonic-gate mutex_exit(&svd->segp_slock); 7460*0Sstevel@tonic-gate error = ENOMEM; 7461*0Sstevel@tonic-gate goto out; 7462*0Sstevel@tonic-gate } else { 7463*0Sstevel@tonic-gate svd->softlockcnt += npages; 7464*0Sstevel@tonic-gate availrmem -= npages; 7465*0Sstevel@tonic-gate segvn_pages_locked += npages; 7466*0Sstevel@tonic-gate } 7467*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 7468*0Sstevel@tonic-gate 7469*0Sstevel@tonic-gate pplist = kmem_alloc(sizeof (page_t *) * npages, KM_SLEEP); 7470*0Sstevel@tonic-gate pl = pplist; 7471*0Sstevel@tonic-gate *ppp = pplist + adjustpages; 7472*0Sstevel@tonic-gate 7473*0Sstevel@tonic-gate page = seg_page(seg, addr); 7474*0Sstevel@tonic-gate anon_index = svd->anon_index + page; 7475*0Sstevel@tonic-gate 7476*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 7477*0Sstevel@tonic-gate for (a = addr; a < addr + len; a += PAGESIZE, anon_index++) { 7478*0Sstevel@tonic-gate struct anon *ap; 7479*0Sstevel@tonic-gate struct vnode *vp; 7480*0Sstevel@tonic-gate u_offset_t off; 7481*0Sstevel@tonic-gate anon_sync_obj_t cookie; 7482*0Sstevel@tonic-gate 7483*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 7484*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 7485*0Sstevel@tonic-gate if (ap == NULL) { 7486*0Sstevel@tonic-gate anon_array_exit(&cookie); 7487*0Sstevel@tonic-gate break; 7488*0Sstevel@tonic-gate } else { 7489*0Sstevel@tonic-gate /* 7490*0Sstevel@tonic-gate * We must never use seg_pcache for COW pages 7491*0Sstevel@tonic-gate * because we might end up with original page still 7492*0Sstevel@tonic-gate * lying in seg_pcache even after private page is 7493*0Sstevel@tonic-gate * created. This leads to data corruption as 7494*0Sstevel@tonic-gate * aio_write refers to the page still in cache 7495*0Sstevel@tonic-gate * while all other accesses refer to the private 7496*0Sstevel@tonic-gate * page. 7497*0Sstevel@tonic-gate */ 7498*0Sstevel@tonic-gate if (ap->an_refcnt != 1) { 7499*0Sstevel@tonic-gate anon_array_exit(&cookie); 7500*0Sstevel@tonic-gate break; 7501*0Sstevel@tonic-gate } 7502*0Sstevel@tonic-gate } 7503*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 7504*0Sstevel@tonic-gate anon_array_exit(&cookie); 7505*0Sstevel@tonic-gate 7506*0Sstevel@tonic-gate pp = page_lookup_nowait(vp, off, SE_SHARED); 7507*0Sstevel@tonic-gate if (pp == NULL) { 7508*0Sstevel@tonic-gate break; 7509*0Sstevel@tonic-gate } 7510*0Sstevel@tonic-gate *pplist++ = pp; 7511*0Sstevel@tonic-gate } 7512*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 7513*0Sstevel@tonic-gate 7514*0Sstevel@tonic-gate if (a >= addr + len) { 7515*0Sstevel@tonic-gate (void) seg_pinsert(seg, addr, len, pl, rw, SEGP_ASYNC_FLUSH, 7516*0Sstevel@tonic-gate segvn_reclaim); 7517*0Sstevel@tonic-gate mutex_exit(&svd->segp_slock); 7518*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7519*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_FILL_END, 7520*0Sstevel@tonic-gate "segvn_pagelock: cache fill seg %p addr %p", seg, addr); 7521*0Sstevel@tonic-gate return (0); 7522*0Sstevel@tonic-gate } 7523*0Sstevel@tonic-gate 7524*0Sstevel@tonic-gate mutex_exit(&svd->segp_slock); 7525*0Sstevel@tonic-gate error = EFAULT; 7526*0Sstevel@tonic-gate pplist = pl; 7527*0Sstevel@tonic-gate np = ((uintptr_t)(a - addr)) >> PAGESHIFT; 7528*0Sstevel@tonic-gate while (np > (uint_t)0) { 7529*0Sstevel@tonic-gate page_unlock(*pplist); 7530*0Sstevel@tonic-gate np--; 7531*0Sstevel@tonic-gate pplist++; 7532*0Sstevel@tonic-gate } 7533*0Sstevel@tonic-gate kmem_free(pl, sizeof (page_t *) * npages); 7534*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 7535*0Sstevel@tonic-gate svd->softlockcnt -= npages; 7536*0Sstevel@tonic-gate availrmem += npages; 7537*0Sstevel@tonic-gate segvn_pages_locked -= npages; 7538*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 7539*0Sstevel@tonic-gate if (svd->softlockcnt <= 0) { 7540*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 7541*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 7542*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 7543*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 7544*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 7545*0Sstevel@tonic-gate } 7546*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 7547*0Sstevel@tonic-gate } 7548*0Sstevel@tonic-gate } 7549*0Sstevel@tonic-gate 7550*0Sstevel@tonic-gate out: 7551*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7552*0Sstevel@tonic-gate *ppp = NULL; 7553*0Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_MISS_END, 7554*0Sstevel@tonic-gate "segvn_pagelock: cache miss seg %p addr %p", seg, addr); 7555*0Sstevel@tonic-gate return (error); 7556*0Sstevel@tonic-gate } 7557*0Sstevel@tonic-gate 7558*0Sstevel@tonic-gate /* 7559*0Sstevel@tonic-gate * purge any cached pages in the I/O page cache 7560*0Sstevel@tonic-gate */ 7561*0Sstevel@tonic-gate static void 7562*0Sstevel@tonic-gate segvn_purge(struct seg *seg) 7563*0Sstevel@tonic-gate { 7564*0Sstevel@tonic-gate seg_ppurge(seg); 7565*0Sstevel@tonic-gate } 7566*0Sstevel@tonic-gate 7567*0Sstevel@tonic-gate static int 7568*0Sstevel@tonic-gate segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, 7569*0Sstevel@tonic-gate enum seg_rw rw) 7570*0Sstevel@tonic-gate { 7571*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 7572*0Sstevel@tonic-gate pgcnt_t np, npages; 7573*0Sstevel@tonic-gate struct page **pl; 7574*0Sstevel@tonic-gate 7575*0Sstevel@tonic-gate #ifdef lint 7576*0Sstevel@tonic-gate addr = addr; 7577*0Sstevel@tonic-gate #endif 7578*0Sstevel@tonic-gate 7579*0Sstevel@tonic-gate npages = np = (len >> PAGESHIFT); 7580*0Sstevel@tonic-gate ASSERT(npages); 7581*0Sstevel@tonic-gate pl = pplist; 7582*0Sstevel@tonic-gate if (seg->s_szc != 0) { 7583*0Sstevel@tonic-gate size_t pgsz = page_get_pagesize(seg->s_szc); 7584*0Sstevel@tonic-gate if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { 7585*0Sstevel@tonic-gate panic("segvn_reclaim: unaligned addr or len"); 7586*0Sstevel@tonic-gate /*NOTREACHED*/ 7587*0Sstevel@tonic-gate } 7588*0Sstevel@tonic-gate } 7589*0Sstevel@tonic-gate 7590*0Sstevel@tonic-gate while (np > (uint_t)0) { 7591*0Sstevel@tonic-gate if (rw == S_WRITE) { 7592*0Sstevel@tonic-gate hat_setrefmod(*pplist); 7593*0Sstevel@tonic-gate } else { 7594*0Sstevel@tonic-gate hat_setref(*pplist); 7595*0Sstevel@tonic-gate } 7596*0Sstevel@tonic-gate page_unlock(*pplist); 7597*0Sstevel@tonic-gate np--; 7598*0Sstevel@tonic-gate pplist++; 7599*0Sstevel@tonic-gate } 7600*0Sstevel@tonic-gate kmem_free(pl, sizeof (page_t *) * npages); 7601*0Sstevel@tonic-gate 7602*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 7603*0Sstevel@tonic-gate availrmem += npages; 7604*0Sstevel@tonic-gate segvn_pages_locked -= npages; 7605*0Sstevel@tonic-gate svd->softlockcnt -= npages; 7606*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 7607*0Sstevel@tonic-gate if (svd->softlockcnt <= 0) { 7608*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 7609*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 7610*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 7611*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 7612*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 7613*0Sstevel@tonic-gate } 7614*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 7615*0Sstevel@tonic-gate } 7616*0Sstevel@tonic-gate } 7617*0Sstevel@tonic-gate return (0); 7618*0Sstevel@tonic-gate } 7619*0Sstevel@tonic-gate /* 7620*0Sstevel@tonic-gate * get a memory ID for an addr in a given segment 7621*0Sstevel@tonic-gate * 7622*0Sstevel@tonic-gate * XXX only creates PAGESIZE pages if anon slots are not initialized. 7623*0Sstevel@tonic-gate * At fault time they will be relocated into larger pages. 7624*0Sstevel@tonic-gate */ 7625*0Sstevel@tonic-gate static int 7626*0Sstevel@tonic-gate segvn_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 7627*0Sstevel@tonic-gate { 7628*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 7629*0Sstevel@tonic-gate struct anon *ap = NULL; 7630*0Sstevel@tonic-gate ulong_t anon_index; 7631*0Sstevel@tonic-gate struct anon_map *amp; 7632*0Sstevel@tonic-gate anon_sync_obj_t cookie; 7633*0Sstevel@tonic-gate 7634*0Sstevel@tonic-gate if (svd->type == MAP_PRIVATE) { 7635*0Sstevel@tonic-gate memidp->val[0] = (uintptr_t)seg->s_as; 7636*0Sstevel@tonic-gate memidp->val[1] = (uintptr_t)addr; 7637*0Sstevel@tonic-gate return (0); 7638*0Sstevel@tonic-gate } 7639*0Sstevel@tonic-gate 7640*0Sstevel@tonic-gate if (svd->type == MAP_SHARED) { 7641*0Sstevel@tonic-gate if (svd->vp) { 7642*0Sstevel@tonic-gate memidp->val[0] = (uintptr_t)svd->vp; 7643*0Sstevel@tonic-gate memidp->val[1] = (u_longlong_t)svd->offset + 7644*0Sstevel@tonic-gate (uintptr_t)(addr - seg->s_base); 7645*0Sstevel@tonic-gate return (0); 7646*0Sstevel@tonic-gate } else { 7647*0Sstevel@tonic-gate 7648*0Sstevel@tonic-gate SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); 7649*0Sstevel@tonic-gate if ((amp = svd->amp) != NULL) { 7650*0Sstevel@tonic-gate anon_index = svd->anon_index + 7651*0Sstevel@tonic-gate seg_page(seg, addr); 7652*0Sstevel@tonic-gate } 7653*0Sstevel@tonic-gate SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 7654*0Sstevel@tonic-gate 7655*0Sstevel@tonic-gate ASSERT(amp != NULL); 7656*0Sstevel@tonic-gate 7657*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 7658*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 7659*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 7660*0Sstevel@tonic-gate if (ap == NULL) { 7661*0Sstevel@tonic-gate page_t *pp; 7662*0Sstevel@tonic-gate 7663*0Sstevel@tonic-gate pp = anon_zero(seg, addr, &ap, svd->cred); 7664*0Sstevel@tonic-gate if (pp == NULL) { 7665*0Sstevel@tonic-gate anon_array_exit(&cookie); 7666*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 7667*0Sstevel@tonic-gate return (ENOMEM); 7668*0Sstevel@tonic-gate } 7669*0Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, anon_index) 7670*0Sstevel@tonic-gate == NULL); 7671*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_index, 7672*0Sstevel@tonic-gate ap, ANON_SLEEP); 7673*0Sstevel@tonic-gate page_unlock(pp); 7674*0Sstevel@tonic-gate } 7675*0Sstevel@tonic-gate 7676*0Sstevel@tonic-gate anon_array_exit(&cookie); 7677*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 7678*0Sstevel@tonic-gate 7679*0Sstevel@tonic-gate memidp->val[0] = (uintptr_t)ap; 7680*0Sstevel@tonic-gate memidp->val[1] = (uintptr_t)addr & PAGEOFFSET; 7681*0Sstevel@tonic-gate return (0); 7682*0Sstevel@tonic-gate } 7683*0Sstevel@tonic-gate } 7684*0Sstevel@tonic-gate return (EINVAL); 7685*0Sstevel@tonic-gate } 7686*0Sstevel@tonic-gate 7687*0Sstevel@tonic-gate static int 7688*0Sstevel@tonic-gate sameprot(struct seg *seg, caddr_t a, size_t len) 7689*0Sstevel@tonic-gate { 7690*0Sstevel@tonic-gate struct segvn_data *svd = (struct segvn_data *)seg->s_data; 7691*0Sstevel@tonic-gate struct vpage *vpage; 7692*0Sstevel@tonic-gate spgcnt_t pages = btop(len); 7693*0Sstevel@tonic-gate uint_t prot; 7694*0Sstevel@tonic-gate 7695*0Sstevel@tonic-gate if (svd->pageprot == 0) 7696*0Sstevel@tonic-gate return (1); 7697*0Sstevel@tonic-gate 7698*0Sstevel@tonic-gate ASSERT(svd->vpage != NULL); 7699*0Sstevel@tonic-gate 7700*0Sstevel@tonic-gate vpage = &svd->vpage[seg_page(seg, a)]; 7701*0Sstevel@tonic-gate prot = VPP_PROT(vpage); 7702*0Sstevel@tonic-gate vpage++; 7703*0Sstevel@tonic-gate pages--; 7704*0Sstevel@tonic-gate while (pages-- > 0) { 7705*0Sstevel@tonic-gate if (prot != VPP_PROT(vpage)) 7706*0Sstevel@tonic-gate return (0); 7707*0Sstevel@tonic-gate vpage++; 7708*0Sstevel@tonic-gate } 7709*0Sstevel@tonic-gate return (1); 7710*0Sstevel@tonic-gate } 7711*0Sstevel@tonic-gate 7712*0Sstevel@tonic-gate /* 7713*0Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 7714*0Sstevel@tonic-gate */ 7715*0Sstevel@tonic-gate static lgrp_mem_policy_info_t * 7716*0Sstevel@tonic-gate segvn_getpolicy(struct seg *seg, caddr_t addr) 7717*0Sstevel@tonic-gate { 7718*0Sstevel@tonic-gate struct anon_map *amp; 7719*0Sstevel@tonic-gate ulong_t anon_index; 7720*0Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 7721*0Sstevel@tonic-gate struct segvn_data *svn_data; 7722*0Sstevel@tonic-gate u_offset_t vn_off; 7723*0Sstevel@tonic-gate vnode_t *vp; 7724*0Sstevel@tonic-gate 7725*0Sstevel@tonic-gate ASSERT(seg != NULL); 7726*0Sstevel@tonic-gate 7727*0Sstevel@tonic-gate svn_data = (struct segvn_data *)seg->s_data; 7728*0Sstevel@tonic-gate if (svn_data == NULL) 7729*0Sstevel@tonic-gate return (NULL); 7730*0Sstevel@tonic-gate 7731*0Sstevel@tonic-gate /* 7732*0Sstevel@tonic-gate * Get policy info for private or shared memory 7733*0Sstevel@tonic-gate */ 7734*0Sstevel@tonic-gate if (svn_data->type != MAP_SHARED) 7735*0Sstevel@tonic-gate policy_info = &svn_data->policy_info; 7736*0Sstevel@tonic-gate else { 7737*0Sstevel@tonic-gate amp = svn_data->amp; 7738*0Sstevel@tonic-gate anon_index = svn_data->anon_index + seg_page(seg, addr); 7739*0Sstevel@tonic-gate vp = svn_data->vp; 7740*0Sstevel@tonic-gate vn_off = svn_data->offset + (uintptr_t)(addr - seg->s_base); 7741*0Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, vp, vn_off); 7742*0Sstevel@tonic-gate } 7743*0Sstevel@tonic-gate 7744*0Sstevel@tonic-gate return (policy_info); 7745*0Sstevel@tonic-gate } 7746