1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate /* 31*0Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 32*0Sstevel@tonic-gate * under license from the Regents of the University of California. 33*0Sstevel@tonic-gate */ 34*0Sstevel@tonic-gate 35*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 36*0Sstevel@tonic-gate 37*0Sstevel@tonic-gate /* 38*0Sstevel@tonic-gate * VM - generic vnode mapping segment. 39*0Sstevel@tonic-gate * 40*0Sstevel@tonic-gate * The segmap driver is used only by the kernel to get faster (than seg_vn) 41*0Sstevel@tonic-gate * mappings [lower routine overhead; more persistent cache] to random 42*0Sstevel@tonic-gate * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 43*0Sstevel@tonic-gate */ 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gate #include <sys/types.h> 46*0Sstevel@tonic-gate #include <sys/t_lock.h> 47*0Sstevel@tonic-gate #include <sys/param.h> 48*0Sstevel@tonic-gate #include <sys/sysmacros.h> 49*0Sstevel@tonic-gate #include <sys/buf.h> 50*0Sstevel@tonic-gate #include <sys/systm.h> 51*0Sstevel@tonic-gate #include <sys/vnode.h> 52*0Sstevel@tonic-gate #include <sys/mman.h> 53*0Sstevel@tonic-gate #include <sys/errno.h> 54*0Sstevel@tonic-gate #include <sys/cred.h> 55*0Sstevel@tonic-gate #include <sys/kmem.h> 56*0Sstevel@tonic-gate #include <sys/vtrace.h> 57*0Sstevel@tonic-gate #include <sys/cmn_err.h> 58*0Sstevel@tonic-gate #include <sys/debug.h> 59*0Sstevel@tonic-gate #include <sys/thread.h> 60*0Sstevel@tonic-gate #include <sys/dumphdr.h> 61*0Sstevel@tonic-gate #include <sys/bitmap.h> 62*0Sstevel@tonic-gate #include <sys/lgrp.h> 63*0Sstevel@tonic-gate 64*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 65*0Sstevel@tonic-gate #include <vm/hat.h> 66*0Sstevel@tonic-gate #include <vm/as.h> 67*0Sstevel@tonic-gate #include <vm/seg.h> 68*0Sstevel@tonic-gate #include <vm/seg_kpm.h> 69*0Sstevel@tonic-gate #include <vm/seg_map.h> 70*0Sstevel@tonic-gate #include <vm/page.h> 71*0Sstevel@tonic-gate #include <vm/pvn.h> 72*0Sstevel@tonic-gate #include <vm/rm.h> 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate /* 75*0Sstevel@tonic-gate * Private seg op routines. 76*0Sstevel@tonic-gate */ 77*0Sstevel@tonic-gate static void segmap_free(struct seg *seg); 78*0Sstevel@tonic-gate faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 79*0Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw); 80*0Sstevel@tonic-gate static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 81*0Sstevel@tonic-gate static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 82*0Sstevel@tonic-gate uint_t prot); 83*0Sstevel@tonic-gate static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 84*0Sstevel@tonic-gate static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 85*0Sstevel@tonic-gate uint_t *protv); 86*0Sstevel@tonic-gate static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 87*0Sstevel@tonic-gate static int segmap_gettype(struct seg *seg, caddr_t addr); 88*0Sstevel@tonic-gate static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 89*0Sstevel@tonic-gate static void segmap_dump(struct seg *seg); 90*0Sstevel@tonic-gate static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 91*0Sstevel@tonic-gate struct page ***ppp, enum lock_type type, 92*0Sstevel@tonic-gate enum seg_rw rw); 93*0Sstevel@tonic-gate static void segmap_badop(void); 94*0Sstevel@tonic-gate static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 95*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 96*0Sstevel@tonic-gate caddr_t addr); 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate /* segkpm support */ 99*0Sstevel@tonic-gate static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 100*0Sstevel@tonic-gate struct smap *, enum seg_rw); 101*0Sstevel@tonic-gate struct smap *get_smap_kpm(caddr_t, page_t **); 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate #define SEGMAP_BADOP(t) (t(*)())segmap_badop 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate static struct seg_ops segmap_ops = { 106*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* dup */ 107*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* unmap */ 108*0Sstevel@tonic-gate segmap_free, 109*0Sstevel@tonic-gate segmap_fault, 110*0Sstevel@tonic-gate segmap_faulta, 111*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* setprot */ 112*0Sstevel@tonic-gate segmap_checkprot, 113*0Sstevel@tonic-gate segmap_kluster, 114*0Sstevel@tonic-gate SEGMAP_BADOP(size_t), /* swapout */ 115*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* sync */ 116*0Sstevel@tonic-gate SEGMAP_BADOP(size_t), /* incore */ 117*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* lockop */ 118*0Sstevel@tonic-gate segmap_getprot, 119*0Sstevel@tonic-gate segmap_getoffset, 120*0Sstevel@tonic-gate segmap_gettype, 121*0Sstevel@tonic-gate segmap_getvp, 122*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* advise */ 123*0Sstevel@tonic-gate segmap_dump, 124*0Sstevel@tonic-gate segmap_pagelock, /* pagelock */ 125*0Sstevel@tonic-gate SEGMAP_BADOP(int), /* setpgsz */ 126*0Sstevel@tonic-gate segmap_getmemid, /* getmemid */ 127*0Sstevel@tonic-gate segmap_getpolicy, /* getpolicy */ 128*0Sstevel@tonic-gate }; 129*0Sstevel@tonic-gate 130*0Sstevel@tonic-gate /* 131*0Sstevel@tonic-gate * Private segmap routines. 132*0Sstevel@tonic-gate */ 133*0Sstevel@tonic-gate static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 134*0Sstevel@tonic-gate size_t len, enum seg_rw rw, struct smap *smp); 135*0Sstevel@tonic-gate static void segmap_smapadd(struct smap *smp); 136*0Sstevel@tonic-gate static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 137*0Sstevel@tonic-gate u_offset_t off, int hashid); 138*0Sstevel@tonic-gate static void segmap_hashout(struct smap *smp); 139*0Sstevel@tonic-gate 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate /* 142*0Sstevel@tonic-gate * Statistics for segmap operations. 143*0Sstevel@tonic-gate * 144*0Sstevel@tonic-gate * No explicit locking to protect these stats. 145*0Sstevel@tonic-gate */ 146*0Sstevel@tonic-gate struct segmapcnt segmapcnt = { 147*0Sstevel@tonic-gate { "fault", KSTAT_DATA_ULONG }, 148*0Sstevel@tonic-gate { "faulta", KSTAT_DATA_ULONG }, 149*0Sstevel@tonic-gate { "getmap", KSTAT_DATA_ULONG }, 150*0Sstevel@tonic-gate { "get_use", KSTAT_DATA_ULONG }, 151*0Sstevel@tonic-gate { "get_reclaim", KSTAT_DATA_ULONG }, 152*0Sstevel@tonic-gate { "get_reuse", KSTAT_DATA_ULONG }, 153*0Sstevel@tonic-gate { "get_unused", KSTAT_DATA_ULONG }, 154*0Sstevel@tonic-gate { "get_nofree", KSTAT_DATA_ULONG }, 155*0Sstevel@tonic-gate { "rel_async", KSTAT_DATA_ULONG }, 156*0Sstevel@tonic-gate { "rel_write", KSTAT_DATA_ULONG }, 157*0Sstevel@tonic-gate { "rel_free", KSTAT_DATA_ULONG }, 158*0Sstevel@tonic-gate { "rel_abort", KSTAT_DATA_ULONG }, 159*0Sstevel@tonic-gate { "rel_dontneed", KSTAT_DATA_ULONG }, 160*0Sstevel@tonic-gate { "release", KSTAT_DATA_ULONG }, 161*0Sstevel@tonic-gate { "pagecreate", KSTAT_DATA_ULONG }, 162*0Sstevel@tonic-gate { "free_notfree", KSTAT_DATA_ULONG }, 163*0Sstevel@tonic-gate { "free_dirty", KSTAT_DATA_ULONG }, 164*0Sstevel@tonic-gate { "free", KSTAT_DATA_ULONG }, 165*0Sstevel@tonic-gate { "stolen", KSTAT_DATA_ULONG }, 166*0Sstevel@tonic-gate { "get_nomtx", KSTAT_DATA_ULONG } 167*0Sstevel@tonic-gate }; 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 170*0Sstevel@tonic-gate uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 171*0Sstevel@tonic-gate 172*0Sstevel@tonic-gate /* 173*0Sstevel@tonic-gate * Return number of map pages in segment. 174*0Sstevel@tonic-gate */ 175*0Sstevel@tonic-gate #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gate /* 178*0Sstevel@tonic-gate * Translate addr into smap number within segment. 179*0Sstevel@tonic-gate */ 180*0Sstevel@tonic-gate #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 181*0Sstevel@tonic-gate 182*0Sstevel@tonic-gate /* 183*0Sstevel@tonic-gate * Translate addr in seg into struct smap pointer. 184*0Sstevel@tonic-gate */ 185*0Sstevel@tonic-gate #define GET_SMAP(seg, addr) \ 186*0Sstevel@tonic-gate &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 187*0Sstevel@tonic-gate 188*0Sstevel@tonic-gate /* 189*0Sstevel@tonic-gate * Bit in map (16 bit bitmap). 190*0Sstevel@tonic-gate */ 191*0Sstevel@tonic-gate #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate static int smd_colormsk = 0; 194*0Sstevel@tonic-gate static int smd_ncolor = 0; 195*0Sstevel@tonic-gate static int smd_nfree = 0; 196*0Sstevel@tonic-gate static int smd_freemsk = 0; 197*0Sstevel@tonic-gate #ifdef DEBUG 198*0Sstevel@tonic-gate static int *colors_used; 199*0Sstevel@tonic-gate #endif 200*0Sstevel@tonic-gate static struct smap *smd_smap; 201*0Sstevel@tonic-gate static struct smaphash *smd_hash; 202*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS 203*0Sstevel@tonic-gate static unsigned int *smd_hash_len; 204*0Sstevel@tonic-gate #endif 205*0Sstevel@tonic-gate static struct smfree *smd_free; 206*0Sstevel@tonic-gate static ulong_t smd_hashmsk = 0; 207*0Sstevel@tonic-gate 208*0Sstevel@tonic-gate #define SEGMAP_MAXCOLOR 2 209*0Sstevel@tonic-gate #define SEGMAP_CACHE_PAD 64 210*0Sstevel@tonic-gate 211*0Sstevel@tonic-gate union segmap_cpu { 212*0Sstevel@tonic-gate struct { 213*0Sstevel@tonic-gate uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 214*0Sstevel@tonic-gate struct smap *scpu_last_smap; 215*0Sstevel@tonic-gate ulong_t scpu_getmap; 216*0Sstevel@tonic-gate ulong_t scpu_release; 217*0Sstevel@tonic-gate ulong_t scpu_get_reclaim; 218*0Sstevel@tonic-gate ulong_t scpu_fault; 219*0Sstevel@tonic-gate ulong_t scpu_pagecreate; 220*0Sstevel@tonic-gate ulong_t scpu_get_reuse; 221*0Sstevel@tonic-gate } scpu; 222*0Sstevel@tonic-gate char scpu_pad[SEGMAP_CACHE_PAD]; 223*0Sstevel@tonic-gate }; 224*0Sstevel@tonic-gate static union segmap_cpu *smd_cpu; 225*0Sstevel@tonic-gate 226*0Sstevel@tonic-gate /* 227*0Sstevel@tonic-gate * There are three locks in seg_map: 228*0Sstevel@tonic-gate * - per freelist mutexes 229*0Sstevel@tonic-gate * - per hashchain mutexes 230*0Sstevel@tonic-gate * - per smap mutexes 231*0Sstevel@tonic-gate * 232*0Sstevel@tonic-gate * The lock ordering is to get the smap mutex to lock down the slot 233*0Sstevel@tonic-gate * first then the hash lock (for hash in/out (vp, off) list) or the 234*0Sstevel@tonic-gate * freelist lock to put the slot back on the free list. 235*0Sstevel@tonic-gate * 236*0Sstevel@tonic-gate * The hash search is done by only holding the hashchain lock, when a wanted 237*0Sstevel@tonic-gate * slot is found, we drop the hashchain lock then lock the slot so there 238*0Sstevel@tonic-gate * is no overlapping of hashchain and smap locks. After the slot is 239*0Sstevel@tonic-gate * locked, we verify again if the slot is still what we are looking 240*0Sstevel@tonic-gate * for. 241*0Sstevel@tonic-gate * 242*0Sstevel@tonic-gate * Allocation of a free slot is done by holding the freelist lock, 243*0Sstevel@tonic-gate * then locking the smap slot at the head of the freelist. This is 244*0Sstevel@tonic-gate * in reversed lock order so mutex_tryenter() is used. 245*0Sstevel@tonic-gate * 246*0Sstevel@tonic-gate * The smap lock protects all fields in smap structure except for 247*0Sstevel@tonic-gate * the link fields for hash/free lists which are protected by 248*0Sstevel@tonic-gate * hashchain and freelist locks. 249*0Sstevel@tonic-gate */ 250*0Sstevel@tonic-gate 251*0Sstevel@tonic-gate #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 254*0Sstevel@tonic-gate #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 255*0Sstevel@tonic-gate 256*0Sstevel@tonic-gate #define SMAPMTX(smp) (&smp->sm_mtx) 257*0Sstevel@tonic-gate 258*0Sstevel@tonic-gate #define SMAP_HASHFUNC(vp, off, hashid) \ 259*0Sstevel@tonic-gate { \ 260*0Sstevel@tonic-gate hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 261*0Sstevel@tonic-gate ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 262*0Sstevel@tonic-gate } 263*0Sstevel@tonic-gate 264*0Sstevel@tonic-gate /* 265*0Sstevel@tonic-gate * The most frequently updated kstat counters are kept in the 266*0Sstevel@tonic-gate * per cpu array to avoid hot cache blocks. The update function 267*0Sstevel@tonic-gate * sums the cpu local counters to update the global counters. 268*0Sstevel@tonic-gate */ 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate /* ARGSUSED */ 271*0Sstevel@tonic-gate int 272*0Sstevel@tonic-gate segmap_kstat_update(kstat_t *ksp, int rw) 273*0Sstevel@tonic-gate { 274*0Sstevel@tonic-gate int i; 275*0Sstevel@tonic-gate ulong_t getmap, release, get_reclaim; 276*0Sstevel@tonic-gate ulong_t fault, pagecreate, get_reuse; 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate if (rw == KSTAT_WRITE) 279*0Sstevel@tonic-gate return (EACCES); 280*0Sstevel@tonic-gate getmap = release = get_reclaim = (ulong_t)0; 281*0Sstevel@tonic-gate fault = pagecreate = get_reuse = (ulong_t)0; 282*0Sstevel@tonic-gate for (i = 0; i < max_ncpus; i++) { 283*0Sstevel@tonic-gate getmap += smd_cpu[i].scpu.scpu_getmap; 284*0Sstevel@tonic-gate release += smd_cpu[i].scpu.scpu_release; 285*0Sstevel@tonic-gate get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 286*0Sstevel@tonic-gate fault += smd_cpu[i].scpu.scpu_fault; 287*0Sstevel@tonic-gate pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 288*0Sstevel@tonic-gate get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 289*0Sstevel@tonic-gate } 290*0Sstevel@tonic-gate segmapcnt.smp_getmap.value.ul = getmap; 291*0Sstevel@tonic-gate segmapcnt.smp_release.value.ul = release; 292*0Sstevel@tonic-gate segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 293*0Sstevel@tonic-gate segmapcnt.smp_fault.value.ul = fault; 294*0Sstevel@tonic-gate segmapcnt.smp_pagecreate.value.ul = pagecreate; 295*0Sstevel@tonic-gate segmapcnt.smp_get_reuse.value.ul = get_reuse; 296*0Sstevel@tonic-gate return (0); 297*0Sstevel@tonic-gate } 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate int 300*0Sstevel@tonic-gate segmap_create(struct seg *seg, void *argsp) 301*0Sstevel@tonic-gate { 302*0Sstevel@tonic-gate struct segmap_data *smd; 303*0Sstevel@tonic-gate struct smap *smp; 304*0Sstevel@tonic-gate struct smfree *sm; 305*0Sstevel@tonic-gate struct segmap_crargs *a = (struct segmap_crargs *)argsp; 306*0Sstevel@tonic-gate struct smaphash *shashp; 307*0Sstevel@tonic-gate union segmap_cpu *scpu; 308*0Sstevel@tonic-gate long i, npages; 309*0Sstevel@tonic-gate size_t hashsz; 310*0Sstevel@tonic-gate uint_t nfreelist; 311*0Sstevel@tonic-gate extern void prefetch_smap_w(void *); 312*0Sstevel@tonic-gate extern int max_ncpus; 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 315*0Sstevel@tonic-gate 316*0Sstevel@tonic-gate if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 317*0Sstevel@tonic-gate panic("segkmap not MAXBSIZE aligned"); 318*0Sstevel@tonic-gate /*NOTREACHED*/ 319*0Sstevel@tonic-gate } 320*0Sstevel@tonic-gate 321*0Sstevel@tonic-gate smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate seg->s_data = (void *)smd; 324*0Sstevel@tonic-gate seg->s_ops = &segmap_ops; 325*0Sstevel@tonic-gate smd->smd_prot = a->prot; 326*0Sstevel@tonic-gate 327*0Sstevel@tonic-gate /* 328*0Sstevel@tonic-gate * Scale the number of smap freelists to be 329*0Sstevel@tonic-gate * proportional to max_ncpus * number of virtual colors. 330*0Sstevel@tonic-gate * The caller can over-ride this scaling by providing 331*0Sstevel@tonic-gate * a non-zero a->nfreelist argument. 332*0Sstevel@tonic-gate */ 333*0Sstevel@tonic-gate nfreelist = a->nfreelist; 334*0Sstevel@tonic-gate if (nfreelist == 0) 335*0Sstevel@tonic-gate nfreelist = max_ncpus; 336*0Sstevel@tonic-gate else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 337*0Sstevel@tonic-gate cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 338*0Sstevel@tonic-gate "%d, using %d", nfreelist, max_ncpus); 339*0Sstevel@tonic-gate nfreelist = max_ncpus; 340*0Sstevel@tonic-gate } 341*0Sstevel@tonic-gate if (nfreelist & (nfreelist - 1)) { 342*0Sstevel@tonic-gate /* round up nfreelist to the next power of two. */ 343*0Sstevel@tonic-gate nfreelist = 1 << (highbit(nfreelist)); 344*0Sstevel@tonic-gate } 345*0Sstevel@tonic-gate 346*0Sstevel@tonic-gate /* 347*0Sstevel@tonic-gate * Get the number of virtual colors - must be a power of 2. 348*0Sstevel@tonic-gate */ 349*0Sstevel@tonic-gate if (a->shmsize) 350*0Sstevel@tonic-gate smd_ncolor = a->shmsize >> MAXBSHIFT; 351*0Sstevel@tonic-gate else 352*0Sstevel@tonic-gate smd_ncolor = 1; 353*0Sstevel@tonic-gate ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 354*0Sstevel@tonic-gate ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 355*0Sstevel@tonic-gate smd_colormsk = smd_ncolor - 1; 356*0Sstevel@tonic-gate smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 357*0Sstevel@tonic-gate smd_freemsk = smd_nfree - 1; 358*0Sstevel@tonic-gate 359*0Sstevel@tonic-gate /* 360*0Sstevel@tonic-gate * Allocate and initialize the freelist headers. 361*0Sstevel@tonic-gate * Note that sm_freeq[1] starts out as the release queue. This 362*0Sstevel@tonic-gate * is known when the smap structures are initialized below. 363*0Sstevel@tonic-gate */ 364*0Sstevel@tonic-gate smd_free = smd->smd_free = 365*0Sstevel@tonic-gate kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 366*0Sstevel@tonic-gate for (i = 0; i < smd_nfree; i++) { 367*0Sstevel@tonic-gate sm = &smd->smd_free[i]; 368*0Sstevel@tonic-gate mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 369*0Sstevel@tonic-gate mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 370*0Sstevel@tonic-gate sm->sm_allocq = &sm->sm_freeq[0]; 371*0Sstevel@tonic-gate sm->sm_releq = &sm->sm_freeq[1]; 372*0Sstevel@tonic-gate } 373*0Sstevel@tonic-gate 374*0Sstevel@tonic-gate /* 375*0Sstevel@tonic-gate * Allocate and initialize the smap hash chain headers. 376*0Sstevel@tonic-gate * Compute hash size rounding down to the next power of two. 377*0Sstevel@tonic-gate */ 378*0Sstevel@tonic-gate npages = MAP_PAGES(seg); 379*0Sstevel@tonic-gate smd->smd_npages = npages; 380*0Sstevel@tonic-gate hashsz = npages / SMAP_HASHAVELEN; 381*0Sstevel@tonic-gate hashsz = 1 << (highbit(hashsz)-1); 382*0Sstevel@tonic-gate smd_hashmsk = hashsz - 1; 383*0Sstevel@tonic-gate smd_hash = smd->smd_hash = 384*0Sstevel@tonic-gate kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 385*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS 386*0Sstevel@tonic-gate smd_hash_len = 387*0Sstevel@tonic-gate kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 388*0Sstevel@tonic-gate #endif 389*0Sstevel@tonic-gate for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 390*0Sstevel@tonic-gate shashp->sh_hash_list = NULL; 391*0Sstevel@tonic-gate mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 392*0Sstevel@tonic-gate } 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gate /* 395*0Sstevel@tonic-gate * Allocate and initialize the smap structures. 396*0Sstevel@tonic-gate * Link all slots onto the appropriate freelist. 397*0Sstevel@tonic-gate * The smap array is large enough to affect boot time 398*0Sstevel@tonic-gate * on large systems, so use memory prefetching and only 399*0Sstevel@tonic-gate * go through the array 1 time. Inline a optimized version 400*0Sstevel@tonic-gate * of segmap_smapadd to add structures to freelists with 401*0Sstevel@tonic-gate * knowledge that no locks are needed here. 402*0Sstevel@tonic-gate */ 403*0Sstevel@tonic-gate smd_smap = smd->smd_sm = 404*0Sstevel@tonic-gate kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 407*0Sstevel@tonic-gate smp >= smd->smd_sm; smp--) { 408*0Sstevel@tonic-gate struct smap *smpfreelist; 409*0Sstevel@tonic-gate struct sm_freeq *releq; 410*0Sstevel@tonic-gate 411*0Sstevel@tonic-gate prefetch_smap_w((char *)smp); 412*0Sstevel@tonic-gate 413*0Sstevel@tonic-gate smp->sm_vp = NULL; 414*0Sstevel@tonic-gate smp->sm_hash = NULL; 415*0Sstevel@tonic-gate smp->sm_off = 0; 416*0Sstevel@tonic-gate smp->sm_bitmap = 0; 417*0Sstevel@tonic-gate smp->sm_refcnt = 0; 418*0Sstevel@tonic-gate mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 419*0Sstevel@tonic-gate smp->sm_free_ndx = SMP2SMF_NDX(smp); 420*0Sstevel@tonic-gate 421*0Sstevel@tonic-gate sm = SMP2SMF(smp); 422*0Sstevel@tonic-gate releq = sm->sm_releq; 423*0Sstevel@tonic-gate 424*0Sstevel@tonic-gate smpfreelist = releq->smq_free; 425*0Sstevel@tonic-gate if (smpfreelist == 0) { 426*0Sstevel@tonic-gate releq->smq_free = smp->sm_next = smp->sm_prev = smp; 427*0Sstevel@tonic-gate } else { 428*0Sstevel@tonic-gate smp->sm_next = smpfreelist; 429*0Sstevel@tonic-gate smp->sm_prev = smpfreelist->sm_prev; 430*0Sstevel@tonic-gate smpfreelist->sm_prev = smp; 431*0Sstevel@tonic-gate smp->sm_prev->sm_next = smp; 432*0Sstevel@tonic-gate releq->smq_free = smp->sm_next; 433*0Sstevel@tonic-gate } 434*0Sstevel@tonic-gate 435*0Sstevel@tonic-gate /* 436*0Sstevel@tonic-gate * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 437*0Sstevel@tonic-gate */ 438*0Sstevel@tonic-gate smp->sm_flags = 0; 439*0Sstevel@tonic-gate 440*0Sstevel@tonic-gate #ifdef SEGKPM_SUPPORT 441*0Sstevel@tonic-gate /* 442*0Sstevel@tonic-gate * Due to the fragile prefetch loop no 443*0Sstevel@tonic-gate * separate function is used here. 444*0Sstevel@tonic-gate */ 445*0Sstevel@tonic-gate smp->sm_kpme_next = NULL; 446*0Sstevel@tonic-gate smp->sm_kpme_prev = NULL; 447*0Sstevel@tonic-gate smp->sm_kpme_page = NULL; 448*0Sstevel@tonic-gate #endif 449*0Sstevel@tonic-gate } 450*0Sstevel@tonic-gate 451*0Sstevel@tonic-gate /* 452*0Sstevel@tonic-gate * Allocate the per color indices that distribute allocation 453*0Sstevel@tonic-gate * requests over the free lists. Each cpu will have a private 454*0Sstevel@tonic-gate * rotor index to spread the allocations even across the available 455*0Sstevel@tonic-gate * smap freelists. Init the scpu_last_smap field to the first 456*0Sstevel@tonic-gate * smap element so there is no need to check for NULL. 457*0Sstevel@tonic-gate */ 458*0Sstevel@tonic-gate smd_cpu = 459*0Sstevel@tonic-gate kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 460*0Sstevel@tonic-gate for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 461*0Sstevel@tonic-gate int j; 462*0Sstevel@tonic-gate for (j = 0; j < smd_ncolor; j++) 463*0Sstevel@tonic-gate scpu->scpu.scpu_free_ndx[j] = j; 464*0Sstevel@tonic-gate scpu->scpu.scpu_last_smap = smd_smap; 465*0Sstevel@tonic-gate } 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate #ifdef DEBUG 468*0Sstevel@tonic-gate /* 469*0Sstevel@tonic-gate * Keep track of which colors are used more often. 470*0Sstevel@tonic-gate */ 471*0Sstevel@tonic-gate colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 472*0Sstevel@tonic-gate #endif /* DEBUG */ 473*0Sstevel@tonic-gate 474*0Sstevel@tonic-gate return (0); 475*0Sstevel@tonic-gate } 476*0Sstevel@tonic-gate 477*0Sstevel@tonic-gate static void 478*0Sstevel@tonic-gate segmap_free(seg) 479*0Sstevel@tonic-gate struct seg *seg; 480*0Sstevel@tonic-gate { 481*0Sstevel@tonic-gate ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 482*0Sstevel@tonic-gate } 483*0Sstevel@tonic-gate 484*0Sstevel@tonic-gate /* 485*0Sstevel@tonic-gate * Do a F_SOFTUNLOCK call over the range requested. 486*0Sstevel@tonic-gate * The range must have already been F_SOFTLOCK'ed. 487*0Sstevel@tonic-gate */ 488*0Sstevel@tonic-gate static void 489*0Sstevel@tonic-gate segmap_unlock( 490*0Sstevel@tonic-gate struct hat *hat, 491*0Sstevel@tonic-gate struct seg *seg, 492*0Sstevel@tonic-gate caddr_t addr, 493*0Sstevel@tonic-gate size_t len, 494*0Sstevel@tonic-gate enum seg_rw rw, 495*0Sstevel@tonic-gate struct smap *smp) 496*0Sstevel@tonic-gate { 497*0Sstevel@tonic-gate page_t *pp; 498*0Sstevel@tonic-gate caddr_t adr; 499*0Sstevel@tonic-gate u_offset_t off; 500*0Sstevel@tonic-gate struct vnode *vp; 501*0Sstevel@tonic-gate kmutex_t *smtx; 502*0Sstevel@tonic-gate 503*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 504*0Sstevel@tonic-gate 505*0Sstevel@tonic-gate #ifdef lint 506*0Sstevel@tonic-gate seg = seg; 507*0Sstevel@tonic-gate #endif 508*0Sstevel@tonic-gate 509*0Sstevel@tonic-gate if (segmap_kpm && IS_KPM_ADDR(addr)) { 510*0Sstevel@tonic-gate 511*0Sstevel@tonic-gate /* 512*0Sstevel@tonic-gate * We're called only from segmap_fault and this was a 513*0Sstevel@tonic-gate * NOP in case of a kpm based smap, so dangerous things 514*0Sstevel@tonic-gate * must have happened in the meantime. Pages are prefaulted 515*0Sstevel@tonic-gate * and locked in segmap_getmapflt and they will not be 516*0Sstevel@tonic-gate * unlocked until segmap_release. 517*0Sstevel@tonic-gate */ 518*0Sstevel@tonic-gate panic("segmap_unlock: called with kpm addr %p", (void *)addr); 519*0Sstevel@tonic-gate /*NOTREACHED*/ 520*0Sstevel@tonic-gate } 521*0Sstevel@tonic-gate 522*0Sstevel@tonic-gate vp = smp->sm_vp; 523*0Sstevel@tonic-gate off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 524*0Sstevel@tonic-gate 525*0Sstevel@tonic-gate hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 526*0Sstevel@tonic-gate for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 527*0Sstevel@tonic-gate ushort_t bitmask; 528*0Sstevel@tonic-gate 529*0Sstevel@tonic-gate /* 530*0Sstevel@tonic-gate * Use page_find() instead of page_lookup() to 531*0Sstevel@tonic-gate * find the page since we know that it has 532*0Sstevel@tonic-gate * "shared" lock. 533*0Sstevel@tonic-gate */ 534*0Sstevel@tonic-gate pp = page_find(vp, off); 535*0Sstevel@tonic-gate if (pp == NULL) { 536*0Sstevel@tonic-gate panic("segmap_unlock: page not found"); 537*0Sstevel@tonic-gate /*NOTREACHED*/ 538*0Sstevel@tonic-gate } 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gate if (rw == S_WRITE) { 541*0Sstevel@tonic-gate hat_setrefmod(pp); 542*0Sstevel@tonic-gate } else if (rw != S_OTHER) { 543*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 544*0Sstevel@tonic-gate "segmap_fault:pp %p vp %p offset %llx", 545*0Sstevel@tonic-gate pp, vp, off); 546*0Sstevel@tonic-gate hat_setref(pp); 547*0Sstevel@tonic-gate } 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate /* 550*0Sstevel@tonic-gate * Clear bitmap, if the bit corresponding to "off" is set, 551*0Sstevel@tonic-gate * since the page and translation are being unlocked. 552*0Sstevel@tonic-gate */ 553*0Sstevel@tonic-gate bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 554*0Sstevel@tonic-gate 555*0Sstevel@tonic-gate /* 556*0Sstevel@tonic-gate * Large Files: Following assertion is to verify 557*0Sstevel@tonic-gate * the correctness of the cast to (int) above. 558*0Sstevel@tonic-gate */ 559*0Sstevel@tonic-gate ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 560*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 561*0Sstevel@tonic-gate mutex_enter(smtx); 562*0Sstevel@tonic-gate if (smp->sm_bitmap & bitmask) { 563*0Sstevel@tonic-gate smp->sm_bitmap &= ~bitmask; 564*0Sstevel@tonic-gate } 565*0Sstevel@tonic-gate mutex_exit(smtx); 566*0Sstevel@tonic-gate 567*0Sstevel@tonic-gate page_unlock(pp); 568*0Sstevel@tonic-gate } 569*0Sstevel@tonic-gate } 570*0Sstevel@tonic-gate 571*0Sstevel@tonic-gate #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 572*0Sstevel@tonic-gate 573*0Sstevel@tonic-gate /* 574*0Sstevel@tonic-gate * This routine is called via a machine specific fault handling 575*0Sstevel@tonic-gate * routine. It is also called by software routines wishing to 576*0Sstevel@tonic-gate * lock or unlock a range of addresses. 577*0Sstevel@tonic-gate * 578*0Sstevel@tonic-gate * Note that this routine expects a page-aligned "addr". 579*0Sstevel@tonic-gate */ 580*0Sstevel@tonic-gate faultcode_t 581*0Sstevel@tonic-gate segmap_fault( 582*0Sstevel@tonic-gate struct hat *hat, 583*0Sstevel@tonic-gate struct seg *seg, 584*0Sstevel@tonic-gate caddr_t addr, 585*0Sstevel@tonic-gate size_t len, 586*0Sstevel@tonic-gate enum fault_type type, 587*0Sstevel@tonic-gate enum seg_rw rw) 588*0Sstevel@tonic-gate { 589*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 590*0Sstevel@tonic-gate struct smap *smp; 591*0Sstevel@tonic-gate page_t *pp, **ppp; 592*0Sstevel@tonic-gate struct vnode *vp; 593*0Sstevel@tonic-gate u_offset_t off; 594*0Sstevel@tonic-gate page_t *pl[MAXPPB + 1]; 595*0Sstevel@tonic-gate uint_t prot; 596*0Sstevel@tonic-gate u_offset_t addroff; 597*0Sstevel@tonic-gate caddr_t adr; 598*0Sstevel@tonic-gate int err; 599*0Sstevel@tonic-gate u_offset_t sm_off; 600*0Sstevel@tonic-gate int hat_flag; 601*0Sstevel@tonic-gate 602*0Sstevel@tonic-gate if (segmap_kpm && IS_KPM_ADDR(addr)) { 603*0Sstevel@tonic-gate int newpage; 604*0Sstevel@tonic-gate kmutex_t *smtx; 605*0Sstevel@tonic-gate 606*0Sstevel@tonic-gate /* 607*0Sstevel@tonic-gate * Pages are successfully prefaulted and locked in 608*0Sstevel@tonic-gate * segmap_getmapflt and can't be unlocked until 609*0Sstevel@tonic-gate * segmap_release. No hat mappings have to be locked 610*0Sstevel@tonic-gate * and they also can't be unlocked as long as the 611*0Sstevel@tonic-gate * caller owns an active kpm addr. 612*0Sstevel@tonic-gate */ 613*0Sstevel@tonic-gate #ifndef DEBUG 614*0Sstevel@tonic-gate if (type != F_SOFTUNLOCK) 615*0Sstevel@tonic-gate return (0); 616*0Sstevel@tonic-gate #endif 617*0Sstevel@tonic-gate 618*0Sstevel@tonic-gate if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 619*0Sstevel@tonic-gate panic("segmap_fault: smap not found " 620*0Sstevel@tonic-gate "for addr %p", (void *)addr); 621*0Sstevel@tonic-gate /*NOTREACHED*/ 622*0Sstevel@tonic-gate } 623*0Sstevel@tonic-gate 624*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 625*0Sstevel@tonic-gate #ifdef DEBUG 626*0Sstevel@tonic-gate newpage = smp->sm_flags & SM_KPM_NEWPAGE; 627*0Sstevel@tonic-gate if (newpage) { 628*0Sstevel@tonic-gate cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 629*0Sstevel@tonic-gate (void *)smp); 630*0Sstevel@tonic-gate } 631*0Sstevel@tonic-gate 632*0Sstevel@tonic-gate if (type != F_SOFTUNLOCK) { 633*0Sstevel@tonic-gate mutex_exit(smtx); 634*0Sstevel@tonic-gate return (0); 635*0Sstevel@tonic-gate } 636*0Sstevel@tonic-gate #endif 637*0Sstevel@tonic-gate mutex_exit(smtx); 638*0Sstevel@tonic-gate vp = smp->sm_vp; 639*0Sstevel@tonic-gate sm_off = smp->sm_off; 640*0Sstevel@tonic-gate 641*0Sstevel@tonic-gate if (vp == NULL) 642*0Sstevel@tonic-gate return (FC_MAKE_ERR(EIO)); 643*0Sstevel@tonic-gate 644*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 645*0Sstevel@tonic-gate 646*0Sstevel@tonic-gate addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 647*0Sstevel@tonic-gate if (addroff + len > MAXBSIZE) 648*0Sstevel@tonic-gate panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 649*0Sstevel@tonic-gate (void *)(addr + len)); 650*0Sstevel@tonic-gate 651*0Sstevel@tonic-gate off = sm_off + addroff; 652*0Sstevel@tonic-gate 653*0Sstevel@tonic-gate pp = page_find(vp, off); 654*0Sstevel@tonic-gate 655*0Sstevel@tonic-gate if (pp == NULL) 656*0Sstevel@tonic-gate panic("segmap_fault: softunlock page not found"); 657*0Sstevel@tonic-gate 658*0Sstevel@tonic-gate /* 659*0Sstevel@tonic-gate * Set ref bit also here in case of S_OTHER to avoid the 660*0Sstevel@tonic-gate * overhead of supporting other cases than F_SOFTUNLOCK 661*0Sstevel@tonic-gate * with segkpm. We can do this because the underlying 662*0Sstevel@tonic-gate * pages are locked anyway. 663*0Sstevel@tonic-gate */ 664*0Sstevel@tonic-gate if (rw == S_WRITE) { 665*0Sstevel@tonic-gate hat_setrefmod(pp); 666*0Sstevel@tonic-gate } else { 667*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 668*0Sstevel@tonic-gate "segmap_fault:pp %p vp %p offset %llx", 669*0Sstevel@tonic-gate pp, vp, off); 670*0Sstevel@tonic-gate hat_setref(pp); 671*0Sstevel@tonic-gate } 672*0Sstevel@tonic-gate 673*0Sstevel@tonic-gate return (0); 674*0Sstevel@tonic-gate } 675*0Sstevel@tonic-gate 676*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 677*0Sstevel@tonic-gate smp = GET_SMAP(seg, addr); 678*0Sstevel@tonic-gate vp = smp->sm_vp; 679*0Sstevel@tonic-gate sm_off = smp->sm_off; 680*0Sstevel@tonic-gate 681*0Sstevel@tonic-gate if (vp == NULL) 682*0Sstevel@tonic-gate return (FC_MAKE_ERR(EIO)); 683*0Sstevel@tonic-gate 684*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 685*0Sstevel@tonic-gate 686*0Sstevel@tonic-gate addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 687*0Sstevel@tonic-gate if (addroff + len > MAXBSIZE) { 688*0Sstevel@tonic-gate panic("segmap_fault: endaddr %p " 689*0Sstevel@tonic-gate "exceeds MAXBSIZE chunk", (void *)(addr + len)); 690*0Sstevel@tonic-gate /*NOTREACHED*/ 691*0Sstevel@tonic-gate } 692*0Sstevel@tonic-gate off = sm_off + addroff; 693*0Sstevel@tonic-gate 694*0Sstevel@tonic-gate /* 695*0Sstevel@tonic-gate * First handle the easy stuff 696*0Sstevel@tonic-gate */ 697*0Sstevel@tonic-gate if (type == F_SOFTUNLOCK) { 698*0Sstevel@tonic-gate segmap_unlock(hat, seg, addr, len, rw, smp); 699*0Sstevel@tonic-gate return (0); 700*0Sstevel@tonic-gate } 701*0Sstevel@tonic-gate 702*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 703*0Sstevel@tonic-gate "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 704*0Sstevel@tonic-gate err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 705*0Sstevel@tonic-gate seg, addr, rw, CRED()); 706*0Sstevel@tonic-gate 707*0Sstevel@tonic-gate if (err) 708*0Sstevel@tonic-gate return (FC_MAKE_ERR(err)); 709*0Sstevel@tonic-gate 710*0Sstevel@tonic-gate prot &= smd->smd_prot; 711*0Sstevel@tonic-gate 712*0Sstevel@tonic-gate /* 713*0Sstevel@tonic-gate * Handle all pages returned in the pl[] array. 714*0Sstevel@tonic-gate * This loop is coded on the assumption that if 715*0Sstevel@tonic-gate * there was no error from the VOP_GETPAGE routine, 716*0Sstevel@tonic-gate * that the page list returned will contain all the 717*0Sstevel@tonic-gate * needed pages for the vp from [off..off + len]. 718*0Sstevel@tonic-gate */ 719*0Sstevel@tonic-gate ppp = pl; 720*0Sstevel@tonic-gate while ((pp = *ppp++) != NULL) { 721*0Sstevel@tonic-gate u_offset_t poff; 722*0Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 723*0Sstevel@tonic-gate hat_flag = HAT_LOAD; 724*0Sstevel@tonic-gate 725*0Sstevel@tonic-gate /* 726*0Sstevel@tonic-gate * Verify that the pages returned are within the range 727*0Sstevel@tonic-gate * of this segmap region. Note that it is theoretically 728*0Sstevel@tonic-gate * possible for pages outside this range to be returned, 729*0Sstevel@tonic-gate * but it is not very likely. If we cannot use the 730*0Sstevel@tonic-gate * page here, just release it and go on to the next one. 731*0Sstevel@tonic-gate */ 732*0Sstevel@tonic-gate if (pp->p_offset < sm_off || 733*0Sstevel@tonic-gate pp->p_offset >= sm_off + MAXBSIZE) { 734*0Sstevel@tonic-gate (void) page_release(pp, 1); 735*0Sstevel@tonic-gate continue; 736*0Sstevel@tonic-gate } 737*0Sstevel@tonic-gate 738*0Sstevel@tonic-gate ASSERT(hat == kas.a_hat); 739*0Sstevel@tonic-gate poff = pp->p_offset; 740*0Sstevel@tonic-gate adr = addr + (poff - off); 741*0Sstevel@tonic-gate if (adr >= addr && adr < addr + len) { 742*0Sstevel@tonic-gate hat_setref(pp); 743*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 744*0Sstevel@tonic-gate "segmap_fault:pp %p vp %p offset %llx", 745*0Sstevel@tonic-gate pp, vp, poff); 746*0Sstevel@tonic-gate if (type == F_SOFTLOCK) 747*0Sstevel@tonic-gate hat_flag = HAT_LOAD_LOCK; 748*0Sstevel@tonic-gate } 749*0Sstevel@tonic-gate 750*0Sstevel@tonic-gate /* 751*0Sstevel@tonic-gate * Deal with VMODSORT pages here. If we know this is a write 752*0Sstevel@tonic-gate * do the setmod now and allow write protection. 753*0Sstevel@tonic-gate * As long as it's modified or not S_OTHER, remove write 754*0Sstevel@tonic-gate * protection. With S_OTHER it's up to the FS to deal with this. 755*0Sstevel@tonic-gate */ 756*0Sstevel@tonic-gate if (IS_VMODSORT(vp)) { 757*0Sstevel@tonic-gate if (rw == S_WRITE) 758*0Sstevel@tonic-gate hat_setmod(pp); 759*0Sstevel@tonic-gate else if (rw != S_OTHER && !hat_ismod(pp)) 760*0Sstevel@tonic-gate prot &= ~PROT_WRITE; 761*0Sstevel@tonic-gate } 762*0Sstevel@tonic-gate 763*0Sstevel@tonic-gate hat_memload(hat, adr, pp, prot, hat_flag); 764*0Sstevel@tonic-gate if (hat_flag != HAT_LOAD_LOCK) 765*0Sstevel@tonic-gate page_unlock(pp); 766*0Sstevel@tonic-gate } 767*0Sstevel@tonic-gate return (0); 768*0Sstevel@tonic-gate } 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gate /* 771*0Sstevel@tonic-gate * This routine is used to start I/O on pages asynchronously. 772*0Sstevel@tonic-gate */ 773*0Sstevel@tonic-gate static faultcode_t 774*0Sstevel@tonic-gate segmap_faulta(struct seg *seg, caddr_t addr) 775*0Sstevel@tonic-gate { 776*0Sstevel@tonic-gate struct smap *smp; 777*0Sstevel@tonic-gate struct vnode *vp; 778*0Sstevel@tonic-gate u_offset_t off; 779*0Sstevel@tonic-gate int err; 780*0Sstevel@tonic-gate 781*0Sstevel@tonic-gate if (segmap_kpm && IS_KPM_ADDR(addr)) { 782*0Sstevel@tonic-gate int newpage; 783*0Sstevel@tonic-gate kmutex_t *smtx; 784*0Sstevel@tonic-gate 785*0Sstevel@tonic-gate /* 786*0Sstevel@tonic-gate * Pages are successfully prefaulted and locked in 787*0Sstevel@tonic-gate * segmap_getmapflt and can't be unlocked until 788*0Sstevel@tonic-gate * segmap_release. No hat mappings have to be locked 789*0Sstevel@tonic-gate * and they also can't be unlocked as long as the 790*0Sstevel@tonic-gate * caller owns an active kpm addr. 791*0Sstevel@tonic-gate */ 792*0Sstevel@tonic-gate #ifdef DEBUG 793*0Sstevel@tonic-gate if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 794*0Sstevel@tonic-gate panic("segmap_faulta: smap not found " 795*0Sstevel@tonic-gate "for addr %p", (void *)addr); 796*0Sstevel@tonic-gate /*NOTREACHED*/ 797*0Sstevel@tonic-gate } 798*0Sstevel@tonic-gate 799*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 800*0Sstevel@tonic-gate newpage = smp->sm_flags & SM_KPM_NEWPAGE; 801*0Sstevel@tonic-gate mutex_exit(smtx); 802*0Sstevel@tonic-gate if (newpage) 803*0Sstevel@tonic-gate cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 804*0Sstevel@tonic-gate (void *)smp); 805*0Sstevel@tonic-gate #endif 806*0Sstevel@tonic-gate return (0); 807*0Sstevel@tonic-gate } 808*0Sstevel@tonic-gate 809*0Sstevel@tonic-gate segmapcnt.smp_faulta.value.ul++; 810*0Sstevel@tonic-gate smp = GET_SMAP(seg, addr); 811*0Sstevel@tonic-gate 812*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 813*0Sstevel@tonic-gate 814*0Sstevel@tonic-gate vp = smp->sm_vp; 815*0Sstevel@tonic-gate off = smp->sm_off; 816*0Sstevel@tonic-gate 817*0Sstevel@tonic-gate if (vp == NULL) { 818*0Sstevel@tonic-gate cmn_err(CE_WARN, "segmap_faulta - no vp"); 819*0Sstevel@tonic-gate return (FC_MAKE_ERR(EIO)); 820*0Sstevel@tonic-gate } 821*0Sstevel@tonic-gate 822*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 823*0Sstevel@tonic-gate "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 824*0Sstevel@tonic-gate 825*0Sstevel@tonic-gate err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 826*0Sstevel@tonic-gate & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 827*0Sstevel@tonic-gate seg, addr, S_READ, CRED()); 828*0Sstevel@tonic-gate 829*0Sstevel@tonic-gate if (err) 830*0Sstevel@tonic-gate return (FC_MAKE_ERR(err)); 831*0Sstevel@tonic-gate return (0); 832*0Sstevel@tonic-gate } 833*0Sstevel@tonic-gate 834*0Sstevel@tonic-gate /*ARGSUSED*/ 835*0Sstevel@tonic-gate static int 836*0Sstevel@tonic-gate segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 837*0Sstevel@tonic-gate { 838*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 839*0Sstevel@tonic-gate 840*0Sstevel@tonic-gate ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 841*0Sstevel@tonic-gate 842*0Sstevel@tonic-gate /* 843*0Sstevel@tonic-gate * Need not acquire the segment lock since 844*0Sstevel@tonic-gate * "smd_prot" is a read-only field. 845*0Sstevel@tonic-gate */ 846*0Sstevel@tonic-gate return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 847*0Sstevel@tonic-gate } 848*0Sstevel@tonic-gate 849*0Sstevel@tonic-gate static int 850*0Sstevel@tonic-gate segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 851*0Sstevel@tonic-gate { 852*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 853*0Sstevel@tonic-gate size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 854*0Sstevel@tonic-gate 855*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 856*0Sstevel@tonic-gate 857*0Sstevel@tonic-gate if (pgno != 0) { 858*0Sstevel@tonic-gate do 859*0Sstevel@tonic-gate protv[--pgno] = smd->smd_prot; 860*0Sstevel@tonic-gate while (pgno != 0); 861*0Sstevel@tonic-gate } 862*0Sstevel@tonic-gate return (0); 863*0Sstevel@tonic-gate } 864*0Sstevel@tonic-gate 865*0Sstevel@tonic-gate static u_offset_t 866*0Sstevel@tonic-gate segmap_getoffset(struct seg *seg, caddr_t addr) 867*0Sstevel@tonic-gate { 868*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 869*0Sstevel@tonic-gate 870*0Sstevel@tonic-gate ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 871*0Sstevel@tonic-gate 872*0Sstevel@tonic-gate return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 873*0Sstevel@tonic-gate } 874*0Sstevel@tonic-gate 875*0Sstevel@tonic-gate /*ARGSUSED*/ 876*0Sstevel@tonic-gate static int 877*0Sstevel@tonic-gate segmap_gettype(struct seg *seg, caddr_t addr) 878*0Sstevel@tonic-gate { 879*0Sstevel@tonic-gate ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 880*0Sstevel@tonic-gate 881*0Sstevel@tonic-gate return (MAP_SHARED); 882*0Sstevel@tonic-gate } 883*0Sstevel@tonic-gate 884*0Sstevel@tonic-gate /*ARGSUSED*/ 885*0Sstevel@tonic-gate static int 886*0Sstevel@tonic-gate segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 887*0Sstevel@tonic-gate { 888*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 889*0Sstevel@tonic-gate 890*0Sstevel@tonic-gate ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 891*0Sstevel@tonic-gate 892*0Sstevel@tonic-gate /* XXX - This doesn't make any sense */ 893*0Sstevel@tonic-gate *vpp = smd->smd_sm->sm_vp; 894*0Sstevel@tonic-gate return (0); 895*0Sstevel@tonic-gate } 896*0Sstevel@tonic-gate 897*0Sstevel@tonic-gate /* 898*0Sstevel@tonic-gate * Check to see if it makes sense to do kluster/read ahead to 899*0Sstevel@tonic-gate * addr + delta relative to the mapping at addr. We assume here 900*0Sstevel@tonic-gate * that delta is a signed PAGESIZE'd multiple (which can be negative). 901*0Sstevel@tonic-gate * 902*0Sstevel@tonic-gate * For segmap we always "approve" of this action from our standpoint. 903*0Sstevel@tonic-gate */ 904*0Sstevel@tonic-gate /*ARGSUSED*/ 905*0Sstevel@tonic-gate static int 906*0Sstevel@tonic-gate segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 907*0Sstevel@tonic-gate { 908*0Sstevel@tonic-gate return (0); 909*0Sstevel@tonic-gate } 910*0Sstevel@tonic-gate 911*0Sstevel@tonic-gate static void 912*0Sstevel@tonic-gate segmap_badop() 913*0Sstevel@tonic-gate { 914*0Sstevel@tonic-gate panic("segmap_badop"); 915*0Sstevel@tonic-gate /*NOTREACHED*/ 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate /* 919*0Sstevel@tonic-gate * Special private segmap operations 920*0Sstevel@tonic-gate */ 921*0Sstevel@tonic-gate 922*0Sstevel@tonic-gate /* 923*0Sstevel@tonic-gate * Add smap to the appropriate free list. 924*0Sstevel@tonic-gate */ 925*0Sstevel@tonic-gate static void 926*0Sstevel@tonic-gate segmap_smapadd(struct smap *smp) 927*0Sstevel@tonic-gate { 928*0Sstevel@tonic-gate struct smfree *sm; 929*0Sstevel@tonic-gate struct smap *smpfreelist; 930*0Sstevel@tonic-gate struct sm_freeq *releq; 931*0Sstevel@tonic-gate 932*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(SMAPMTX(smp))); 933*0Sstevel@tonic-gate 934*0Sstevel@tonic-gate if (smp->sm_refcnt != 0) { 935*0Sstevel@tonic-gate panic("segmap_smapadd"); 936*0Sstevel@tonic-gate /*NOTREACHED*/ 937*0Sstevel@tonic-gate } 938*0Sstevel@tonic-gate 939*0Sstevel@tonic-gate sm = &smd_free[smp->sm_free_ndx]; 940*0Sstevel@tonic-gate /* 941*0Sstevel@tonic-gate * Add to the tail of the release queue 942*0Sstevel@tonic-gate * Note that sm_releq and sm_allocq could toggle 943*0Sstevel@tonic-gate * before we get the lock. This does not affect 944*0Sstevel@tonic-gate * correctness as the 2 queues are only maintained 945*0Sstevel@tonic-gate * to reduce lock pressure. 946*0Sstevel@tonic-gate */ 947*0Sstevel@tonic-gate releq = sm->sm_releq; 948*0Sstevel@tonic-gate if (releq == &sm->sm_freeq[0]) 949*0Sstevel@tonic-gate smp->sm_flags |= SM_QNDX_ZERO; 950*0Sstevel@tonic-gate else 951*0Sstevel@tonic-gate smp->sm_flags &= ~SM_QNDX_ZERO; 952*0Sstevel@tonic-gate mutex_enter(&releq->smq_mtx); 953*0Sstevel@tonic-gate smpfreelist = releq->smq_free; 954*0Sstevel@tonic-gate if (smpfreelist == 0) { 955*0Sstevel@tonic-gate int want; 956*0Sstevel@tonic-gate 957*0Sstevel@tonic-gate releq->smq_free = smp->sm_next = smp->sm_prev = smp; 958*0Sstevel@tonic-gate /* 959*0Sstevel@tonic-gate * Both queue mutexes held to set sm_want; 960*0Sstevel@tonic-gate * snapshot the value before dropping releq mutex. 961*0Sstevel@tonic-gate * If sm_want appears after the releq mutex is dropped, 962*0Sstevel@tonic-gate * then the smap just freed is already gone. 963*0Sstevel@tonic-gate */ 964*0Sstevel@tonic-gate want = sm->sm_want; 965*0Sstevel@tonic-gate mutex_exit(&releq->smq_mtx); 966*0Sstevel@tonic-gate /* 967*0Sstevel@tonic-gate * See if there was a waiter before dropping the releq mutex 968*0Sstevel@tonic-gate * then recheck after obtaining sm_freeq[0] mutex as 969*0Sstevel@tonic-gate * the another thread may have already signaled. 970*0Sstevel@tonic-gate */ 971*0Sstevel@tonic-gate if (want) { 972*0Sstevel@tonic-gate mutex_enter(&sm->sm_freeq[0].smq_mtx); 973*0Sstevel@tonic-gate if (sm->sm_want) 974*0Sstevel@tonic-gate cv_signal(&sm->sm_free_cv); 975*0Sstevel@tonic-gate mutex_exit(&sm->sm_freeq[0].smq_mtx); 976*0Sstevel@tonic-gate } 977*0Sstevel@tonic-gate } else { 978*0Sstevel@tonic-gate smp->sm_next = smpfreelist; 979*0Sstevel@tonic-gate smp->sm_prev = smpfreelist->sm_prev; 980*0Sstevel@tonic-gate smpfreelist->sm_prev = smp; 981*0Sstevel@tonic-gate smp->sm_prev->sm_next = smp; 982*0Sstevel@tonic-gate mutex_exit(&releq->smq_mtx); 983*0Sstevel@tonic-gate } 984*0Sstevel@tonic-gate } 985*0Sstevel@tonic-gate 986*0Sstevel@tonic-gate 987*0Sstevel@tonic-gate static struct smap * 988*0Sstevel@tonic-gate segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 989*0Sstevel@tonic-gate { 990*0Sstevel@tonic-gate struct smap **hpp; 991*0Sstevel@tonic-gate struct smap *tmp; 992*0Sstevel@tonic-gate kmutex_t *hmtx; 993*0Sstevel@tonic-gate 994*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(SMAPMTX(smp))); 995*0Sstevel@tonic-gate ASSERT(smp->sm_vp == NULL); 996*0Sstevel@tonic-gate ASSERT(smp->sm_hash == NULL); 997*0Sstevel@tonic-gate ASSERT(smp->sm_prev == NULL); 998*0Sstevel@tonic-gate ASSERT(smp->sm_next == NULL); 999*0Sstevel@tonic-gate ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 1000*0Sstevel@tonic-gate 1001*0Sstevel@tonic-gate hmtx = SHASHMTX(hashid); 1002*0Sstevel@tonic-gate 1003*0Sstevel@tonic-gate mutex_enter(hmtx); 1004*0Sstevel@tonic-gate /* 1005*0Sstevel@tonic-gate * First we need to verify that no one has created a smp 1006*0Sstevel@tonic-gate * with (vp,off) as its tag before we us. 1007*0Sstevel@tonic-gate */ 1008*0Sstevel@tonic-gate for (tmp = smd_hash[hashid].sh_hash_list; 1009*0Sstevel@tonic-gate tmp != NULL; tmp = tmp->sm_hash) 1010*0Sstevel@tonic-gate if (tmp->sm_vp == vp && tmp->sm_off == off) 1011*0Sstevel@tonic-gate break; 1012*0Sstevel@tonic-gate 1013*0Sstevel@tonic-gate if (tmp == NULL) { 1014*0Sstevel@tonic-gate /* 1015*0Sstevel@tonic-gate * No one created one yet. 1016*0Sstevel@tonic-gate * 1017*0Sstevel@tonic-gate * Funniness here - we don't increment the ref count on the 1018*0Sstevel@tonic-gate * vnode * even though we have another pointer to it here. 1019*0Sstevel@tonic-gate * The reason for this is that we don't want the fact that 1020*0Sstevel@tonic-gate * a seg_map entry somewhere refers to a vnode to prevent the 1021*0Sstevel@tonic-gate * vnode * itself from going away. This is because this 1022*0Sstevel@tonic-gate * reference to the vnode is a "soft one". In the case where 1023*0Sstevel@tonic-gate * a mapping is being used by a rdwr [or directory routine?] 1024*0Sstevel@tonic-gate * there already has to be a non-zero ref count on the vnode. 1025*0Sstevel@tonic-gate * In the case where the vp has been freed and the the smap 1026*0Sstevel@tonic-gate * structure is on the free list, there are no pages in memory 1027*0Sstevel@tonic-gate * that can refer to the vnode. Thus even if we reuse the same 1028*0Sstevel@tonic-gate * vnode/smap structure for a vnode which has the same 1029*0Sstevel@tonic-gate * address but represents a different object, we are ok. 1030*0Sstevel@tonic-gate */ 1031*0Sstevel@tonic-gate smp->sm_vp = vp; 1032*0Sstevel@tonic-gate smp->sm_off = off; 1033*0Sstevel@tonic-gate 1034*0Sstevel@tonic-gate hpp = &smd_hash[hashid].sh_hash_list; 1035*0Sstevel@tonic-gate smp->sm_hash = *hpp; 1036*0Sstevel@tonic-gate *hpp = smp; 1037*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS 1038*0Sstevel@tonic-gate smd_hash_len[hashid]++; 1039*0Sstevel@tonic-gate #endif 1040*0Sstevel@tonic-gate } 1041*0Sstevel@tonic-gate mutex_exit(hmtx); 1042*0Sstevel@tonic-gate 1043*0Sstevel@tonic-gate return (tmp); 1044*0Sstevel@tonic-gate } 1045*0Sstevel@tonic-gate 1046*0Sstevel@tonic-gate static void 1047*0Sstevel@tonic-gate segmap_hashout(struct smap *smp) 1048*0Sstevel@tonic-gate { 1049*0Sstevel@tonic-gate struct smap **hpp, *hp; 1050*0Sstevel@tonic-gate struct vnode *vp; 1051*0Sstevel@tonic-gate kmutex_t *mtx; 1052*0Sstevel@tonic-gate int hashid; 1053*0Sstevel@tonic-gate u_offset_t off; 1054*0Sstevel@tonic-gate 1055*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1056*0Sstevel@tonic-gate 1057*0Sstevel@tonic-gate vp = smp->sm_vp; 1058*0Sstevel@tonic-gate off = smp->sm_off; 1059*0Sstevel@tonic-gate 1060*0Sstevel@tonic-gate SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1061*0Sstevel@tonic-gate mtx = SHASHMTX(hashid); 1062*0Sstevel@tonic-gate mutex_enter(mtx); 1063*0Sstevel@tonic-gate 1064*0Sstevel@tonic-gate hpp = &smd_hash[hashid].sh_hash_list; 1065*0Sstevel@tonic-gate for (;;) { 1066*0Sstevel@tonic-gate hp = *hpp; 1067*0Sstevel@tonic-gate if (hp == NULL) { 1068*0Sstevel@tonic-gate panic("segmap_hashout"); 1069*0Sstevel@tonic-gate /*NOTREACHED*/ 1070*0Sstevel@tonic-gate } 1071*0Sstevel@tonic-gate if (hp == smp) 1072*0Sstevel@tonic-gate break; 1073*0Sstevel@tonic-gate hpp = &hp->sm_hash; 1074*0Sstevel@tonic-gate } 1075*0Sstevel@tonic-gate 1076*0Sstevel@tonic-gate *hpp = smp->sm_hash; 1077*0Sstevel@tonic-gate smp->sm_hash = NULL; 1078*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS 1079*0Sstevel@tonic-gate smd_hash_len[hashid]--; 1080*0Sstevel@tonic-gate #endif 1081*0Sstevel@tonic-gate mutex_exit(mtx); 1082*0Sstevel@tonic-gate 1083*0Sstevel@tonic-gate smp->sm_vp = NULL; 1084*0Sstevel@tonic-gate smp->sm_off = (u_offset_t)0; 1085*0Sstevel@tonic-gate 1086*0Sstevel@tonic-gate } 1087*0Sstevel@tonic-gate 1088*0Sstevel@tonic-gate /* 1089*0Sstevel@tonic-gate * Attempt to free unmodified, unmapped, and non locked segmap 1090*0Sstevel@tonic-gate * pages. 1091*0Sstevel@tonic-gate */ 1092*0Sstevel@tonic-gate void 1093*0Sstevel@tonic-gate segmap_pagefree(struct vnode *vp, u_offset_t off) 1094*0Sstevel@tonic-gate { 1095*0Sstevel@tonic-gate u_offset_t pgoff; 1096*0Sstevel@tonic-gate page_t *pp; 1097*0Sstevel@tonic-gate 1098*0Sstevel@tonic-gate for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1099*0Sstevel@tonic-gate 1100*0Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1101*0Sstevel@tonic-gate continue; 1102*0Sstevel@tonic-gate 1103*0Sstevel@tonic-gate switch (page_release(pp, 1)) { 1104*0Sstevel@tonic-gate case PGREL_NOTREL: 1105*0Sstevel@tonic-gate segmapcnt.smp_free_notfree.value.ul++; 1106*0Sstevel@tonic-gate break; 1107*0Sstevel@tonic-gate case PGREL_MOD: 1108*0Sstevel@tonic-gate segmapcnt.smp_free_dirty.value.ul++; 1109*0Sstevel@tonic-gate break; 1110*0Sstevel@tonic-gate case PGREL_CLEAN: 1111*0Sstevel@tonic-gate segmapcnt.smp_free.value.ul++; 1112*0Sstevel@tonic-gate break; 1113*0Sstevel@tonic-gate } 1114*0Sstevel@tonic-gate } 1115*0Sstevel@tonic-gate } 1116*0Sstevel@tonic-gate 1117*0Sstevel@tonic-gate /* 1118*0Sstevel@tonic-gate * Locks held on entry: smap lock 1119*0Sstevel@tonic-gate * Locks held on exit : smap lock. 1120*0Sstevel@tonic-gate */ 1121*0Sstevel@tonic-gate 1122*0Sstevel@tonic-gate static void 1123*0Sstevel@tonic-gate grab_smp(struct smap *smp, page_t *pp) 1124*0Sstevel@tonic-gate { 1125*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1126*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt == 0); 1127*0Sstevel@tonic-gate 1128*0Sstevel@tonic-gate if (smp->sm_vp != (struct vnode *)NULL) { 1129*0Sstevel@tonic-gate struct vnode *vp = smp->sm_vp; 1130*0Sstevel@tonic-gate u_offset_t off = smp->sm_off; 1131*0Sstevel@tonic-gate /* 1132*0Sstevel@tonic-gate * Destroy old vnode association and 1133*0Sstevel@tonic-gate * unload any hardware translations to 1134*0Sstevel@tonic-gate * the old object. 1135*0Sstevel@tonic-gate */ 1136*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1137*0Sstevel@tonic-gate segmap_hashout(smp); 1138*0Sstevel@tonic-gate 1139*0Sstevel@tonic-gate /* 1140*0Sstevel@tonic-gate * This node is off freelist and hashlist, 1141*0Sstevel@tonic-gate * so there is no reason to drop/reacquire sm_mtx 1142*0Sstevel@tonic-gate * across calls to hat_unload. 1143*0Sstevel@tonic-gate */ 1144*0Sstevel@tonic-gate if (segmap_kpm) { 1145*0Sstevel@tonic-gate caddr_t vaddr; 1146*0Sstevel@tonic-gate int hat_unload_needed = 0; 1147*0Sstevel@tonic-gate 1148*0Sstevel@tonic-gate /* 1149*0Sstevel@tonic-gate * unload kpm mapping 1150*0Sstevel@tonic-gate */ 1151*0Sstevel@tonic-gate if (pp != NULL) { 1152*0Sstevel@tonic-gate vaddr = hat_kpm_page2va(pp, 1); 1153*0Sstevel@tonic-gate hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1154*0Sstevel@tonic-gate page_unlock(pp); 1155*0Sstevel@tonic-gate } 1156*0Sstevel@tonic-gate 1157*0Sstevel@tonic-gate /* 1158*0Sstevel@tonic-gate * Check if we have (also) the rare case of a 1159*0Sstevel@tonic-gate * non kpm mapping. 1160*0Sstevel@tonic-gate */ 1161*0Sstevel@tonic-gate if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1162*0Sstevel@tonic-gate hat_unload_needed = 1; 1163*0Sstevel@tonic-gate smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1164*0Sstevel@tonic-gate } 1165*0Sstevel@tonic-gate 1166*0Sstevel@tonic-gate if (hat_unload_needed) { 1167*0Sstevel@tonic-gate hat_unload(kas.a_hat, segkmap->s_base + 1168*0Sstevel@tonic-gate ((smp - smd_smap) * MAXBSIZE), 1169*0Sstevel@tonic-gate MAXBSIZE, HAT_UNLOAD); 1170*0Sstevel@tonic-gate } 1171*0Sstevel@tonic-gate 1172*0Sstevel@tonic-gate } else { 1173*0Sstevel@tonic-gate ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1174*0Sstevel@tonic-gate smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1175*0Sstevel@tonic-gate hat_unload(kas.a_hat, segkmap->s_base + 1176*0Sstevel@tonic-gate ((smp - smd_smap) * MAXBSIZE), 1177*0Sstevel@tonic-gate MAXBSIZE, HAT_UNLOAD); 1178*0Sstevel@tonic-gate } 1179*0Sstevel@tonic-gate segmap_pagefree(vp, off); 1180*0Sstevel@tonic-gate } 1181*0Sstevel@tonic-gate } 1182*0Sstevel@tonic-gate 1183*0Sstevel@tonic-gate static struct smap * 1184*0Sstevel@tonic-gate get_free_smp(int free_ndx) 1185*0Sstevel@tonic-gate { 1186*0Sstevel@tonic-gate struct smfree *sm; 1187*0Sstevel@tonic-gate kmutex_t *smtx; 1188*0Sstevel@tonic-gate struct smap *smp, *first; 1189*0Sstevel@tonic-gate struct sm_freeq *allocq, *releq; 1190*0Sstevel@tonic-gate struct kpme *kpme; 1191*0Sstevel@tonic-gate page_t *pp = NULL; 1192*0Sstevel@tonic-gate int end_ndx, page_locked = 0; 1193*0Sstevel@tonic-gate 1194*0Sstevel@tonic-gate end_ndx = free_ndx; 1195*0Sstevel@tonic-gate sm = &smd_free[free_ndx]; 1196*0Sstevel@tonic-gate 1197*0Sstevel@tonic-gate retry_queue: 1198*0Sstevel@tonic-gate allocq = sm->sm_allocq; 1199*0Sstevel@tonic-gate mutex_enter(&allocq->smq_mtx); 1200*0Sstevel@tonic-gate 1201*0Sstevel@tonic-gate if ((smp = allocq->smq_free) == NULL) { 1202*0Sstevel@tonic-gate 1203*0Sstevel@tonic-gate skip_queue: 1204*0Sstevel@tonic-gate /* 1205*0Sstevel@tonic-gate * The alloc list is empty or this queue is being skipped; 1206*0Sstevel@tonic-gate * first see if the allocq toggled. 1207*0Sstevel@tonic-gate */ 1208*0Sstevel@tonic-gate if (sm->sm_allocq != allocq) { 1209*0Sstevel@tonic-gate /* queue changed */ 1210*0Sstevel@tonic-gate mutex_exit(&allocq->smq_mtx); 1211*0Sstevel@tonic-gate goto retry_queue; 1212*0Sstevel@tonic-gate } 1213*0Sstevel@tonic-gate releq = sm->sm_releq; 1214*0Sstevel@tonic-gate if (!mutex_tryenter(&releq->smq_mtx)) { 1215*0Sstevel@tonic-gate /* cannot get releq; a free smp may be there now */ 1216*0Sstevel@tonic-gate mutex_exit(&allocq->smq_mtx); 1217*0Sstevel@tonic-gate 1218*0Sstevel@tonic-gate /* 1219*0Sstevel@tonic-gate * This loop could spin forever if this thread has 1220*0Sstevel@tonic-gate * higher priority than the thread that is holding 1221*0Sstevel@tonic-gate * releq->smq_mtx. In order to force the other thread 1222*0Sstevel@tonic-gate * to run, we'll lock/unlock the mutex which is safe 1223*0Sstevel@tonic-gate * since we just unlocked the allocq mutex. 1224*0Sstevel@tonic-gate */ 1225*0Sstevel@tonic-gate mutex_enter(&releq->smq_mtx); 1226*0Sstevel@tonic-gate mutex_exit(&releq->smq_mtx); 1227*0Sstevel@tonic-gate goto retry_queue; 1228*0Sstevel@tonic-gate } 1229*0Sstevel@tonic-gate if (releq->smq_free == NULL) { 1230*0Sstevel@tonic-gate /* 1231*0Sstevel@tonic-gate * This freelist is empty. 1232*0Sstevel@tonic-gate * This should not happen unless clients 1233*0Sstevel@tonic-gate * are failing to release the segmap 1234*0Sstevel@tonic-gate * window after accessing the data. 1235*0Sstevel@tonic-gate * Before resorting to sleeping, try 1236*0Sstevel@tonic-gate * the next list of the same color. 1237*0Sstevel@tonic-gate */ 1238*0Sstevel@tonic-gate free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1239*0Sstevel@tonic-gate if (free_ndx != end_ndx) { 1240*0Sstevel@tonic-gate mutex_exit(&releq->smq_mtx); 1241*0Sstevel@tonic-gate mutex_exit(&allocq->smq_mtx); 1242*0Sstevel@tonic-gate sm = &smd_free[free_ndx]; 1243*0Sstevel@tonic-gate goto retry_queue; 1244*0Sstevel@tonic-gate } 1245*0Sstevel@tonic-gate /* 1246*0Sstevel@tonic-gate * Tried all freelists of the same color once, 1247*0Sstevel@tonic-gate * wait on this list and hope something gets freed. 1248*0Sstevel@tonic-gate */ 1249*0Sstevel@tonic-gate segmapcnt.smp_get_nofree.value.ul++; 1250*0Sstevel@tonic-gate sm->sm_want++; 1251*0Sstevel@tonic-gate mutex_exit(&sm->sm_freeq[1].smq_mtx); 1252*0Sstevel@tonic-gate cv_wait(&sm->sm_free_cv, 1253*0Sstevel@tonic-gate &sm->sm_freeq[0].smq_mtx); 1254*0Sstevel@tonic-gate sm->sm_want--; 1255*0Sstevel@tonic-gate mutex_exit(&sm->sm_freeq[0].smq_mtx); 1256*0Sstevel@tonic-gate sm = &smd_free[free_ndx]; 1257*0Sstevel@tonic-gate goto retry_queue; 1258*0Sstevel@tonic-gate } else { 1259*0Sstevel@tonic-gate /* 1260*0Sstevel@tonic-gate * Something on the rele queue; flip the alloc 1261*0Sstevel@tonic-gate * and rele queues and retry. 1262*0Sstevel@tonic-gate */ 1263*0Sstevel@tonic-gate sm->sm_allocq = releq; 1264*0Sstevel@tonic-gate sm->sm_releq = allocq; 1265*0Sstevel@tonic-gate mutex_exit(&allocq->smq_mtx); 1266*0Sstevel@tonic-gate mutex_exit(&releq->smq_mtx); 1267*0Sstevel@tonic-gate if (page_locked) { 1268*0Sstevel@tonic-gate delay(hz >> 2); 1269*0Sstevel@tonic-gate page_locked = 0; 1270*0Sstevel@tonic-gate } 1271*0Sstevel@tonic-gate goto retry_queue; 1272*0Sstevel@tonic-gate } 1273*0Sstevel@tonic-gate } else { 1274*0Sstevel@tonic-gate /* 1275*0Sstevel@tonic-gate * Fastpath the case we get the smap mutex 1276*0Sstevel@tonic-gate * on the first try. 1277*0Sstevel@tonic-gate */ 1278*0Sstevel@tonic-gate first = smp; 1279*0Sstevel@tonic-gate next_smap: 1280*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 1281*0Sstevel@tonic-gate if (!mutex_tryenter(smtx)) { 1282*0Sstevel@tonic-gate /* 1283*0Sstevel@tonic-gate * Another thread is trying to reclaim this slot. 1284*0Sstevel@tonic-gate * Skip to the next queue or smap. 1285*0Sstevel@tonic-gate */ 1286*0Sstevel@tonic-gate if ((smp = smp->sm_next) == first) { 1287*0Sstevel@tonic-gate goto skip_queue; 1288*0Sstevel@tonic-gate } else { 1289*0Sstevel@tonic-gate goto next_smap; 1290*0Sstevel@tonic-gate } 1291*0Sstevel@tonic-gate } else { 1292*0Sstevel@tonic-gate /* 1293*0Sstevel@tonic-gate * if kpme exists, get shared lock on the page 1294*0Sstevel@tonic-gate */ 1295*0Sstevel@tonic-gate if (segmap_kpm && smp->sm_vp != NULL) { 1296*0Sstevel@tonic-gate 1297*0Sstevel@tonic-gate kpme = GET_KPME(smp); 1298*0Sstevel@tonic-gate pp = kpme->kpe_page; 1299*0Sstevel@tonic-gate 1300*0Sstevel@tonic-gate if (pp != NULL) { 1301*0Sstevel@tonic-gate if (!page_trylock(pp, SE_SHARED)) { 1302*0Sstevel@tonic-gate smp = smp->sm_next; 1303*0Sstevel@tonic-gate mutex_exit(smtx); 1304*0Sstevel@tonic-gate page_locked = 1; 1305*0Sstevel@tonic-gate 1306*0Sstevel@tonic-gate pp = NULL; 1307*0Sstevel@tonic-gate 1308*0Sstevel@tonic-gate if (smp == first) { 1309*0Sstevel@tonic-gate goto skip_queue; 1310*0Sstevel@tonic-gate } else { 1311*0Sstevel@tonic-gate goto next_smap; 1312*0Sstevel@tonic-gate } 1313*0Sstevel@tonic-gate } else { 1314*0Sstevel@tonic-gate if (kpme->kpe_page == NULL) { 1315*0Sstevel@tonic-gate page_unlock(pp); 1316*0Sstevel@tonic-gate pp = NULL; 1317*0Sstevel@tonic-gate } 1318*0Sstevel@tonic-gate } 1319*0Sstevel@tonic-gate } 1320*0Sstevel@tonic-gate } 1321*0Sstevel@tonic-gate 1322*0Sstevel@tonic-gate /* 1323*0Sstevel@tonic-gate * At this point, we've selected smp. Remove smp 1324*0Sstevel@tonic-gate * from its freelist. If smp is the first one in 1325*0Sstevel@tonic-gate * the freelist, update the head of the freelist. 1326*0Sstevel@tonic-gate */ 1327*0Sstevel@tonic-gate if (first == smp) { 1328*0Sstevel@tonic-gate ASSERT(first == allocq->smq_free); 1329*0Sstevel@tonic-gate allocq->smq_free = smp->sm_next; 1330*0Sstevel@tonic-gate } 1331*0Sstevel@tonic-gate 1332*0Sstevel@tonic-gate /* 1333*0Sstevel@tonic-gate * if the head of the freelist still points to smp, 1334*0Sstevel@tonic-gate * then there are no more free smaps in that list. 1335*0Sstevel@tonic-gate */ 1336*0Sstevel@tonic-gate if (allocq->smq_free == smp) 1337*0Sstevel@tonic-gate /* 1338*0Sstevel@tonic-gate * Took the last one 1339*0Sstevel@tonic-gate */ 1340*0Sstevel@tonic-gate allocq->smq_free = NULL; 1341*0Sstevel@tonic-gate else { 1342*0Sstevel@tonic-gate smp->sm_prev->sm_next = smp->sm_next; 1343*0Sstevel@tonic-gate smp->sm_next->sm_prev = smp->sm_prev; 1344*0Sstevel@tonic-gate } 1345*0Sstevel@tonic-gate mutex_exit(&allocq->smq_mtx); 1346*0Sstevel@tonic-gate smp->sm_prev = smp->sm_next = NULL; 1347*0Sstevel@tonic-gate 1348*0Sstevel@tonic-gate /* 1349*0Sstevel@tonic-gate * if pp != NULL, pp must have been locked; 1350*0Sstevel@tonic-gate * grab_smp() unlocks pp. 1351*0Sstevel@tonic-gate */ 1352*0Sstevel@tonic-gate ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1353*0Sstevel@tonic-gate grab_smp(smp, pp); 1354*0Sstevel@tonic-gate /* return smp locked. */ 1355*0Sstevel@tonic-gate ASSERT(SMAPMTX(smp) == smtx); 1356*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(smtx)); 1357*0Sstevel@tonic-gate return (smp); 1358*0Sstevel@tonic-gate } 1359*0Sstevel@tonic-gate } 1360*0Sstevel@tonic-gate } 1361*0Sstevel@tonic-gate 1362*0Sstevel@tonic-gate /* 1363*0Sstevel@tonic-gate * Special public segmap operations 1364*0Sstevel@tonic-gate */ 1365*0Sstevel@tonic-gate 1366*0Sstevel@tonic-gate /* 1367*0Sstevel@tonic-gate * Create pages (without using VOP_GETPAGE) and load up tranlations to them. 1368*0Sstevel@tonic-gate * If softlock is TRUE, then set things up so that it looks like a call 1369*0Sstevel@tonic-gate * to segmap_fault with F_SOFTLOCK. 1370*0Sstevel@tonic-gate * 1371*0Sstevel@tonic-gate * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1372*0Sstevel@tonic-gate * 1373*0Sstevel@tonic-gate * All fields in the generic segment (struct seg) are considered to be 1374*0Sstevel@tonic-gate * read-only for "segmap" even though the kernel address space (kas) may 1375*0Sstevel@tonic-gate * not be locked, hence no lock is needed to access them. 1376*0Sstevel@tonic-gate */ 1377*0Sstevel@tonic-gate int 1378*0Sstevel@tonic-gate segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1379*0Sstevel@tonic-gate { 1380*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1381*0Sstevel@tonic-gate page_t *pp; 1382*0Sstevel@tonic-gate u_offset_t off; 1383*0Sstevel@tonic-gate struct smap *smp; 1384*0Sstevel@tonic-gate struct vnode *vp; 1385*0Sstevel@tonic-gate caddr_t eaddr; 1386*0Sstevel@tonic-gate int newpage = 0; 1387*0Sstevel@tonic-gate uint_t prot; 1388*0Sstevel@tonic-gate kmutex_t *smtx; 1389*0Sstevel@tonic-gate int hat_flag; 1390*0Sstevel@tonic-gate 1391*0Sstevel@tonic-gate ASSERT(seg->s_as == &kas); 1392*0Sstevel@tonic-gate 1393*0Sstevel@tonic-gate if (segmap_kpm && IS_KPM_ADDR(addr)) { 1394*0Sstevel@tonic-gate /* 1395*0Sstevel@tonic-gate * Pages are successfully prefaulted and locked in 1396*0Sstevel@tonic-gate * segmap_getmapflt and can't be unlocked until 1397*0Sstevel@tonic-gate * segmap_release. The SM_KPM_NEWPAGE flag is set 1398*0Sstevel@tonic-gate * in segmap_pagecreate_kpm when new pages are created. 1399*0Sstevel@tonic-gate * and it is returned as "newpage" indication here. 1400*0Sstevel@tonic-gate */ 1401*0Sstevel@tonic-gate if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1402*0Sstevel@tonic-gate panic("segmap_pagecreate: smap not found " 1403*0Sstevel@tonic-gate "for addr %p", (void *)addr); 1404*0Sstevel@tonic-gate /*NOTREACHED*/ 1405*0Sstevel@tonic-gate } 1406*0Sstevel@tonic-gate 1407*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 1408*0Sstevel@tonic-gate newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1409*0Sstevel@tonic-gate smp->sm_flags &= ~SM_KPM_NEWPAGE; 1410*0Sstevel@tonic-gate mutex_exit(smtx); 1411*0Sstevel@tonic-gate 1412*0Sstevel@tonic-gate return (newpage); 1413*0Sstevel@tonic-gate } 1414*0Sstevel@tonic-gate 1415*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1416*0Sstevel@tonic-gate 1417*0Sstevel@tonic-gate eaddr = addr + len; 1418*0Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1419*0Sstevel@tonic-gate 1420*0Sstevel@tonic-gate smp = GET_SMAP(seg, addr); 1421*0Sstevel@tonic-gate 1422*0Sstevel@tonic-gate /* 1423*0Sstevel@tonic-gate * We don't grab smp mutex here since we assume the smp 1424*0Sstevel@tonic-gate * has a refcnt set already which prevents the slot from 1425*0Sstevel@tonic-gate * changing its id. 1426*0Sstevel@tonic-gate */ 1427*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 1428*0Sstevel@tonic-gate 1429*0Sstevel@tonic-gate vp = smp->sm_vp; 1430*0Sstevel@tonic-gate off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1431*0Sstevel@tonic-gate prot = smd->smd_prot; 1432*0Sstevel@tonic-gate 1433*0Sstevel@tonic-gate for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1434*0Sstevel@tonic-gate hat_flag = HAT_LOAD; 1435*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 1436*0Sstevel@tonic-gate if (pp == NULL) { 1437*0Sstevel@tonic-gate ushort_t bitindex; 1438*0Sstevel@tonic-gate 1439*0Sstevel@tonic-gate if ((pp = page_create_va(vp, off, 1440*0Sstevel@tonic-gate PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1441*0Sstevel@tonic-gate panic("segmap_pagecreate: page_create failed"); 1442*0Sstevel@tonic-gate /*NOTREACHED*/ 1443*0Sstevel@tonic-gate } 1444*0Sstevel@tonic-gate newpage = 1; 1445*0Sstevel@tonic-gate page_io_unlock(pp); 1446*0Sstevel@tonic-gate 1447*0Sstevel@tonic-gate /* 1448*0Sstevel@tonic-gate * Since pages created here do not contain valid 1449*0Sstevel@tonic-gate * data until the caller writes into them, the 1450*0Sstevel@tonic-gate * "exclusive" lock will not be dropped to prevent 1451*0Sstevel@tonic-gate * other users from accessing the page. We also 1452*0Sstevel@tonic-gate * have to lock the translation to prevent a fault 1453*0Sstevel@tonic-gate * from occuring when the virtual address mapped by 1454*0Sstevel@tonic-gate * this page is written into. This is necessary to 1455*0Sstevel@tonic-gate * avoid a deadlock since we haven't dropped the 1456*0Sstevel@tonic-gate * "exclusive" lock. 1457*0Sstevel@tonic-gate */ 1458*0Sstevel@tonic-gate bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1459*0Sstevel@tonic-gate 1460*0Sstevel@tonic-gate /* 1461*0Sstevel@tonic-gate * Large Files: The following assertion is to 1462*0Sstevel@tonic-gate * verify the cast above. 1463*0Sstevel@tonic-gate */ 1464*0Sstevel@tonic-gate ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1465*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 1466*0Sstevel@tonic-gate mutex_enter(smtx); 1467*0Sstevel@tonic-gate smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1468*0Sstevel@tonic-gate mutex_exit(smtx); 1469*0Sstevel@tonic-gate 1470*0Sstevel@tonic-gate hat_flag = HAT_LOAD_LOCK; 1471*0Sstevel@tonic-gate } else if (softlock) { 1472*0Sstevel@tonic-gate hat_flag = HAT_LOAD_LOCK; 1473*0Sstevel@tonic-gate } 1474*0Sstevel@tonic-gate 1475*0Sstevel@tonic-gate if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1476*0Sstevel@tonic-gate hat_setmod(pp); 1477*0Sstevel@tonic-gate 1478*0Sstevel@tonic-gate hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1479*0Sstevel@tonic-gate 1480*0Sstevel@tonic-gate if (hat_flag != HAT_LOAD_LOCK) 1481*0Sstevel@tonic-gate page_unlock(pp); 1482*0Sstevel@tonic-gate 1483*0Sstevel@tonic-gate TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1484*0Sstevel@tonic-gate "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1485*0Sstevel@tonic-gate seg, addr, pp, vp, off); 1486*0Sstevel@tonic-gate } 1487*0Sstevel@tonic-gate 1488*0Sstevel@tonic-gate return (newpage); 1489*0Sstevel@tonic-gate } 1490*0Sstevel@tonic-gate 1491*0Sstevel@tonic-gate void 1492*0Sstevel@tonic-gate segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1493*0Sstevel@tonic-gate { 1494*0Sstevel@tonic-gate struct smap *smp; 1495*0Sstevel@tonic-gate ushort_t bitmask; 1496*0Sstevel@tonic-gate page_t *pp; 1497*0Sstevel@tonic-gate struct vnode *vp; 1498*0Sstevel@tonic-gate u_offset_t off; 1499*0Sstevel@tonic-gate caddr_t eaddr; 1500*0Sstevel@tonic-gate kmutex_t *smtx; 1501*0Sstevel@tonic-gate 1502*0Sstevel@tonic-gate ASSERT(seg->s_as == &kas); 1503*0Sstevel@tonic-gate 1504*0Sstevel@tonic-gate eaddr = addr + len; 1505*0Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1506*0Sstevel@tonic-gate 1507*0Sstevel@tonic-gate if (segmap_kpm && IS_KPM_ADDR(addr)) { 1508*0Sstevel@tonic-gate /* 1509*0Sstevel@tonic-gate * Pages are successfully prefaulted and locked in 1510*0Sstevel@tonic-gate * segmap_getmapflt and can't be unlocked until 1511*0Sstevel@tonic-gate * segmap_release, so no pages or hat mappings have 1512*0Sstevel@tonic-gate * to be unlocked at this point. 1513*0Sstevel@tonic-gate */ 1514*0Sstevel@tonic-gate #ifdef DEBUG 1515*0Sstevel@tonic-gate if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1516*0Sstevel@tonic-gate panic("segmap_pageunlock: smap not found " 1517*0Sstevel@tonic-gate "for addr %p", (void *)addr); 1518*0Sstevel@tonic-gate /*NOTREACHED*/ 1519*0Sstevel@tonic-gate } 1520*0Sstevel@tonic-gate 1521*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 1522*0Sstevel@tonic-gate mutex_exit(SMAPMTX(smp)); 1523*0Sstevel@tonic-gate #endif 1524*0Sstevel@tonic-gate return; 1525*0Sstevel@tonic-gate } 1526*0Sstevel@tonic-gate 1527*0Sstevel@tonic-gate smp = GET_SMAP(seg, addr); 1528*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 1529*0Sstevel@tonic-gate 1530*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 1531*0Sstevel@tonic-gate 1532*0Sstevel@tonic-gate vp = smp->sm_vp; 1533*0Sstevel@tonic-gate off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1534*0Sstevel@tonic-gate 1535*0Sstevel@tonic-gate for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1536*0Sstevel@tonic-gate bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1537*0Sstevel@tonic-gate 1538*0Sstevel@tonic-gate /* 1539*0Sstevel@tonic-gate * Large Files: Following assertion is to verify 1540*0Sstevel@tonic-gate * the correctness of the cast to (int) above. 1541*0Sstevel@tonic-gate */ 1542*0Sstevel@tonic-gate ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1543*0Sstevel@tonic-gate 1544*0Sstevel@tonic-gate /* 1545*0Sstevel@tonic-gate * If the bit corresponding to "off" is set, 1546*0Sstevel@tonic-gate * clear this bit in the bitmap, unlock translations, 1547*0Sstevel@tonic-gate * and release the "exclusive" lock on the page. 1548*0Sstevel@tonic-gate */ 1549*0Sstevel@tonic-gate if (smp->sm_bitmap & bitmask) { 1550*0Sstevel@tonic-gate mutex_enter(smtx); 1551*0Sstevel@tonic-gate smp->sm_bitmap &= ~bitmask; 1552*0Sstevel@tonic-gate mutex_exit(smtx); 1553*0Sstevel@tonic-gate 1554*0Sstevel@tonic-gate hat_unlock(kas.a_hat, addr, PAGESIZE); 1555*0Sstevel@tonic-gate 1556*0Sstevel@tonic-gate /* 1557*0Sstevel@tonic-gate * Use page_find() instead of page_lookup() to 1558*0Sstevel@tonic-gate * find the page since we know that it has 1559*0Sstevel@tonic-gate * "exclusive" lock. 1560*0Sstevel@tonic-gate */ 1561*0Sstevel@tonic-gate pp = page_find(vp, off); 1562*0Sstevel@tonic-gate if (pp == NULL) { 1563*0Sstevel@tonic-gate panic("segmap_pageunlock: page not found"); 1564*0Sstevel@tonic-gate /*NOTREACHED*/ 1565*0Sstevel@tonic-gate } 1566*0Sstevel@tonic-gate if (rw == S_WRITE) { 1567*0Sstevel@tonic-gate hat_setrefmod(pp); 1568*0Sstevel@tonic-gate } else if (rw != S_OTHER) { 1569*0Sstevel@tonic-gate hat_setref(pp); 1570*0Sstevel@tonic-gate } 1571*0Sstevel@tonic-gate 1572*0Sstevel@tonic-gate page_unlock(pp); 1573*0Sstevel@tonic-gate } 1574*0Sstevel@tonic-gate } 1575*0Sstevel@tonic-gate } 1576*0Sstevel@tonic-gate 1577*0Sstevel@tonic-gate caddr_t 1578*0Sstevel@tonic-gate segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1579*0Sstevel@tonic-gate { 1580*0Sstevel@tonic-gate return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1581*0Sstevel@tonic-gate } 1582*0Sstevel@tonic-gate 1583*0Sstevel@tonic-gate /* 1584*0Sstevel@tonic-gate * This is the magic virtual address that offset 0 of an ELF 1585*0Sstevel@tonic-gate * file gets mapped to in user space. This is used to pick 1586*0Sstevel@tonic-gate * the vac color on the freelist. 1587*0Sstevel@tonic-gate */ 1588*0Sstevel@tonic-gate #define ELF_OFFZERO_VA (0x10000) 1589*0Sstevel@tonic-gate /* 1590*0Sstevel@tonic-gate * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1591*0Sstevel@tonic-gate * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1592*0Sstevel@tonic-gate * The return address is always MAXBSIZE aligned. 1593*0Sstevel@tonic-gate * 1594*0Sstevel@tonic-gate * If forcefault is nonzero and the MMU translations haven't yet been created, 1595*0Sstevel@tonic-gate * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1596*0Sstevel@tonic-gate */ 1597*0Sstevel@tonic-gate caddr_t 1598*0Sstevel@tonic-gate segmap_getmapflt( 1599*0Sstevel@tonic-gate struct seg *seg, 1600*0Sstevel@tonic-gate struct vnode *vp, 1601*0Sstevel@tonic-gate u_offset_t off, 1602*0Sstevel@tonic-gate size_t len, 1603*0Sstevel@tonic-gate int forcefault, 1604*0Sstevel@tonic-gate enum seg_rw rw) 1605*0Sstevel@tonic-gate { 1606*0Sstevel@tonic-gate struct smap *smp, *nsmp; 1607*0Sstevel@tonic-gate extern struct vnode *common_specvp(); 1608*0Sstevel@tonic-gate caddr_t baseaddr; /* MAXBSIZE aligned */ 1609*0Sstevel@tonic-gate u_offset_t baseoff; 1610*0Sstevel@tonic-gate int newslot; 1611*0Sstevel@tonic-gate caddr_t vaddr; 1612*0Sstevel@tonic-gate int color, hashid; 1613*0Sstevel@tonic-gate kmutex_t *hashmtx, *smapmtx; 1614*0Sstevel@tonic-gate struct smfree *sm; 1615*0Sstevel@tonic-gate page_t *pp; 1616*0Sstevel@tonic-gate struct kpme *kpme; 1617*0Sstevel@tonic-gate uint_t prot; 1618*0Sstevel@tonic-gate caddr_t base; 1619*0Sstevel@tonic-gate page_t *pl[MAXPPB + 1]; 1620*0Sstevel@tonic-gate int error; 1621*0Sstevel@tonic-gate int is_kpm = 1; 1622*0Sstevel@tonic-gate 1623*0Sstevel@tonic-gate ASSERT(seg->s_as == &kas); 1624*0Sstevel@tonic-gate ASSERT(seg == segkmap); 1625*0Sstevel@tonic-gate 1626*0Sstevel@tonic-gate baseoff = off & (offset_t)MAXBMASK; 1627*0Sstevel@tonic-gate if (off + len > baseoff + MAXBSIZE) { 1628*0Sstevel@tonic-gate panic("segmap_getmap bad len"); 1629*0Sstevel@tonic-gate /*NOTREACHED*/ 1630*0Sstevel@tonic-gate } 1631*0Sstevel@tonic-gate 1632*0Sstevel@tonic-gate /* 1633*0Sstevel@tonic-gate * If this is a block device we have to be sure to use the 1634*0Sstevel@tonic-gate * "common" block device vnode for the mapping. 1635*0Sstevel@tonic-gate */ 1636*0Sstevel@tonic-gate if (vp->v_type == VBLK) 1637*0Sstevel@tonic-gate vp = common_specvp(vp); 1638*0Sstevel@tonic-gate 1639*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1640*0Sstevel@tonic-gate 1641*0Sstevel@tonic-gate if (segmap_kpm == 0 || 1642*0Sstevel@tonic-gate (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1643*0Sstevel@tonic-gate is_kpm = 0; 1644*0Sstevel@tonic-gate } 1645*0Sstevel@tonic-gate 1646*0Sstevel@tonic-gate SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1647*0Sstevel@tonic-gate hashmtx = SHASHMTX(hashid); 1648*0Sstevel@tonic-gate 1649*0Sstevel@tonic-gate retry_hash: 1650*0Sstevel@tonic-gate mutex_enter(hashmtx); 1651*0Sstevel@tonic-gate for (smp = smd_hash[hashid].sh_hash_list; 1652*0Sstevel@tonic-gate smp != NULL; smp = smp->sm_hash) 1653*0Sstevel@tonic-gate if (smp->sm_vp == vp && smp->sm_off == baseoff) 1654*0Sstevel@tonic-gate break; 1655*0Sstevel@tonic-gate mutex_exit(hashmtx); 1656*0Sstevel@tonic-gate 1657*0Sstevel@tonic-gate vrfy_smp: 1658*0Sstevel@tonic-gate if (smp != NULL) { 1659*0Sstevel@tonic-gate 1660*0Sstevel@tonic-gate ASSERT(vp->v_count != 0); 1661*0Sstevel@tonic-gate 1662*0Sstevel@tonic-gate /* 1663*0Sstevel@tonic-gate * Get smap lock and recheck its tag. The hash lock 1664*0Sstevel@tonic-gate * is dropped since the hash is based on (vp, off) 1665*0Sstevel@tonic-gate * and (vp, off) won't change when we have smap mtx. 1666*0Sstevel@tonic-gate */ 1667*0Sstevel@tonic-gate smapmtx = SMAPMTX(smp); 1668*0Sstevel@tonic-gate mutex_enter(smapmtx); 1669*0Sstevel@tonic-gate if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1670*0Sstevel@tonic-gate mutex_exit(smapmtx); 1671*0Sstevel@tonic-gate goto retry_hash; 1672*0Sstevel@tonic-gate } 1673*0Sstevel@tonic-gate 1674*0Sstevel@tonic-gate if (smp->sm_refcnt == 0) { 1675*0Sstevel@tonic-gate 1676*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1677*0Sstevel@tonic-gate 1678*0Sstevel@tonic-gate /* 1679*0Sstevel@tonic-gate * Could still be on the free list. However, this 1680*0Sstevel@tonic-gate * could also be an smp that is transitioning from 1681*0Sstevel@tonic-gate * the free list when we have too much contention 1682*0Sstevel@tonic-gate * for the smapmtx's. In this case, we have an 1683*0Sstevel@tonic-gate * unlocked smp that is not on the free list any 1684*0Sstevel@tonic-gate * longer, but still has a 0 refcnt. The only way 1685*0Sstevel@tonic-gate * to be sure is to check the freelist pointers. 1686*0Sstevel@tonic-gate * Since we now have the smapmtx, we are guaranteed 1687*0Sstevel@tonic-gate * that the (vp, off) won't change, so we are safe 1688*0Sstevel@tonic-gate * to reclaim it. get_free_smp() knows that this 1689*0Sstevel@tonic-gate * can happen, and it will check the refcnt. 1690*0Sstevel@tonic-gate */ 1691*0Sstevel@tonic-gate 1692*0Sstevel@tonic-gate if ((smp->sm_next != NULL)) { 1693*0Sstevel@tonic-gate struct sm_freeq *freeq; 1694*0Sstevel@tonic-gate 1695*0Sstevel@tonic-gate ASSERT(smp->sm_prev != NULL); 1696*0Sstevel@tonic-gate sm = &smd_free[smp->sm_free_ndx]; 1697*0Sstevel@tonic-gate 1698*0Sstevel@tonic-gate if (smp->sm_flags & SM_QNDX_ZERO) 1699*0Sstevel@tonic-gate freeq = &sm->sm_freeq[0]; 1700*0Sstevel@tonic-gate else 1701*0Sstevel@tonic-gate freeq = &sm->sm_freeq[1]; 1702*0Sstevel@tonic-gate 1703*0Sstevel@tonic-gate mutex_enter(&freeq->smq_mtx); 1704*0Sstevel@tonic-gate if (freeq->smq_free != smp) { 1705*0Sstevel@tonic-gate /* 1706*0Sstevel@tonic-gate * fastpath normal case 1707*0Sstevel@tonic-gate */ 1708*0Sstevel@tonic-gate smp->sm_prev->sm_next = smp->sm_next; 1709*0Sstevel@tonic-gate smp->sm_next->sm_prev = smp->sm_prev; 1710*0Sstevel@tonic-gate } else if (smp == smp->sm_next) { 1711*0Sstevel@tonic-gate /* 1712*0Sstevel@tonic-gate * Taking the last smap on freelist 1713*0Sstevel@tonic-gate */ 1714*0Sstevel@tonic-gate freeq->smq_free = NULL; 1715*0Sstevel@tonic-gate } else { 1716*0Sstevel@tonic-gate /* 1717*0Sstevel@tonic-gate * Reclaiming 1st smap on list 1718*0Sstevel@tonic-gate */ 1719*0Sstevel@tonic-gate freeq->smq_free = smp->sm_next; 1720*0Sstevel@tonic-gate smp->sm_prev->sm_next = smp->sm_next; 1721*0Sstevel@tonic-gate smp->sm_next->sm_prev = smp->sm_prev; 1722*0Sstevel@tonic-gate } 1723*0Sstevel@tonic-gate mutex_exit(&freeq->smq_mtx); 1724*0Sstevel@tonic-gate smp->sm_prev = smp->sm_next = NULL; 1725*0Sstevel@tonic-gate } else { 1726*0Sstevel@tonic-gate ASSERT(smp->sm_prev == NULL); 1727*0Sstevel@tonic-gate segmapcnt.smp_stolen.value.ul++; 1728*0Sstevel@tonic-gate } 1729*0Sstevel@tonic-gate 1730*0Sstevel@tonic-gate } else { 1731*0Sstevel@tonic-gate segmapcnt.smp_get_use.value.ul++; 1732*0Sstevel@tonic-gate } 1733*0Sstevel@tonic-gate smp->sm_refcnt++; /* another user */ 1734*0Sstevel@tonic-gate 1735*0Sstevel@tonic-gate /* 1736*0Sstevel@tonic-gate * We don't invoke segmap_fault via TLB miss, so we set ref 1737*0Sstevel@tonic-gate * and mod bits in advance. For S_OTHER we set them in 1738*0Sstevel@tonic-gate * segmap_fault F_SOFTUNLOCK. 1739*0Sstevel@tonic-gate */ 1740*0Sstevel@tonic-gate if (is_kpm) { 1741*0Sstevel@tonic-gate if (rw == S_WRITE) { 1742*0Sstevel@tonic-gate smp->sm_flags |= SM_WRITE_DATA; 1743*0Sstevel@tonic-gate } else if (rw == S_READ) { 1744*0Sstevel@tonic-gate smp->sm_flags |= SM_READ_DATA; 1745*0Sstevel@tonic-gate } 1746*0Sstevel@tonic-gate } 1747*0Sstevel@tonic-gate mutex_exit(smapmtx); 1748*0Sstevel@tonic-gate 1749*0Sstevel@tonic-gate newslot = 0; 1750*0Sstevel@tonic-gate } else { 1751*0Sstevel@tonic-gate 1752*0Sstevel@tonic-gate uint32_t free_ndx, *free_ndxp; 1753*0Sstevel@tonic-gate union segmap_cpu *scpu; 1754*0Sstevel@tonic-gate 1755*0Sstevel@tonic-gate /* 1756*0Sstevel@tonic-gate * On a PAC machine or a machine with anti-alias 1757*0Sstevel@tonic-gate * hardware, smd_colormsk will be zero. 1758*0Sstevel@tonic-gate * 1759*0Sstevel@tonic-gate * On a VAC machine- pick color by offset in the file 1760*0Sstevel@tonic-gate * so we won't get VAC conflicts on elf files. 1761*0Sstevel@tonic-gate * On data files, color does not matter but we 1762*0Sstevel@tonic-gate * don't know what kind of file it is so we always 1763*0Sstevel@tonic-gate * pick color by offset. This causes color 1764*0Sstevel@tonic-gate * corresponding to file offset zero to be used more 1765*0Sstevel@tonic-gate * heavily. 1766*0Sstevel@tonic-gate */ 1767*0Sstevel@tonic-gate color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1768*0Sstevel@tonic-gate scpu = smd_cpu+CPU->cpu_seqid; 1769*0Sstevel@tonic-gate free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1770*0Sstevel@tonic-gate free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1771*0Sstevel@tonic-gate #ifdef DEBUG 1772*0Sstevel@tonic-gate colors_used[free_ndx]++; 1773*0Sstevel@tonic-gate #endif /* DEBUG */ 1774*0Sstevel@tonic-gate 1775*0Sstevel@tonic-gate /* 1776*0Sstevel@tonic-gate * Get a locked smp slot from the free list. 1777*0Sstevel@tonic-gate */ 1778*0Sstevel@tonic-gate smp = get_free_smp(free_ndx); 1779*0Sstevel@tonic-gate smapmtx = SMAPMTX(smp); 1780*0Sstevel@tonic-gate 1781*0Sstevel@tonic-gate ASSERT(smp->sm_vp == NULL); 1782*0Sstevel@tonic-gate 1783*0Sstevel@tonic-gate if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1784*0Sstevel@tonic-gate /* 1785*0Sstevel@tonic-gate * Failed to hashin, there exists one now. 1786*0Sstevel@tonic-gate * Return the smp we just allocated. 1787*0Sstevel@tonic-gate */ 1788*0Sstevel@tonic-gate segmap_smapadd(smp); 1789*0Sstevel@tonic-gate mutex_exit(smapmtx); 1790*0Sstevel@tonic-gate 1791*0Sstevel@tonic-gate smp = nsmp; 1792*0Sstevel@tonic-gate goto vrfy_smp; 1793*0Sstevel@tonic-gate } 1794*0Sstevel@tonic-gate smp->sm_refcnt++; /* another user */ 1795*0Sstevel@tonic-gate 1796*0Sstevel@tonic-gate /* 1797*0Sstevel@tonic-gate * We don't invoke segmap_fault via TLB miss, so we set ref 1798*0Sstevel@tonic-gate * and mod bits in advance. For S_OTHER we set them in 1799*0Sstevel@tonic-gate * segmap_fault F_SOFTUNLOCK. 1800*0Sstevel@tonic-gate */ 1801*0Sstevel@tonic-gate if (is_kpm) { 1802*0Sstevel@tonic-gate if (rw == S_WRITE) { 1803*0Sstevel@tonic-gate smp->sm_flags |= SM_WRITE_DATA; 1804*0Sstevel@tonic-gate } else if (rw == S_READ) { 1805*0Sstevel@tonic-gate smp->sm_flags |= SM_READ_DATA; 1806*0Sstevel@tonic-gate } 1807*0Sstevel@tonic-gate } 1808*0Sstevel@tonic-gate mutex_exit(smapmtx); 1809*0Sstevel@tonic-gate 1810*0Sstevel@tonic-gate newslot = 1; 1811*0Sstevel@tonic-gate } 1812*0Sstevel@tonic-gate 1813*0Sstevel@tonic-gate if (!is_kpm) 1814*0Sstevel@tonic-gate goto use_segmap_range; 1815*0Sstevel@tonic-gate 1816*0Sstevel@tonic-gate /* 1817*0Sstevel@tonic-gate * Use segkpm 1818*0Sstevel@tonic-gate */ 1819*0Sstevel@tonic-gate ASSERT(PAGESIZE == MAXBSIZE); 1820*0Sstevel@tonic-gate 1821*0Sstevel@tonic-gate /* 1822*0Sstevel@tonic-gate * remember the last smp faulted on this cpu. 1823*0Sstevel@tonic-gate */ 1824*0Sstevel@tonic-gate (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1825*0Sstevel@tonic-gate 1826*0Sstevel@tonic-gate if (forcefault == SM_PAGECREATE) { 1827*0Sstevel@tonic-gate baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1828*0Sstevel@tonic-gate return (baseaddr); 1829*0Sstevel@tonic-gate } 1830*0Sstevel@tonic-gate 1831*0Sstevel@tonic-gate if (newslot == 0 && 1832*0Sstevel@tonic-gate (pp = GET_KPME(smp)->kpe_page) != NULL) { 1833*0Sstevel@tonic-gate 1834*0Sstevel@tonic-gate /* fastpath */ 1835*0Sstevel@tonic-gate switch (rw) { 1836*0Sstevel@tonic-gate case S_READ: 1837*0Sstevel@tonic-gate case S_WRITE: 1838*0Sstevel@tonic-gate if (page_trylock(pp, SE_SHARED)) { 1839*0Sstevel@tonic-gate if (PP_ISFREE(pp) || 1840*0Sstevel@tonic-gate !(pp->p_vnode == vp && 1841*0Sstevel@tonic-gate pp->p_offset == baseoff)) { 1842*0Sstevel@tonic-gate page_unlock(pp); 1843*0Sstevel@tonic-gate pp = page_lookup(vp, baseoff, 1844*0Sstevel@tonic-gate SE_SHARED); 1845*0Sstevel@tonic-gate } 1846*0Sstevel@tonic-gate } else { 1847*0Sstevel@tonic-gate pp = page_lookup(vp, baseoff, SE_SHARED); 1848*0Sstevel@tonic-gate } 1849*0Sstevel@tonic-gate 1850*0Sstevel@tonic-gate if (pp == NULL) { 1851*0Sstevel@tonic-gate ASSERT(GET_KPME(smp)->kpe_page == NULL); 1852*0Sstevel@tonic-gate break; 1853*0Sstevel@tonic-gate } 1854*0Sstevel@tonic-gate 1855*0Sstevel@tonic-gate if (rw == S_WRITE && 1856*0Sstevel@tonic-gate hat_page_getattr(pp, P_MOD | P_REF) != 1857*0Sstevel@tonic-gate (P_MOD | P_REF)) { 1858*0Sstevel@tonic-gate page_unlock(pp); 1859*0Sstevel@tonic-gate break; 1860*0Sstevel@tonic-gate } 1861*0Sstevel@tonic-gate 1862*0Sstevel@tonic-gate /* 1863*0Sstevel@tonic-gate * We have the p_selock as reader, grab_smp 1864*0Sstevel@tonic-gate * can't hit us, we have bumped the smap 1865*0Sstevel@tonic-gate * refcnt and hat_pageunload needs the 1866*0Sstevel@tonic-gate * p_selock exclusive. 1867*0Sstevel@tonic-gate */ 1868*0Sstevel@tonic-gate kpme = GET_KPME(smp); 1869*0Sstevel@tonic-gate if (kpme->kpe_page == pp) { 1870*0Sstevel@tonic-gate baseaddr = hat_kpm_page2va(pp, 0); 1871*0Sstevel@tonic-gate } else if (kpme->kpe_page == NULL) { 1872*0Sstevel@tonic-gate baseaddr = hat_kpm_mapin(pp, kpme); 1873*0Sstevel@tonic-gate } else { 1874*0Sstevel@tonic-gate panic("segmap_getmapflt: stale " 1875*0Sstevel@tonic-gate "kpme page, kpme %p", (void *)kpme); 1876*0Sstevel@tonic-gate /*NOTREACHED*/ 1877*0Sstevel@tonic-gate } 1878*0Sstevel@tonic-gate 1879*0Sstevel@tonic-gate /* 1880*0Sstevel@tonic-gate * We don't invoke segmap_fault via TLB miss, 1881*0Sstevel@tonic-gate * so we set ref and mod bits in advance. 1882*0Sstevel@tonic-gate * For S_OTHER and we set them in segmap_fault 1883*0Sstevel@tonic-gate * F_SOFTUNLOCK. 1884*0Sstevel@tonic-gate */ 1885*0Sstevel@tonic-gate if (rw == S_READ && !hat_isref(pp)) 1886*0Sstevel@tonic-gate hat_setref(pp); 1887*0Sstevel@tonic-gate 1888*0Sstevel@tonic-gate return (baseaddr); 1889*0Sstevel@tonic-gate default: 1890*0Sstevel@tonic-gate break; 1891*0Sstevel@tonic-gate } 1892*0Sstevel@tonic-gate } 1893*0Sstevel@tonic-gate 1894*0Sstevel@tonic-gate base = segkpm_create_va(baseoff); 1895*0Sstevel@tonic-gate error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1896*0Sstevel@tonic-gate seg, base, rw, CRED()); 1897*0Sstevel@tonic-gate 1898*0Sstevel@tonic-gate pp = pl[0]; 1899*0Sstevel@tonic-gate if (error || pp == NULL) { 1900*0Sstevel@tonic-gate /* 1901*0Sstevel@tonic-gate * Use segmap address slot and let segmap_fault deal 1902*0Sstevel@tonic-gate * with the error cases. There is no error return 1903*0Sstevel@tonic-gate * possible here. 1904*0Sstevel@tonic-gate */ 1905*0Sstevel@tonic-gate goto use_segmap_range; 1906*0Sstevel@tonic-gate } 1907*0Sstevel@tonic-gate 1908*0Sstevel@tonic-gate ASSERT(pl[1] == NULL); 1909*0Sstevel@tonic-gate 1910*0Sstevel@tonic-gate /* 1911*0Sstevel@tonic-gate * When prot is not returned w/ PROT_ALL the returned pages 1912*0Sstevel@tonic-gate * are not backed by fs blocks. For most of the segmap users 1913*0Sstevel@tonic-gate * this is no problem, they don't write to the pages in the 1914*0Sstevel@tonic-gate * same request and therefore don't rely on a following 1915*0Sstevel@tonic-gate * trap driven segmap_fault. With SM_LOCKPROTO users it 1916*0Sstevel@tonic-gate * is more secure to use segkmap adresses to allow 1917*0Sstevel@tonic-gate * protection segmap_fault's. 1918*0Sstevel@tonic-gate */ 1919*0Sstevel@tonic-gate if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1920*0Sstevel@tonic-gate /* 1921*0Sstevel@tonic-gate * Use segmap address slot and let segmap_fault 1922*0Sstevel@tonic-gate * do the error return. 1923*0Sstevel@tonic-gate */ 1924*0Sstevel@tonic-gate ASSERT(rw != S_WRITE); 1925*0Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 1926*0Sstevel@tonic-gate page_unlock(pp); 1927*0Sstevel@tonic-gate forcefault = 0; 1928*0Sstevel@tonic-gate goto use_segmap_range; 1929*0Sstevel@tonic-gate } 1930*0Sstevel@tonic-gate 1931*0Sstevel@tonic-gate /* 1932*0Sstevel@tonic-gate * We have the p_selock as reader, grab_smp can't hit us, we 1933*0Sstevel@tonic-gate * have bumped the smap refcnt and hat_pageunload needs the 1934*0Sstevel@tonic-gate * p_selock exclusive. 1935*0Sstevel@tonic-gate */ 1936*0Sstevel@tonic-gate kpme = GET_KPME(smp); 1937*0Sstevel@tonic-gate if (kpme->kpe_page == pp) { 1938*0Sstevel@tonic-gate baseaddr = hat_kpm_page2va(pp, 0); 1939*0Sstevel@tonic-gate } else if (kpme->kpe_page == NULL) { 1940*0Sstevel@tonic-gate baseaddr = hat_kpm_mapin(pp, kpme); 1941*0Sstevel@tonic-gate } else { 1942*0Sstevel@tonic-gate panic("segmap_getmapflt: stale kpme page after " 1943*0Sstevel@tonic-gate "VOP_GETPAGE, kpme %p", (void *)kpme); 1944*0Sstevel@tonic-gate /*NOTREACHED*/ 1945*0Sstevel@tonic-gate } 1946*0Sstevel@tonic-gate 1947*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1948*0Sstevel@tonic-gate 1949*0Sstevel@tonic-gate return (baseaddr); 1950*0Sstevel@tonic-gate 1951*0Sstevel@tonic-gate 1952*0Sstevel@tonic-gate use_segmap_range: 1953*0Sstevel@tonic-gate baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1954*0Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1955*0Sstevel@tonic-gate "segmap_getmap:seg %p addr %p vp %p offset %llx", 1956*0Sstevel@tonic-gate seg, baseaddr, vp, baseoff); 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate /* 1959*0Sstevel@tonic-gate * Prefault the translations 1960*0Sstevel@tonic-gate */ 1961*0Sstevel@tonic-gate vaddr = baseaddr + (off - baseoff); 1962*0Sstevel@tonic-gate if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1963*0Sstevel@tonic-gate 1964*0Sstevel@tonic-gate caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1965*0Sstevel@tonic-gate (uintptr_t)PAGEMASK); 1966*0Sstevel@tonic-gate 1967*0Sstevel@tonic-gate (void) segmap_fault(kas.a_hat, seg, pgaddr, 1968*0Sstevel@tonic-gate (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1969*0Sstevel@tonic-gate F_INVAL, rw); 1970*0Sstevel@tonic-gate } 1971*0Sstevel@tonic-gate 1972*0Sstevel@tonic-gate return (baseaddr); 1973*0Sstevel@tonic-gate } 1974*0Sstevel@tonic-gate 1975*0Sstevel@tonic-gate int 1976*0Sstevel@tonic-gate segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1977*0Sstevel@tonic-gate { 1978*0Sstevel@tonic-gate struct smap *smp; 1979*0Sstevel@tonic-gate int error; 1980*0Sstevel@tonic-gate int bflags = 0; 1981*0Sstevel@tonic-gate struct vnode *vp; 1982*0Sstevel@tonic-gate u_offset_t offset; 1983*0Sstevel@tonic-gate kmutex_t *smtx; 1984*0Sstevel@tonic-gate int is_kpm = 0; 1985*0Sstevel@tonic-gate page_t *pp; 1986*0Sstevel@tonic-gate 1987*0Sstevel@tonic-gate if (segmap_kpm && IS_KPM_ADDR(addr)) { 1988*0Sstevel@tonic-gate 1989*0Sstevel@tonic-gate if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1990*0Sstevel@tonic-gate panic("segmap_release: addr %p not " 1991*0Sstevel@tonic-gate "MAXBSIZE aligned", (void *)addr); 1992*0Sstevel@tonic-gate /*NOTREACHED*/ 1993*0Sstevel@tonic-gate } 1994*0Sstevel@tonic-gate 1995*0Sstevel@tonic-gate if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1996*0Sstevel@tonic-gate panic("segmap_release: smap not found " 1997*0Sstevel@tonic-gate "for addr %p", (void *)addr); 1998*0Sstevel@tonic-gate /*NOTREACHED*/ 1999*0Sstevel@tonic-gate } 2000*0Sstevel@tonic-gate 2001*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2002*0Sstevel@tonic-gate "segmap_relmap:seg %p addr %p smp %p", 2003*0Sstevel@tonic-gate seg, addr, smp); 2004*0Sstevel@tonic-gate 2005*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 2006*0Sstevel@tonic-gate 2007*0Sstevel@tonic-gate /* 2008*0Sstevel@tonic-gate * For compatibilty reasons segmap_pagecreate_kpm sets this 2009*0Sstevel@tonic-gate * flag to allow a following segmap_pagecreate to return 2010*0Sstevel@tonic-gate * this as "newpage" flag. When segmap_pagecreate is not 2011*0Sstevel@tonic-gate * called at all we clear it now. 2012*0Sstevel@tonic-gate */ 2013*0Sstevel@tonic-gate smp->sm_flags &= ~SM_KPM_NEWPAGE; 2014*0Sstevel@tonic-gate is_kpm = 1; 2015*0Sstevel@tonic-gate if (smp->sm_flags & SM_WRITE_DATA) { 2016*0Sstevel@tonic-gate hat_setrefmod(pp); 2017*0Sstevel@tonic-gate } else if (smp->sm_flags & SM_READ_DATA) { 2018*0Sstevel@tonic-gate hat_setref(pp); 2019*0Sstevel@tonic-gate } 2020*0Sstevel@tonic-gate } else { 2021*0Sstevel@tonic-gate if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2022*0Sstevel@tonic-gate ((uintptr_t)addr & MAXBOFFSET) != 0) { 2023*0Sstevel@tonic-gate panic("segmap_release: bad addr %p", (void *)addr); 2024*0Sstevel@tonic-gate /*NOTREACHED*/ 2025*0Sstevel@tonic-gate } 2026*0Sstevel@tonic-gate smp = GET_SMAP(seg, addr); 2027*0Sstevel@tonic-gate 2028*0Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2029*0Sstevel@tonic-gate "segmap_relmap:seg %p addr %p smp %p", 2030*0Sstevel@tonic-gate seg, addr, smp); 2031*0Sstevel@tonic-gate 2032*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 2033*0Sstevel@tonic-gate mutex_enter(smtx); 2034*0Sstevel@tonic-gate smp->sm_flags |= SM_NOTKPM_RELEASED; 2035*0Sstevel@tonic-gate } 2036*0Sstevel@tonic-gate 2037*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 2038*0Sstevel@tonic-gate 2039*0Sstevel@tonic-gate /* 2040*0Sstevel@tonic-gate * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2041*0Sstevel@tonic-gate * are set. 2042*0Sstevel@tonic-gate */ 2043*0Sstevel@tonic-gate if ((flags & ~SM_DONTNEED) != 0) { 2044*0Sstevel@tonic-gate if (flags & SM_WRITE) 2045*0Sstevel@tonic-gate segmapcnt.smp_rel_write.value.ul++; 2046*0Sstevel@tonic-gate if (flags & SM_ASYNC) { 2047*0Sstevel@tonic-gate bflags |= B_ASYNC; 2048*0Sstevel@tonic-gate segmapcnt.smp_rel_async.value.ul++; 2049*0Sstevel@tonic-gate } 2050*0Sstevel@tonic-gate if (flags & SM_INVAL) { 2051*0Sstevel@tonic-gate bflags |= B_INVAL; 2052*0Sstevel@tonic-gate segmapcnt.smp_rel_abort.value.ul++; 2053*0Sstevel@tonic-gate } 2054*0Sstevel@tonic-gate if (flags & SM_DESTROY) { 2055*0Sstevel@tonic-gate bflags |= (B_INVAL|B_TRUNC); 2056*0Sstevel@tonic-gate segmapcnt.smp_rel_abort.value.ul++; 2057*0Sstevel@tonic-gate } 2058*0Sstevel@tonic-gate if (smp->sm_refcnt == 1) { 2059*0Sstevel@tonic-gate /* 2060*0Sstevel@tonic-gate * We only bother doing the FREE and DONTNEED flags 2061*0Sstevel@tonic-gate * if no one else is still referencing this mapping. 2062*0Sstevel@tonic-gate */ 2063*0Sstevel@tonic-gate if (flags & SM_FREE) { 2064*0Sstevel@tonic-gate bflags |= B_FREE; 2065*0Sstevel@tonic-gate segmapcnt.smp_rel_free.value.ul++; 2066*0Sstevel@tonic-gate } 2067*0Sstevel@tonic-gate if (flags & SM_DONTNEED) { 2068*0Sstevel@tonic-gate bflags |= B_DONTNEED; 2069*0Sstevel@tonic-gate segmapcnt.smp_rel_dontneed.value.ul++; 2070*0Sstevel@tonic-gate } 2071*0Sstevel@tonic-gate } 2072*0Sstevel@tonic-gate } else { 2073*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2074*0Sstevel@tonic-gate } 2075*0Sstevel@tonic-gate 2076*0Sstevel@tonic-gate vp = smp->sm_vp; 2077*0Sstevel@tonic-gate offset = smp->sm_off; 2078*0Sstevel@tonic-gate 2079*0Sstevel@tonic-gate if (--smp->sm_refcnt == 0) { 2080*0Sstevel@tonic-gate 2081*0Sstevel@tonic-gate if (is_kpm) { 2082*0Sstevel@tonic-gate smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2083*0Sstevel@tonic-gate } 2084*0Sstevel@tonic-gate if (flags & (SM_INVAL|SM_DESTROY)) { 2085*0Sstevel@tonic-gate segmap_hashout(smp); /* remove map info */ 2086*0Sstevel@tonic-gate if (is_kpm) { 2087*0Sstevel@tonic-gate hat_kpm_mapout(pp, GET_KPME(smp), addr); 2088*0Sstevel@tonic-gate if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2089*0Sstevel@tonic-gate smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2090*0Sstevel@tonic-gate hat_unload(kas.a_hat, addr, MAXBSIZE, 2091*0Sstevel@tonic-gate HAT_UNLOAD); 2092*0Sstevel@tonic-gate } 2093*0Sstevel@tonic-gate 2094*0Sstevel@tonic-gate } else { 2095*0Sstevel@tonic-gate if (segmap_kpm) 2096*0Sstevel@tonic-gate segkpm_mapout_validkpme(GET_KPME(smp)); 2097*0Sstevel@tonic-gate 2098*0Sstevel@tonic-gate smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2099*0Sstevel@tonic-gate hat_unload(kas.a_hat, addr, MAXBSIZE, 2100*0Sstevel@tonic-gate HAT_UNLOAD); 2101*0Sstevel@tonic-gate } 2102*0Sstevel@tonic-gate } 2103*0Sstevel@tonic-gate segmap_smapadd(smp); /* add to free list */ 2104*0Sstevel@tonic-gate } 2105*0Sstevel@tonic-gate 2106*0Sstevel@tonic-gate mutex_exit(smtx); 2107*0Sstevel@tonic-gate 2108*0Sstevel@tonic-gate if (is_kpm) 2109*0Sstevel@tonic-gate page_unlock(pp); 2110*0Sstevel@tonic-gate /* 2111*0Sstevel@tonic-gate * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2112*0Sstevel@tonic-gate * are set. 2113*0Sstevel@tonic-gate */ 2114*0Sstevel@tonic-gate if ((flags & ~SM_DONTNEED) != 0) { 2115*0Sstevel@tonic-gate error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2116*0Sstevel@tonic-gate bflags, CRED()); 2117*0Sstevel@tonic-gate } else { 2118*0Sstevel@tonic-gate error = 0; 2119*0Sstevel@tonic-gate } 2120*0Sstevel@tonic-gate 2121*0Sstevel@tonic-gate return (error); 2122*0Sstevel@tonic-gate } 2123*0Sstevel@tonic-gate 2124*0Sstevel@tonic-gate /* 2125*0Sstevel@tonic-gate * Dump the pages belonging to this segmap segment. 2126*0Sstevel@tonic-gate */ 2127*0Sstevel@tonic-gate static void 2128*0Sstevel@tonic-gate segmap_dump(struct seg *seg) 2129*0Sstevel@tonic-gate { 2130*0Sstevel@tonic-gate struct segmap_data *smd; 2131*0Sstevel@tonic-gate struct smap *smp, *smp_end; 2132*0Sstevel@tonic-gate page_t *pp; 2133*0Sstevel@tonic-gate pfn_t pfn; 2134*0Sstevel@tonic-gate u_offset_t off; 2135*0Sstevel@tonic-gate caddr_t addr; 2136*0Sstevel@tonic-gate 2137*0Sstevel@tonic-gate smd = (struct segmap_data *)seg->s_data; 2138*0Sstevel@tonic-gate addr = seg->s_base; 2139*0Sstevel@tonic-gate for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2140*0Sstevel@tonic-gate smp < smp_end; smp++) { 2141*0Sstevel@tonic-gate 2142*0Sstevel@tonic-gate if (smp->sm_refcnt) { 2143*0Sstevel@tonic-gate for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2144*0Sstevel@tonic-gate int we_own_it = 0; 2145*0Sstevel@tonic-gate 2146*0Sstevel@tonic-gate /* 2147*0Sstevel@tonic-gate * If pp == NULL, the page either does 2148*0Sstevel@tonic-gate * not exist or is exclusively locked. 2149*0Sstevel@tonic-gate * So determine if it exists before 2150*0Sstevel@tonic-gate * searching for it. 2151*0Sstevel@tonic-gate */ 2152*0Sstevel@tonic-gate if ((pp = page_lookup_nowait(smp->sm_vp, 2153*0Sstevel@tonic-gate smp->sm_off + off, SE_SHARED))) 2154*0Sstevel@tonic-gate we_own_it = 1; 2155*0Sstevel@tonic-gate else 2156*0Sstevel@tonic-gate pp = page_exists(smp->sm_vp, 2157*0Sstevel@tonic-gate smp->sm_off + off); 2158*0Sstevel@tonic-gate 2159*0Sstevel@tonic-gate if (pp) { 2160*0Sstevel@tonic-gate pfn = page_pptonum(pp); 2161*0Sstevel@tonic-gate dump_addpage(seg->s_as, 2162*0Sstevel@tonic-gate addr + off, pfn); 2163*0Sstevel@tonic-gate if (we_own_it) 2164*0Sstevel@tonic-gate page_unlock(pp); 2165*0Sstevel@tonic-gate } 2166*0Sstevel@tonic-gate dump_timeleft = dump_timeout; 2167*0Sstevel@tonic-gate } 2168*0Sstevel@tonic-gate } 2169*0Sstevel@tonic-gate addr += MAXBSIZE; 2170*0Sstevel@tonic-gate } 2171*0Sstevel@tonic-gate } 2172*0Sstevel@tonic-gate 2173*0Sstevel@tonic-gate /*ARGSUSED*/ 2174*0Sstevel@tonic-gate static int 2175*0Sstevel@tonic-gate segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2176*0Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 2177*0Sstevel@tonic-gate { 2178*0Sstevel@tonic-gate return (ENOTSUP); 2179*0Sstevel@tonic-gate } 2180*0Sstevel@tonic-gate 2181*0Sstevel@tonic-gate static int 2182*0Sstevel@tonic-gate segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2183*0Sstevel@tonic-gate { 2184*0Sstevel@tonic-gate struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2185*0Sstevel@tonic-gate 2186*0Sstevel@tonic-gate memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2187*0Sstevel@tonic-gate memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2188*0Sstevel@tonic-gate return (0); 2189*0Sstevel@tonic-gate } 2190*0Sstevel@tonic-gate 2191*0Sstevel@tonic-gate /*ARGSUSED*/ 2192*0Sstevel@tonic-gate static lgrp_mem_policy_info_t * 2193*0Sstevel@tonic-gate segmap_getpolicy(struct seg *seg, caddr_t addr) 2194*0Sstevel@tonic-gate { 2195*0Sstevel@tonic-gate return (NULL); 2196*0Sstevel@tonic-gate } 2197*0Sstevel@tonic-gate 2198*0Sstevel@tonic-gate 2199*0Sstevel@tonic-gate #ifdef SEGKPM_SUPPORT 2200*0Sstevel@tonic-gate 2201*0Sstevel@tonic-gate /* 2202*0Sstevel@tonic-gate * segkpm support routines 2203*0Sstevel@tonic-gate */ 2204*0Sstevel@tonic-gate 2205*0Sstevel@tonic-gate static caddr_t 2206*0Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2207*0Sstevel@tonic-gate struct smap *smp, enum seg_rw rw) 2208*0Sstevel@tonic-gate { 2209*0Sstevel@tonic-gate caddr_t base; 2210*0Sstevel@tonic-gate page_t *pp; 2211*0Sstevel@tonic-gate int newpage = 0; 2212*0Sstevel@tonic-gate struct kpme *kpme; 2213*0Sstevel@tonic-gate 2214*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 2215*0Sstevel@tonic-gate 2216*0Sstevel@tonic-gate if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2217*0Sstevel@tonic-gate kmutex_t *smtx; 2218*0Sstevel@tonic-gate 2219*0Sstevel@tonic-gate base = segkpm_create_va(off); 2220*0Sstevel@tonic-gate 2221*0Sstevel@tonic-gate if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2222*0Sstevel@tonic-gate seg, base)) == NULL) { 2223*0Sstevel@tonic-gate panic("segmap_pagecreate_kpm: " 2224*0Sstevel@tonic-gate "page_create failed"); 2225*0Sstevel@tonic-gate /*NOTREACHED*/ 2226*0Sstevel@tonic-gate } 2227*0Sstevel@tonic-gate 2228*0Sstevel@tonic-gate newpage = 1; 2229*0Sstevel@tonic-gate page_io_unlock(pp); 2230*0Sstevel@tonic-gate ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2231*0Sstevel@tonic-gate 2232*0Sstevel@tonic-gate /* 2233*0Sstevel@tonic-gate * Mark this here until the following segmap_pagecreate 2234*0Sstevel@tonic-gate * or segmap_release. 2235*0Sstevel@tonic-gate */ 2236*0Sstevel@tonic-gate smtx = SMAPMTX(smp); 2237*0Sstevel@tonic-gate mutex_enter(smtx); 2238*0Sstevel@tonic-gate smp->sm_flags |= SM_KPM_NEWPAGE; 2239*0Sstevel@tonic-gate mutex_exit(smtx); 2240*0Sstevel@tonic-gate } 2241*0Sstevel@tonic-gate 2242*0Sstevel@tonic-gate kpme = GET_KPME(smp); 2243*0Sstevel@tonic-gate if (!newpage && kpme->kpe_page == pp) 2244*0Sstevel@tonic-gate base = hat_kpm_page2va(pp, 0); 2245*0Sstevel@tonic-gate else 2246*0Sstevel@tonic-gate base = hat_kpm_mapin(pp, kpme); 2247*0Sstevel@tonic-gate 2248*0Sstevel@tonic-gate /* 2249*0Sstevel@tonic-gate * FS code may decide not to call segmap_pagecreate and we 2250*0Sstevel@tonic-gate * don't invoke segmap_fault via TLB miss, so we have to set 2251*0Sstevel@tonic-gate * ref and mod bits in advance. 2252*0Sstevel@tonic-gate */ 2253*0Sstevel@tonic-gate if (rw == S_WRITE) { 2254*0Sstevel@tonic-gate hat_setrefmod(pp); 2255*0Sstevel@tonic-gate } else { 2256*0Sstevel@tonic-gate ASSERT(rw == S_READ); 2257*0Sstevel@tonic-gate hat_setref(pp); 2258*0Sstevel@tonic-gate } 2259*0Sstevel@tonic-gate 2260*0Sstevel@tonic-gate smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2261*0Sstevel@tonic-gate 2262*0Sstevel@tonic-gate return (base); 2263*0Sstevel@tonic-gate } 2264*0Sstevel@tonic-gate 2265*0Sstevel@tonic-gate /* 2266*0Sstevel@tonic-gate * Find the smap structure corresponding to the 2267*0Sstevel@tonic-gate * KPM addr and return it locked. 2268*0Sstevel@tonic-gate */ 2269*0Sstevel@tonic-gate struct smap * 2270*0Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp) 2271*0Sstevel@tonic-gate { 2272*0Sstevel@tonic-gate struct smap *smp; 2273*0Sstevel@tonic-gate struct vnode *vp; 2274*0Sstevel@tonic-gate u_offset_t offset; 2275*0Sstevel@tonic-gate caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2276*0Sstevel@tonic-gate int hashid; 2277*0Sstevel@tonic-gate kmutex_t *hashmtx; 2278*0Sstevel@tonic-gate page_t *pp; 2279*0Sstevel@tonic-gate union segmap_cpu *scpu; 2280*0Sstevel@tonic-gate 2281*0Sstevel@tonic-gate pp = hat_kpm_vaddr2page(baseaddr); 2282*0Sstevel@tonic-gate 2283*0Sstevel@tonic-gate ASSERT(pp && !PP_ISFREE(pp)); 2284*0Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 2285*0Sstevel@tonic-gate ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2286*0Sstevel@tonic-gate 2287*0Sstevel@tonic-gate vp = pp->p_vnode; 2288*0Sstevel@tonic-gate offset = pp->p_offset; 2289*0Sstevel@tonic-gate ASSERT(vp != NULL); 2290*0Sstevel@tonic-gate 2291*0Sstevel@tonic-gate /* 2292*0Sstevel@tonic-gate * Assume the last smap used on this cpu is the one needed. 2293*0Sstevel@tonic-gate */ 2294*0Sstevel@tonic-gate scpu = smd_cpu+CPU->cpu_seqid; 2295*0Sstevel@tonic-gate smp = scpu->scpu.scpu_last_smap; 2296*0Sstevel@tonic-gate mutex_enter(&smp->sm_mtx); 2297*0Sstevel@tonic-gate if (smp->sm_vp == vp && smp->sm_off == offset) { 2298*0Sstevel@tonic-gate ASSERT(smp->sm_refcnt > 0); 2299*0Sstevel@tonic-gate } else { 2300*0Sstevel@tonic-gate /* 2301*0Sstevel@tonic-gate * Assumption wrong, find the smap on the hash chain. 2302*0Sstevel@tonic-gate */ 2303*0Sstevel@tonic-gate mutex_exit(&smp->sm_mtx); 2304*0Sstevel@tonic-gate SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2305*0Sstevel@tonic-gate hashmtx = SHASHMTX(hashid); 2306*0Sstevel@tonic-gate 2307*0Sstevel@tonic-gate mutex_enter(hashmtx); 2308*0Sstevel@tonic-gate smp = smd_hash[hashid].sh_hash_list; 2309*0Sstevel@tonic-gate for (; smp != NULL; smp = smp->sm_hash) { 2310*0Sstevel@tonic-gate if (smp->sm_vp == vp && smp->sm_off == offset) 2311*0Sstevel@tonic-gate break; 2312*0Sstevel@tonic-gate } 2313*0Sstevel@tonic-gate mutex_exit(hashmtx); 2314*0Sstevel@tonic-gate if (smp) { 2315*0Sstevel@tonic-gate mutex_enter(&smp->sm_mtx); 2316*0Sstevel@tonic-gate ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2317*0Sstevel@tonic-gate } 2318*0Sstevel@tonic-gate } 2319*0Sstevel@tonic-gate 2320*0Sstevel@tonic-gate if (ppp) 2321*0Sstevel@tonic-gate *ppp = smp ? pp : NULL; 2322*0Sstevel@tonic-gate 2323*0Sstevel@tonic-gate return (smp); 2324*0Sstevel@tonic-gate } 2325*0Sstevel@tonic-gate 2326*0Sstevel@tonic-gate #else /* SEGKPM_SUPPORT */ 2327*0Sstevel@tonic-gate 2328*0Sstevel@tonic-gate /* segkpm stubs */ 2329*0Sstevel@tonic-gate 2330*0Sstevel@tonic-gate /*ARGSUSED*/ 2331*0Sstevel@tonic-gate static caddr_t 2332*0Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2333*0Sstevel@tonic-gate struct smap *smp, enum seg_rw rw) 2334*0Sstevel@tonic-gate { 2335*0Sstevel@tonic-gate return (NULL); 2336*0Sstevel@tonic-gate } 2337*0Sstevel@tonic-gate 2338*0Sstevel@tonic-gate /*ARGSUSED*/ 2339*0Sstevel@tonic-gate struct smap * 2340*0Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp) 2341*0Sstevel@tonic-gate { 2342*0Sstevel@tonic-gate return (NULL); 2343*0Sstevel@tonic-gate } 2344*0Sstevel@tonic-gate 2345*0Sstevel@tonic-gate #endif /* SEGKPM_SUPPORT */ 2346