1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate /* 30*0Sstevel@tonic-gate * UNIX machine dependent virtual memory support. 31*0Sstevel@tonic-gate */ 32*0Sstevel@tonic-gate 33*0Sstevel@tonic-gate #include <sys/vm.h> 34*0Sstevel@tonic-gate #include <sys/exec.h> 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate #include <sys/exechdr.h> 37*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 38*0Sstevel@tonic-gate #include <sys/atomic.h> 39*0Sstevel@tonic-gate #include <sys/archsystm.h> 40*0Sstevel@tonic-gate #include <sys/machsystm.h> 41*0Sstevel@tonic-gate #include <sys/kdi.h> 42*0Sstevel@tonic-gate #include <sys/cpu_module.h> 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gate #include <vm/hat_sfmmu.h> 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gate #include <sys/memnode.h> 47*0Sstevel@tonic-gate 48*0Sstevel@tonic-gate #include <sys/mem_config.h> 49*0Sstevel@tonic-gate #include <sys/mem_cage.h> 50*0Sstevel@tonic-gate #include <vm/vm_dep.h> 51*0Sstevel@tonic-gate #include <sys/platform_module.h> 52*0Sstevel@tonic-gate 53*0Sstevel@tonic-gate /* 54*0Sstevel@tonic-gate * These variables are set by module specific config routines. 55*0Sstevel@tonic-gate * They are only set by modules which will use physical cache page coloring 56*0Sstevel@tonic-gate * and/or virtual cache page coloring. 57*0Sstevel@tonic-gate */ 58*0Sstevel@tonic-gate int do_pg_coloring = 0; 59*0Sstevel@tonic-gate int do_virtual_coloring = 0; 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate /* 62*0Sstevel@tonic-gate * These variables can be conveniently patched at kernel load time to 63*0Sstevel@tonic-gate * prevent do_pg_coloring or do_virtual_coloring from being enabled by 64*0Sstevel@tonic-gate * module specific config routines. 65*0Sstevel@tonic-gate */ 66*0Sstevel@tonic-gate 67*0Sstevel@tonic-gate int use_page_coloring = 1; 68*0Sstevel@tonic-gate int use_virtual_coloring = 1; 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate /* 71*0Sstevel@tonic-gate * initialized by page_coloring_init() 72*0Sstevel@tonic-gate */ 73*0Sstevel@tonic-gate extern uint_t page_colors; 74*0Sstevel@tonic-gate extern uint_t page_colors_mask; 75*0Sstevel@tonic-gate extern uint_t page_coloring_shift; 76*0Sstevel@tonic-gate int cpu_page_colors; 77*0Sstevel@tonic-gate uint_t vac_colors = 0; 78*0Sstevel@tonic-gate uint_t vac_colors_mask = 0; 79*0Sstevel@tonic-gate 80*0Sstevel@tonic-gate /* 81*0Sstevel@tonic-gate * get the ecache setsize for the current cpu. 82*0Sstevel@tonic-gate */ 83*0Sstevel@tonic-gate #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize) 84*0Sstevel@tonic-gate 85*0Sstevel@tonic-gate #ifdef DEBUG 86*0Sstevel@tonic-gate plcnt_t plcnt; /* page list count */ 87*0Sstevel@tonic-gate #endif 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate /* 90*0Sstevel@tonic-gate * This variable is set by the cpu module to contain the lowest 91*0Sstevel@tonic-gate * address not affected by the SF_ERRATA_57 workaround. It should 92*0Sstevel@tonic-gate * remain 0 if the workaround is not needed. 93*0Sstevel@tonic-gate */ 94*0Sstevel@tonic-gate #if defined(SF_ERRATA_57) 95*0Sstevel@tonic-gate caddr_t errata57_limit; 96*0Sstevel@tonic-gate #endif 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate extern int disable_auto_large_pages; /* used by map_pgsz*() routines */ 99*0Sstevel@tonic-gate 100*0Sstevel@tonic-gate extern void page_relocate_hash(page_t *, page_t *); 101*0Sstevel@tonic-gate 102*0Sstevel@tonic-gate /* 103*0Sstevel@tonic-gate * these must be defined in platform specific areas 104*0Sstevel@tonic-gate */ 105*0Sstevel@tonic-gate extern void map_addr_proc(caddr_t *, size_t, offset_t, int, caddr_t, 106*0Sstevel@tonic-gate struct proc *, uint_t); 107*0Sstevel@tonic-gate extern page_t *page_get_freelist(struct vnode *, u_offset_t, struct seg *, 108*0Sstevel@tonic-gate caddr_t, size_t, uint_t, struct lgrp *); 109*0Sstevel@tonic-gate /* 110*0Sstevel@tonic-gate * Convert page frame number to an OBMEM page frame number 111*0Sstevel@tonic-gate * (i.e. put in the type bits -- zero for this implementation) 112*0Sstevel@tonic-gate */ 113*0Sstevel@tonic-gate pfn_t 114*0Sstevel@tonic-gate impl_obmem_pfnum(pfn_t pf) 115*0Sstevel@tonic-gate { 116*0Sstevel@tonic-gate return (pf); 117*0Sstevel@tonic-gate } 118*0Sstevel@tonic-gate 119*0Sstevel@tonic-gate /* 120*0Sstevel@tonic-gate * Use physmax to determine the highest physical page of DRAM memory 121*0Sstevel@tonic-gate * It is assumed that any physical addresses above physmax is in IO space. 122*0Sstevel@tonic-gate * We don't bother checking the low end because we assume that memory space 123*0Sstevel@tonic-gate * begins at physical page frame 0. 124*0Sstevel@tonic-gate * 125*0Sstevel@tonic-gate * Return 1 if the page frame is onboard DRAM memory, else 0. 126*0Sstevel@tonic-gate * Returns 0 for nvram so it won't be cached. 127*0Sstevel@tonic-gate */ 128*0Sstevel@tonic-gate int 129*0Sstevel@tonic-gate pf_is_memory(pfn_t pf) 130*0Sstevel@tonic-gate { 131*0Sstevel@tonic-gate /* We must be IO space */ 132*0Sstevel@tonic-gate if (pf > physmax) 133*0Sstevel@tonic-gate return (0); 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate /* We must be memory space */ 136*0Sstevel@tonic-gate return (1); 137*0Sstevel@tonic-gate } 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate /* 140*0Sstevel@tonic-gate * Handle a pagefault. 141*0Sstevel@tonic-gate */ 142*0Sstevel@tonic-gate faultcode_t 143*0Sstevel@tonic-gate pagefault(caddr_t addr, enum fault_type type, enum seg_rw rw, int iskernel) 144*0Sstevel@tonic-gate { 145*0Sstevel@tonic-gate struct as *as; 146*0Sstevel@tonic-gate struct proc *p; 147*0Sstevel@tonic-gate faultcode_t res; 148*0Sstevel@tonic-gate caddr_t base; 149*0Sstevel@tonic-gate size_t len; 150*0Sstevel@tonic-gate int err; 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate if (INVALID_VADDR(addr)) 153*0Sstevel@tonic-gate return (FC_NOMAP); 154*0Sstevel@tonic-gate 155*0Sstevel@tonic-gate if (iskernel) { 156*0Sstevel@tonic-gate as = &kas; 157*0Sstevel@tonic-gate } else { 158*0Sstevel@tonic-gate p = curproc; 159*0Sstevel@tonic-gate as = p->p_as; 160*0Sstevel@tonic-gate #if defined(SF_ERRATA_57) 161*0Sstevel@tonic-gate /* 162*0Sstevel@tonic-gate * Prevent infinite loops due to a segment driver 163*0Sstevel@tonic-gate * setting the execute permissions and the sfmmu hat 164*0Sstevel@tonic-gate * silently ignoring them. 165*0Sstevel@tonic-gate */ 166*0Sstevel@tonic-gate if (rw == S_EXEC && AS_TYPE_64BIT(as) && 167*0Sstevel@tonic-gate addr < errata57_limit) { 168*0Sstevel@tonic-gate res = FC_NOMAP; 169*0Sstevel@tonic-gate goto out; 170*0Sstevel@tonic-gate } 171*0Sstevel@tonic-gate #endif 172*0Sstevel@tonic-gate } 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gate /* 175*0Sstevel@tonic-gate * Dispatch pagefault. 176*0Sstevel@tonic-gate */ 177*0Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, type, rw); 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate /* 180*0Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's 181*0Sstevel@tonic-gate * UNIX data or stack segments, just return status info. 182*0Sstevel@tonic-gate */ 183*0Sstevel@tonic-gate if (!(res == FC_NOMAP && iskernel == 0)) 184*0Sstevel@tonic-gate goto out; 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate /* 187*0Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped 188*0Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod 189*0Sstevel@tonic-gate * mapping there and then try calling the fault routine again. 190*0Sstevel@tonic-gate */ 191*0Sstevel@tonic-gate base = p->p_brkbase; 192*0Sstevel@tonic-gate len = p->p_brksize; 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */ 195*0Sstevel@tonic-gate base = (caddr_t)(p->p_usrstack - p->p_stksize); 196*0Sstevel@tonic-gate len = p->p_stksize; 197*0Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */ 198*0Sstevel@tonic-gate /* not in either UNIX data or stack segments */ 199*0Sstevel@tonic-gate res = FC_NOMAP; 200*0Sstevel@tonic-gate goto out; 201*0Sstevel@tonic-gate } 202*0Sstevel@tonic-gate } 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gate /* the rest of this function implements a 3.X 4.X 5.X compatibility */ 205*0Sstevel@tonic-gate /* This code is probably not needed anymore */ 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */ 208*0Sstevel@tonic-gate len = (((uintptr_t)base + len + PAGEOFFSET) & PAGEMASK) - 209*0Sstevel@tonic-gate ((uintptr_t)base & PAGEMASK); 210*0Sstevel@tonic-gate base = (caddr_t)((uintptr_t)base & PAGEMASK); 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate as_rangelock(as); 213*0Sstevel@tonic-gate as_purge(as); 214*0Sstevel@tonic-gate if (as_gap(as, PAGESIZE, &base, &len, AH_CONTAIN, addr) == 0) { 215*0Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp); 216*0Sstevel@tonic-gate as_rangeunlock(as); 217*0Sstevel@tonic-gate if (err) { 218*0Sstevel@tonic-gate res = FC_MAKE_ERR(err); 219*0Sstevel@tonic-gate goto out; 220*0Sstevel@tonic-gate } 221*0Sstevel@tonic-gate } else { 222*0Sstevel@tonic-gate /* 223*0Sstevel@tonic-gate * This page is already mapped by another thread after we 224*0Sstevel@tonic-gate * returned from as_fault() above. We just fallthrough 225*0Sstevel@tonic-gate * as_fault() below. 226*0Sstevel@tonic-gate */ 227*0Sstevel@tonic-gate as_rangeunlock(as); 228*0Sstevel@tonic-gate } 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, F_INVAL, rw); 231*0Sstevel@tonic-gate 232*0Sstevel@tonic-gate out: 233*0Sstevel@tonic-gate 234*0Sstevel@tonic-gate return (res); 235*0Sstevel@tonic-gate } 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gate /* 238*0Sstevel@tonic-gate * This is the routine which defines the address limit implied 239*0Sstevel@tonic-gate * by the flag '_MAP_LOW32'. USERLIMIT32 matches the highest 240*0Sstevel@tonic-gate * mappable address in a 32-bit process on this platform (though 241*0Sstevel@tonic-gate * perhaps we should make it be UINT32_MAX here?) 242*0Sstevel@tonic-gate */ 243*0Sstevel@tonic-gate void 244*0Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags) 245*0Sstevel@tonic-gate { 246*0Sstevel@tonic-gate struct proc *p = curproc; 247*0Sstevel@tonic-gate caddr_t userlimit = flags & _MAP_LOW32 ? 248*0Sstevel@tonic-gate (caddr_t)USERLIMIT32 : p->p_as->a_userlimit; 249*0Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, p, flags); 250*0Sstevel@tonic-gate } 251*0Sstevel@tonic-gate 252*0Sstevel@tonic-gate /* 253*0Sstevel@tonic-gate * Some V9 CPUs have holes in the middle of the 64-bit virtual address range. 254*0Sstevel@tonic-gate */ 255*0Sstevel@tonic-gate caddr_t hole_start, hole_end; 256*0Sstevel@tonic-gate 257*0Sstevel@tonic-gate /* 258*0Sstevel@tonic-gate * kpm mapping window 259*0Sstevel@tonic-gate */ 260*0Sstevel@tonic-gate caddr_t kpm_vbase; 261*0Sstevel@tonic-gate size_t kpm_size; 262*0Sstevel@tonic-gate uchar_t kpm_size_shift; 263*0Sstevel@tonic-gate 264*0Sstevel@tonic-gate /* 265*0Sstevel@tonic-gate * Determine whether [base, base+len] contains a mapable range of 266*0Sstevel@tonic-gate * addresses at least minlen long. base and len are adjusted if 267*0Sstevel@tonic-gate * required to provide a mapable range. 268*0Sstevel@tonic-gate */ 269*0Sstevel@tonic-gate /* ARGSUSED */ 270*0Sstevel@tonic-gate int 271*0Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir) 272*0Sstevel@tonic-gate { 273*0Sstevel@tonic-gate caddr_t hi, lo; 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate lo = *basep; 276*0Sstevel@tonic-gate hi = lo + *lenp; 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate /* 279*0Sstevel@tonic-gate * If hi rolled over the top, try cutting back. 280*0Sstevel@tonic-gate */ 281*0Sstevel@tonic-gate if (hi < lo) { 282*0Sstevel@tonic-gate size_t newlen = 0 - (uintptr_t)lo - 1l; 283*0Sstevel@tonic-gate 284*0Sstevel@tonic-gate if (newlen + (uintptr_t)hi < minlen) 285*0Sstevel@tonic-gate return (0); 286*0Sstevel@tonic-gate if (newlen < minlen) 287*0Sstevel@tonic-gate return (0); 288*0Sstevel@tonic-gate *lenp = newlen; 289*0Sstevel@tonic-gate } else if (hi - lo < minlen) 290*0Sstevel@tonic-gate return (0); 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate /* 293*0Sstevel@tonic-gate * Deal with a possible hole in the address range between 294*0Sstevel@tonic-gate * hole_start and hole_end that should never be mapped by the MMU. 295*0Sstevel@tonic-gate */ 296*0Sstevel@tonic-gate hi = lo + *lenp; 297*0Sstevel@tonic-gate 298*0Sstevel@tonic-gate if (lo < hole_start) { 299*0Sstevel@tonic-gate if (hi > hole_start) 300*0Sstevel@tonic-gate if (hi < hole_end) 301*0Sstevel@tonic-gate hi = hole_start; 302*0Sstevel@tonic-gate else 303*0Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */ 304*0Sstevel@tonic-gate if (dir == AH_LO) { 305*0Sstevel@tonic-gate /* 306*0Sstevel@tonic-gate * prefer lowest range 307*0Sstevel@tonic-gate */ 308*0Sstevel@tonic-gate if (hole_start - lo >= minlen) 309*0Sstevel@tonic-gate hi = hole_start; 310*0Sstevel@tonic-gate else if (hi - hole_end >= minlen) 311*0Sstevel@tonic-gate lo = hole_end; 312*0Sstevel@tonic-gate else 313*0Sstevel@tonic-gate return (0); 314*0Sstevel@tonic-gate } else { 315*0Sstevel@tonic-gate /* 316*0Sstevel@tonic-gate * prefer highest range 317*0Sstevel@tonic-gate */ 318*0Sstevel@tonic-gate if (hi - hole_end >= minlen) 319*0Sstevel@tonic-gate lo = hole_end; 320*0Sstevel@tonic-gate else if (hole_start - lo >= minlen) 321*0Sstevel@tonic-gate hi = hole_start; 322*0Sstevel@tonic-gate else 323*0Sstevel@tonic-gate return (0); 324*0Sstevel@tonic-gate } 325*0Sstevel@tonic-gate } else { 326*0Sstevel@tonic-gate /* lo >= hole_start */ 327*0Sstevel@tonic-gate if (hi < hole_end) 328*0Sstevel@tonic-gate return (0); 329*0Sstevel@tonic-gate if (lo < hole_end) 330*0Sstevel@tonic-gate lo = hole_end; 331*0Sstevel@tonic-gate } 332*0Sstevel@tonic-gate 333*0Sstevel@tonic-gate if (hi - lo < minlen) 334*0Sstevel@tonic-gate return (0); 335*0Sstevel@tonic-gate 336*0Sstevel@tonic-gate *basep = lo; 337*0Sstevel@tonic-gate *lenp = hi - lo; 338*0Sstevel@tonic-gate 339*0Sstevel@tonic-gate return (1); 340*0Sstevel@tonic-gate } 341*0Sstevel@tonic-gate 342*0Sstevel@tonic-gate /* 343*0Sstevel@tonic-gate * Determine whether [addr, addr+len] with protections `prot' are valid 344*0Sstevel@tonic-gate * for a user address space. 345*0Sstevel@tonic-gate */ 346*0Sstevel@tonic-gate /*ARGSUSED*/ 347*0Sstevel@tonic-gate int 348*0Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as, 349*0Sstevel@tonic-gate caddr_t userlimit) 350*0Sstevel@tonic-gate { 351*0Sstevel@tonic-gate caddr_t eaddr = addr + len; 352*0Sstevel@tonic-gate 353*0Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit) 354*0Sstevel@tonic-gate return (RANGE_BADADDR); 355*0Sstevel@tonic-gate 356*0Sstevel@tonic-gate /* 357*0Sstevel@tonic-gate * Determine if the address range falls within an illegal 358*0Sstevel@tonic-gate * range of the MMU. 359*0Sstevel@tonic-gate */ 360*0Sstevel@tonic-gate if (eaddr > hole_start && addr < hole_end) 361*0Sstevel@tonic-gate return (RANGE_BADADDR); 362*0Sstevel@tonic-gate 363*0Sstevel@tonic-gate #if defined(SF_ERRATA_57) 364*0Sstevel@tonic-gate /* 365*0Sstevel@tonic-gate * Make sure USERLIMIT isn't raised too high 366*0Sstevel@tonic-gate */ 367*0Sstevel@tonic-gate ASSERT64(addr <= (caddr_t)0xffffffff80000000ul || 368*0Sstevel@tonic-gate errata57_limit == 0); 369*0Sstevel@tonic-gate 370*0Sstevel@tonic-gate if (AS_TYPE_64BIT(as) && 371*0Sstevel@tonic-gate (addr < errata57_limit) && 372*0Sstevel@tonic-gate (prot & PROT_EXEC)) 373*0Sstevel@tonic-gate return (RANGE_BADPROT); 374*0Sstevel@tonic-gate #endif /* SF_ERRATA57 */ 375*0Sstevel@tonic-gate return (RANGE_OKAY); 376*0Sstevel@tonic-gate } 377*0Sstevel@tonic-gate 378*0Sstevel@tonic-gate /* 379*0Sstevel@tonic-gate * Routine used to check to see if an a.out can be executed 380*0Sstevel@tonic-gate * by the current machine/architecture. 381*0Sstevel@tonic-gate */ 382*0Sstevel@tonic-gate int 383*0Sstevel@tonic-gate chkaout(struct exdata *exp) 384*0Sstevel@tonic-gate { 385*0Sstevel@tonic-gate if (exp->ux_mach == M_SPARC) 386*0Sstevel@tonic-gate return (0); 387*0Sstevel@tonic-gate else 388*0Sstevel@tonic-gate return (ENOEXEC); 389*0Sstevel@tonic-gate } 390*0Sstevel@tonic-gate 391*0Sstevel@tonic-gate /* 392*0Sstevel@tonic-gate * The following functions return information about an a.out 393*0Sstevel@tonic-gate * which is used when a program is executed. 394*0Sstevel@tonic-gate */ 395*0Sstevel@tonic-gate 396*0Sstevel@tonic-gate /* 397*0Sstevel@tonic-gate * Return the load memory address for the data segment. 398*0Sstevel@tonic-gate */ 399*0Sstevel@tonic-gate caddr_t 400*0Sstevel@tonic-gate getdmem(struct exec *exp) 401*0Sstevel@tonic-gate { 402*0Sstevel@tonic-gate /* 403*0Sstevel@tonic-gate * XXX - Sparc Reference Hack approaching 404*0Sstevel@tonic-gate * Remember that we are loading 405*0Sstevel@tonic-gate * 8k executables into a 4k machine 406*0Sstevel@tonic-gate * DATA_ALIGN == 2 * PAGESIZE 407*0Sstevel@tonic-gate */ 408*0Sstevel@tonic-gate if (exp->a_text) 409*0Sstevel@tonic-gate return ((caddr_t)(roundup(USRTEXT + exp->a_text, DATA_ALIGN))); 410*0Sstevel@tonic-gate else 411*0Sstevel@tonic-gate return ((caddr_t)USRTEXT); 412*0Sstevel@tonic-gate } 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate /* 415*0Sstevel@tonic-gate * Return the starting disk address for the data segment. 416*0Sstevel@tonic-gate */ 417*0Sstevel@tonic-gate ulong_t 418*0Sstevel@tonic-gate getdfile(struct exec *exp) 419*0Sstevel@tonic-gate { 420*0Sstevel@tonic-gate if (exp->a_magic == ZMAGIC) 421*0Sstevel@tonic-gate return (exp->a_text); 422*0Sstevel@tonic-gate else 423*0Sstevel@tonic-gate return (sizeof (struct exec) + exp->a_text); 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate 426*0Sstevel@tonic-gate /* 427*0Sstevel@tonic-gate * Return the load memory address for the text segment. 428*0Sstevel@tonic-gate */ 429*0Sstevel@tonic-gate 430*0Sstevel@tonic-gate /*ARGSUSED*/ 431*0Sstevel@tonic-gate caddr_t 432*0Sstevel@tonic-gate gettmem(struct exec *exp) 433*0Sstevel@tonic-gate { 434*0Sstevel@tonic-gate return ((caddr_t)USRTEXT); 435*0Sstevel@tonic-gate } 436*0Sstevel@tonic-gate 437*0Sstevel@tonic-gate /* 438*0Sstevel@tonic-gate * Return the file byte offset for the text segment. 439*0Sstevel@tonic-gate */ 440*0Sstevel@tonic-gate uint_t 441*0Sstevel@tonic-gate gettfile(struct exec *exp) 442*0Sstevel@tonic-gate { 443*0Sstevel@tonic-gate if (exp->a_magic == ZMAGIC) 444*0Sstevel@tonic-gate return (0); 445*0Sstevel@tonic-gate else 446*0Sstevel@tonic-gate return (sizeof (struct exec)); 447*0Sstevel@tonic-gate } 448*0Sstevel@tonic-gate 449*0Sstevel@tonic-gate void 450*0Sstevel@tonic-gate getexinfo( 451*0Sstevel@tonic-gate struct exdata *edp_in, 452*0Sstevel@tonic-gate struct exdata *edp_out, 453*0Sstevel@tonic-gate int *pagetext, 454*0Sstevel@tonic-gate int *pagedata) 455*0Sstevel@tonic-gate { 456*0Sstevel@tonic-gate *edp_out = *edp_in; /* structure copy */ 457*0Sstevel@tonic-gate 458*0Sstevel@tonic-gate if ((edp_in->ux_mag == ZMAGIC) && 459*0Sstevel@tonic-gate ((edp_in->vp->v_flag & VNOMAP) == 0)) { 460*0Sstevel@tonic-gate *pagetext = 1; 461*0Sstevel@tonic-gate *pagedata = 1; 462*0Sstevel@tonic-gate } else { 463*0Sstevel@tonic-gate *pagetext = 0; 464*0Sstevel@tonic-gate *pagedata = 0; 465*0Sstevel@tonic-gate } 466*0Sstevel@tonic-gate } 467*0Sstevel@tonic-gate 468*0Sstevel@tonic-gate #define MAP_PGSZ_COMMON(pgsz, n, upper, lower, len) \ 469*0Sstevel@tonic-gate for ((n) = (upper); (n) > (lower); (n)--) { \ 470*0Sstevel@tonic-gate if (disable_auto_large_pages & (1 << (n))) \ 471*0Sstevel@tonic-gate continue; \ 472*0Sstevel@tonic-gate if (hw_page_array[(n)].hp_size <= (len)) { \ 473*0Sstevel@tonic-gate (pgsz) = hw_page_array[(n)].hp_size; \ 474*0Sstevel@tonic-gate break; \ 475*0Sstevel@tonic-gate } \ 476*0Sstevel@tonic-gate } 477*0Sstevel@tonic-gate 478*0Sstevel@tonic-gate 479*0Sstevel@tonic-gate /*ARGSUSED*/ 480*0Sstevel@tonic-gate size_t 481*0Sstevel@tonic-gate map_pgszva(struct proc *p, caddr_t addr, size_t len) 482*0Sstevel@tonic-gate { 483*0Sstevel@tonic-gate size_t pgsz = MMU_PAGESIZE; 484*0Sstevel@tonic-gate int n, upper; 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * Select the best fit page size within the constraints of 488*0Sstevel@tonic-gate * auto_lpg_{min,max}szc. 489*0Sstevel@tonic-gate * 490*0Sstevel@tonic-gate * Note that we also take the heap size into account when 491*0Sstevel@tonic-gate * deciding if we've crossed the threshold at which we should 492*0Sstevel@tonic-gate * increase the page size. This isn't perfect since the heap 493*0Sstevel@tonic-gate * may not have reached its full size yet, but it's better than 494*0Sstevel@tonic-gate * not considering it at all. 495*0Sstevel@tonic-gate */ 496*0Sstevel@tonic-gate len += p->p_brksize; 497*0Sstevel@tonic-gate if (ptob(auto_lpg_tlb_threshold) <= len) { 498*0Sstevel@tonic-gate 499*0Sstevel@tonic-gate upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); 500*0Sstevel@tonic-gate 501*0Sstevel@tonic-gate /* 502*0Sstevel@tonic-gate * Use auto_lpg_minszc - 1 as the limit so we never drop 503*0Sstevel@tonic-gate * below auto_lpg_minszc. We don't have a size code to refer 504*0Sstevel@tonic-gate * to like we have for bss and stack, so we assume 0. 505*0Sstevel@tonic-gate * auto_lpg_minszc should always be >= 0. Using 506*0Sstevel@tonic-gate * auto_lpg_minszc cuts off the loop. 507*0Sstevel@tonic-gate */ 508*0Sstevel@tonic-gate MAP_PGSZ_COMMON(pgsz, n, upper, auto_lpg_minszc - 1, len); 509*0Sstevel@tonic-gate } 510*0Sstevel@tonic-gate 511*0Sstevel@tonic-gate return (pgsz); 512*0Sstevel@tonic-gate } 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate size_t 515*0Sstevel@tonic-gate map_pgszheap(struct proc *p, caddr_t addr, size_t len) 516*0Sstevel@tonic-gate { 517*0Sstevel@tonic-gate size_t pgsz; 518*0Sstevel@tonic-gate int n, upper, lower; 519*0Sstevel@tonic-gate 520*0Sstevel@tonic-gate /* 521*0Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size. 522*0Sstevel@tonic-gate */ 523*0Sstevel@tonic-gate if (len == 0) { 524*0Sstevel@tonic-gate len = p->p_brksize; 525*0Sstevel@tonic-gate } 526*0Sstevel@tonic-gate 527*0Sstevel@tonic-gate /* 528*0Sstevel@tonic-gate * Still zero? Then we don't have a heap yet, so pick the default 529*0Sstevel@tonic-gate * heap size. 530*0Sstevel@tonic-gate */ 531*0Sstevel@tonic-gate if (len == 0) { 532*0Sstevel@tonic-gate pgsz = auto_lpg_heap_default; 533*0Sstevel@tonic-gate } else { 534*0Sstevel@tonic-gate pgsz = hw_page_array[p->p_brkpageszc].hp_size; 535*0Sstevel@tonic-gate } 536*0Sstevel@tonic-gate 537*0Sstevel@tonic-gate if ((pgsz * auto_lpg_tlb_threshold) <= len) { 538*0Sstevel@tonic-gate /* 539*0Sstevel@tonic-gate * We're past the threshold, so select the best fit 540*0Sstevel@tonic-gate * page size within the constraints of 541*0Sstevel@tonic-gate * auto_lpg_{min,max}szc and the minimum required 542*0Sstevel@tonic-gate * alignment. 543*0Sstevel@tonic-gate */ 544*0Sstevel@tonic-gate upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); 545*0Sstevel@tonic-gate lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc); 546*0Sstevel@tonic-gate MAP_PGSZ_COMMON(pgsz, n, upper, lower, len); 547*0Sstevel@tonic-gate } 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate /* 550*0Sstevel@tonic-gate * If addr == 0 we were called by memcntl() or exec_args() when the 551*0Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size. 552*0Sstevel@tonic-gate */ 553*0Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) { 554*0Sstevel@tonic-gate pgsz = hw_page_array[p->p_brkpageszc].hp_size; 555*0Sstevel@tonic-gate } 556*0Sstevel@tonic-gate 557*0Sstevel@tonic-gate return (pgsz); 558*0Sstevel@tonic-gate } 559*0Sstevel@tonic-gate 560*0Sstevel@tonic-gate size_t 561*0Sstevel@tonic-gate map_pgszstk(struct proc *p, caddr_t addr, size_t len) 562*0Sstevel@tonic-gate { 563*0Sstevel@tonic-gate size_t pgsz; 564*0Sstevel@tonic-gate int n, upper, lower; 565*0Sstevel@tonic-gate 566*0Sstevel@tonic-gate /* 567*0Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size. 568*0Sstevel@tonic-gate */ 569*0Sstevel@tonic-gate if (len == 0) { 570*0Sstevel@tonic-gate len = p->p_stksize; 571*0Sstevel@tonic-gate } 572*0Sstevel@tonic-gate 573*0Sstevel@tonic-gate /* 574*0Sstevel@tonic-gate * Still zero? Then we don't have a heap yet, so pick the default 575*0Sstevel@tonic-gate * stack size. 576*0Sstevel@tonic-gate */ 577*0Sstevel@tonic-gate if (len == 0) { 578*0Sstevel@tonic-gate pgsz = auto_lpg_stack_default; 579*0Sstevel@tonic-gate } else { 580*0Sstevel@tonic-gate pgsz = hw_page_array[p->p_stkpageszc].hp_size; 581*0Sstevel@tonic-gate } 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate if ((pgsz * auto_lpg_tlb_threshold) <= len) { 584*0Sstevel@tonic-gate /* 585*0Sstevel@tonic-gate * We're past the threshold, so select the best fit 586*0Sstevel@tonic-gate * page size within the constraints of 587*0Sstevel@tonic-gate * auto_lpg_{min,max}szc and the minimum required 588*0Sstevel@tonic-gate * alignment. 589*0Sstevel@tonic-gate */ 590*0Sstevel@tonic-gate upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); 591*0Sstevel@tonic-gate lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc); 592*0Sstevel@tonic-gate MAP_PGSZ_COMMON(pgsz, n, upper, lower, len); 593*0Sstevel@tonic-gate } 594*0Sstevel@tonic-gate 595*0Sstevel@tonic-gate /* 596*0Sstevel@tonic-gate * If addr == 0 we were called by memcntl() or exec_args() when the 597*0Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size. 598*0Sstevel@tonic-gate */ 599*0Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_stkpageszc].hp_size)) { 600*0Sstevel@tonic-gate pgsz = hw_page_array[p->p_stkpageszc].hp_size; 601*0Sstevel@tonic-gate } 602*0Sstevel@tonic-gate 603*0Sstevel@tonic-gate return (pgsz); 604*0Sstevel@tonic-gate } 605*0Sstevel@tonic-gate 606*0Sstevel@tonic-gate 607*0Sstevel@tonic-gate /* 608*0Sstevel@tonic-gate * Return non 0 value if the address may cause a VAC alias with KPM mappings. 609*0Sstevel@tonic-gate * KPM selects an address such that it's equal offset modulo shm_alignment and 610*0Sstevel@tonic-gate * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping. 611*0Sstevel@tonic-gate */ 612*0Sstevel@tonic-gate int 613*0Sstevel@tonic-gate map_addr_vacalign_check(caddr_t addr, u_offset_t off) 614*0Sstevel@tonic-gate { 615*0Sstevel@tonic-gate if (vac) { 616*0Sstevel@tonic-gate return (((uintptr_t)addr ^ off) & shm_alignment - 1); 617*0Sstevel@tonic-gate } else { 618*0Sstevel@tonic-gate return (0); 619*0Sstevel@tonic-gate } 620*0Sstevel@tonic-gate } 621*0Sstevel@tonic-gate 622*0Sstevel@tonic-gate /* 623*0Sstevel@tonic-gate * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m 624*0Sstevel@tonic-gate * can be set in platform or CPU specific code but user can change the 625*0Sstevel@tonic-gate * default values via /etc/system. 626*0Sstevel@tonic-gate * 627*0Sstevel@tonic-gate * Initial values are defined in architecture specific mach_vm_dep.c file. 628*0Sstevel@tonic-gate */ 629*0Sstevel@tonic-gate extern int use_text_pgsz64k; 630*0Sstevel@tonic-gate extern int use_text_pgsz4m; 631*0Sstevel@tonic-gate extern int use_initdata_pgsz64k; 632*0Sstevel@tonic-gate 633*0Sstevel@tonic-gate /* 634*0Sstevel@tonic-gate * disable_text_largepages and disable_initdata_largepages bitmaks are set in 635*0Sstevel@tonic-gate * platform or CPU specific code to disable page sizes that should not be 636*0Sstevel@tonic-gate * used. These variables normally shouldn't be changed via /etc/system. A 637*0Sstevel@tonic-gate * particular page size for text or inititialized data will be used by default 638*0Sstevel@tonic-gate * if both one of use_* variables is set to 1 AND this page size is not 639*0Sstevel@tonic-gate * disabled in the corresponding disable_* bitmask variable. 640*0Sstevel@tonic-gate * 641*0Sstevel@tonic-gate * Initial values are defined in architecture specific mach_vm_dep.c file. 642*0Sstevel@tonic-gate */ 643*0Sstevel@tonic-gate extern int disable_text_largepages; 644*0Sstevel@tonic-gate extern int disable_initdata_largepages; 645*0Sstevel@tonic-gate 646*0Sstevel@tonic-gate /* 647*0Sstevel@tonic-gate * Minimum segment size tunables before 64K or 4M large pages 648*0Sstevel@tonic-gate * should be used to map it. 649*0Sstevel@tonic-gate * 650*0Sstevel@tonic-gate * Initial values are defined in architecture specific mach_vm_dep.c file. 651*0Sstevel@tonic-gate */ 652*0Sstevel@tonic-gate extern size_t text_pgsz64k_minsize; 653*0Sstevel@tonic-gate extern size_t text_pgsz4m_minsize; 654*0Sstevel@tonic-gate extern size_t initdata_pgsz64k_minsize; 655*0Sstevel@tonic-gate 656*0Sstevel@tonic-gate /* 657*0Sstevel@tonic-gate * Sanity control. Don't use large pages regardless of user 658*0Sstevel@tonic-gate * settings if there's less than execseg_lpg_min_physmem memory installed. 659*0Sstevel@tonic-gate * The units for this variable is 8K pages. 660*0Sstevel@tonic-gate */ 661*0Sstevel@tonic-gate pgcnt_t execseg_lpg_min_physmem = 131072; /* 1GB */ 662*0Sstevel@tonic-gate 663*0Sstevel@tonic-gate 664*0Sstevel@tonic-gate /* assumes TTE8K...TTE4M == szc */ 665*0Sstevel@tonic-gate 666*0Sstevel@tonic-gate static uint_t 667*0Sstevel@tonic-gate map_text_pgsz4m(caddr_t addr, size_t len) 668*0Sstevel@tonic-gate { 669*0Sstevel@tonic-gate caddr_t a; 670*0Sstevel@tonic-gate 671*0Sstevel@tonic-gate if (len < text_pgsz4m_minsize) { 672*0Sstevel@tonic-gate return (0); 673*0Sstevel@tonic-gate } 674*0Sstevel@tonic-gate 675*0Sstevel@tonic-gate a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t); 676*0Sstevel@tonic-gate if (a < addr || a >= addr + len) { 677*0Sstevel@tonic-gate return (0); 678*0Sstevel@tonic-gate } 679*0Sstevel@tonic-gate len -= (a - addr); 680*0Sstevel@tonic-gate if (len < MMU_PAGESIZE4M) { 681*0Sstevel@tonic-gate return (0); 682*0Sstevel@tonic-gate } 683*0Sstevel@tonic-gate 684*0Sstevel@tonic-gate return (1 << TTE4M); 685*0Sstevel@tonic-gate } 686*0Sstevel@tonic-gate 687*0Sstevel@tonic-gate static uint_t 688*0Sstevel@tonic-gate map_text_pgsz64k(caddr_t addr, size_t len) 689*0Sstevel@tonic-gate { 690*0Sstevel@tonic-gate caddr_t a; 691*0Sstevel@tonic-gate size_t svlen = len; 692*0Sstevel@tonic-gate 693*0Sstevel@tonic-gate if (len < text_pgsz64k_minsize) { 694*0Sstevel@tonic-gate return (0); 695*0Sstevel@tonic-gate } 696*0Sstevel@tonic-gate 697*0Sstevel@tonic-gate a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t); 698*0Sstevel@tonic-gate if (a < addr || a >= addr + len) { 699*0Sstevel@tonic-gate return (0); 700*0Sstevel@tonic-gate } 701*0Sstevel@tonic-gate len -= (a - addr); 702*0Sstevel@tonic-gate if (len < MMU_PAGESIZE64K) { 703*0Sstevel@tonic-gate return (0); 704*0Sstevel@tonic-gate } 705*0Sstevel@tonic-gate if (!use_text_pgsz4m || 706*0Sstevel@tonic-gate disable_text_largepages & (1 << TTE4M)) { 707*0Sstevel@tonic-gate return (1 << TTE64K); 708*0Sstevel@tonic-gate } 709*0Sstevel@tonic-gate if (svlen < text_pgsz4m_minsize) { 710*0Sstevel@tonic-gate return (1 << TTE64K); 711*0Sstevel@tonic-gate } 712*0Sstevel@tonic-gate addr = a; 713*0Sstevel@tonic-gate a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t); 714*0Sstevel@tonic-gate if (a < addr || a >= addr + len) { 715*0Sstevel@tonic-gate return (1 << TTE64K); 716*0Sstevel@tonic-gate } 717*0Sstevel@tonic-gate len -= (a - addr); 718*0Sstevel@tonic-gate if (len < MMU_PAGESIZE4M) { 719*0Sstevel@tonic-gate return (1 << TTE64K); 720*0Sstevel@tonic-gate } 721*0Sstevel@tonic-gate return ((1 << TTE4M) | (1 << TTE64K)); 722*0Sstevel@tonic-gate } 723*0Sstevel@tonic-gate 724*0Sstevel@tonic-gate static uint_t 725*0Sstevel@tonic-gate map_initdata_pgsz64k(caddr_t addr, size_t len) 726*0Sstevel@tonic-gate { 727*0Sstevel@tonic-gate caddr_t a; 728*0Sstevel@tonic-gate 729*0Sstevel@tonic-gate if (len < initdata_pgsz64k_minsize) { 730*0Sstevel@tonic-gate return (0); 731*0Sstevel@tonic-gate } 732*0Sstevel@tonic-gate 733*0Sstevel@tonic-gate a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t); 734*0Sstevel@tonic-gate if (a < addr || a >= addr + len) { 735*0Sstevel@tonic-gate return (0); 736*0Sstevel@tonic-gate } 737*0Sstevel@tonic-gate len -= (a - addr); 738*0Sstevel@tonic-gate if (len < MMU_PAGESIZE64K) { 739*0Sstevel@tonic-gate return (0); 740*0Sstevel@tonic-gate } 741*0Sstevel@tonic-gate return (1 << TTE64K); 742*0Sstevel@tonic-gate } 743*0Sstevel@tonic-gate 744*0Sstevel@tonic-gate /* 745*0Sstevel@tonic-gate * Return a bit vector of large page size codes that 746*0Sstevel@tonic-gate * can be used to map [addr, addr + len) region. 747*0Sstevel@tonic-gate */ 748*0Sstevel@tonic-gate uint_t 749*0Sstevel@tonic-gate map_execseg_pgszcvec(int text, caddr_t addr, size_t len) 750*0Sstevel@tonic-gate { 751*0Sstevel@tonic-gate uint_t ret = 0; 752*0Sstevel@tonic-gate 753*0Sstevel@tonic-gate if (physmem < execseg_lpg_min_physmem) { 754*0Sstevel@tonic-gate return (0); 755*0Sstevel@tonic-gate } 756*0Sstevel@tonic-gate 757*0Sstevel@tonic-gate if (text) { 758*0Sstevel@tonic-gate if (use_text_pgsz64k && 759*0Sstevel@tonic-gate !(disable_text_largepages & (1 << TTE64K))) { 760*0Sstevel@tonic-gate ret = map_text_pgsz64k(addr, len); 761*0Sstevel@tonic-gate } else if (use_text_pgsz4m && 762*0Sstevel@tonic-gate !(disable_text_largepages & (1 << TTE4M))) { 763*0Sstevel@tonic-gate ret = map_text_pgsz4m(addr, len); 764*0Sstevel@tonic-gate } 765*0Sstevel@tonic-gate } else if (use_initdata_pgsz64k && 766*0Sstevel@tonic-gate !(disable_initdata_largepages & (1 << TTE64K))) { 767*0Sstevel@tonic-gate ret = map_initdata_pgsz64k(addr, len); 768*0Sstevel@tonic-gate } 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gate return (ret); 771*0Sstevel@tonic-gate } 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate #define PNUM_SIZE(size_code) \ 774*0Sstevel@tonic-gate (hw_page_array[size_code].hp_size >> hw_page_array[0].hp_shift) 775*0Sstevel@tonic-gate 776*0Sstevel@tonic-gate /* 777*0Sstevel@tonic-gate * Anchored in the table below are counters used to keep track 778*0Sstevel@tonic-gate * of free contiguous physical memory. Each element of the table contains 779*0Sstevel@tonic-gate * the array of counters, the size of array which is allocated during 780*0Sstevel@tonic-gate * startup based on physmax and a shift value used to convert a pagenum 781*0Sstevel@tonic-gate * into a counter array index or vice versa. The table has page size 782*0Sstevel@tonic-gate * for rows and region size for columns: 783*0Sstevel@tonic-gate * 784*0Sstevel@tonic-gate * page_counters[page_size][region_size] 785*0Sstevel@tonic-gate * 786*0Sstevel@tonic-gate * page_size: TTE size code of pages on page_size freelist. 787*0Sstevel@tonic-gate * 788*0Sstevel@tonic-gate * region_size: TTE size code of a candidate larger page made up 789*0Sstevel@tonic-gate * made up of contiguous free page_size pages. 790*0Sstevel@tonic-gate * 791*0Sstevel@tonic-gate * As you go across a page_size row increasing region_size each 792*0Sstevel@tonic-gate * element keeps track of how many (region_size - 1) size groups 793*0Sstevel@tonic-gate * made up of page_size free pages can be coalesced into a 794*0Sstevel@tonic-gate * regsion_size page. Yuck! Lets try an example: 795*0Sstevel@tonic-gate * 796*0Sstevel@tonic-gate * page_counters[1][3] is the table element used for identifying 797*0Sstevel@tonic-gate * candidate 4M pages from contiguous pages off the 64K free list. 798*0Sstevel@tonic-gate * Each index in the page_counters[1][3].array spans 4M. Its the 799*0Sstevel@tonic-gate * number of free 512K size (regsion_size - 1) groups of contiguous 800*0Sstevel@tonic-gate * 64K free pages. So when page_counters[1][3].counters[n] == 8 801*0Sstevel@tonic-gate * we know we have a candidate 4M page made up of 512K size groups 802*0Sstevel@tonic-gate * of 64K free pages. 803*0Sstevel@tonic-gate */ 804*0Sstevel@tonic-gate 805*0Sstevel@tonic-gate /* 806*0Sstevel@tonic-gate * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins) 807*0Sstevel@tonic-gate * dimensions are allocated dynamically. 808*0Sstevel@tonic-gate */ 809*0Sstevel@tonic-gate page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; 810*0Sstevel@tonic-gate 811*0Sstevel@tonic-gate /* 812*0Sstevel@tonic-gate * For now there is only a single size cache list. 813*0Sstevel@tonic-gate * Allocated dynamically. 814*0Sstevel@tonic-gate */ 815*0Sstevel@tonic-gate page_t ***page_cachelists[MAX_MEM_TYPES]; 816*0Sstevel@tonic-gate 817*0Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX]; 818*0Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX]; 819*0Sstevel@tonic-gate 820*0Sstevel@tonic-gate caddr_t 821*0Sstevel@tonic-gate alloc_page_freelists(int mnode, caddr_t alloc_base, int alloc_align) 822*0Sstevel@tonic-gate { 823*0Sstevel@tonic-gate int mtype; 824*0Sstevel@tonic-gate uint_t szc; 825*0Sstevel@tonic-gate 826*0Sstevel@tonic-gate alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_align); 827*0Sstevel@tonic-gate 828*0Sstevel@tonic-gate /* 829*0Sstevel@tonic-gate * We only support small pages in the cachelist. 830*0Sstevel@tonic-gate */ 831*0Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { 832*0Sstevel@tonic-gate page_cachelists[mtype][mnode] = (page_t **)alloc_base; 833*0Sstevel@tonic-gate alloc_base += (sizeof (page_t *) * page_colors); 834*0Sstevel@tonic-gate /* 835*0Sstevel@tonic-gate * Allocate freelists bins for all 836*0Sstevel@tonic-gate * supported page sizes. 837*0Sstevel@tonic-gate */ 838*0Sstevel@tonic-gate for (szc = 0; szc < mmu_page_sizes; szc++) { 839*0Sstevel@tonic-gate page_freelists[szc][mtype][mnode] = 840*0Sstevel@tonic-gate (page_t **)alloc_base; 841*0Sstevel@tonic-gate alloc_base += ((sizeof (page_t *) * 842*0Sstevel@tonic-gate page_get_pagecolors(szc))); 843*0Sstevel@tonic-gate } 844*0Sstevel@tonic-gate } 845*0Sstevel@tonic-gate 846*0Sstevel@tonic-gate alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_align); 847*0Sstevel@tonic-gate 848*0Sstevel@tonic-gate return (alloc_base); 849*0Sstevel@tonic-gate } 850*0Sstevel@tonic-gate 851*0Sstevel@tonic-gate /* 852*0Sstevel@tonic-gate * Allocate page_freelists bin headers for a memnode from the 853*0Sstevel@tonic-gate * nucleus data area. This is the first time that mmu_page_sizes is 854*0Sstevel@tonic-gate * used during sun4u bootup, so check mmu_page_sizes initialization. 855*0Sstevel@tonic-gate */ 856*0Sstevel@tonic-gate int 857*0Sstevel@tonic-gate ndata_alloc_page_freelists(struct memlist *ndata, int mnode) 858*0Sstevel@tonic-gate { 859*0Sstevel@tonic-gate size_t alloc_sz; 860*0Sstevel@tonic-gate caddr_t alloc_base; 861*0Sstevel@tonic-gate caddr_t end; 862*0Sstevel@tonic-gate int mtype; 863*0Sstevel@tonic-gate uint_t szc; 864*0Sstevel@tonic-gate int32_t allp = 0; 865*0Sstevel@tonic-gate 866*0Sstevel@tonic-gate if (&mmu_init_mmu_page_sizes) { 867*0Sstevel@tonic-gate if (!mmu_init_mmu_page_sizes(allp)) { 868*0Sstevel@tonic-gate cmn_err(CE_PANIC, "mmu_page_sizes %d not initialized", 869*0Sstevel@tonic-gate mmu_page_sizes); 870*0Sstevel@tonic-gate } 871*0Sstevel@tonic-gate } 872*0Sstevel@tonic-gate ASSERT(mmu_page_sizes >= DEFAULT_MMU_PAGE_SIZES); 873*0Sstevel@tonic-gate 874*0Sstevel@tonic-gate /* first time called - allocate max_mem_nodes dimension */ 875*0Sstevel@tonic-gate if (mnode == 0) { 876*0Sstevel@tonic-gate int i; 877*0Sstevel@tonic-gate 878*0Sstevel@tonic-gate /* page_cachelists */ 879*0Sstevel@tonic-gate alloc_sz = MAX_MEM_TYPES * max_mem_nodes * 880*0Sstevel@tonic-gate sizeof (page_t **); 881*0Sstevel@tonic-gate 882*0Sstevel@tonic-gate /* page_freelists */ 883*0Sstevel@tonic-gate alloc_sz += MAX_MEM_TYPES * mmu_page_sizes * max_mem_nodes * 884*0Sstevel@tonic-gate sizeof (page_t **); 885*0Sstevel@tonic-gate 886*0Sstevel@tonic-gate /* fpc_mutex and cpc_mutex */ 887*0Sstevel@tonic-gate alloc_sz += 2 * NPC_MUTEX * max_mem_nodes * sizeof (kmutex_t); 888*0Sstevel@tonic-gate 889*0Sstevel@tonic-gate alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize); 890*0Sstevel@tonic-gate if (alloc_base == NULL) 891*0Sstevel@tonic-gate return (-1); 892*0Sstevel@tonic-gate 893*0Sstevel@tonic-gate ASSERT(((uintptr_t)alloc_base & (ecache_alignsize - 1)) == 0); 894*0Sstevel@tonic-gate 895*0Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { 896*0Sstevel@tonic-gate page_cachelists[mtype] = (page_t ***)alloc_base; 897*0Sstevel@tonic-gate alloc_base += (max_mem_nodes * sizeof (page_t **)); 898*0Sstevel@tonic-gate for (szc = 0; szc < mmu_page_sizes; szc++) { 899*0Sstevel@tonic-gate page_freelists[szc][mtype] = 900*0Sstevel@tonic-gate (page_t ***)alloc_base; 901*0Sstevel@tonic-gate alloc_base += (max_mem_nodes * 902*0Sstevel@tonic-gate sizeof (page_t **)); 903*0Sstevel@tonic-gate } 904*0Sstevel@tonic-gate } 905*0Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 906*0Sstevel@tonic-gate fpc_mutex[i] = (kmutex_t *)alloc_base; 907*0Sstevel@tonic-gate alloc_base += (sizeof (kmutex_t) * max_mem_nodes); 908*0Sstevel@tonic-gate cpc_mutex[i] = (kmutex_t *)alloc_base; 909*0Sstevel@tonic-gate alloc_base += (sizeof (kmutex_t) * max_mem_nodes); 910*0Sstevel@tonic-gate } 911*0Sstevel@tonic-gate alloc_sz = 0; 912*0Sstevel@tonic-gate } 913*0Sstevel@tonic-gate 914*0Sstevel@tonic-gate /* 915*0Sstevel@tonic-gate * Calculate the size needed by alloc_page_freelists(). 916*0Sstevel@tonic-gate */ 917*0Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { 918*0Sstevel@tonic-gate alloc_sz += sizeof (page_t *) * page_colors; 919*0Sstevel@tonic-gate 920*0Sstevel@tonic-gate for (szc = 0; szc < mmu_page_sizes; szc++) 921*0Sstevel@tonic-gate alloc_sz += sizeof (page_t *) * 922*0Sstevel@tonic-gate page_get_pagecolors(szc); 923*0Sstevel@tonic-gate } 924*0Sstevel@tonic-gate 925*0Sstevel@tonic-gate alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize); 926*0Sstevel@tonic-gate if (alloc_base == NULL) 927*0Sstevel@tonic-gate return (-1); 928*0Sstevel@tonic-gate 929*0Sstevel@tonic-gate end = alloc_page_freelists(mnode, alloc_base, ecache_alignsize); 930*0Sstevel@tonic-gate ASSERT((uintptr_t)end == roundup((uintptr_t)alloc_base + alloc_sz, 931*0Sstevel@tonic-gate ecache_alignsize)); 932*0Sstevel@tonic-gate 933*0Sstevel@tonic-gate return (0); 934*0Sstevel@tonic-gate } 935*0Sstevel@tonic-gate 936*0Sstevel@tonic-gate /* 937*0Sstevel@tonic-gate * To select our starting bin, we stride through the bins with a stride 938*0Sstevel@tonic-gate * of 337. Why 337? It's prime, it's largeish, and it performs well both 939*0Sstevel@tonic-gate * in simulation and practice for different workloads on varying cache sizes. 940*0Sstevel@tonic-gate */ 941*0Sstevel@tonic-gate uint32_t color_start_current = 0; 942*0Sstevel@tonic-gate uint32_t color_start_stride = 337; 943*0Sstevel@tonic-gate int color_start_random = 0; 944*0Sstevel@tonic-gate 945*0Sstevel@tonic-gate /* ARGSUSED */ 946*0Sstevel@tonic-gate uint_t 947*0Sstevel@tonic-gate get_color_start(struct as *as) 948*0Sstevel@tonic-gate { 949*0Sstevel@tonic-gate uint32_t old, new; 950*0Sstevel@tonic-gate 951*0Sstevel@tonic-gate if (consistent_coloring == 2 || color_start_random) { 952*0Sstevel@tonic-gate return ((uint_t)(((gettick()) << (vac_shift - MMU_PAGESHIFT)) & 953*0Sstevel@tonic-gate page_colors_mask)); 954*0Sstevel@tonic-gate } 955*0Sstevel@tonic-gate 956*0Sstevel@tonic-gate do { 957*0Sstevel@tonic-gate old = color_start_current; 958*0Sstevel@tonic-gate new = old + (color_start_stride << (vac_shift - MMU_PAGESHIFT)); 959*0Sstevel@tonic-gate } while (cas32(&color_start_current, old, new) != old); 960*0Sstevel@tonic-gate 961*0Sstevel@tonic-gate return ((uint_t)(new)); 962*0Sstevel@tonic-gate } 963*0Sstevel@tonic-gate 964*0Sstevel@tonic-gate /* 965*0Sstevel@tonic-gate * Called once at startup from kphysm_init() -- before memialloc() 966*0Sstevel@tonic-gate * is invoked to do the 1st page_free()/page_freelist_add(). 967*0Sstevel@tonic-gate * 968*0Sstevel@tonic-gate * initializes page_colors and page_colors_mask based on ecache_setsize. 969*0Sstevel@tonic-gate * 970*0Sstevel@tonic-gate * Also initializes the counter locks. 971*0Sstevel@tonic-gate */ 972*0Sstevel@tonic-gate void 973*0Sstevel@tonic-gate page_coloring_init() 974*0Sstevel@tonic-gate { 975*0Sstevel@tonic-gate int a; 976*0Sstevel@tonic-gate 977*0Sstevel@tonic-gate if (do_pg_coloring == 0) { 978*0Sstevel@tonic-gate page_colors = 1; 979*0Sstevel@tonic-gate return; 980*0Sstevel@tonic-gate } 981*0Sstevel@tonic-gate 982*0Sstevel@tonic-gate /* 983*0Sstevel@tonic-gate * Calculate page_colors from ecache_setsize. ecache_setsize contains 984*0Sstevel@tonic-gate * the max ecache setsize of all cpus configured in the system or, for 985*0Sstevel@tonic-gate * cheetah+ systems, the max possible ecache setsize for all possible 986*0Sstevel@tonic-gate * cheetah+ cpus. 987*0Sstevel@tonic-gate */ 988*0Sstevel@tonic-gate page_colors = ecache_setsize / MMU_PAGESIZE; 989*0Sstevel@tonic-gate page_colors_mask = page_colors - 1; 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gate /* 992*0Sstevel@tonic-gate * initialize cpu_page_colors if ecache setsizes are homogenous. 993*0Sstevel@tonic-gate * cpu_page_colors set to -1 during DR operation or during startup 994*0Sstevel@tonic-gate * if setsizes are heterogenous. 995*0Sstevel@tonic-gate * 996*0Sstevel@tonic-gate * The value of cpu_page_colors determines if additional color bins 997*0Sstevel@tonic-gate * need to be checked for a particular color in the page_get routines. 998*0Sstevel@tonic-gate */ 999*0Sstevel@tonic-gate if ((cpu_page_colors == 0) && (cpu_setsize < ecache_setsize)) 1000*0Sstevel@tonic-gate cpu_page_colors = cpu_setsize / MMU_PAGESIZE; 1001*0Sstevel@tonic-gate 1002*0Sstevel@tonic-gate vac_colors = vac_size / MMU_PAGESIZE; 1003*0Sstevel@tonic-gate vac_colors_mask = vac_colors -1; 1004*0Sstevel@tonic-gate 1005*0Sstevel@tonic-gate page_coloring_shift = 0; 1006*0Sstevel@tonic-gate a = ecache_setsize; 1007*0Sstevel@tonic-gate while (a >>= 1) { 1008*0Sstevel@tonic-gate page_coloring_shift++; 1009*0Sstevel@tonic-gate } 1010*0Sstevel@tonic-gate } 1011*0Sstevel@tonic-gate 1012*0Sstevel@tonic-gate int 1013*0Sstevel@tonic-gate bp_color(struct buf *bp) 1014*0Sstevel@tonic-gate { 1015*0Sstevel@tonic-gate int color = -1; 1016*0Sstevel@tonic-gate 1017*0Sstevel@tonic-gate if (vac) { 1018*0Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) != 0) { 1019*0Sstevel@tonic-gate color = sfmmu_get_ppvcolor(bp->b_pages); 1020*0Sstevel@tonic-gate } else if (bp->b_un.b_addr != NULL) { 1021*0Sstevel@tonic-gate color = sfmmu_get_addrvcolor(bp->b_un.b_addr); 1022*0Sstevel@tonic-gate } 1023*0Sstevel@tonic-gate } 1024*0Sstevel@tonic-gate return (color < 0 ? 0 : ptob(color)); 1025*0Sstevel@tonic-gate } 1026*0Sstevel@tonic-gate 1027*0Sstevel@tonic-gate /* 1028*0Sstevel@tonic-gate * Create & Initialise pageout scanner thread. The thread has to 1029*0Sstevel@tonic-gate * start at procedure with process pp and priority pri. 1030*0Sstevel@tonic-gate */ 1031*0Sstevel@tonic-gate void 1032*0Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri) 1033*0Sstevel@tonic-gate { 1034*0Sstevel@tonic-gate (void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri); 1035*0Sstevel@tonic-gate } 1036*0Sstevel@tonic-gate 1037*0Sstevel@tonic-gate /* 1038*0Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations 1039*0Sstevel@tonic-gate * to an alternate mapping. Stubbed out on all platforms except sun4u, 1040*0Sstevel@tonic-gate * at least for now. 1041*0Sstevel@tonic-gate */ 1042*0Sstevel@tonic-gate void 1043*0Sstevel@tonic-gate dcache_flushall() 1044*0Sstevel@tonic-gate { 1045*0Sstevel@tonic-gate sfmmu_cache_flushall(); 1046*0Sstevel@tonic-gate } 1047*0Sstevel@tonic-gate 1048*0Sstevel@tonic-gate static int 1049*0Sstevel@tonic-gate kdi_range_overlap(uintptr_t va1, size_t sz1, uintptr_t va2, size_t sz2) 1050*0Sstevel@tonic-gate { 1051*0Sstevel@tonic-gate if (va1 < va2 && va1 + sz1 <= va2) 1052*0Sstevel@tonic-gate return (0); 1053*0Sstevel@tonic-gate 1054*0Sstevel@tonic-gate if (va2 < va1 && va2 + sz2 <= va1) 1055*0Sstevel@tonic-gate return (0); 1056*0Sstevel@tonic-gate 1057*0Sstevel@tonic-gate return (1); 1058*0Sstevel@tonic-gate } 1059*0Sstevel@tonic-gate 1060*0Sstevel@tonic-gate /* 1061*0Sstevel@tonic-gate * Return the number of bytes, relative to the beginning of a given range, that 1062*0Sstevel@tonic-gate * are non-toxic (can be read from and written to with relative impunity). 1063*0Sstevel@tonic-gate */ 1064*0Sstevel@tonic-gate size_t 1065*0Sstevel@tonic-gate kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write) 1066*0Sstevel@tonic-gate { 1067*0Sstevel@tonic-gate /* OBP reads are harmless, but we don't want people writing there */ 1068*0Sstevel@tonic-gate if (write && kdi_range_overlap(va, sz, OFW_START_ADDR, OFW_END_ADDR - 1069*0Sstevel@tonic-gate OFW_START_ADDR + 1)) 1070*0Sstevel@tonic-gate return (va < OFW_START_ADDR ? OFW_START_ADDR - va : 0); 1071*0Sstevel@tonic-gate 1072*0Sstevel@tonic-gate if (kdi_range_overlap(va, sz, PIOMAPBASE, PIOMAPSIZE)) 1073*0Sstevel@tonic-gate return (va < PIOMAPBASE ? PIOMAPBASE - va : 0); 1074*0Sstevel@tonic-gate 1075*0Sstevel@tonic-gate return (sz); /* no overlap */ 1076*0Sstevel@tonic-gate } 1077*0Sstevel@tonic-gate 1078*0Sstevel@tonic-gate /* 1079*0Sstevel@tonic-gate * Minimum physmem required for enabling large pages for kernel heap 1080*0Sstevel@tonic-gate * Currently we do not enable lp for kmem on systems with less 1081*0Sstevel@tonic-gate * than 1GB of memory. This value can be changed via /etc/system 1082*0Sstevel@tonic-gate */ 1083*0Sstevel@tonic-gate size_t segkmem_lpminphysmem = 0x40000000; /* 1GB */ 1084*0Sstevel@tonic-gate 1085*0Sstevel@tonic-gate /* 1086*0Sstevel@tonic-gate * this function chooses large page size for kernel heap 1087*0Sstevel@tonic-gate */ 1088*0Sstevel@tonic-gate size_t 1089*0Sstevel@tonic-gate get_segkmem_lpsize(size_t lpsize) 1090*0Sstevel@tonic-gate { 1091*0Sstevel@tonic-gate size_t memtotal = physmem * PAGESIZE; 1092*0Sstevel@tonic-gate 1093*0Sstevel@tonic-gate if (memtotal < segkmem_lpminphysmem) 1094*0Sstevel@tonic-gate return (PAGESIZE); 1095*0Sstevel@tonic-gate 1096*0Sstevel@tonic-gate if (plat_lpkmem_is_supported != NULL && 1097*0Sstevel@tonic-gate plat_lpkmem_is_supported() == 0) 1098*0Sstevel@tonic-gate return (PAGESIZE); 1099*0Sstevel@tonic-gate 1100*0Sstevel@tonic-gate return (mmu_get_kernel_lpsize(lpsize)); 1101*0Sstevel@tonic-gate } 1102