10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51443Skchow * Common Development and Distribution License (the "License"). 61443Skchow * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 221373Skchow * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 310Sstevel@tonic-gate * under license from the Regents of the University of California. 320Sstevel@tonic-gate */ 330Sstevel@tonic-gate 340Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 350Sstevel@tonic-gate 360Sstevel@tonic-gate /* 370Sstevel@tonic-gate * UNIX machine dependent virtual memory support. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate #include <sys/types.h> 410Sstevel@tonic-gate #include <sys/param.h> 420Sstevel@tonic-gate #include <sys/systm.h> 430Sstevel@tonic-gate #include <sys/user.h> 440Sstevel@tonic-gate #include <sys/proc.h> 450Sstevel@tonic-gate #include <sys/kmem.h> 460Sstevel@tonic-gate #include <sys/vmem.h> 470Sstevel@tonic-gate #include <sys/buf.h> 480Sstevel@tonic-gate #include <sys/cpuvar.h> 490Sstevel@tonic-gate #include <sys/lgrp.h> 500Sstevel@tonic-gate #include <sys/disp.h> 510Sstevel@tonic-gate #include <sys/vm.h> 520Sstevel@tonic-gate #include <sys/mman.h> 530Sstevel@tonic-gate #include <sys/vnode.h> 540Sstevel@tonic-gate #include <sys/cred.h> 550Sstevel@tonic-gate #include <sys/exec.h> 560Sstevel@tonic-gate #include <sys/exechdr.h> 570Sstevel@tonic-gate #include <sys/debug.h> 582991Ssusans #include <sys/vmsystm.h> 590Sstevel@tonic-gate 600Sstevel@tonic-gate #include <vm/hat.h> 610Sstevel@tonic-gate #include <vm/as.h> 620Sstevel@tonic-gate #include <vm/seg.h> 630Sstevel@tonic-gate #include <vm/seg_kp.h> 640Sstevel@tonic-gate #include <vm/seg_vn.h> 650Sstevel@tonic-gate #include <vm/page.h> 660Sstevel@tonic-gate #include <vm/seg_kmem.h> 670Sstevel@tonic-gate #include <vm/seg_kpm.h> 680Sstevel@tonic-gate #include <vm/vm_dep.h> 690Sstevel@tonic-gate 700Sstevel@tonic-gate #include <sys/cpu.h> 710Sstevel@tonic-gate #include <sys/vm_machparam.h> 720Sstevel@tonic-gate #include <sys/memlist.h> 730Sstevel@tonic-gate #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */ 740Sstevel@tonic-gate #include <vm/hat_i86.h> 750Sstevel@tonic-gate #include <sys/x86_archext.h> 760Sstevel@tonic-gate #include <sys/elf_386.h> 770Sstevel@tonic-gate #include <sys/cmn_err.h> 780Sstevel@tonic-gate #include <sys/archsystm.h> 790Sstevel@tonic-gate #include <sys/machsystm.h> 800Sstevel@tonic-gate 810Sstevel@tonic-gate #include <sys/vtrace.h> 820Sstevel@tonic-gate #include <sys/ddidmareq.h> 830Sstevel@tonic-gate #include <sys/promif.h> 840Sstevel@tonic-gate #include <sys/memnode.h> 850Sstevel@tonic-gate #include <sys/stack.h> 860Sstevel@tonic-gate 872961Sdp78419 uint_t vac_colors = 1; 880Sstevel@tonic-gate 890Sstevel@tonic-gate int largepagesupport = 0; 900Sstevel@tonic-gate extern uint_t page_create_new; 910Sstevel@tonic-gate extern uint_t page_create_exists; 920Sstevel@tonic-gate extern uint_t page_create_putbacks; 930Sstevel@tonic-gate extern uint_t page_create_putbacks; 940Sstevel@tonic-gate extern uintptr_t eprom_kernelbase; 950Sstevel@tonic-gate extern int use_sse_pagecopy, use_sse_pagezero; /* in ml/float.s */ 960Sstevel@tonic-gate 970Sstevel@tonic-gate /* 4g memory management */ 980Sstevel@tonic-gate pgcnt_t maxmem4g; 990Sstevel@tonic-gate pgcnt_t freemem4g; 1000Sstevel@tonic-gate int physmax4g; 1010Sstevel@tonic-gate int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */ 1020Sstevel@tonic-gate int lotsfree4gshift = 3; 1030Sstevel@tonic-gate 1041385Skchow /* 16m memory management: desired number of free pages below 16m. */ 1051385Skchow pgcnt_t desfree16m = 0x380; 1061385Skchow 1070Sstevel@tonic-gate #ifdef VM_STATS 1080Sstevel@tonic-gate struct { 1090Sstevel@tonic-gate ulong_t pga_alloc; 1100Sstevel@tonic-gate ulong_t pga_notfullrange; 1110Sstevel@tonic-gate ulong_t pga_nulldmaattr; 1120Sstevel@tonic-gate ulong_t pga_allocok; 1130Sstevel@tonic-gate ulong_t pga_allocfailed; 1140Sstevel@tonic-gate ulong_t pgma_alloc; 1150Sstevel@tonic-gate ulong_t pgma_allocok; 1160Sstevel@tonic-gate ulong_t pgma_allocfailed; 1170Sstevel@tonic-gate ulong_t pgma_allocempty; 1180Sstevel@tonic-gate } pga_vmstats; 1190Sstevel@tonic-gate #endif 1200Sstevel@tonic-gate 1210Sstevel@tonic-gate uint_t mmu_page_sizes; 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate /* How many page sizes the users can see */ 1240Sstevel@tonic-gate uint_t mmu_exported_page_sizes; 1250Sstevel@tonic-gate 126423Sdavemq /* 127423Sdavemq * Number of pages in 1 GB. Don't enable automatic large pages if we have 128423Sdavemq * fewer than this many pages. 129423Sdavemq */ 1302991Ssusans pgcnt_t shm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 1312991Ssusans pgcnt_t privm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 1322991Ssusans 1332991Ssusans /* 1342991Ssusans * Maximum and default segment size tunables for user private 1352991Ssusans * and shared anon memory, and user text and initialized data. 1362991Ssusans * These can be patched via /etc/system to allow large pages 1372991Ssusans * to be used for mapping application private and shared anon memory. 1382991Ssusans */ 1392991Ssusans size_t mcntl0_lpsize = MMU_PAGESIZE; 1402991Ssusans size_t max_uheap_lpsize = MMU_PAGESIZE; 1412991Ssusans size_t default_uheap_lpsize = MMU_PAGESIZE; 1422991Ssusans size_t max_ustack_lpsize = MMU_PAGESIZE; 1432991Ssusans size_t default_ustack_lpsize = MMU_PAGESIZE; 1442991Ssusans size_t max_privmap_lpsize = MMU_PAGESIZE; 1452991Ssusans size_t max_uidata_lpsize = MMU_PAGESIZE; 1462991Ssusans size_t max_utext_lpsize = MMU_PAGESIZE; 1472991Ssusans size_t max_shm_lpsize = MMU_PAGESIZE; 1480Sstevel@tonic-gate 1490Sstevel@tonic-gate /* 1500Sstevel@tonic-gate * Return the optimum page size for a given mapping 1510Sstevel@tonic-gate */ 1520Sstevel@tonic-gate /*ARGSUSED*/ 1530Sstevel@tonic-gate size_t 1542991Ssusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) 1550Sstevel@tonic-gate { 1562991Ssusans level_t l = 0; 1572991Ssusans size_t pgsz = MMU_PAGESIZE; 1582991Ssusans size_t max_lpsize; 1592991Ssusans uint_t mszc; 1600Sstevel@tonic-gate 1612991Ssusans ASSERT(maptype != MAPPGSZ_VA); 1622991Ssusans 1632991Ssusans if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { 1642991Ssusans return (MMU_PAGESIZE); 1652991Ssusans } 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate switch (maptype) { 1682991Ssusans case MAPPGSZ_HEAP: 1690Sstevel@tonic-gate case MAPPGSZ_STK: 1702991Ssusans max_lpsize = memcntl ? mcntl0_lpsize : (maptype == 1712991Ssusans MAPPGSZ_HEAP ? max_uheap_lpsize : max_ustack_lpsize); 1722991Ssusans if (max_lpsize == MMU_PAGESIZE) { 1732991Ssusans return (MMU_PAGESIZE); 1742991Ssusans } 1752991Ssusans if (len == 0) { 1762991Ssusans len = (maptype == MAPPGSZ_HEAP) ? p->p_brkbase + 1772991Ssusans p->p_brksize - p->p_bssbase : p->p_stksize; 1782991Ssusans } 1792991Ssusans len = (maptype == MAPPGSZ_HEAP) ? MAX(len, 1802991Ssusans default_uheap_lpsize) : MAX(len, default_ustack_lpsize); 1812991Ssusans 1820Sstevel@tonic-gate /* 1830Sstevel@tonic-gate * use the pages size that best fits len 1840Sstevel@tonic-gate */ 1850Sstevel@tonic-gate for (l = mmu.max_page_level; l > 0; --l) { 1862991Ssusans if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) { 1870Sstevel@tonic-gate continue; 1882991Ssusans } else { 1892991Ssusans pgsz = LEVEL_SIZE(l); 1902991Ssusans } 1910Sstevel@tonic-gate break; 1920Sstevel@tonic-gate } 1932991Ssusans 1942991Ssusans mszc = (maptype == MAPPGSZ_HEAP ? p->p_brkpageszc : 1952991Ssusans p->p_stkpageszc); 1962991Ssusans if (addr == 0 && (pgsz < hw_page_array[mszc].hp_size)) { 1972991Ssusans pgsz = hw_page_array[mszc].hp_size; 1982991Ssusans } 1992991Ssusans return (pgsz); 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate /* 2020Sstevel@tonic-gate * for ISM use the 1st large page size. 2030Sstevel@tonic-gate */ 2040Sstevel@tonic-gate case MAPPGSZ_ISM: 2050Sstevel@tonic-gate if (mmu.max_page_level == 0) 2060Sstevel@tonic-gate return (MMU_PAGESIZE); 2070Sstevel@tonic-gate return (LEVEL_SIZE(1)); 2080Sstevel@tonic-gate } 2092991Ssusans return (pgsz); 2100Sstevel@tonic-gate } 2110Sstevel@tonic-gate 2122991Ssusans static uint_t 2132991Ssusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize, 2142991Ssusans size_t min_physmem) 2152991Ssusans { 2162991Ssusans caddr_t eaddr = addr + size; 2172991Ssusans uint_t szcvec = 0; 2182991Ssusans caddr_t raddr; 2192991Ssusans caddr_t readdr; 2202991Ssusans size_t pgsz; 2212991Ssusans int i; 2222991Ssusans 2232991Ssusans if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { 2242991Ssusans return (0); 2252991Ssusans } 2262991Ssusans 2272991Ssusans for (i = mmu_page_sizes - 1; i > 0; i--) { 2282991Ssusans pgsz = page_get_pagesize(i); 2292991Ssusans if (pgsz > max_lpsize) { 2302991Ssusans continue; 2312991Ssusans } 2322991Ssusans raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 2332991Ssusans readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 2342991Ssusans if (raddr < addr || raddr >= readdr) { 2352991Ssusans continue; 2362991Ssusans } 2372991Ssusans if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { 2382991Ssusans continue; 2392991Ssusans } 2402991Ssusans /* 2412991Ssusans * Set szcvec to the remaining page sizes. 2422991Ssusans */ 2432991Ssusans szcvec = ((1 << (i + 1)) - 1) & ~1; 2442991Ssusans break; 2452991Ssusans } 2462991Ssusans return (szcvec); 2472991Ssusans } 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate /* 2500Sstevel@tonic-gate * Return a bit vector of large page size codes that 2510Sstevel@tonic-gate * can be used to map [addr, addr + len) region. 2520Sstevel@tonic-gate */ 2530Sstevel@tonic-gate /*ARGSUSED*/ 2540Sstevel@tonic-gate uint_t 2552991Ssusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, 2562991Ssusans int memcntl) 2570Sstevel@tonic-gate { 2582991Ssusans size_t max_lpsize = mcntl0_lpsize; 2590Sstevel@tonic-gate 2602991Ssusans if (mmu.max_page_level == 0) 2610Sstevel@tonic-gate return (0); 2620Sstevel@tonic-gate 2632991Ssusans if (flags & MAP_TEXT) { 2642991Ssusans if (!memcntl) 2652991Ssusans max_lpsize = max_utext_lpsize; 2662991Ssusans return (map_szcvec(addr, size, off, max_lpsize, 2672991Ssusans shm_lpg_min_physmem)); 2682991Ssusans 2692991Ssusans } else if (flags & MAP_INITDATA) { 2702991Ssusans if (!memcntl) 2712991Ssusans max_lpsize = max_uidata_lpsize; 2722991Ssusans return (map_szcvec(addr, size, off, max_lpsize, 2732991Ssusans privm_lpg_min_physmem)); 2742991Ssusans 2752991Ssusans } else if (type == MAPPGSZC_SHM) { 2762991Ssusans if (!memcntl) 2772991Ssusans max_lpsize = max_shm_lpsize; 2782991Ssusans return (map_szcvec(addr, size, off, max_lpsize, 2792991Ssusans shm_lpg_min_physmem)); 2800Sstevel@tonic-gate 2812991Ssusans } else if (type == MAPPGSZC_HEAP) { 2822991Ssusans if (!memcntl) 2832991Ssusans max_lpsize = max_uheap_lpsize; 2842991Ssusans return (map_szcvec(addr, size, off, max_lpsize, 2852991Ssusans privm_lpg_min_physmem)); 2862414Saguzovsk 2872991Ssusans } else if (type == MAPPGSZC_STACK) { 2882991Ssusans if (!memcntl) 2892991Ssusans max_lpsize = max_ustack_lpsize; 2902991Ssusans return (map_szcvec(addr, size, off, max_lpsize, 2912991Ssusans privm_lpg_min_physmem)); 2922991Ssusans 2932991Ssusans } else { 2942991Ssusans if (!memcntl) 2952991Ssusans max_lpsize = max_privmap_lpsize; 2962991Ssusans return (map_szcvec(addr, size, off, max_lpsize, 2972991Ssusans privm_lpg_min_physmem)); 2982414Saguzovsk } 2992414Saguzovsk } 3002414Saguzovsk 3010Sstevel@tonic-gate /* 3020Sstevel@tonic-gate * Handle a pagefault. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate faultcode_t 3050Sstevel@tonic-gate pagefault( 3060Sstevel@tonic-gate caddr_t addr, 3070Sstevel@tonic-gate enum fault_type type, 3080Sstevel@tonic-gate enum seg_rw rw, 3090Sstevel@tonic-gate int iskernel) 3100Sstevel@tonic-gate { 3110Sstevel@tonic-gate struct as *as; 3120Sstevel@tonic-gate struct hat *hat; 3130Sstevel@tonic-gate struct proc *p; 3140Sstevel@tonic-gate kthread_t *t; 3150Sstevel@tonic-gate faultcode_t res; 3160Sstevel@tonic-gate caddr_t base; 3170Sstevel@tonic-gate size_t len; 3180Sstevel@tonic-gate int err; 3190Sstevel@tonic-gate int mapped_red; 3200Sstevel@tonic-gate uintptr_t ea; 3210Sstevel@tonic-gate 3220Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 3230Sstevel@tonic-gate 3240Sstevel@tonic-gate if (INVALID_VADDR(addr)) 3250Sstevel@tonic-gate return (FC_NOMAP); 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate mapped_red = segkp_map_red(); 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate if (iskernel) { 3300Sstevel@tonic-gate as = &kas; 3310Sstevel@tonic-gate hat = as->a_hat; 3320Sstevel@tonic-gate } else { 3330Sstevel@tonic-gate t = curthread; 3340Sstevel@tonic-gate p = ttoproc(t); 3350Sstevel@tonic-gate as = p->p_as; 3360Sstevel@tonic-gate hat = as->a_hat; 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate /* 3400Sstevel@tonic-gate * Dispatch pagefault. 3410Sstevel@tonic-gate */ 3420Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, type, rw); 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate /* 3450Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's 3460Sstevel@tonic-gate * UNIX data or stack segments, just return status info. 3470Sstevel@tonic-gate */ 3480Sstevel@tonic-gate if (res != FC_NOMAP || iskernel) 3490Sstevel@tonic-gate goto out; 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate /* 3520Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped 3530Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod 3540Sstevel@tonic-gate * mapping there and then try calling the fault routine again. 3550Sstevel@tonic-gate */ 3560Sstevel@tonic-gate base = p->p_brkbase; 3570Sstevel@tonic-gate len = p->p_brksize; 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */ 3600Sstevel@tonic-gate base = (caddr_t)p->p_usrstack - p->p_stksize; 3610Sstevel@tonic-gate len = p->p_stksize; 3620Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */ 3630Sstevel@tonic-gate /* not in either UNIX data or stack segments */ 3640Sstevel@tonic-gate res = FC_NOMAP; 3650Sstevel@tonic-gate goto out; 3660Sstevel@tonic-gate } 3670Sstevel@tonic-gate } 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate /* 3700Sstevel@tonic-gate * the rest of this function implements a 3.X 4.X 5.X compatibility 3710Sstevel@tonic-gate * This code is probably not needed anymore 3720Sstevel@tonic-gate */ 3730Sstevel@tonic-gate if (p->p_model == DATAMODEL_ILP32) { 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */ 3760Sstevel@tonic-gate ea = P2ROUNDUP((uintptr_t)base + len, MMU_PAGESIZE); 3770Sstevel@tonic-gate base = (caddr_t)P2ALIGN((uintptr_t)base, MMU_PAGESIZE); 3780Sstevel@tonic-gate len = ea - (uintptr_t)base; 3790Sstevel@tonic-gate 3800Sstevel@tonic-gate as_rangelock(as); 3810Sstevel@tonic-gate if (as_gap(as, MMU_PAGESIZE, &base, &len, AH_CONTAIN, addr) == 3820Sstevel@tonic-gate 0) { 3830Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp); 3840Sstevel@tonic-gate as_rangeunlock(as); 3850Sstevel@tonic-gate if (err) { 3860Sstevel@tonic-gate res = FC_MAKE_ERR(err); 3870Sstevel@tonic-gate goto out; 3880Sstevel@tonic-gate } 3890Sstevel@tonic-gate } else { 3900Sstevel@tonic-gate /* 3910Sstevel@tonic-gate * This page is already mapped by another thread after 3920Sstevel@tonic-gate * we returned from as_fault() above. We just fall 3930Sstevel@tonic-gate * through as_fault() below. 3940Sstevel@tonic-gate */ 3950Sstevel@tonic-gate as_rangeunlock(as); 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, F_INVAL, rw); 3990Sstevel@tonic-gate } 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate out: 4020Sstevel@tonic-gate if (mapped_red) 4030Sstevel@tonic-gate segkp_unmap_red(); 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate return (res); 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate void 4090Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags) 4100Sstevel@tonic-gate { 4110Sstevel@tonic-gate struct proc *p = curproc; 4120Sstevel@tonic-gate caddr_t userlimit = (flags & _MAP_LOW32) ? 4130Sstevel@tonic-gate (caddr_t)_userlimit32 : p->p_as->a_userlimit; 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags); 4160Sstevel@tonic-gate } 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate /*ARGSUSED*/ 4190Sstevel@tonic-gate int 4200Sstevel@tonic-gate map_addr_vacalign_check(caddr_t addr, u_offset_t off) 4210Sstevel@tonic-gate { 4220Sstevel@tonic-gate return (0); 4230Sstevel@tonic-gate } 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate /* 4260Sstevel@tonic-gate * map_addr_proc() is the routine called when the system is to 4270Sstevel@tonic-gate * choose an address for the user. We will pick an address 4280Sstevel@tonic-gate * range which is the highest available below kernelbase. 4290Sstevel@tonic-gate * 4300Sstevel@tonic-gate * addrp is a value/result parameter. 4310Sstevel@tonic-gate * On input it is a hint from the user to be used in a completely 4320Sstevel@tonic-gate * machine dependent fashion. We decide to completely ignore this hint. 4330Sstevel@tonic-gate * 4340Sstevel@tonic-gate * On output it is NULL if no address can be found in the current 4350Sstevel@tonic-gate * processes address space or else an address that is currently 4360Sstevel@tonic-gate * not mapped for len bytes with a page of red zone on either side. 4370Sstevel@tonic-gate * 4380Sstevel@tonic-gate * align is not needed on x86 (it's for viturally addressed caches) 4390Sstevel@tonic-gate */ 4400Sstevel@tonic-gate /*ARGSUSED*/ 4410Sstevel@tonic-gate void 4420Sstevel@tonic-gate map_addr_proc( 4430Sstevel@tonic-gate caddr_t *addrp, 4440Sstevel@tonic-gate size_t len, 4450Sstevel@tonic-gate offset_t off, 4460Sstevel@tonic-gate int vacalign, 4470Sstevel@tonic-gate caddr_t userlimit, 4480Sstevel@tonic-gate struct proc *p, 4490Sstevel@tonic-gate uint_t flags) 4500Sstevel@tonic-gate { 4510Sstevel@tonic-gate struct as *as = p->p_as; 4520Sstevel@tonic-gate caddr_t addr; 4530Sstevel@tonic-gate caddr_t base; 4540Sstevel@tonic-gate size_t slen; 4550Sstevel@tonic-gate size_t align_amount; 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate ASSERT32(userlimit == as->a_userlimit); 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate base = p->p_brkbase; 4600Sstevel@tonic-gate #if defined(__amd64) 4610Sstevel@tonic-gate /* 4620Sstevel@tonic-gate * XX64 Yes, this needs more work. 4630Sstevel@tonic-gate */ 4640Sstevel@tonic-gate if (p->p_model == DATAMODEL_NATIVE) { 4650Sstevel@tonic-gate if (userlimit < as->a_userlimit) { 4660Sstevel@tonic-gate /* 4670Sstevel@tonic-gate * This happens when a program wants to map 4680Sstevel@tonic-gate * something in a range that's accessible to a 4690Sstevel@tonic-gate * program in a smaller address space. For example, 4700Sstevel@tonic-gate * a 64-bit program calling mmap32(2) to guarantee 4710Sstevel@tonic-gate * that the returned address is below 4Gbytes. 4720Sstevel@tonic-gate */ 4730Sstevel@tonic-gate ASSERT((uintptr_t)userlimit < ADDRESS_C(0xffffffff)); 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate if (userlimit > base) 4760Sstevel@tonic-gate slen = userlimit - base; 4770Sstevel@tonic-gate else { 4780Sstevel@tonic-gate *addrp = NULL; 4790Sstevel@tonic-gate return; 4800Sstevel@tonic-gate } 4810Sstevel@tonic-gate } else { 4820Sstevel@tonic-gate /* 4830Sstevel@tonic-gate * XX64 This layout is probably wrong .. but in 4840Sstevel@tonic-gate * the event we make the amd64 address space look 4850Sstevel@tonic-gate * like sparcv9 i.e. with the stack -above- the 4860Sstevel@tonic-gate * heap, this bit of code might even be correct. 4870Sstevel@tonic-gate */ 4880Sstevel@tonic-gate slen = p->p_usrstack - base - 4890Sstevel@tonic-gate (((size_t)rctl_enforced_value( 4900Sstevel@tonic-gate rctlproc_legacy[RLIMIT_STACK], 4910Sstevel@tonic-gate p->p_rctls, p) + PAGEOFFSET) & PAGEMASK); 4920Sstevel@tonic-gate } 4930Sstevel@tonic-gate } else 4940Sstevel@tonic-gate #endif 4950Sstevel@tonic-gate slen = userlimit - base; 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate len = (len + PAGEOFFSET) & PAGEMASK; 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate /* 5000Sstevel@tonic-gate * Redzone for each side of the request. This is done to leave 5010Sstevel@tonic-gate * one page unmapped between segments. This is not required, but 5020Sstevel@tonic-gate * it's useful for the user because if their program strays across 5030Sstevel@tonic-gate * a segment boundary, it will catch a fault immediately making 5040Sstevel@tonic-gate * debugging a little easier. 5050Sstevel@tonic-gate */ 5060Sstevel@tonic-gate len += 2 * MMU_PAGESIZE; 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate /* 5090Sstevel@tonic-gate * figure out what the alignment should be 5100Sstevel@tonic-gate * 5110Sstevel@tonic-gate * XX64 -- is there an ELF_AMD64_MAXPGSZ or is it the same???? 5120Sstevel@tonic-gate */ 5130Sstevel@tonic-gate if (len <= ELF_386_MAXPGSZ) { 5140Sstevel@tonic-gate /* 5150Sstevel@tonic-gate * Align virtual addresses to ensure that ELF shared libraries 5160Sstevel@tonic-gate * are mapped with the appropriate alignment constraints by 5170Sstevel@tonic-gate * the run-time linker. 5180Sstevel@tonic-gate */ 5190Sstevel@tonic-gate align_amount = ELF_386_MAXPGSZ; 5200Sstevel@tonic-gate } else { 5210Sstevel@tonic-gate int l = mmu.max_page_level; 5220Sstevel@tonic-gate 5230Sstevel@tonic-gate while (l && len < LEVEL_SIZE(l)) 5240Sstevel@tonic-gate --l; 5250Sstevel@tonic-gate 5260Sstevel@tonic-gate align_amount = LEVEL_SIZE(l); 5270Sstevel@tonic-gate } 5280Sstevel@tonic-gate 5290Sstevel@tonic-gate if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) 5300Sstevel@tonic-gate align_amount = (uintptr_t)*addrp; 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate len += align_amount; 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate /* 5350Sstevel@tonic-gate * Look for a large enough hole starting below userlimit. 5360Sstevel@tonic-gate * After finding it, use the upper part. Addition of PAGESIZE 5370Sstevel@tonic-gate * is for the redzone as described above. 5380Sstevel@tonic-gate */ 5390Sstevel@tonic-gate if (as_gap(as, len, &base, &slen, AH_HI, NULL) == 0) { 5400Sstevel@tonic-gate caddr_t as_addr; 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate addr = base + slen - len + MMU_PAGESIZE; 5430Sstevel@tonic-gate as_addr = addr; 5440Sstevel@tonic-gate /* 5450Sstevel@tonic-gate * Round address DOWN to the alignment amount, 5460Sstevel@tonic-gate * add the offset, and if this address is less 5470Sstevel@tonic-gate * than the original address, add alignment amount. 5480Sstevel@tonic-gate */ 5490Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1))); 5500Sstevel@tonic-gate addr += (uintptr_t)(off & (align_amount - 1)); 5510Sstevel@tonic-gate if (addr < as_addr) 5520Sstevel@tonic-gate addr += align_amount; 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate ASSERT(addr <= (as_addr + align_amount)); 5550Sstevel@tonic-gate ASSERT(((uintptr_t)addr & (align_amount - 1)) == 5560Sstevel@tonic-gate ((uintptr_t)(off & (align_amount - 1)))); 5570Sstevel@tonic-gate *addrp = addr; 5580Sstevel@tonic-gate } else { 5590Sstevel@tonic-gate *addrp = NULL; /* no more virtual space */ 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate } 5620Sstevel@tonic-gate 5630Sstevel@tonic-gate /* 5640Sstevel@tonic-gate * Determine whether [base, base+len] contains a valid range of 5650Sstevel@tonic-gate * addresses at least minlen long. base and len are adjusted if 5660Sstevel@tonic-gate * required to provide a valid range. 5670Sstevel@tonic-gate */ 5680Sstevel@tonic-gate /*ARGSUSED3*/ 5690Sstevel@tonic-gate int 5700Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir) 5710Sstevel@tonic-gate { 5720Sstevel@tonic-gate uintptr_t hi, lo; 5730Sstevel@tonic-gate 5740Sstevel@tonic-gate lo = (uintptr_t)*basep; 5750Sstevel@tonic-gate hi = lo + *lenp; 5760Sstevel@tonic-gate 5770Sstevel@tonic-gate /* 5780Sstevel@tonic-gate * If hi rolled over the top, try cutting back. 5790Sstevel@tonic-gate */ 5800Sstevel@tonic-gate if (hi < lo) { 5810Sstevel@tonic-gate if (0 - lo + hi < minlen) 5820Sstevel@tonic-gate return (0); 5830Sstevel@tonic-gate if (0 - lo < minlen) 5840Sstevel@tonic-gate return (0); 5850Sstevel@tonic-gate *lenp = 0 - lo; 5860Sstevel@tonic-gate } else if (hi - lo < minlen) { 5870Sstevel@tonic-gate return (0); 5880Sstevel@tonic-gate } 5890Sstevel@tonic-gate #if defined(__amd64) 5900Sstevel@tonic-gate /* 5910Sstevel@tonic-gate * Deal with a possible hole in the address range between 5920Sstevel@tonic-gate * hole_start and hole_end that should never be mapped. 5930Sstevel@tonic-gate */ 5940Sstevel@tonic-gate if (lo < hole_start) { 5950Sstevel@tonic-gate if (hi > hole_start) { 5960Sstevel@tonic-gate if (hi < hole_end) { 5970Sstevel@tonic-gate hi = hole_start; 5980Sstevel@tonic-gate } else { 5990Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */ 6000Sstevel@tonic-gate if (dir == AH_LO) { 6010Sstevel@tonic-gate /* 6020Sstevel@tonic-gate * prefer lowest range 6030Sstevel@tonic-gate */ 6040Sstevel@tonic-gate if (hole_start - lo >= minlen) 6050Sstevel@tonic-gate hi = hole_start; 6060Sstevel@tonic-gate else if (hi - hole_end >= minlen) 6070Sstevel@tonic-gate lo = hole_end; 6080Sstevel@tonic-gate else 6090Sstevel@tonic-gate return (0); 6100Sstevel@tonic-gate } else { 6110Sstevel@tonic-gate /* 6120Sstevel@tonic-gate * prefer highest range 6130Sstevel@tonic-gate */ 6140Sstevel@tonic-gate if (hi - hole_end >= minlen) 6150Sstevel@tonic-gate lo = hole_end; 6160Sstevel@tonic-gate else if (hole_start - lo >= minlen) 6170Sstevel@tonic-gate hi = hole_start; 6180Sstevel@tonic-gate else 6190Sstevel@tonic-gate return (0); 6200Sstevel@tonic-gate } 6210Sstevel@tonic-gate } 6220Sstevel@tonic-gate } 6230Sstevel@tonic-gate } else { 6240Sstevel@tonic-gate /* lo >= hole_start */ 6250Sstevel@tonic-gate if (hi < hole_end) 6260Sstevel@tonic-gate return (0); 6270Sstevel@tonic-gate if (lo < hole_end) 6280Sstevel@tonic-gate lo = hole_end; 6290Sstevel@tonic-gate } 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate if (hi - lo < minlen) 6320Sstevel@tonic-gate return (0); 6330Sstevel@tonic-gate 6340Sstevel@tonic-gate *basep = (caddr_t)lo; 6350Sstevel@tonic-gate *lenp = hi - lo; 6360Sstevel@tonic-gate #endif 6370Sstevel@tonic-gate return (1); 6380Sstevel@tonic-gate } 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate /* 6410Sstevel@tonic-gate * Determine whether [addr, addr+len] are valid user addresses. 6420Sstevel@tonic-gate */ 6430Sstevel@tonic-gate /*ARGSUSED*/ 6440Sstevel@tonic-gate int 6450Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as, 6460Sstevel@tonic-gate caddr_t userlimit) 6470Sstevel@tonic-gate { 6480Sstevel@tonic-gate caddr_t eaddr = addr + len; 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit) 6510Sstevel@tonic-gate return (RANGE_BADADDR); 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate #if defined(__amd64) 6540Sstevel@tonic-gate /* 6550Sstevel@tonic-gate * Check for the VA hole 6560Sstevel@tonic-gate */ 6570Sstevel@tonic-gate if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end) 6580Sstevel@tonic-gate return (RANGE_BADADDR); 6590Sstevel@tonic-gate #endif 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate return (RANGE_OKAY); 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate /* 6650Sstevel@tonic-gate * Return 1 if the page frame is onboard memory, else 0. 6660Sstevel@tonic-gate */ 6670Sstevel@tonic-gate int 6680Sstevel@tonic-gate pf_is_memory(pfn_t pf) 6690Sstevel@tonic-gate { 6700Sstevel@tonic-gate return (address_in_memlist(phys_install, mmu_ptob((uint64_t)pf), 1)); 6710Sstevel@tonic-gate } 6720Sstevel@tonic-gate 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate /* 6750Sstevel@tonic-gate * initialized by page_coloring_init(). 6760Sstevel@tonic-gate */ 6770Sstevel@tonic-gate uint_t page_colors; 6780Sstevel@tonic-gate uint_t page_colors_mask; 6790Sstevel@tonic-gate uint_t page_coloring_shift; 6800Sstevel@tonic-gate int cpu_page_colors; 6810Sstevel@tonic-gate static uint_t l2_colors; 6820Sstevel@tonic-gate 6830Sstevel@tonic-gate /* 6840Sstevel@tonic-gate * Page freelists and cachelists are dynamically allocated once mnoderangecnt 6850Sstevel@tonic-gate * and page_colors are calculated from the l2 cache n-way set size. Within a 6860Sstevel@tonic-gate * mnode range, the page freelist and cachelist are hashed into bins based on 6870Sstevel@tonic-gate * color. This makes it easier to search for a page within a specific memory 6880Sstevel@tonic-gate * range. 6890Sstevel@tonic-gate */ 6900Sstevel@tonic-gate #define PAGE_COLORS_MIN 16 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate page_t ****page_freelists; 6930Sstevel@tonic-gate page_t ***page_cachelists; 6940Sstevel@tonic-gate 6950Sstevel@tonic-gate /* 6960Sstevel@tonic-gate * As the PC architecture evolved memory up was clumped into several 6970Sstevel@tonic-gate * ranges for various historical I/O devices to do DMA. 6980Sstevel@tonic-gate * < 16Meg - ISA bus 6990Sstevel@tonic-gate * < 2Gig - ??? 7000Sstevel@tonic-gate * < 4Gig - PCI bus or drivers that don't understand PAE mode 7010Sstevel@tonic-gate */ 7020Sstevel@tonic-gate static pfn_t arch_memranges[NUM_MEM_RANGES] = { 7030Sstevel@tonic-gate 0x100000, /* pfn range for 4G and above */ 7040Sstevel@tonic-gate 0x80000, /* pfn range for 2G-4G */ 7050Sstevel@tonic-gate 0x01000, /* pfn range for 16M-2G */ 7060Sstevel@tonic-gate 0x00000, /* pfn range for 0-16M */ 7070Sstevel@tonic-gate }; 7080Sstevel@tonic-gate 7090Sstevel@tonic-gate /* 7100Sstevel@tonic-gate * These are changed during startup if the machine has limited memory. 7110Sstevel@tonic-gate */ 7120Sstevel@tonic-gate pfn_t *memranges = &arch_memranges[0]; 7130Sstevel@tonic-gate int nranges = NUM_MEM_RANGES; 7140Sstevel@tonic-gate 7150Sstevel@tonic-gate /* 7160Sstevel@tonic-gate * Used by page layer to know about page sizes 7170Sstevel@tonic-gate */ 7180Sstevel@tonic-gate hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1]; 7190Sstevel@tonic-gate 7200Sstevel@tonic-gate /* 7210Sstevel@tonic-gate * This can be patched via /etc/system to allow old non-PAE aware device 7220Sstevel@tonic-gate * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM. 7230Sstevel@tonic-gate */ 7240Sstevel@tonic-gate #if defined(__i386) 7251443Skchow int restricted_kmemalloc = 0; 7260Sstevel@tonic-gate #elif defined(__amd64) 7270Sstevel@tonic-gate int restricted_kmemalloc = 0; 7280Sstevel@tonic-gate #endif 7290Sstevel@tonic-gate 7300Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX]; 7310Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX]; 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate /* 7350Sstevel@tonic-gate * return the memrange containing pfn 7360Sstevel@tonic-gate */ 7370Sstevel@tonic-gate int 7380Sstevel@tonic-gate memrange_num(pfn_t pfn) 7390Sstevel@tonic-gate { 7400Sstevel@tonic-gate int n; 7410Sstevel@tonic-gate 7420Sstevel@tonic-gate for (n = 0; n < nranges - 1; ++n) { 7430Sstevel@tonic-gate if (pfn >= memranges[n]) 7440Sstevel@tonic-gate break; 7450Sstevel@tonic-gate } 7460Sstevel@tonic-gate return (n); 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate 7490Sstevel@tonic-gate /* 7500Sstevel@tonic-gate * return the mnoderange containing pfn 7510Sstevel@tonic-gate */ 7520Sstevel@tonic-gate int 7530Sstevel@tonic-gate pfn_2_mtype(pfn_t pfn) 7540Sstevel@tonic-gate { 7550Sstevel@tonic-gate int n; 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate for (n = mnoderangecnt - 1; n >= 0; n--) { 7580Sstevel@tonic-gate if (pfn >= mnoderanges[n].mnr_pfnlo) { 7590Sstevel@tonic-gate break; 7600Sstevel@tonic-gate } 7610Sstevel@tonic-gate } 7620Sstevel@tonic-gate return (n); 7630Sstevel@tonic-gate } 7640Sstevel@tonic-gate 7650Sstevel@tonic-gate /* 7660Sstevel@tonic-gate * is_contigpage_free: 7670Sstevel@tonic-gate * returns a page list of contiguous pages. It minimally has to return 7680Sstevel@tonic-gate * minctg pages. Caller determines minctg based on the scatter-gather 7690Sstevel@tonic-gate * list length. 7700Sstevel@tonic-gate * 7710Sstevel@tonic-gate * pfnp is set to the next page frame to search on return. 7720Sstevel@tonic-gate */ 7730Sstevel@tonic-gate static page_t * 7740Sstevel@tonic-gate is_contigpage_free( 7750Sstevel@tonic-gate pfn_t *pfnp, 7760Sstevel@tonic-gate pgcnt_t *pgcnt, 7770Sstevel@tonic-gate pgcnt_t minctg, 7780Sstevel@tonic-gate uint64_t pfnseg, 7790Sstevel@tonic-gate int iolock) 7800Sstevel@tonic-gate { 7810Sstevel@tonic-gate int i = 0; 7820Sstevel@tonic-gate pfn_t pfn = *pfnp; 7830Sstevel@tonic-gate page_t *pp; 7840Sstevel@tonic-gate page_t *plist = NULL; 7850Sstevel@tonic-gate 7860Sstevel@tonic-gate /* 7870Sstevel@tonic-gate * fail if pfn + minctg crosses a segment boundary. 7880Sstevel@tonic-gate * Adjust for next starting pfn to begin at segment boundary. 7890Sstevel@tonic-gate */ 7900Sstevel@tonic-gate 7910Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) { 7920Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 7930Sstevel@tonic-gate return (NULL); 7940Sstevel@tonic-gate } 7950Sstevel@tonic-gate 7960Sstevel@tonic-gate do { 7970Sstevel@tonic-gate retry: 7980Sstevel@tonic-gate pp = page_numtopp_nolock(pfn + i); 7990Sstevel@tonic-gate if ((pp == NULL) || 8000Sstevel@tonic-gate (page_trylock(pp, SE_EXCL) == 0)) { 8010Sstevel@tonic-gate (*pfnp)++; 8020Sstevel@tonic-gate break; 8030Sstevel@tonic-gate } 8040Sstevel@tonic-gate if (page_pptonum(pp) != pfn + i) { 8050Sstevel@tonic-gate page_unlock(pp); 8060Sstevel@tonic-gate goto retry; 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate if (!(PP_ISFREE(pp))) { 8100Sstevel@tonic-gate page_unlock(pp); 8110Sstevel@tonic-gate (*pfnp)++; 8120Sstevel@tonic-gate break; 8130Sstevel@tonic-gate } 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate if (!PP_ISAGED(pp)) { 8160Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 8170Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 8180Sstevel@tonic-gate } else { 8190Sstevel@tonic-gate page_list_sub(pp, PG_FREE_LIST); 8200Sstevel@tonic-gate } 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate if (iolock) 8230Sstevel@tonic-gate page_io_lock(pp); 8240Sstevel@tonic-gate page_list_concat(&plist, &pp); 8250Sstevel@tonic-gate 8260Sstevel@tonic-gate /* 8270Sstevel@tonic-gate * exit loop when pgcnt satisfied or segment boundary reached. 8280Sstevel@tonic-gate */ 8290Sstevel@tonic-gate 8300Sstevel@tonic-gate } while ((++i < *pgcnt) && ((pfn + i) & pfnseg)); 8310Sstevel@tonic-gate 8320Sstevel@tonic-gate *pfnp += i; /* set to next pfn to search */ 8330Sstevel@tonic-gate 8340Sstevel@tonic-gate if (i >= minctg) { 8350Sstevel@tonic-gate *pgcnt -= i; 8360Sstevel@tonic-gate return (plist); 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate /* 8400Sstevel@tonic-gate * failure: minctg not satisfied. 8410Sstevel@tonic-gate * 8420Sstevel@tonic-gate * if next request crosses segment boundary, set next pfn 8430Sstevel@tonic-gate * to search from the segment boundary. 8440Sstevel@tonic-gate */ 8450Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) 8460Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 8470Sstevel@tonic-gate 8480Sstevel@tonic-gate /* clean up any pages already allocated */ 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate while (plist) { 8510Sstevel@tonic-gate pp = plist; 8520Sstevel@tonic-gate page_sub(&plist, pp); 8530Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 8540Sstevel@tonic-gate if (iolock) 8550Sstevel@tonic-gate page_io_unlock(pp); 8560Sstevel@tonic-gate page_unlock(pp); 8570Sstevel@tonic-gate } 8580Sstevel@tonic-gate 8590Sstevel@tonic-gate return (NULL); 8600Sstevel@tonic-gate } 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate /* 8630Sstevel@tonic-gate * verify that pages being returned from allocator have correct DMA attribute 8640Sstevel@tonic-gate */ 8650Sstevel@tonic-gate #ifndef DEBUG 8660Sstevel@tonic-gate #define check_dma(a, b, c) (0) 8670Sstevel@tonic-gate #else 8680Sstevel@tonic-gate static void 8690Sstevel@tonic-gate check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt) 8700Sstevel@tonic-gate { 8710Sstevel@tonic-gate if (dma_attr == NULL) 8720Sstevel@tonic-gate return; 8730Sstevel@tonic-gate 8740Sstevel@tonic-gate while (cnt-- > 0) { 8750Sstevel@tonic-gate if (mmu_ptob((uint64_t)pp->p_pagenum) < 8760Sstevel@tonic-gate dma_attr->dma_attr_addr_lo) 8770Sstevel@tonic-gate panic("PFN (pp=%p) below dma_attr_addr_lo", pp); 8780Sstevel@tonic-gate if (mmu_ptob((uint64_t)pp->p_pagenum) >= 8790Sstevel@tonic-gate dma_attr->dma_attr_addr_hi) 8800Sstevel@tonic-gate panic("PFN (pp=%p) above dma_attr_addr_hi", pp); 8810Sstevel@tonic-gate pp = pp->p_next; 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate } 8840Sstevel@tonic-gate #endif 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate static kmutex_t contig_lock; 8870Sstevel@tonic-gate 8880Sstevel@tonic-gate #define CONTIG_LOCK() mutex_enter(&contig_lock); 8890Sstevel@tonic-gate #define CONTIG_UNLOCK() mutex_exit(&contig_lock); 8900Sstevel@tonic-gate 8910Sstevel@tonic-gate #define PFN_16M (mmu_btop((uint64_t)0x1000000)) 8920Sstevel@tonic-gate 8930Sstevel@tonic-gate static page_t * 8940Sstevel@tonic-gate page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock) 8950Sstevel@tonic-gate { 8960Sstevel@tonic-gate pfn_t pfn; 8970Sstevel@tonic-gate int sgllen; 8980Sstevel@tonic-gate uint64_t pfnseg; 8990Sstevel@tonic-gate pgcnt_t minctg; 9000Sstevel@tonic-gate page_t *pplist = NULL, *plist; 9010Sstevel@tonic-gate uint64_t lo, hi; 9020Sstevel@tonic-gate pgcnt_t pfnalign = 0; 9030Sstevel@tonic-gate static pfn_t startpfn; 9040Sstevel@tonic-gate static pgcnt_t lastctgcnt; 9050Sstevel@tonic-gate uintptr_t align; 9060Sstevel@tonic-gate 9070Sstevel@tonic-gate CONTIG_LOCK(); 9080Sstevel@tonic-gate 9090Sstevel@tonic-gate if (mattr) { 9100Sstevel@tonic-gate lo = mmu_btop((mattr->dma_attr_addr_lo + MMU_PAGEOFFSET)); 9110Sstevel@tonic-gate hi = mmu_btop(mattr->dma_attr_addr_hi); 9120Sstevel@tonic-gate if (hi >= physmax) 9130Sstevel@tonic-gate hi = physmax - 1; 9140Sstevel@tonic-gate sgllen = mattr->dma_attr_sgllen; 9150Sstevel@tonic-gate pfnseg = mmu_btop(mattr->dma_attr_seg); 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate align = maxbit(mattr->dma_attr_align, mattr->dma_attr_minxfer); 9180Sstevel@tonic-gate if (align > MMU_PAGESIZE) 9190Sstevel@tonic-gate pfnalign = mmu_btop(align); 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate /* 9220Sstevel@tonic-gate * in order to satisfy the request, must minimally 9230Sstevel@tonic-gate * acquire minctg contiguous pages 9240Sstevel@tonic-gate */ 9250Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9260Sstevel@tonic-gate 9270Sstevel@tonic-gate ASSERT(hi >= lo); 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate /* 9300Sstevel@tonic-gate * start from where last searched if the minctg >= lastctgcnt 9310Sstevel@tonic-gate */ 9320Sstevel@tonic-gate if (minctg < lastctgcnt || startpfn < lo || startpfn > hi) 9330Sstevel@tonic-gate startpfn = lo; 9340Sstevel@tonic-gate } else { 9350Sstevel@tonic-gate hi = physmax - 1; 9360Sstevel@tonic-gate lo = 0; 9370Sstevel@tonic-gate sgllen = 1; 9380Sstevel@tonic-gate pfnseg = mmu.highest_pfn; 9390Sstevel@tonic-gate minctg = *pgcnt; 9400Sstevel@tonic-gate 9410Sstevel@tonic-gate if (minctg < lastctgcnt) 9420Sstevel@tonic-gate startpfn = lo; 9430Sstevel@tonic-gate } 9440Sstevel@tonic-gate lastctgcnt = minctg; 9450Sstevel@tonic-gate 9460Sstevel@tonic-gate ASSERT(pfnseg + 1 >= (uint64_t)minctg); 9470Sstevel@tonic-gate 9480Sstevel@tonic-gate /* conserve 16m memory - start search above 16m when possible */ 9490Sstevel@tonic-gate if (hi > PFN_16M && startpfn < PFN_16M) 9500Sstevel@tonic-gate startpfn = PFN_16M; 9510Sstevel@tonic-gate 9520Sstevel@tonic-gate pfn = startpfn; 9530Sstevel@tonic-gate if (pfnalign) 9540Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9550Sstevel@tonic-gate 9560Sstevel@tonic-gate while (pfn + minctg - 1 <= hi) { 9570Sstevel@tonic-gate 9580Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 9590Sstevel@tonic-gate if (plist) { 9600Sstevel@tonic-gate page_list_concat(&pplist, &plist); 9610Sstevel@tonic-gate sgllen--; 9620Sstevel@tonic-gate /* 9630Sstevel@tonic-gate * return when contig pages no longer needed 9640Sstevel@tonic-gate */ 9650Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 9660Sstevel@tonic-gate startpfn = pfn; 9670Sstevel@tonic-gate CONTIG_UNLOCK(); 9680Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 9690Sstevel@tonic-gate return (pplist); 9700Sstevel@tonic-gate } 9710Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9720Sstevel@tonic-gate } 9730Sstevel@tonic-gate if (pfnalign) 9740Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9750Sstevel@tonic-gate } 9760Sstevel@tonic-gate 9770Sstevel@tonic-gate /* cannot find contig pages in specified range */ 9780Sstevel@tonic-gate if (startpfn == lo) { 9790Sstevel@tonic-gate CONTIG_UNLOCK(); 9800Sstevel@tonic-gate return (NULL); 9810Sstevel@tonic-gate } 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate /* did not start with lo previously */ 9840Sstevel@tonic-gate pfn = lo; 9850Sstevel@tonic-gate if (pfnalign) 9860Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9870Sstevel@tonic-gate 9880Sstevel@tonic-gate /* allow search to go above startpfn */ 9890Sstevel@tonic-gate while (pfn < startpfn) { 9900Sstevel@tonic-gate 9910Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 9920Sstevel@tonic-gate if (plist != NULL) { 9930Sstevel@tonic-gate 9940Sstevel@tonic-gate page_list_concat(&pplist, &plist); 9950Sstevel@tonic-gate sgllen--; 9960Sstevel@tonic-gate 9970Sstevel@tonic-gate /* 9980Sstevel@tonic-gate * return when contig pages no longer needed 9990Sstevel@tonic-gate */ 10000Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 10010Sstevel@tonic-gate startpfn = pfn; 10020Sstevel@tonic-gate CONTIG_UNLOCK(); 10030Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 10040Sstevel@tonic-gate return (pplist); 10050Sstevel@tonic-gate } 10060Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 10070Sstevel@tonic-gate } 10080Sstevel@tonic-gate if (pfnalign) 10090Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate CONTIG_UNLOCK(); 10120Sstevel@tonic-gate return (NULL); 10130Sstevel@tonic-gate } 10140Sstevel@tonic-gate 10150Sstevel@tonic-gate /* 10160Sstevel@tonic-gate * combine mem_node_config and memrange memory ranges into one data 10170Sstevel@tonic-gate * structure to be used for page list management. 10180Sstevel@tonic-gate * 10190Sstevel@tonic-gate * mnode_range_cnt() calculates the number of memory ranges for mnode and 10200Sstevel@tonic-gate * memranges[]. Used to determine the size of page lists and mnoderanges. 10210Sstevel@tonic-gate * 10220Sstevel@tonic-gate * mnode_range_setup() initializes mnoderanges. 10230Sstevel@tonic-gate */ 10240Sstevel@tonic-gate mnoderange_t *mnoderanges; 10250Sstevel@tonic-gate int mnoderangecnt; 10260Sstevel@tonic-gate int mtype4g; 10270Sstevel@tonic-gate 10280Sstevel@tonic-gate int 10292961Sdp78419 mnode_range_cnt(int mnode) 10300Sstevel@tonic-gate { 10310Sstevel@tonic-gate int mri; 10320Sstevel@tonic-gate int mnrcnt = 0; 10330Sstevel@tonic-gate 10342961Sdp78419 if (mem_node_config[mnode].exists != 0) { 10350Sstevel@tonic-gate mri = nranges - 1; 10360Sstevel@tonic-gate 10370Sstevel@tonic-gate /* find the memranges index below contained in mnode range */ 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 10400Sstevel@tonic-gate mri--; 10410Sstevel@tonic-gate 10420Sstevel@tonic-gate /* 10430Sstevel@tonic-gate * increment mnode range counter when memranges or mnode 10440Sstevel@tonic-gate * boundary is reached. 10450Sstevel@tonic-gate */ 10460Sstevel@tonic-gate while (mri >= 0 && 10470Sstevel@tonic-gate mem_node_config[mnode].physmax >= MEMRANGELO(mri)) { 10480Sstevel@tonic-gate mnrcnt++; 10490Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 10500Sstevel@tonic-gate mri--; 10510Sstevel@tonic-gate else 10520Sstevel@tonic-gate break; 10530Sstevel@tonic-gate } 10540Sstevel@tonic-gate } 10552961Sdp78419 ASSERT(mnrcnt <= MAX_MNODE_MRANGES); 10560Sstevel@tonic-gate return (mnrcnt); 10570Sstevel@tonic-gate } 10580Sstevel@tonic-gate 10590Sstevel@tonic-gate void 10600Sstevel@tonic-gate mnode_range_setup(mnoderange_t *mnoderanges) 10610Sstevel@tonic-gate { 10620Sstevel@tonic-gate int mnode, mri; 10630Sstevel@tonic-gate 10640Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 10650Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 10660Sstevel@tonic-gate continue; 10670Sstevel@tonic-gate 10680Sstevel@tonic-gate mri = nranges - 1; 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 10710Sstevel@tonic-gate mri--; 10720Sstevel@tonic-gate 10730Sstevel@tonic-gate while (mri >= 0 && mem_node_config[mnode].physmax >= 10740Sstevel@tonic-gate MEMRANGELO(mri)) { 10750Sstevel@tonic-gate mnoderanges->mnr_pfnlo = 10760Sstevel@tonic-gate MAX(MEMRANGELO(mri), 10770Sstevel@tonic-gate mem_node_config[mnode].physbase); 10780Sstevel@tonic-gate mnoderanges->mnr_pfnhi = 10790Sstevel@tonic-gate MIN(MEMRANGEHI(mri), 10800Sstevel@tonic-gate mem_node_config[mnode].physmax); 10810Sstevel@tonic-gate mnoderanges->mnr_mnode = mnode; 10820Sstevel@tonic-gate mnoderanges->mnr_memrange = mri; 10830Sstevel@tonic-gate mnoderanges++; 10840Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 10850Sstevel@tonic-gate mri--; 10860Sstevel@tonic-gate else 10870Sstevel@tonic-gate break; 10880Sstevel@tonic-gate } 10890Sstevel@tonic-gate } 10900Sstevel@tonic-gate } 10910Sstevel@tonic-gate 10920Sstevel@tonic-gate /* 10930Sstevel@tonic-gate * Determine if the mnode range specified in mtype contains memory belonging 10940Sstevel@tonic-gate * to memory node mnode. If flags & PGI_MT_RANGE is set then mtype contains 10951385Skchow * the range of indices from high pfn to 0, 16m or 4g. 10960Sstevel@tonic-gate * 10970Sstevel@tonic-gate * Return first mnode range type index found otherwise return -1 if none found. 10980Sstevel@tonic-gate */ 10990Sstevel@tonic-gate int 11000Sstevel@tonic-gate mtype_func(int mnode, int mtype, uint_t flags) 11010Sstevel@tonic-gate { 11020Sstevel@tonic-gate if (flags & PGI_MT_RANGE) { 11031385Skchow int mtlim; 11040Sstevel@tonic-gate 11050Sstevel@tonic-gate if (flags & PGI_MT_NEXT) 11060Sstevel@tonic-gate mtype--; 11071385Skchow if (flags & PGI_MT_RANGE0) 11081385Skchow mtlim = 0; 11091385Skchow else if (flags & PGI_MT_RANGE4G) 11101385Skchow mtlim = mtype4g + 1; /* exclude 0-4g range */ 11111385Skchow else if (flags & PGI_MT_RANGE16M) 11121385Skchow mtlim = 1; /* exclude 0-16m range */ 11130Sstevel@tonic-gate while (mtype >= mtlim) { 11140Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 11150Sstevel@tonic-gate return (mtype); 11160Sstevel@tonic-gate mtype--; 11170Sstevel@tonic-gate } 11180Sstevel@tonic-gate } else { 11190Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 11200Sstevel@tonic-gate return (mtype); 11210Sstevel@tonic-gate } 11220Sstevel@tonic-gate return (-1); 11230Sstevel@tonic-gate } 11240Sstevel@tonic-gate 11250Sstevel@tonic-gate /* 11261373Skchow * Update the page list max counts with the pfn range specified by the 11271373Skchow * input parameters. Called from add_physmem() when physical memory with 11281373Skchow * page_t's are initially added to the page lists. 11291373Skchow */ 11301373Skchow void 11311373Skchow mtype_modify_max(pfn_t startpfn, long cnt) 11321373Skchow { 11331373Skchow int mtype = 0; 11341373Skchow pfn_t endpfn = startpfn + cnt, pfn; 11351373Skchow pgcnt_t inc; 11361373Skchow 11371373Skchow ASSERT(cnt > 0); 11381373Skchow 11391373Skchow for (pfn = startpfn; pfn < endpfn; ) { 11401373Skchow if (pfn <= mnoderanges[mtype].mnr_pfnhi) { 11411373Skchow if (endpfn < mnoderanges[mtype].mnr_pfnhi) { 11421373Skchow inc = endpfn - pfn; 11431373Skchow } else { 11441373Skchow inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1; 11451373Skchow } 11461373Skchow mnoderanges[mtype].mnr_mt_pgmax += inc; 11471373Skchow if (physmax4g && mtype <= mtype4g) 11481373Skchow maxmem4g += inc; 11491373Skchow pfn += inc; 11501373Skchow } 11511373Skchow mtype++; 11521373Skchow ASSERT(mtype < mnoderangecnt || pfn >= endpfn); 11531373Skchow } 11541373Skchow } 11551373Skchow 11561373Skchow /* 1157414Skchow * Returns the free page count for mnode 1158414Skchow */ 1159414Skchow int 1160414Skchow mnode_pgcnt(int mnode) 1161414Skchow { 1162414Skchow int mtype = mnoderangecnt - 1; 1163414Skchow int flags = PGI_MT_RANGE0; 1164414Skchow pgcnt_t pgcnt = 0; 1165414Skchow 1166414Skchow mtype = mtype_func(mnode, mtype, flags); 1167414Skchow 1168414Skchow while (mtype != -1) { 11691385Skchow pgcnt += MTYPE_FREEMEM(mtype); 1170414Skchow mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); 1171414Skchow } 1172414Skchow return (pgcnt); 1173414Skchow } 1174414Skchow 1175414Skchow /* 11760Sstevel@tonic-gate * Initialize page coloring variables based on the l2 cache parameters. 11770Sstevel@tonic-gate * Calculate and return memory needed for page coloring data structures. 11780Sstevel@tonic-gate */ 11790Sstevel@tonic-gate size_t 11800Sstevel@tonic-gate page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc) 11810Sstevel@tonic-gate { 11820Sstevel@tonic-gate size_t colorsz = 0; 11830Sstevel@tonic-gate int i; 11840Sstevel@tonic-gate int colors; 11850Sstevel@tonic-gate 11860Sstevel@tonic-gate /* 11870Sstevel@tonic-gate * Reduce the memory ranges lists if we don't have large amounts 11880Sstevel@tonic-gate * of memory. This avoids searching known empty free lists. 11890Sstevel@tonic-gate */ 11900Sstevel@tonic-gate i = memrange_num(physmax); 11910Sstevel@tonic-gate memranges += i; 11920Sstevel@tonic-gate nranges -= i; 11930Sstevel@tonic-gate #if defined(__i386) 11940Sstevel@tonic-gate if (i > 0) 11950Sstevel@tonic-gate restricted_kmemalloc = 0; 11960Sstevel@tonic-gate #endif 11970Sstevel@tonic-gate /* physmax greater than 4g */ 11980Sstevel@tonic-gate if (i == 0) 11990Sstevel@tonic-gate physmax4g = 1; 12000Sstevel@tonic-gate 12010Sstevel@tonic-gate ASSERT(ISP2(l2_sz)); 12020Sstevel@tonic-gate ASSERT(ISP2(l2_linesz)); 12030Sstevel@tonic-gate ASSERT(l2_sz > MMU_PAGESIZE); 12040Sstevel@tonic-gate 12050Sstevel@tonic-gate /* l2_assoc is 0 for fully associative l2 cache */ 12060Sstevel@tonic-gate if (l2_assoc) 12070Sstevel@tonic-gate l2_colors = MAX(1, l2_sz / (l2_assoc * MMU_PAGESIZE)); 12080Sstevel@tonic-gate else 12090Sstevel@tonic-gate l2_colors = 1; 12100Sstevel@tonic-gate 12110Sstevel@tonic-gate /* for scalability, configure at least PAGE_COLORS_MIN color bins */ 12120Sstevel@tonic-gate page_colors = MAX(l2_colors, PAGE_COLORS_MIN); 12130Sstevel@tonic-gate 12140Sstevel@tonic-gate /* 12150Sstevel@tonic-gate * cpu_page_colors is non-zero when a page color may be spread across 12160Sstevel@tonic-gate * multiple bins. 12170Sstevel@tonic-gate */ 12180Sstevel@tonic-gate if (l2_colors < page_colors) 12190Sstevel@tonic-gate cpu_page_colors = l2_colors; 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate ASSERT(ISP2(page_colors)); 12220Sstevel@tonic-gate 12230Sstevel@tonic-gate page_colors_mask = page_colors - 1; 12240Sstevel@tonic-gate 12250Sstevel@tonic-gate ASSERT(ISP2(CPUSETSIZE())); 12260Sstevel@tonic-gate page_coloring_shift = lowbit(CPUSETSIZE()); 12270Sstevel@tonic-gate 12282961Sdp78419 /* initialize number of colors per page size */ 12292961Sdp78419 for (i = 0; i <= mmu.max_page_level; i++) { 12302961Sdp78419 hw_page_array[i].hp_size = LEVEL_SIZE(i); 12312961Sdp78419 hw_page_array[i].hp_shift = LEVEL_SHIFT(i); 12322961Sdp78419 hw_page_array[i].hp_pgcnt = LEVEL_SIZE(i) >> LEVEL_SHIFT(0); 12332961Sdp78419 hw_page_array[i].hp_colors = (page_colors_mask >> 12342961Sdp78419 (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift)) 12352961Sdp78419 + 1; 12362961Sdp78419 } 12372961Sdp78419 12382961Sdp78419 /* 12392961Sdp78419 * The value of cpu_page_colors determines if additional color bins 12402961Sdp78419 * need to be checked for a particular color in the page_get routines. 12412961Sdp78419 */ 12422961Sdp78419 if (cpu_page_colors != 0) { 12432961Sdp78419 12442961Sdp78419 int a = lowbit(page_colors) - lowbit(cpu_page_colors); 12452961Sdp78419 ASSERT(a > 0); 12462961Sdp78419 ASSERT(a < 16); 12472961Sdp78419 12482961Sdp78419 for (i = 0; i <= mmu.max_page_level; i++) { 12492961Sdp78419 if ((colors = hw_page_array[i].hp_colors) <= 1) { 12502961Sdp78419 colorequivszc[i] = 0; 12512961Sdp78419 continue; 12522961Sdp78419 } 12532961Sdp78419 while ((colors >> a) == 0) 12542961Sdp78419 a--; 12552961Sdp78419 ASSERT(a >= 0); 12562961Sdp78419 12572961Sdp78419 /* higher 4 bits encodes color equiv mask */ 12582961Sdp78419 colorequivszc[i] = (a << 4); 12592961Sdp78419 } 12602961Sdp78419 } 12612961Sdp78419 12622961Sdp78419 /* factor in colorequiv to check additional 'equivalent' bins. */ 12632961Sdp78419 if (colorequiv > 1) { 12642961Sdp78419 12652961Sdp78419 int a = lowbit(colorequiv) - 1; 12662961Sdp78419 if (a > 15) 12672961Sdp78419 a = 15; 12682961Sdp78419 12692961Sdp78419 for (i = 0; i <= mmu.max_page_level; i++) { 12702961Sdp78419 if ((colors = hw_page_array[i].hp_colors) <= 1) { 12712961Sdp78419 continue; 12722961Sdp78419 } 12732961Sdp78419 while ((colors >> a) == 0) 12742961Sdp78419 a--; 12752961Sdp78419 if ((a << 4) > colorequivszc[i]) { 12762961Sdp78419 colorequivszc[i] = (a << 4); 12772961Sdp78419 } 12782961Sdp78419 } 12792961Sdp78419 } 12802961Sdp78419 12810Sstevel@tonic-gate /* size for mnoderanges */ 12822961Sdp78419 for (mnoderangecnt = 0, i = 0; i < max_mem_nodes; i++) 12832961Sdp78419 mnoderangecnt += mnode_range_cnt(i); 12840Sstevel@tonic-gate colorsz = mnoderangecnt * sizeof (mnoderange_t); 12850Sstevel@tonic-gate 12860Sstevel@tonic-gate /* size for fpc_mutex and cpc_mutex */ 12870Sstevel@tonic-gate colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX); 12880Sstevel@tonic-gate 12890Sstevel@tonic-gate /* size of page_freelists */ 12900Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t ***); 12910Sstevel@tonic-gate colorsz += mnoderangecnt * mmu_page_sizes * sizeof (page_t **); 12920Sstevel@tonic-gate 12930Sstevel@tonic-gate for (i = 0; i < mmu_page_sizes; i++) { 12940Sstevel@tonic-gate colors = page_get_pagecolors(i); 12950Sstevel@tonic-gate colorsz += mnoderangecnt * colors * sizeof (page_t *); 12960Sstevel@tonic-gate } 12970Sstevel@tonic-gate 12980Sstevel@tonic-gate /* size of page_cachelists */ 12990Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t **); 13000Sstevel@tonic-gate colorsz += mnoderangecnt * page_colors * sizeof (page_t *); 13010Sstevel@tonic-gate 13020Sstevel@tonic-gate return (colorsz); 13030Sstevel@tonic-gate } 13040Sstevel@tonic-gate 13050Sstevel@tonic-gate /* 13060Sstevel@tonic-gate * Called once at startup to configure page_coloring data structures and 13070Sstevel@tonic-gate * does the 1st page_free()/page_freelist_add(). 13080Sstevel@tonic-gate */ 13090Sstevel@tonic-gate void 13100Sstevel@tonic-gate page_coloring_setup(caddr_t pcmemaddr) 13110Sstevel@tonic-gate { 13120Sstevel@tonic-gate int i; 13130Sstevel@tonic-gate int j; 13140Sstevel@tonic-gate int k; 13150Sstevel@tonic-gate caddr_t addr; 13160Sstevel@tonic-gate int colors; 13170Sstevel@tonic-gate 13180Sstevel@tonic-gate /* 13190Sstevel@tonic-gate * do page coloring setup 13200Sstevel@tonic-gate */ 13210Sstevel@tonic-gate addr = pcmemaddr; 13220Sstevel@tonic-gate 13230Sstevel@tonic-gate mnoderanges = (mnoderange_t *)addr; 13240Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (mnoderange_t)); 13250Sstevel@tonic-gate 13260Sstevel@tonic-gate mnode_range_setup(mnoderanges); 13270Sstevel@tonic-gate 13280Sstevel@tonic-gate if (physmax4g) 13290Sstevel@tonic-gate mtype4g = pfn_2_mtype(0xfffff); 13300Sstevel@tonic-gate 13310Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 13320Sstevel@tonic-gate fpc_mutex[k] = (kmutex_t *)addr; 13330Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 13340Sstevel@tonic-gate } 13350Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 13360Sstevel@tonic-gate cpc_mutex[k] = (kmutex_t *)addr; 13370Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 13380Sstevel@tonic-gate } 13390Sstevel@tonic-gate page_freelists = (page_t ****)addr; 13400Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t ***)); 13410Sstevel@tonic-gate 13420Sstevel@tonic-gate page_cachelists = (page_t ***)addr; 13430Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t **)); 13440Sstevel@tonic-gate 13450Sstevel@tonic-gate for (i = 0; i < mnoderangecnt; i++) { 13460Sstevel@tonic-gate page_freelists[i] = (page_t ***)addr; 13470Sstevel@tonic-gate addr += (mmu_page_sizes * sizeof (page_t **)); 13480Sstevel@tonic-gate 13490Sstevel@tonic-gate for (j = 0; j < mmu_page_sizes; j++) { 13500Sstevel@tonic-gate colors = page_get_pagecolors(j); 13510Sstevel@tonic-gate page_freelists[i][j] = (page_t **)addr; 13520Sstevel@tonic-gate addr += (colors * sizeof (page_t *)); 13530Sstevel@tonic-gate } 13540Sstevel@tonic-gate page_cachelists[i] = (page_t **)addr; 13550Sstevel@tonic-gate addr += (page_colors * sizeof (page_t *)); 13560Sstevel@tonic-gate } 13570Sstevel@tonic-gate } 13580Sstevel@tonic-gate 13590Sstevel@tonic-gate /*ARGSUSED*/ 13600Sstevel@tonic-gate int 13610Sstevel@tonic-gate bp_color(struct buf *bp) 13620Sstevel@tonic-gate { 13630Sstevel@tonic-gate return (0); 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate /* 13670Sstevel@tonic-gate * get a page from any list with the given mnode 13680Sstevel@tonic-gate */ 13690Sstevel@tonic-gate page_t * 13700Sstevel@tonic-gate page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags, 13710Sstevel@tonic-gate int mnode, int mtype, ddi_dma_attr_t *dma_attr) 13720Sstevel@tonic-gate { 13732961Sdp78419 kmutex_t *pcm; 13742961Sdp78419 int i; 13752961Sdp78419 page_t *pp; 13762961Sdp78419 page_t *first_pp; 13772961Sdp78419 uint64_t pgaddr; 13782961Sdp78419 ulong_t bin; 13792961Sdp78419 int mtypestart; 13802961Sdp78419 int plw_initialized; 13812961Sdp78419 page_list_walker_t plw; 13820Sstevel@tonic-gate 13830Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_alloc); 13840Sstevel@tonic-gate 13850Sstevel@tonic-gate ASSERT((flags & PG_MATCH_COLOR) == 0); 13860Sstevel@tonic-gate ASSERT(szc == 0); 13870Sstevel@tonic-gate ASSERT(dma_attr != NULL); 13880Sstevel@tonic-gate 13890Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 13900Sstevel@tonic-gate if (mtype < 0) { 13910Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocempty); 13920Sstevel@tonic-gate return (NULL); 13930Sstevel@tonic-gate } 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate mtypestart = mtype; 13960Sstevel@tonic-gate 13970Sstevel@tonic-gate bin = origbin; 13980Sstevel@tonic-gate 13990Sstevel@tonic-gate /* 14000Sstevel@tonic-gate * check up to page_colors + 1 bins - origbin may be checked twice 14010Sstevel@tonic-gate * because of BIN_STEP skip 14020Sstevel@tonic-gate */ 14030Sstevel@tonic-gate do { 14042961Sdp78419 plw_initialized = 0; 14052961Sdp78419 14062961Sdp78419 for (plw.plw_count = 0; 14072961Sdp78419 plw.plw_count < page_colors; plw.plw_count++) { 14082961Sdp78419 14090Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL) 14100Sstevel@tonic-gate goto nextfreebin; 14110Sstevel@tonic-gate 14120Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 14130Sstevel@tonic-gate mutex_enter(pcm); 14140Sstevel@tonic-gate pp = PAGE_FREELISTS(mnode, szc, bin, mtype); 14150Sstevel@tonic-gate first_pp = pp; 14160Sstevel@tonic-gate while (pp != NULL) { 14170Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 14180Sstevel@tonic-gate pp = pp->p_next; 14190Sstevel@tonic-gate if (pp == first_pp) { 14200Sstevel@tonic-gate pp = NULL; 14210Sstevel@tonic-gate } 14220Sstevel@tonic-gate continue; 14230Sstevel@tonic-gate } 14240Sstevel@tonic-gate 14250Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14260Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14270Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 14280Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 14290Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 14300Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 14310Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 14320Sstevel@tonic-gate /* check if page within DMA attributes */ 14330Sstevel@tonic-gate pgaddr = mmu_ptob((uint64_t)(pp->p_pagenum)); 14340Sstevel@tonic-gate 14350Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 14360Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 14370Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 14380Sstevel@tonic-gate break; 14390Sstevel@tonic-gate } 14400Sstevel@tonic-gate 14410Sstevel@tonic-gate /* continue looking */ 14420Sstevel@tonic-gate page_unlock(pp); 14430Sstevel@tonic-gate pp = pp->p_next; 14440Sstevel@tonic-gate if (pp == first_pp) 14450Sstevel@tonic-gate pp = NULL; 14460Sstevel@tonic-gate 14470Sstevel@tonic-gate } 14480Sstevel@tonic-gate if (pp != NULL) { 14490Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 14500Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 14510Sstevel@tonic-gate 14520Sstevel@tonic-gate /* found a page with specified DMA attributes */ 14530Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, szc, bin, 14540Sstevel@tonic-gate mtype), pp); 1455414Skchow page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate if ((PP_ISFREE(pp) == 0) || 14580Sstevel@tonic-gate (PP_ISAGED(pp) == 0)) { 14590Sstevel@tonic-gate cmn_err(CE_PANIC, "page %p is not free", 14600Sstevel@tonic-gate (void *)pp); 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate mutex_exit(pcm); 14640Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 14650Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 14660Sstevel@tonic-gate return (pp); 14670Sstevel@tonic-gate } 14680Sstevel@tonic-gate mutex_exit(pcm); 14690Sstevel@tonic-gate nextfreebin: 14702961Sdp78419 if (plw_initialized == 0) { 14712961Sdp78419 page_list_walk_init(szc, 0, bin, 1, 0, &plw); 14722961Sdp78419 ASSERT(plw.plw_ceq_dif == page_colors); 14732961Sdp78419 plw_initialized = 1; 14742961Sdp78419 } 14750Sstevel@tonic-gate 14762961Sdp78419 if (plw.plw_do_split) { 14772961Sdp78419 pp = page_freelist_split(szc, bin, mnode, 14782961Sdp78419 mtype, 14792961Sdp78419 mmu_btop(dma_attr->dma_attr_addr_hi + 1), 14802961Sdp78419 &plw); 14812961Sdp78419 if (pp != NULL) 14822961Sdp78419 return (pp); 14832961Sdp78419 } 14842961Sdp78419 14852961Sdp78419 bin = page_list_walk_next_bin(szc, bin, &plw); 14860Sstevel@tonic-gate } 14872961Sdp78419 1488414Skchow MTYPE_NEXT(mnode, mtype, flags); 1489414Skchow } while (mtype >= 0); 14900Sstevel@tonic-gate 14910Sstevel@tonic-gate /* failed to find a page in the freelist; try it in the cachelist */ 14920Sstevel@tonic-gate 14930Sstevel@tonic-gate /* reset mtype start for cachelist search */ 14940Sstevel@tonic-gate mtype = mtypestart; 14950Sstevel@tonic-gate ASSERT(mtype >= 0); 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate /* start with the bin of matching color */ 14980Sstevel@tonic-gate bin = origbin; 14990Sstevel@tonic-gate 15000Sstevel@tonic-gate do { 15010Sstevel@tonic-gate for (i = 0; i <= page_colors; i++) { 15020Sstevel@tonic-gate if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL) 15030Sstevel@tonic-gate goto nextcachebin; 15040Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); 15050Sstevel@tonic-gate mutex_enter(pcm); 15060Sstevel@tonic-gate pp = PAGE_CACHELISTS(mnode, bin, mtype); 15070Sstevel@tonic-gate first_pp = pp; 15080Sstevel@tonic-gate while (pp != NULL) { 15090Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 15100Sstevel@tonic-gate pp = pp->p_next; 15110Sstevel@tonic-gate if (pp == first_pp) 15120Sstevel@tonic-gate break; 15130Sstevel@tonic-gate continue; 15140Sstevel@tonic-gate } 15150Sstevel@tonic-gate ASSERT(pp->p_vnode); 15160Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 15170Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 15180Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 15190Sstevel@tonic-gate 15200Sstevel@tonic-gate /* check if page within DMA attributes */ 15210Sstevel@tonic-gate 15220Sstevel@tonic-gate pgaddr = ptob((uint64_t)(pp->p_pagenum)); 15230Sstevel@tonic-gate 15240Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 15250Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 15260Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 15270Sstevel@tonic-gate break; 15280Sstevel@tonic-gate } 15290Sstevel@tonic-gate 15300Sstevel@tonic-gate /* continue looking */ 15310Sstevel@tonic-gate page_unlock(pp); 15320Sstevel@tonic-gate pp = pp->p_next; 15330Sstevel@tonic-gate if (pp == first_pp) 15340Sstevel@tonic-gate pp = NULL; 15350Sstevel@tonic-gate } 15360Sstevel@tonic-gate 15370Sstevel@tonic-gate if (pp != NULL) { 15380Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 15390Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 15400Sstevel@tonic-gate 15410Sstevel@tonic-gate /* found a page with specified DMA attributes */ 15420Sstevel@tonic-gate page_sub(&PAGE_CACHELISTS(mnode, bin, 15430Sstevel@tonic-gate mtype), pp); 1544414Skchow page_ctr_sub(mnode, mtype, pp, PG_CACHE_LIST); 15450Sstevel@tonic-gate 15460Sstevel@tonic-gate mutex_exit(pcm); 15470Sstevel@tonic-gate ASSERT(pp->p_vnode); 15480Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 15490Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 15500Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 15510Sstevel@tonic-gate return (pp); 15520Sstevel@tonic-gate } 15530Sstevel@tonic-gate mutex_exit(pcm); 15540Sstevel@tonic-gate nextcachebin: 15550Sstevel@tonic-gate bin += (i == 0) ? BIN_STEP : 1; 15560Sstevel@tonic-gate bin &= page_colors_mask; 15570Sstevel@tonic-gate } 1558414Skchow MTYPE_NEXT(mnode, mtype, flags); 1559414Skchow } while (mtype >= 0); 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocfailed); 15620Sstevel@tonic-gate return (NULL); 15630Sstevel@tonic-gate } 15640Sstevel@tonic-gate 15650Sstevel@tonic-gate /* 15660Sstevel@tonic-gate * This function is similar to page_get_freelist()/page_get_cachelist() 15670Sstevel@tonic-gate * but it searches both the lists to find a page with the specified 15680Sstevel@tonic-gate * color (or no color) and DMA attributes. The search is done in the 15690Sstevel@tonic-gate * freelist first and then in the cache list within the highest memory 15700Sstevel@tonic-gate * range (based on DMA attributes) before searching in the lower 15710Sstevel@tonic-gate * memory ranges. 15720Sstevel@tonic-gate * 15730Sstevel@tonic-gate * Note: This function is called only by page_create_io(). 15740Sstevel@tonic-gate */ 15750Sstevel@tonic-gate /*ARGSUSED*/ 15760Sstevel@tonic-gate page_t * 15770Sstevel@tonic-gate page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr, 15780Sstevel@tonic-gate size_t size, uint_t flags, ddi_dma_attr_t *dma_attr, lgrp_t *lgrp) 15790Sstevel@tonic-gate { 15800Sstevel@tonic-gate uint_t bin; 15810Sstevel@tonic-gate int mtype; 15820Sstevel@tonic-gate page_t *pp; 15830Sstevel@tonic-gate int n; 15840Sstevel@tonic-gate int m; 15850Sstevel@tonic-gate int szc; 15860Sstevel@tonic-gate int fullrange; 15870Sstevel@tonic-gate int mnode; 15880Sstevel@tonic-gate int local_failed_stat = 0; 15890Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 15900Sstevel@tonic-gate 15910Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_alloc); 15920Sstevel@tonic-gate 15930Sstevel@tonic-gate /* only base pagesize currently supported */ 15940Sstevel@tonic-gate if (size != MMU_PAGESIZE) 15950Sstevel@tonic-gate return (NULL); 15960Sstevel@tonic-gate 15970Sstevel@tonic-gate /* 15980Sstevel@tonic-gate * If we're passed a specific lgroup, we use it. Otherwise, 15990Sstevel@tonic-gate * assume first-touch placement is desired. 16000Sstevel@tonic-gate */ 16010Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 16020Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 16030Sstevel@tonic-gate 16040Sstevel@tonic-gate /* LINTED */ 16052961Sdp78419 AS_2_BIN(as, seg, vp, vaddr, bin, 0); 16060Sstevel@tonic-gate 16070Sstevel@tonic-gate /* 16080Sstevel@tonic-gate * Only hold one freelist or cachelist lock at a time, that way we 16090Sstevel@tonic-gate * can start anywhere and not have to worry about lock 16100Sstevel@tonic-gate * ordering. 16110Sstevel@tonic-gate */ 16120Sstevel@tonic-gate if (dma_attr == NULL) { 16130Sstevel@tonic-gate n = 0; 16140Sstevel@tonic-gate m = mnoderangecnt - 1; 16150Sstevel@tonic-gate fullrange = 1; 16160Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_nulldmaattr); 16170Sstevel@tonic-gate } else { 16180Sstevel@tonic-gate pfn_t pfnlo = mmu_btop(dma_attr->dma_attr_addr_lo); 16190Sstevel@tonic-gate pfn_t pfnhi = mmu_btop(dma_attr->dma_attr_addr_hi); 16200Sstevel@tonic-gate 16210Sstevel@tonic-gate /* 16220Sstevel@tonic-gate * We can guarantee alignment only for page boundary. 16230Sstevel@tonic-gate */ 16240Sstevel@tonic-gate if (dma_attr->dma_attr_align > MMU_PAGESIZE) 16250Sstevel@tonic-gate return (NULL); 16260Sstevel@tonic-gate 16270Sstevel@tonic-gate n = pfn_2_mtype(pfnlo); 16280Sstevel@tonic-gate m = pfn_2_mtype(pfnhi); 16290Sstevel@tonic-gate 16300Sstevel@tonic-gate fullrange = ((pfnlo == mnoderanges[n].mnr_pfnlo) && 16310Sstevel@tonic-gate (pfnhi >= mnoderanges[m].mnr_pfnhi)); 16320Sstevel@tonic-gate } 16330Sstevel@tonic-gate VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange); 16340Sstevel@tonic-gate 16350Sstevel@tonic-gate if (n > m) 16360Sstevel@tonic-gate return (NULL); 16370Sstevel@tonic-gate 16380Sstevel@tonic-gate szc = 0; 16390Sstevel@tonic-gate 16400Sstevel@tonic-gate /* cylcing thru mtype handled by RANGE0 if n == 0 */ 16410Sstevel@tonic-gate if (n == 0) { 16420Sstevel@tonic-gate flags |= PGI_MT_RANGE0; 16430Sstevel@tonic-gate n = m; 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate /* 16470Sstevel@tonic-gate * Try local memory node first, but try remote if we can't 16480Sstevel@tonic-gate * get a page of the right color. 16490Sstevel@tonic-gate */ 16500Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_HIER); 16510Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 16520Sstevel@tonic-gate /* 16530Sstevel@tonic-gate * allocate pages from high pfn to low. 16540Sstevel@tonic-gate */ 16550Sstevel@tonic-gate for (mtype = m; mtype >= n; mtype--) { 16560Sstevel@tonic-gate if (fullrange != 0) { 16570Sstevel@tonic-gate pp = page_get_mnode_freelist(mnode, 16580Sstevel@tonic-gate bin, mtype, szc, flags); 16590Sstevel@tonic-gate if (pp == NULL) { 16600Sstevel@tonic-gate pp = page_get_mnode_cachelist( 16610Sstevel@tonic-gate bin, flags, mnode, mtype); 16620Sstevel@tonic-gate } 16630Sstevel@tonic-gate } else { 16640Sstevel@tonic-gate pp = page_get_mnode_anylist(bin, szc, 16650Sstevel@tonic-gate flags, mnode, mtype, dma_attr); 16660Sstevel@tonic-gate } 16670Sstevel@tonic-gate if (pp != NULL) { 16680Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocok); 16690Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 16700Sstevel@tonic-gate return (pp); 16710Sstevel@tonic-gate } 16720Sstevel@tonic-gate } 16730Sstevel@tonic-gate if (!local_failed_stat) { 16740Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 16750Sstevel@tonic-gate local_failed_stat = 1; 16760Sstevel@tonic-gate } 16770Sstevel@tonic-gate } 16780Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocfailed); 16790Sstevel@tonic-gate 16800Sstevel@tonic-gate return (NULL); 16810Sstevel@tonic-gate } 16820Sstevel@tonic-gate 16830Sstevel@tonic-gate /* 16840Sstevel@tonic-gate * page_create_io() 16850Sstevel@tonic-gate * 16860Sstevel@tonic-gate * This function is a copy of page_create_va() with an additional 16870Sstevel@tonic-gate * argument 'mattr' that specifies DMA memory requirements to 16880Sstevel@tonic-gate * the page list functions. This function is used by the segkmem 16890Sstevel@tonic-gate * allocator so it is only to create new pages (i.e PG_EXCL is 16900Sstevel@tonic-gate * set). 16910Sstevel@tonic-gate * 16920Sstevel@tonic-gate * Note: This interface is currently used by x86 PSM only and is 16930Sstevel@tonic-gate * not fully specified so the commitment level is only for 16940Sstevel@tonic-gate * private interface specific to x86. This interface uses PSM 16950Sstevel@tonic-gate * specific page_get_anylist() interface. 16960Sstevel@tonic-gate */ 16970Sstevel@tonic-gate 16980Sstevel@tonic-gate #define PAGE_HASH_SEARCH(index, pp, vp, off) { \ 16990Sstevel@tonic-gate for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash) { \ 17000Sstevel@tonic-gate if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \ 17010Sstevel@tonic-gate break; \ 17020Sstevel@tonic-gate } \ 17030Sstevel@tonic-gate } 17040Sstevel@tonic-gate 17050Sstevel@tonic-gate 17060Sstevel@tonic-gate page_t * 17070Sstevel@tonic-gate page_create_io( 17080Sstevel@tonic-gate struct vnode *vp, 17090Sstevel@tonic-gate u_offset_t off, 17100Sstevel@tonic-gate uint_t bytes, 17110Sstevel@tonic-gate uint_t flags, 17120Sstevel@tonic-gate struct as *as, 17130Sstevel@tonic-gate caddr_t vaddr, 17140Sstevel@tonic-gate ddi_dma_attr_t *mattr) /* DMA memory attributes if any */ 17150Sstevel@tonic-gate { 17160Sstevel@tonic-gate page_t *plist = NULL; 17170Sstevel@tonic-gate uint_t plist_len = 0; 17180Sstevel@tonic-gate pgcnt_t npages; 17190Sstevel@tonic-gate page_t *npp = NULL; 17200Sstevel@tonic-gate uint_t pages_req; 17210Sstevel@tonic-gate page_t *pp; 17220Sstevel@tonic-gate kmutex_t *phm = NULL; 17230Sstevel@tonic-gate uint_t index; 17240Sstevel@tonic-gate 17250Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_START, 17260Sstevel@tonic-gate "page_create_start:vp %p off %llx bytes %u flags %x", 17270Sstevel@tonic-gate vp, off, bytes, flags); 17280Sstevel@tonic-gate 17290Sstevel@tonic-gate ASSERT((flags & ~(PG_EXCL | PG_WAIT | PG_PHYSCONTIG)) == 0); 17300Sstevel@tonic-gate 17310Sstevel@tonic-gate pages_req = npages = mmu_btopr(bytes); 17320Sstevel@tonic-gate 17330Sstevel@tonic-gate /* 17340Sstevel@tonic-gate * Do the freemem and pcf accounting. 17350Sstevel@tonic-gate */ 17360Sstevel@tonic-gate if (!page_create_wait(npages, flags)) { 17370Sstevel@tonic-gate return (NULL); 17380Sstevel@tonic-gate } 17390Sstevel@tonic-gate 17400Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SUCCESS, 17410Sstevel@tonic-gate "page_create_success:vp %p off %llx", 17420Sstevel@tonic-gate vp, off); 17430Sstevel@tonic-gate 17440Sstevel@tonic-gate /* 17450Sstevel@tonic-gate * If satisfying this request has left us with too little 17460Sstevel@tonic-gate * memory, start the wheels turning to get some back. The 17470Sstevel@tonic-gate * first clause of the test prevents waking up the pageout 17480Sstevel@tonic-gate * daemon in situations where it would decide that there's 17490Sstevel@tonic-gate * nothing to do. 17500Sstevel@tonic-gate */ 17510Sstevel@tonic-gate if (nscan < desscan && freemem < minfree) { 17520Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 17530Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 17540Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 17550Sstevel@tonic-gate } 17560Sstevel@tonic-gate 17570Sstevel@tonic-gate if (flags & PG_PHYSCONTIG) { 17580Sstevel@tonic-gate 17590Sstevel@tonic-gate plist = page_get_contigpage(&npages, mattr, 1); 17600Sstevel@tonic-gate if (plist == NULL) { 17610Sstevel@tonic-gate page_create_putback(npages); 17620Sstevel@tonic-gate return (NULL); 17630Sstevel@tonic-gate } 17640Sstevel@tonic-gate 17650Sstevel@tonic-gate pp = plist; 17660Sstevel@tonic-gate 17670Sstevel@tonic-gate do { 17680Sstevel@tonic-gate if (!page_hashin(pp, vp, off, NULL)) { 17690Sstevel@tonic-gate panic("pg_creat_io: hashin failed %p %p %llx", 17700Sstevel@tonic-gate (void *)pp, (void *)vp, off); 17710Sstevel@tonic-gate } 17720Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 17730Sstevel@tonic-gate off += MMU_PAGESIZE; 17740Sstevel@tonic-gate PP_CLRFREE(pp); 17750Sstevel@tonic-gate PP_CLRAGED(pp); 17760Sstevel@tonic-gate page_set_props(pp, P_REF); 17770Sstevel@tonic-gate pp = pp->p_next; 17780Sstevel@tonic-gate } while (pp != plist); 17790Sstevel@tonic-gate 17800Sstevel@tonic-gate if (!npages) { 17810Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 17820Sstevel@tonic-gate return (plist); 17830Sstevel@tonic-gate } else { 17840Sstevel@tonic-gate vaddr += (pages_req - npages) << MMU_PAGESHIFT; 17850Sstevel@tonic-gate } 17860Sstevel@tonic-gate 17870Sstevel@tonic-gate /* 17880Sstevel@tonic-gate * fall-thru: 17890Sstevel@tonic-gate * 17900Sstevel@tonic-gate * page_get_contigpage returns when npages <= sgllen. 17910Sstevel@tonic-gate * Grab the rest of the non-contig pages below from anylist. 17920Sstevel@tonic-gate */ 17930Sstevel@tonic-gate } 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate /* 17960Sstevel@tonic-gate * Loop around collecting the requested number of pages. 17970Sstevel@tonic-gate * Most of the time, we have to `create' a new page. With 17980Sstevel@tonic-gate * this in mind, pull the page off the free list before 17990Sstevel@tonic-gate * getting the hash lock. This will minimize the hash 18000Sstevel@tonic-gate * lock hold time, nesting, and the like. If it turns 18010Sstevel@tonic-gate * out we don't need the page, we put it back at the end. 18020Sstevel@tonic-gate */ 18030Sstevel@tonic-gate while (npages--) { 18040Sstevel@tonic-gate phm = NULL; 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 18070Sstevel@tonic-gate top: 18080Sstevel@tonic-gate ASSERT(phm == NULL); 18090Sstevel@tonic-gate ASSERT(index == PAGE_HASH_FUNC(vp, off)); 18100Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 18110Sstevel@tonic-gate 18120Sstevel@tonic-gate if (npp == NULL) { 18130Sstevel@tonic-gate /* 18140Sstevel@tonic-gate * Try to get the page of any color either from 18150Sstevel@tonic-gate * the freelist or from the cache list. 18160Sstevel@tonic-gate */ 18170Sstevel@tonic-gate npp = page_get_anylist(vp, off, as, vaddr, MMU_PAGESIZE, 18180Sstevel@tonic-gate flags & ~PG_MATCH_COLOR, mattr, NULL); 18190Sstevel@tonic-gate if (npp == NULL) { 18200Sstevel@tonic-gate if (mattr == NULL) { 18210Sstevel@tonic-gate /* 18220Sstevel@tonic-gate * Not looking for a special page; 18230Sstevel@tonic-gate * panic! 18240Sstevel@tonic-gate */ 18250Sstevel@tonic-gate panic("no page found %d", (int)npages); 18260Sstevel@tonic-gate } 18270Sstevel@tonic-gate /* 18280Sstevel@tonic-gate * No page found! This can happen 18290Sstevel@tonic-gate * if we are looking for a page 18300Sstevel@tonic-gate * within a specific memory range 18310Sstevel@tonic-gate * for DMA purposes. If PG_WAIT is 18320Sstevel@tonic-gate * specified then we wait for a 18330Sstevel@tonic-gate * while and then try again. The 18340Sstevel@tonic-gate * wait could be forever if we 18350Sstevel@tonic-gate * don't get the page(s) we need. 18360Sstevel@tonic-gate * 18370Sstevel@tonic-gate * Note: XXX We really need a mechanism 18380Sstevel@tonic-gate * to wait for pages in the desired 18390Sstevel@tonic-gate * range. For now, we wait for any 18400Sstevel@tonic-gate * pages and see if we can use it. 18410Sstevel@tonic-gate */ 18420Sstevel@tonic-gate 18430Sstevel@tonic-gate if ((mattr != NULL) && (flags & PG_WAIT)) { 18440Sstevel@tonic-gate delay(10); 18450Sstevel@tonic-gate goto top; 18460Sstevel@tonic-gate } 18470Sstevel@tonic-gate 18480Sstevel@tonic-gate goto fail; /* undo accounting stuff */ 18490Sstevel@tonic-gate } 18500Sstevel@tonic-gate 18510Sstevel@tonic-gate if (PP_ISAGED(npp) == 0) { 18520Sstevel@tonic-gate /* 18530Sstevel@tonic-gate * Since this page came from the 18540Sstevel@tonic-gate * cachelist, we must destroy the 18550Sstevel@tonic-gate * old vnode association. 18560Sstevel@tonic-gate */ 18570Sstevel@tonic-gate page_hashout(npp, (kmutex_t *)NULL); 18580Sstevel@tonic-gate } 18590Sstevel@tonic-gate } 18600Sstevel@tonic-gate 18610Sstevel@tonic-gate /* 18620Sstevel@tonic-gate * We own this page! 18630Sstevel@tonic-gate */ 18640Sstevel@tonic-gate ASSERT(PAGE_EXCL(npp)); 18650Sstevel@tonic-gate ASSERT(npp->p_vnode == NULL); 18660Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(npp)); 18670Sstevel@tonic-gate PP_CLRFREE(npp); 18680Sstevel@tonic-gate PP_CLRAGED(npp); 18690Sstevel@tonic-gate 18700Sstevel@tonic-gate /* 18710Sstevel@tonic-gate * Here we have a page in our hot little mits and are 18720Sstevel@tonic-gate * just waiting to stuff it on the appropriate lists. 18730Sstevel@tonic-gate * Get the mutex and check to see if it really does 18740Sstevel@tonic-gate * not exist. 18750Sstevel@tonic-gate */ 18760Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 18770Sstevel@tonic-gate mutex_enter(phm); 18780Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 18790Sstevel@tonic-gate if (pp == NULL) { 18800Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 18810Sstevel@tonic-gate pp = npp; 18820Sstevel@tonic-gate npp = NULL; 18830Sstevel@tonic-gate if (!page_hashin(pp, vp, off, phm)) { 18840Sstevel@tonic-gate /* 18850Sstevel@tonic-gate * Since we hold the page hash mutex and 18860Sstevel@tonic-gate * just searched for this page, page_hashin 18870Sstevel@tonic-gate * had better not fail. If it does, that 18880Sstevel@tonic-gate * means somethread did not follow the 18890Sstevel@tonic-gate * page hash mutex rules. Panic now and 18900Sstevel@tonic-gate * get it over with. As usual, go down 18910Sstevel@tonic-gate * holding all the locks. 18920Sstevel@tonic-gate */ 18930Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 18940Sstevel@tonic-gate panic("page_create: hashin fail %p %p %llx %p", 18950Sstevel@tonic-gate (void *)pp, (void *)vp, off, (void *)phm); 18960Sstevel@tonic-gate 18970Sstevel@tonic-gate } 18980Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 18990Sstevel@tonic-gate mutex_exit(phm); 19000Sstevel@tonic-gate phm = NULL; 19010Sstevel@tonic-gate 19020Sstevel@tonic-gate /* 19030Sstevel@tonic-gate * Hat layer locking need not be done to set 19040Sstevel@tonic-gate * the following bits since the page is not hashed 19050Sstevel@tonic-gate * and was on the free list (i.e., had no mappings). 19060Sstevel@tonic-gate * 19070Sstevel@tonic-gate * Set the reference bit to protect 19080Sstevel@tonic-gate * against immediate pageout 19090Sstevel@tonic-gate * 19100Sstevel@tonic-gate * XXXmh modify freelist code to set reference 19110Sstevel@tonic-gate * bit so we don't have to do it here. 19120Sstevel@tonic-gate */ 19130Sstevel@tonic-gate page_set_props(pp, P_REF); 19140Sstevel@tonic-gate } else { 19150Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 19160Sstevel@tonic-gate mutex_exit(phm); 19170Sstevel@tonic-gate phm = NULL; 19180Sstevel@tonic-gate /* 19190Sstevel@tonic-gate * NOTE: This should not happen for pages associated 19200Sstevel@tonic-gate * with kernel vnode 'kvp'. 19210Sstevel@tonic-gate */ 19220Sstevel@tonic-gate /* XX64 - to debug why this happens! */ 1923*3290Sjohansen ASSERT(!VN_ISKAS(vp)); 1924*3290Sjohansen if (VN_ISKAS(vp)) 19250Sstevel@tonic-gate cmn_err(CE_NOTE, 19260Sstevel@tonic-gate "page_create: page not expected " 19270Sstevel@tonic-gate "in hash list for kernel vnode - pp 0x%p", 19280Sstevel@tonic-gate (void *)pp); 19290Sstevel@tonic-gate VM_STAT_ADD(page_create_exists); 19300Sstevel@tonic-gate goto fail; 19310Sstevel@tonic-gate } 19320Sstevel@tonic-gate 19330Sstevel@tonic-gate /* 19340Sstevel@tonic-gate * Got a page! It is locked. Acquire the i/o 19350Sstevel@tonic-gate * lock since we are going to use the p_next and 19360Sstevel@tonic-gate * p_prev fields to link the requested pages together. 19370Sstevel@tonic-gate */ 19380Sstevel@tonic-gate page_io_lock(pp); 19390Sstevel@tonic-gate page_add(&plist, pp); 19400Sstevel@tonic-gate plist = plist->p_next; 19410Sstevel@tonic-gate off += MMU_PAGESIZE; 19420Sstevel@tonic-gate vaddr += MMU_PAGESIZE; 19430Sstevel@tonic-gate } 19440Sstevel@tonic-gate 19450Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 19460Sstevel@tonic-gate return (plist); 19470Sstevel@tonic-gate 19480Sstevel@tonic-gate fail: 19490Sstevel@tonic-gate if (npp != NULL) { 19500Sstevel@tonic-gate /* 19510Sstevel@tonic-gate * Did not need this page after all. 19520Sstevel@tonic-gate * Put it back on the free list. 19530Sstevel@tonic-gate */ 19540Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 19550Sstevel@tonic-gate PP_SETFREE(npp); 19560Sstevel@tonic-gate PP_SETAGED(npp); 19570Sstevel@tonic-gate npp->p_offset = (u_offset_t)-1; 19580Sstevel@tonic-gate page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL); 19590Sstevel@tonic-gate page_unlock(npp); 19600Sstevel@tonic-gate } 19610Sstevel@tonic-gate 19620Sstevel@tonic-gate /* 19630Sstevel@tonic-gate * Give up the pages we already got. 19640Sstevel@tonic-gate */ 19650Sstevel@tonic-gate while (plist != NULL) { 19660Sstevel@tonic-gate pp = plist; 19670Sstevel@tonic-gate page_sub(&plist, pp); 19680Sstevel@tonic-gate page_io_unlock(pp); 19690Sstevel@tonic-gate plist_len++; 19700Sstevel@tonic-gate /*LINTED: constant in conditional ctx*/ 19710Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 19720Sstevel@tonic-gate } 19730Sstevel@tonic-gate 19740Sstevel@tonic-gate /* 19750Sstevel@tonic-gate * VN_DISPOSE does freemem accounting for the pages in plist 19760Sstevel@tonic-gate * by calling page_free. So, we need to undo the pcf accounting 19770Sstevel@tonic-gate * for only the remaining pages. 19780Sstevel@tonic-gate */ 19790Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 19800Sstevel@tonic-gate page_create_putback(pages_req - plist_len); 19810Sstevel@tonic-gate 19820Sstevel@tonic-gate return (NULL); 19830Sstevel@tonic-gate } 19840Sstevel@tonic-gate 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate /* 19870Sstevel@tonic-gate * Copy the data from the physical page represented by "frompp" to 19880Sstevel@tonic-gate * that represented by "topp". ppcopy uses CPU->cpu_caddr1 and 19890Sstevel@tonic-gate * CPU->cpu_caddr2. It assumes that no one uses either map at interrupt 19900Sstevel@tonic-gate * level and no one sleeps with an active mapping there. 19910Sstevel@tonic-gate * 19920Sstevel@tonic-gate * Note that the ref/mod bits in the page_t's are not affected by 19930Sstevel@tonic-gate * this operation, hence it is up to the caller to update them appropriately. 19940Sstevel@tonic-gate */ 19953253Smec int 19960Sstevel@tonic-gate ppcopy(page_t *frompp, page_t *topp) 19970Sstevel@tonic-gate { 19980Sstevel@tonic-gate caddr_t pp_addr1; 19990Sstevel@tonic-gate caddr_t pp_addr2; 20000Sstevel@tonic-gate void *pte1; 20010Sstevel@tonic-gate void *pte2; 20020Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 20033253Smec label_t ljb; 20043253Smec int ret = 1; 20050Sstevel@tonic-gate 20060Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 20070Sstevel@tonic-gate ASSERT(PAGE_LOCKED(frompp)); 20080Sstevel@tonic-gate ASSERT(PAGE_LOCKED(topp)); 20090Sstevel@tonic-gate 20100Sstevel@tonic-gate if (kpm_enable) { 20110Sstevel@tonic-gate pp_addr1 = hat_kpm_page2va(frompp, 0); 20120Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(topp, 0); 20130Sstevel@tonic-gate kpreempt_disable(); 20140Sstevel@tonic-gate } else { 20150Sstevel@tonic-gate /* 20160Sstevel@tonic-gate * disable pre-emption so that CPU can't change 20170Sstevel@tonic-gate */ 20180Sstevel@tonic-gate kpreempt_disable(); 20190Sstevel@tonic-gate 20200Sstevel@tonic-gate pp_addr1 = CPU->cpu_caddr1; 20210Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 20220Sstevel@tonic-gate pte1 = (void *)CPU->cpu_caddr1pte; 20230Sstevel@tonic-gate pte2 = (void *)CPU->cpu_caddr2pte; 20240Sstevel@tonic-gate 20250Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 20260Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 20270Sstevel@tonic-gate 20280Sstevel@tonic-gate hat_mempte_remap(page_pptonum(frompp), pp_addr1, pte1, 20290Sstevel@tonic-gate PROT_READ | HAT_STORECACHING_OK, HAT_LOAD_NOCONSIST); 20300Sstevel@tonic-gate hat_mempte_remap(page_pptonum(topp), pp_addr2, pte2, 20310Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 20320Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 20330Sstevel@tonic-gate } 20340Sstevel@tonic-gate 20353253Smec if (on_fault(&ljb)) { 20363253Smec ret = 0; 20373253Smec goto faulted; 20383253Smec } 20390Sstevel@tonic-gate if (use_sse_pagecopy) 20400Sstevel@tonic-gate hwblkpagecopy(pp_addr1, pp_addr2); 20410Sstevel@tonic-gate else 20420Sstevel@tonic-gate bcopy(pp_addr1, pp_addr2, PAGESIZE); 20430Sstevel@tonic-gate 20443253Smec no_fault(); 20453253Smec faulted: 20460Sstevel@tonic-gate if (!kpm_enable) 20470Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 20480Sstevel@tonic-gate kpreempt_enable(); 20493253Smec return (ret); 20500Sstevel@tonic-gate } 20510Sstevel@tonic-gate 20520Sstevel@tonic-gate /* 20530Sstevel@tonic-gate * Zero the physical page from off to off + len given by `pp' 20540Sstevel@tonic-gate * without changing the reference and modified bits of page. 20550Sstevel@tonic-gate * 20560Sstevel@tonic-gate * We use this using CPU private page address #2, see ppcopy() for more info. 20570Sstevel@tonic-gate * pagezero() must not be called at interrupt level. 20580Sstevel@tonic-gate */ 20590Sstevel@tonic-gate void 20600Sstevel@tonic-gate pagezero(page_t *pp, uint_t off, uint_t len) 20610Sstevel@tonic-gate { 20620Sstevel@tonic-gate caddr_t pp_addr2; 20630Sstevel@tonic-gate void *pte2; 20640Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 20650Sstevel@tonic-gate 20660Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 20670Sstevel@tonic-gate ASSERT(len <= MMU_PAGESIZE); 20680Sstevel@tonic-gate ASSERT(off <= MMU_PAGESIZE); 20690Sstevel@tonic-gate ASSERT(off + len <= MMU_PAGESIZE); 20700Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 20710Sstevel@tonic-gate 20720Sstevel@tonic-gate if (kpm_enable) { 20730Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(pp, 0); 20740Sstevel@tonic-gate kpreempt_disable(); 20750Sstevel@tonic-gate } else { 20760Sstevel@tonic-gate kpreempt_disable(); 20770Sstevel@tonic-gate 20780Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 20790Sstevel@tonic-gate pte2 = (void *)CPU->cpu_caddr2pte; 20800Sstevel@tonic-gate 20810Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 20820Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 20830Sstevel@tonic-gate 20840Sstevel@tonic-gate hat_mempte_remap(page_pptonum(pp), pp_addr2, pte2, 20850Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 20860Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 20870Sstevel@tonic-gate } 20880Sstevel@tonic-gate 20890Sstevel@tonic-gate if (use_sse_pagezero) 20900Sstevel@tonic-gate hwblkclr(pp_addr2 + off, len); 20910Sstevel@tonic-gate else 20920Sstevel@tonic-gate bzero(pp_addr2 + off, len); 20930Sstevel@tonic-gate 20940Sstevel@tonic-gate if (!kpm_enable) 20950Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 20960Sstevel@tonic-gate kpreempt_enable(); 20970Sstevel@tonic-gate } 20980Sstevel@tonic-gate 20990Sstevel@tonic-gate /* 21000Sstevel@tonic-gate * Platform-dependent page scrub call. 21010Sstevel@tonic-gate */ 21020Sstevel@tonic-gate void 21030Sstevel@tonic-gate pagescrub(page_t *pp, uint_t off, uint_t len) 21040Sstevel@tonic-gate { 21050Sstevel@tonic-gate /* 21060Sstevel@tonic-gate * For now, we rely on the fact that pagezero() will 21070Sstevel@tonic-gate * always clear UEs. 21080Sstevel@tonic-gate */ 21090Sstevel@tonic-gate pagezero(pp, off, len); 21100Sstevel@tonic-gate } 21110Sstevel@tonic-gate 21120Sstevel@tonic-gate /* 21130Sstevel@tonic-gate * set up two private addresses for use on a given CPU for use in ppcopy() 21140Sstevel@tonic-gate */ 21150Sstevel@tonic-gate void 21160Sstevel@tonic-gate setup_vaddr_for_ppcopy(struct cpu *cpup) 21170Sstevel@tonic-gate { 21180Sstevel@tonic-gate void *addr; 21190Sstevel@tonic-gate void *pte; 21200Sstevel@tonic-gate 21210Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 21220Sstevel@tonic-gate pte = hat_mempte_setup(addr); 21230Sstevel@tonic-gate cpup->cpu_caddr1 = addr; 21240Sstevel@tonic-gate cpup->cpu_caddr1pte = (pteptr_t)pte; 21250Sstevel@tonic-gate 21260Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 21270Sstevel@tonic-gate pte = hat_mempte_setup(addr); 21280Sstevel@tonic-gate cpup->cpu_caddr2 = addr; 21290Sstevel@tonic-gate cpup->cpu_caddr2pte = (pteptr_t)pte; 21300Sstevel@tonic-gate 21310Sstevel@tonic-gate mutex_init(&cpup->cpu_ppaddr_mutex, NULL, MUTEX_DEFAULT, NULL); 21320Sstevel@tonic-gate } 21330Sstevel@tonic-gate 21340Sstevel@tonic-gate 21350Sstevel@tonic-gate /* 21360Sstevel@tonic-gate * Create the pageout scanner thread. The thread has to 21370Sstevel@tonic-gate * start at procedure with process pp and priority pri. 21380Sstevel@tonic-gate */ 21390Sstevel@tonic-gate void 21400Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri) 21410Sstevel@tonic-gate { 21420Sstevel@tonic-gate (void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri); 21430Sstevel@tonic-gate } 21440Sstevel@tonic-gate 21450Sstevel@tonic-gate /* 21460Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations 21470Sstevel@tonic-gate * to an alternate mapping. Unnecessary on Intel / AMD platforms. 21480Sstevel@tonic-gate */ 21490Sstevel@tonic-gate void 21500Sstevel@tonic-gate dcache_flushall() 21510Sstevel@tonic-gate {} 21523177Sdp78419 21533177Sdp78419 size_t 21543177Sdp78419 exec_get_spslew(void) 21553177Sdp78419 { 21563177Sdp78419 return (0); 21573177Sdp78419 } 2158