10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*2414Saguzovsk * Common Development and Distribution License (the "License"). 6*2414Saguzovsk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*2414Saguzovsk * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/types.h> 290Sstevel@tonic-gate #include <sys/param.h> 300Sstevel@tonic-gate #include <sys/systm.h> 310Sstevel@tonic-gate #include <sys/buf.h> 320Sstevel@tonic-gate #include <sys/cred.h> 330Sstevel@tonic-gate #include <sys/errno.h> 340Sstevel@tonic-gate #include <sys/vnode.h> 350Sstevel@tonic-gate #include <sys/cmn_err.h> 360Sstevel@tonic-gate #include <sys/swap.h> 370Sstevel@tonic-gate #include <sys/mman.h> 380Sstevel@tonic-gate #include <sys/vmsystm.h> 390Sstevel@tonic-gate #include <sys/vtrace.h> 400Sstevel@tonic-gate #include <sys/debug.h> 410Sstevel@tonic-gate #include <sys/sysmacros.h> 420Sstevel@tonic-gate #include <sys/vm.h> 430Sstevel@tonic-gate 440Sstevel@tonic-gate #include <sys/fs/swapnode.h> 450Sstevel@tonic-gate 460Sstevel@tonic-gate #include <vm/seg.h> 470Sstevel@tonic-gate #include <vm/page.h> 480Sstevel@tonic-gate #include <vm/pvn.h> 490Sstevel@tonic-gate #include <fs/fs_subr.h> 500Sstevel@tonic-gate 510Sstevel@tonic-gate #include <vm/seg_kp.h> 520Sstevel@tonic-gate 530Sstevel@tonic-gate /* 540Sstevel@tonic-gate * Define the routines within this file. 550Sstevel@tonic-gate */ 560Sstevel@tonic-gate static int swap_getpage(struct vnode *vp, offset_t off, size_t len, 570Sstevel@tonic-gate uint_t *protp, struct page **plarr, size_t plsz, 580Sstevel@tonic-gate struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); 590Sstevel@tonic-gate static int swap_putpage(struct vnode *vp, offset_t off, size_t len, 600Sstevel@tonic-gate int flags, struct cred *cr); 610Sstevel@tonic-gate static void swap_inactive(struct vnode *vp, struct cred *cr); 620Sstevel@tonic-gate static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, 630Sstevel@tonic-gate cred_t *cr); 640Sstevel@tonic-gate 650Sstevel@tonic-gate static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len, 660Sstevel@tonic-gate uint_t *protp, page_t **plarr, size_t plsz, 670Sstevel@tonic-gate struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); 680Sstevel@tonic-gate 690Sstevel@tonic-gate int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len, 70*2414Saguzovsk uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp, 71*2414Saguzovsk uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr, 72*2414Saguzovsk enum seg_rw rw, struct cred *cr); 730Sstevel@tonic-gate 740Sstevel@tonic-gate static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off, 750Sstevel@tonic-gate size_t *lenp, int flags, struct cred *cr); 760Sstevel@tonic-gate 770Sstevel@tonic-gate const fs_operation_def_t swap_vnodeops_template[] = { 780Sstevel@tonic-gate VOPNAME_INACTIVE, (fs_generic_func_p) swap_inactive, 790Sstevel@tonic-gate VOPNAME_GETPAGE, swap_getpage, 800Sstevel@tonic-gate VOPNAME_PUTPAGE, swap_putpage, 810Sstevel@tonic-gate VOPNAME_DISPOSE, (fs_generic_func_p) swap_dispose, 820Sstevel@tonic-gate VOPNAME_SETFL, fs_error, 830Sstevel@tonic-gate VOPNAME_POLL, fs_error, 840Sstevel@tonic-gate VOPNAME_PATHCONF, fs_error, 850Sstevel@tonic-gate VOPNAME_GETSECATTR, fs_error, 860Sstevel@tonic-gate VOPNAME_SHRLOCK, fs_error, 870Sstevel@tonic-gate NULL, NULL 880Sstevel@tonic-gate }; 890Sstevel@tonic-gate 900Sstevel@tonic-gate vnodeops_t *swap_vnodeops; 910Sstevel@tonic-gate 920Sstevel@tonic-gate /* ARGSUSED */ 930Sstevel@tonic-gate static void 940Sstevel@tonic-gate swap_inactive( 950Sstevel@tonic-gate struct vnode *vp, 960Sstevel@tonic-gate struct cred *cr) 970Sstevel@tonic-gate { 980Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0); 990Sstevel@tonic-gate } 1000Sstevel@tonic-gate 1010Sstevel@tonic-gate /* 1020Sstevel@tonic-gate * Return all the pages from [off..off+len] in given file 1030Sstevel@tonic-gate */ 1040Sstevel@tonic-gate static int 1050Sstevel@tonic-gate swap_getpage( 1060Sstevel@tonic-gate struct vnode *vp, 1070Sstevel@tonic-gate offset_t off, 1080Sstevel@tonic-gate size_t len, 1090Sstevel@tonic-gate uint_t *protp, 1100Sstevel@tonic-gate page_t *pl[], 1110Sstevel@tonic-gate size_t plsz, 1120Sstevel@tonic-gate struct seg *seg, 1130Sstevel@tonic-gate caddr_t addr, 1140Sstevel@tonic-gate enum seg_rw rw, 1150Sstevel@tonic-gate struct cred *cr) 1160Sstevel@tonic-gate { 1170Sstevel@tonic-gate int err; 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n", 1200Sstevel@tonic-gate (void *)vp, off, len, 0, 0); 1210Sstevel@tonic-gate 1220Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE, 1230Sstevel@tonic-gate "swapfs getpage:vp %p off %llx len %ld", 1240Sstevel@tonic-gate (void *)vp, off, len); 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate if (len <= PAGESIZE) { 1270Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1280Sstevel@tonic-gate seg, addr, rw, cr); 1290Sstevel@tonic-gate } else { 1300Sstevel@tonic-gate err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, 1310Sstevel@tonic-gate protp, pl, plsz, seg, addr, rw, cr); 1320Sstevel@tonic-gate } 1330Sstevel@tonic-gate 1340Sstevel@tonic-gate return (err); 1350Sstevel@tonic-gate } 1360Sstevel@tonic-gate 1370Sstevel@tonic-gate /* 1380Sstevel@tonic-gate * Called from pvn_getpages or swap_getpage to get a particular page. 1390Sstevel@tonic-gate */ 1400Sstevel@tonic-gate /*ARGSUSED*/ 1410Sstevel@tonic-gate static int 1420Sstevel@tonic-gate swap_getapage( 1430Sstevel@tonic-gate struct vnode *vp, 1440Sstevel@tonic-gate u_offset_t off, 1450Sstevel@tonic-gate size_t len, 1460Sstevel@tonic-gate uint_t *protp, 1470Sstevel@tonic-gate page_t *pl[], 1480Sstevel@tonic-gate size_t plsz, 1490Sstevel@tonic-gate struct seg *seg, 1500Sstevel@tonic-gate caddr_t addr, 1510Sstevel@tonic-gate enum seg_rw rw, 1520Sstevel@tonic-gate struct cred *cr) 1530Sstevel@tonic-gate { 1540Sstevel@tonic-gate struct page *pp, *rpp; 1550Sstevel@tonic-gate int flags; 1560Sstevel@tonic-gate int err = 0; 1570Sstevel@tonic-gate struct vnode *pvp = NULL; 1580Sstevel@tonic-gate u_offset_t poff; 1590Sstevel@tonic-gate int flag_noreloc; 1600Sstevel@tonic-gate se_t lock; 1610Sstevel@tonic-gate extern int kcage_on; 1620Sstevel@tonic-gate int upgrade = 0; 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n", 1650Sstevel@tonic-gate vp, off, len, 0, 0); 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate /* 1680Sstevel@tonic-gate * Until there is a call-back mechanism to cause SEGKP 1690Sstevel@tonic-gate * pages to be unlocked, make them non-relocatable. 1700Sstevel@tonic-gate */ 1710Sstevel@tonic-gate if (SEG_IS_SEGKP(seg)) 1720Sstevel@tonic-gate flag_noreloc = PG_NORELOC; 1730Sstevel@tonic-gate else 1740Sstevel@tonic-gate flag_noreloc = 0; 1750Sstevel@tonic-gate 1760Sstevel@tonic-gate if (protp != NULL) 1770Sstevel@tonic-gate *protp = PROT_ALL; 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED); 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate again: 1820Sstevel@tonic-gate if (pp = page_lookup(vp, off, lock)) { 1830Sstevel@tonic-gate /* 1840Sstevel@tonic-gate * In very rare instances, a segkp page may have been 1850Sstevel@tonic-gate * relocated outside of the kernel by the kernel cage 1860Sstevel@tonic-gate * due to the window between page_unlock() and 1870Sstevel@tonic-gate * VOP_PUTPAGE() in segkp_unlock(). Due to the 1880Sstevel@tonic-gate * rareness of these occurances, the solution is to 1890Sstevel@tonic-gate * relocate the page to a P_NORELOC page. 1900Sstevel@tonic-gate */ 1910Sstevel@tonic-gate if (flag_noreloc != 0) { 1920Sstevel@tonic-gate if (!PP_ISNORELOC(pp) && kcage_on) { 1930Sstevel@tonic-gate if (lock != SE_EXCL) { 1940Sstevel@tonic-gate upgrade = 1; 1950Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 1960Sstevel@tonic-gate page_unlock(pp); 1970Sstevel@tonic-gate lock = SE_EXCL; 1980Sstevel@tonic-gate goto again; 1990Sstevel@tonic-gate } 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate if (page_relocate_cage(&pp, &rpp) != 0) 2030Sstevel@tonic-gate panic("swap_getapage: " 2040Sstevel@tonic-gate "page_relocate_cage failed"); 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate pp = rpp; 2070Sstevel@tonic-gate } 2080Sstevel@tonic-gate } 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate if (pl) { 2110Sstevel@tonic-gate if (upgrade) 2120Sstevel@tonic-gate page_downgrade(pp); 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate pl[0] = pp; 2150Sstevel@tonic-gate pl[1] = NULL; 2160Sstevel@tonic-gate } else { 2170Sstevel@tonic-gate page_unlock(pp); 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate } else { 2200Sstevel@tonic-gate pp = page_create_va(vp, off, PAGESIZE, 2210Sstevel@tonic-gate PG_WAIT | PG_EXCL | flag_noreloc, 2220Sstevel@tonic-gate seg, addr); 2230Sstevel@tonic-gate /* 2240Sstevel@tonic-gate * Someone raced in and created the page after we did the 2250Sstevel@tonic-gate * lookup but before we did the create, so go back and 2260Sstevel@tonic-gate * try to look it up again. 2270Sstevel@tonic-gate */ 2280Sstevel@tonic-gate if (pp == NULL) 2290Sstevel@tonic-gate goto again; 2300Sstevel@tonic-gate if (rw != S_CREATE) { 2310Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff); 2320Sstevel@tonic-gate if (pvp) { 2330Sstevel@tonic-gate struct anon *ap; 2340Sstevel@tonic-gate kmutex_t *ahm; 2350Sstevel@tonic-gate 2360Sstevel@tonic-gate flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 2370Sstevel@tonic-gate err = VOP_PAGEIO(pvp, pp, poff, 2380Sstevel@tonic-gate PAGESIZE, flags, cr); 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate if (!err) { 2410Sstevel@tonic-gate ahm = &anonhash_lock[AH_LOCK(vp, off)]; 2420Sstevel@tonic-gate mutex_enter(ahm); 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate ap = swap_anon(vp, off); 2450Sstevel@tonic-gate if (ap == NULL) 2460Sstevel@tonic-gate panic("swap_getapage: null anon"); 2470Sstevel@tonic-gate 2480Sstevel@tonic-gate if (ap->an_pvp == pvp && 2490Sstevel@tonic-gate ap->an_poff == poff) { 2500Sstevel@tonic-gate swap_phys_free(pvp, poff, 2510Sstevel@tonic-gate PAGESIZE); 2520Sstevel@tonic-gate ap->an_pvp = NULL; 2530Sstevel@tonic-gate ap->an_poff = NULL; 2540Sstevel@tonic-gate hat_setmod(pp); 2550Sstevel@tonic-gate } 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate mutex_exit(ahm); 2580Sstevel@tonic-gate } 2590Sstevel@tonic-gate } else { 2600Sstevel@tonic-gate if (!err) 2610Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 2620Sstevel@tonic-gate 2630Sstevel@tonic-gate /* 2640Sstevel@tonic-gate * If it's a fault ahead, release page_io_lock 2650Sstevel@tonic-gate * and SE_EXCL we grabbed in page_create_va 2660Sstevel@tonic-gate * 2670Sstevel@tonic-gate * If we are here, we haven't called VOP_PAGEIO 2680Sstevel@tonic-gate * and thus calling pvn_read_done(pp, B_READ) 2690Sstevel@tonic-gate * below may mislead that we tried i/o. Besides, 2700Sstevel@tonic-gate * in case of async, pvn_read_done() should 2710Sstevel@tonic-gate * not be called by *getpage() 2720Sstevel@tonic-gate */ 2730Sstevel@tonic-gate if (pl == NULL) { 2740Sstevel@tonic-gate /* 2750Sstevel@tonic-gate * swap_getphysname can return error 2760Sstevel@tonic-gate * only when we are getting called from 2770Sstevel@tonic-gate * swapslot_free which passes non-NULL 2780Sstevel@tonic-gate * pl to VOP_GETPAGE. 2790Sstevel@tonic-gate */ 2800Sstevel@tonic-gate ASSERT(err == 0); 2810Sstevel@tonic-gate page_io_unlock(pp); 2820Sstevel@tonic-gate page_unlock(pp); 2830Sstevel@tonic-gate } 2840Sstevel@tonic-gate } 2850Sstevel@tonic-gate } 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate ASSERT(pp != NULL); 2880Sstevel@tonic-gate 2890Sstevel@tonic-gate if (err && pl) 2900Sstevel@tonic-gate pvn_read_done(pp, B_ERROR); 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate if (!err && pl) 2930Sstevel@tonic-gate pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 2940Sstevel@tonic-gate } 2950Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE, 2960Sstevel@tonic-gate "swapfs getapage:pp %p vp %p off %llx", pp, vp, off); 2970Sstevel@tonic-gate return (err); 2980Sstevel@tonic-gate } 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate /* 3010Sstevel@tonic-gate * Called from large page anon routines only! This is an ugly hack where 3020Sstevel@tonic-gate * the anon layer directly calls into swapfs with a preallocated large page. 3030Sstevel@tonic-gate * Another method would have been to change to VOP and add an extra arg for 3040Sstevel@tonic-gate * the preallocated large page. This all could be cleaned up later when we 3050Sstevel@tonic-gate * solve the anonymous naming problem and no longer need to loop across of 3060Sstevel@tonic-gate * the VOP in PAGESIZE increments to fill in or initialize a large page as 3070Sstevel@tonic-gate * is done today. I think the latter is better since it avoid a change to 3080Sstevel@tonic-gate * the VOP interface that could later be avoided. 3090Sstevel@tonic-gate */ 3100Sstevel@tonic-gate int 3110Sstevel@tonic-gate swap_getconpage( 3120Sstevel@tonic-gate struct vnode *vp, 3130Sstevel@tonic-gate u_offset_t off, 3140Sstevel@tonic-gate size_t len, 3150Sstevel@tonic-gate uint_t *protp, 3160Sstevel@tonic-gate page_t *pl[], 3170Sstevel@tonic-gate size_t plsz, 3180Sstevel@tonic-gate page_t *conpp, 319*2414Saguzovsk uint_t *pszc, 3200Sstevel@tonic-gate spgcnt_t *nreloc, 3210Sstevel@tonic-gate struct seg *seg, 3220Sstevel@tonic-gate caddr_t addr, 3230Sstevel@tonic-gate enum seg_rw rw, 3240Sstevel@tonic-gate struct cred *cr) 3250Sstevel@tonic-gate { 3260Sstevel@tonic-gate struct page *pp; 3270Sstevel@tonic-gate int err = 0; 3280Sstevel@tonic-gate struct vnode *pvp = NULL; 3290Sstevel@tonic-gate u_offset_t poff; 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate ASSERT(len == PAGESIZE); 3320Sstevel@tonic-gate ASSERT(pl != NULL); 3330Sstevel@tonic-gate ASSERT(plsz == PAGESIZE); 3340Sstevel@tonic-gate ASSERT(protp == NULL); 3350Sstevel@tonic-gate ASSERT(nreloc != NULL); 3360Sstevel@tonic-gate ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */ 3370Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n", 3380Sstevel@tonic-gate vp, off, len, 0, 0); 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate /* 3410Sstevel@tonic-gate * If we are not using a preallocated page then we know one already 3420Sstevel@tonic-gate * exists. So just let the old code handle it. 3430Sstevel@tonic-gate */ 3440Sstevel@tonic-gate if (conpp == NULL) { 3450Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 3460Sstevel@tonic-gate seg, addr, rw, cr); 3470Sstevel@tonic-gate return (err); 3480Sstevel@tonic-gate } 3490Sstevel@tonic-gate ASSERT(conpp->p_szc != 0); 3500Sstevel@tonic-gate ASSERT(PAGE_EXCL(conpp)); 3510Sstevel@tonic-gate 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate ASSERT(conpp->p_next == conpp); 3540Sstevel@tonic-gate ASSERT(conpp->p_prev == conpp); 3550Sstevel@tonic-gate ASSERT(!PP_ISAGED(conpp)); 3560Sstevel@tonic-gate ASSERT(!PP_ISFREE(conpp)); 3570Sstevel@tonic-gate 3580Sstevel@tonic-gate *nreloc = 0; 3590Sstevel@tonic-gate pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0); 3600Sstevel@tonic-gate 3610Sstevel@tonic-gate /* 3620Sstevel@tonic-gate * If existing page is found we may need to relocate. 3630Sstevel@tonic-gate */ 3640Sstevel@tonic-gate if (pp != conpp) { 3650Sstevel@tonic-gate ASSERT(rw != S_CREATE); 366*2414Saguzovsk ASSERT(pszc != NULL); 3670Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 3680Sstevel@tonic-gate if (pp->p_szc < conpp->p_szc) { 369*2414Saguzovsk *pszc = pp->p_szc; 3700Sstevel@tonic-gate page_unlock(pp); 3710Sstevel@tonic-gate err = -1; 372*2414Saguzovsk } else if (pp->p_szc > conpp->p_szc && 373*2414Saguzovsk seg->s_szc > conpp->p_szc) { 374*2414Saguzovsk *pszc = MIN(pp->p_szc, seg->s_szc); 3750Sstevel@tonic-gate page_unlock(pp); 3760Sstevel@tonic-gate err = -2; 3770Sstevel@tonic-gate } else { 3780Sstevel@tonic-gate pl[0] = pp; 3790Sstevel@tonic-gate pl[1] = NULL; 3800Sstevel@tonic-gate if (page_pptonum(pp) & 381*2414Saguzovsk (page_get_pagecnt(conpp->p_szc) - 1)) 3820Sstevel@tonic-gate cmn_err(CE_PANIC, "swap_getconpage: no root"); 3830Sstevel@tonic-gate } 3840Sstevel@tonic-gate return (err); 3850Sstevel@tonic-gate } 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate if (*nreloc != 0) { 3900Sstevel@tonic-gate ASSERT(rw != S_CREATE); 3910Sstevel@tonic-gate pl[0] = pp; 3920Sstevel@tonic-gate pl[1] = NULL; 3930Sstevel@tonic-gate return (0); 3940Sstevel@tonic-gate } 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate *nreloc = 1; 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate /* 3990Sstevel@tonic-gate * If necessary do the page io. 4000Sstevel@tonic-gate */ 4010Sstevel@tonic-gate if (rw != S_CREATE) { 4020Sstevel@tonic-gate /* 4030Sstevel@tonic-gate * Since we are only called now on behalf of an 4040Sstevel@tonic-gate * address space operation it's impossible for 4050Sstevel@tonic-gate * us to fail unlike swap_getapge() which 4060Sstevel@tonic-gate * also gets called from swapslot_free(). 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate if (swap_getphysname(vp, off, &pvp, &poff)) { 4090Sstevel@tonic-gate cmn_err(CE_PANIC, 4100Sstevel@tonic-gate "swap_getconpage: swap_getphysname failed!"); 4110Sstevel@tonic-gate } 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate if (pvp) { 4140Sstevel@tonic-gate err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ, cr); 4150Sstevel@tonic-gate } else { 4160Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 4170Sstevel@tonic-gate } 4180Sstevel@tonic-gate } 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate /* 4210Sstevel@tonic-gate * Normally we would let pvn_read_done() destroy 4220Sstevel@tonic-gate * the page on IO error. But since this is a preallocated 4230Sstevel@tonic-gate * page we'll let the anon layer handle it. 4240Sstevel@tonic-gate */ 4250Sstevel@tonic-gate page_io_unlock(pp); 4260Sstevel@tonic-gate if (err != 0) 4270Sstevel@tonic-gate page_hashout(pp, NULL); 4280Sstevel@tonic-gate ASSERT(pp->p_next == pp); 4290Sstevel@tonic-gate ASSERT(pp->p_prev == pp); 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE, 4320Sstevel@tonic-gate "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off); 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate pl[0] = pp; 4350Sstevel@tonic-gate pl[1] = NULL; 4360Sstevel@tonic-gate return (err); 4370Sstevel@tonic-gate } 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate /* Async putpage klustering stuff */ 4400Sstevel@tonic-gate int sw_pending_size; 4410Sstevel@tonic-gate extern int klustsize; 4420Sstevel@tonic-gate extern struct async_reqs *sw_getreq(); 4430Sstevel@tonic-gate extern void sw_putreq(struct async_reqs *); 4440Sstevel@tonic-gate extern void sw_putbackreq(struct async_reqs *); 4450Sstevel@tonic-gate extern struct async_reqs *sw_getfree(); 4460Sstevel@tonic-gate extern void sw_putfree(struct async_reqs *); 4470Sstevel@tonic-gate 4480Sstevel@tonic-gate static size_t swap_putpagecnt, swap_pagespushed; 4490Sstevel@tonic-gate static size_t swap_otherfail, swap_otherpages; 4500Sstevel@tonic-gate static size_t swap_klustfail, swap_klustpages; 4510Sstevel@tonic-gate static size_t swap_getiofail, swap_getiopages; 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate /* 4540Sstevel@tonic-gate * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 4550Sstevel@tonic-gate * If len == 0, do from off to EOF. 4560Sstevel@tonic-gate */ 4570Sstevel@tonic-gate static int swap_nopage = 0; /* Don't do swap_putpage's if set */ 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate /* ARGSUSED */ 4600Sstevel@tonic-gate static int 4610Sstevel@tonic-gate swap_putpage( 4620Sstevel@tonic-gate struct vnode *vp, 4630Sstevel@tonic-gate offset_t off, 4640Sstevel@tonic-gate size_t len, 4650Sstevel@tonic-gate int flags, 4660Sstevel@tonic-gate struct cred *cr) 4670Sstevel@tonic-gate { 4680Sstevel@tonic-gate page_t *pp; 4690Sstevel@tonic-gate u_offset_t io_off; 4700Sstevel@tonic-gate size_t io_len = 0; 4710Sstevel@tonic-gate int err = 0; 4720Sstevel@tonic-gate struct async_reqs *arg; 4730Sstevel@tonic-gate 4740Sstevel@tonic-gate if (swap_nopage) 4750Sstevel@tonic-gate return (0); 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate ASSERT(vp->v_count != 0); 4780Sstevel@tonic-gate 4790Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, 4800Sstevel@tonic-gate "swap_putpage: vp %p, off %llx len %lx, flags %x\n", 4810Sstevel@tonic-gate (void *)vp, off, len, flags, 0); 4820Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE, 4830Sstevel@tonic-gate "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len); 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 4860Sstevel@tonic-gate return (ENOSYS); 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate if (!vn_has_cached_data(vp)) 4890Sstevel@tonic-gate return (0); 4900Sstevel@tonic-gate 4910Sstevel@tonic-gate if (len == 0) { 4920Sstevel@tonic-gate if (curproc == proc_pageout) 4930Sstevel@tonic-gate cmn_err(CE_PANIC, "swapfs: pageout can't block"); 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate /* Search the entire vp list for pages >= off. */ 4960Sstevel@tonic-gate err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage, 4970Sstevel@tonic-gate flags, cr); 4980Sstevel@tonic-gate } else { 4990Sstevel@tonic-gate u_offset_t eoff; 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate /* 5020Sstevel@tonic-gate * Loop over all offsets in the range [off...off + len] 5030Sstevel@tonic-gate * looking for pages to deal with. 5040Sstevel@tonic-gate */ 5050Sstevel@tonic-gate eoff = off + len; 5060Sstevel@tonic-gate for (io_off = (u_offset_t)off; io_off < eoff; 5070Sstevel@tonic-gate io_off += io_len) { 5080Sstevel@tonic-gate /* 5090Sstevel@tonic-gate * If we run out of the async req slot, put the page 5100Sstevel@tonic-gate * now instead of queuing. 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) && 5130Sstevel@tonic-gate sw_pending_size < klustsize && 5140Sstevel@tonic-gate (arg = sw_getfree())) { 5150Sstevel@tonic-gate /* 5160Sstevel@tonic-gate * If we are clustering, we should allow 5170Sstevel@tonic-gate * pageout to feed us more pages because # of 5180Sstevel@tonic-gate * pushes is limited by # of I/Os, and one 5190Sstevel@tonic-gate * cluster is considered to be one I/O. 5200Sstevel@tonic-gate */ 5210Sstevel@tonic-gate if (pushes) 5220Sstevel@tonic-gate pushes--; 5230Sstevel@tonic-gate 5240Sstevel@tonic-gate arg->a_vp = vp; 5250Sstevel@tonic-gate arg->a_off = io_off; 5260Sstevel@tonic-gate arg->a_len = PAGESIZE; 5270Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 5280Sstevel@tonic-gate arg->a_cred = kcred; 5290Sstevel@tonic-gate sw_putreq(arg); 5300Sstevel@tonic-gate io_len = PAGESIZE; 5310Sstevel@tonic-gate continue; 5320Sstevel@tonic-gate } 5330Sstevel@tonic-gate /* 5340Sstevel@tonic-gate * If we are not invalidating pages, use the 5350Sstevel@tonic-gate * routine page_lookup_nowait() to prevent 5360Sstevel@tonic-gate * reclaiming them from the free list. 5370Sstevel@tonic-gate */ 5380Sstevel@tonic-gate if ((flags & B_INVAL) || 5390Sstevel@tonic-gate (flags & (B_ASYNC | B_FREE)) == B_FREE) 5400Sstevel@tonic-gate pp = page_lookup(vp, io_off, SE_EXCL); 5410Sstevel@tonic-gate else 5420Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off, 5430Sstevel@tonic-gate (flags & B_FREE) ? SE_EXCL : SE_SHARED); 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate if (pp == NULL || pvn_getdirty(pp, flags) == 0) 5460Sstevel@tonic-gate io_len = PAGESIZE; 5470Sstevel@tonic-gate else { 5480Sstevel@tonic-gate err = swap_putapage(vp, pp, &io_off, &io_len, 5490Sstevel@tonic-gate flags, cr); 5500Sstevel@tonic-gate if (err != 0) 5510Sstevel@tonic-gate break; 5520Sstevel@tonic-gate } 5530Sstevel@tonic-gate } 5540Sstevel@tonic-gate } 5550Sstevel@tonic-gate /* If invalidating, verify all pages on vnode list are gone. */ 5560Sstevel@tonic-gate if (err == 0 && off == 0 && len == 0 && 5570Sstevel@tonic-gate (flags & B_INVAL) && vn_has_cached_data(vp)) { 5580Sstevel@tonic-gate cmn_err(CE_WARN, 5590Sstevel@tonic-gate "swap_putpage: B_INVAL, pages not gone"); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate return (err); 5620Sstevel@tonic-gate } 5630Sstevel@tonic-gate 5640Sstevel@tonic-gate /* 5650Sstevel@tonic-gate * Write out a single page. 5660Sstevel@tonic-gate * For swapfs this means choose a physical swap slot and write the page 5670Sstevel@tonic-gate * out using VOP_PAGEIO. 5680Sstevel@tonic-gate * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty 5690Sstevel@tonic-gate * swapfs pages, a bunch of contiguous swap slots and then write them 5700Sstevel@tonic-gate * all out in one clustered i/o. 5710Sstevel@tonic-gate */ 5720Sstevel@tonic-gate /*ARGSUSED*/ 5730Sstevel@tonic-gate static int 5740Sstevel@tonic-gate swap_putapage( 5750Sstevel@tonic-gate struct vnode *vp, 5760Sstevel@tonic-gate page_t *pp, 5770Sstevel@tonic-gate u_offset_t *offp, 5780Sstevel@tonic-gate size_t *lenp, 5790Sstevel@tonic-gate int flags, 5800Sstevel@tonic-gate struct cred *cr) 5810Sstevel@tonic-gate { 5820Sstevel@tonic-gate int err; 5830Sstevel@tonic-gate struct vnode *pvp; 5840Sstevel@tonic-gate u_offset_t poff, off; 5850Sstevel@tonic-gate u_offset_t doff; 5860Sstevel@tonic-gate size_t dlen; 5870Sstevel@tonic-gate size_t klsz = 0; 5880Sstevel@tonic-gate u_offset_t klstart = 0; 5890Sstevel@tonic-gate struct vnode *klvp = NULL; 5900Sstevel@tonic-gate page_t *pplist; 5910Sstevel@tonic-gate se_t se; 5920Sstevel@tonic-gate struct async_reqs *arg; 5930Sstevel@tonic-gate size_t swap_klustsize; 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate /* 5960Sstevel@tonic-gate * This check is added for callers who access swap_putpage with len = 0. 5970Sstevel@tonic-gate * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty. 5980Sstevel@tonic-gate * And it's necessary to do the same queuing if users have the same 5990Sstevel@tonic-gate * B_ASYNC|B_FREE flags on. 6000Sstevel@tonic-gate */ 6010Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) && 6020Sstevel@tonic-gate sw_pending_size < klustsize && (arg = sw_getfree())) { 6030Sstevel@tonic-gate 6040Sstevel@tonic-gate hat_setmod(pp); 6050Sstevel@tonic-gate page_io_unlock(pp); 6060Sstevel@tonic-gate page_unlock(pp); 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate arg->a_vp = vp; 6090Sstevel@tonic-gate arg->a_off = pp->p_offset; 6100Sstevel@tonic-gate arg->a_len = PAGESIZE; 6110Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 6120Sstevel@tonic-gate arg->a_cred = kcred; 6130Sstevel@tonic-gate sw_putreq(arg); 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate return (0); 6160Sstevel@tonic-gate } 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate SWAPFS_PRINT(SWAP_PUTP, 6190Sstevel@tonic-gate "swap_putapage: pp %p, vp %p, off %llx, flags %x\n", 6200Sstevel@tonic-gate pp, vp, pp->p_offset, flags, 0); 6210Sstevel@tonic-gate 6220Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate off = pp->p_offset; 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate doff = off; 6270Sstevel@tonic-gate dlen = PAGESIZE; 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) { 6300Sstevel@tonic-gate err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0); 6310Sstevel@tonic-gate hat_setmod(pp); 6320Sstevel@tonic-gate page_io_unlock(pp); 6330Sstevel@tonic-gate page_unlock(pp); 6340Sstevel@tonic-gate goto out; 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate 6370Sstevel@tonic-gate klvp = pvp; 6380Sstevel@tonic-gate klstart = poff; 6390Sstevel@tonic-gate pplist = pp; 6400Sstevel@tonic-gate /* 6410Sstevel@tonic-gate * If this is ASYNC | FREE and we've accumulated a bunch of such 6420Sstevel@tonic-gate * pending requests, kluster. 6430Sstevel@tonic-gate */ 6440Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE)) 6450Sstevel@tonic-gate swap_klustsize = klustsize; 6460Sstevel@tonic-gate else 6470Sstevel@tonic-gate swap_klustsize = PAGESIZE; 6480Sstevel@tonic-gate se = (flags & B_FREE ? SE_EXCL : SE_SHARED); 6490Sstevel@tonic-gate klsz = PAGESIZE; 6500Sstevel@tonic-gate while (klsz < swap_klustsize) { 6510Sstevel@tonic-gate if ((arg = sw_getreq()) == NULL) { 6520Sstevel@tonic-gate swap_getiofail++; 6530Sstevel@tonic-gate swap_getiopages += btop(klsz); 6540Sstevel@tonic-gate break; 6550Sstevel@tonic-gate } 6560Sstevel@tonic-gate ASSERT(vn_matchops(arg->a_vp, swap_vnodeops)); 6570Sstevel@tonic-gate vp = arg->a_vp; 6580Sstevel@tonic-gate off = arg->a_off; 6590Sstevel@tonic-gate 6600Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, off, se)) == NULL) { 6610Sstevel@tonic-gate swap_otherfail++; 6620Sstevel@tonic-gate swap_otherpages += btop(klsz); 6630Sstevel@tonic-gate sw_putfree(arg); 6640Sstevel@tonic-gate break; 6650Sstevel@tonic-gate } 6660Sstevel@tonic-gate if (pvn_getdirty(pp, flags | B_DELWRI) == 0) { 6670Sstevel@tonic-gate sw_putfree(arg); 6680Sstevel@tonic-gate continue; 6690Sstevel@tonic-gate } 6700Sstevel@tonic-gate /* Get new physical backing store for the page */ 6710Sstevel@tonic-gate doff = off; 6720Sstevel@tonic-gate dlen = PAGESIZE; 6730Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, 6740Sstevel@tonic-gate &pvp, &poff)) { 6750Sstevel@tonic-gate swap_otherfail++; 6760Sstevel@tonic-gate swap_otherpages += btop(klsz); 6770Sstevel@tonic-gate hat_setmod(pp); 6780Sstevel@tonic-gate page_io_unlock(pp); 6790Sstevel@tonic-gate page_unlock(pp); 6800Sstevel@tonic-gate sw_putbackreq(arg); 6810Sstevel@tonic-gate break; 6820Sstevel@tonic-gate } 6830Sstevel@tonic-gate /* Try to cluster new physical name with previous ones */ 6840Sstevel@tonic-gate if (klvp == pvp && poff == klstart + klsz) { 6850Sstevel@tonic-gate klsz += PAGESIZE; 6860Sstevel@tonic-gate page_add(&pplist, pp); 6870Sstevel@tonic-gate pplist = pplist->p_next; 6880Sstevel@tonic-gate sw_putfree(arg); 6890Sstevel@tonic-gate } else if (klvp == pvp && poff == klstart - PAGESIZE) { 6900Sstevel@tonic-gate klsz += PAGESIZE; 6910Sstevel@tonic-gate klstart -= PAGESIZE; 6920Sstevel@tonic-gate page_add(&pplist, pp); 6930Sstevel@tonic-gate sw_putfree(arg); 6940Sstevel@tonic-gate } else { 6950Sstevel@tonic-gate swap_klustfail++; 6960Sstevel@tonic-gate swap_klustpages += btop(klsz); 6970Sstevel@tonic-gate hat_setmod(pp); 6980Sstevel@tonic-gate page_io_unlock(pp); 6990Sstevel@tonic-gate page_unlock(pp); 7000Sstevel@tonic-gate sw_putbackreq(arg); 7010Sstevel@tonic-gate break; 7020Sstevel@tonic-gate } 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate err = VOP_PAGEIO(klvp, pplist, klstart, klsz, 7060Sstevel@tonic-gate B_WRITE | flags, cr); 7070Sstevel@tonic-gate 7080Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) 7090Sstevel@tonic-gate pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags); 7100Sstevel@tonic-gate 7110Sstevel@tonic-gate /* Statistics */ 7120Sstevel@tonic-gate if (!err) { 7130Sstevel@tonic-gate swap_putpagecnt++; 7140Sstevel@tonic-gate swap_pagespushed += btop(klsz); 7150Sstevel@tonic-gate } 7160Sstevel@tonic-gate out: 7170Sstevel@tonic-gate TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE, 7180Sstevel@tonic-gate "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx", 7190Sstevel@tonic-gate vp, klvp, klstart, klsz); 7200Sstevel@tonic-gate if (err && err != ENOMEM) 7210Sstevel@tonic-gate cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err); 7220Sstevel@tonic-gate if (lenp) 7230Sstevel@tonic-gate *lenp = PAGESIZE; 7240Sstevel@tonic-gate return (err); 7250Sstevel@tonic-gate } 7260Sstevel@tonic-gate 7270Sstevel@tonic-gate static void 7280Sstevel@tonic-gate swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr) 7290Sstevel@tonic-gate { 7300Sstevel@tonic-gate int err; 7310Sstevel@tonic-gate u_offset_t off = pp->p_offset; 7320Sstevel@tonic-gate vnode_t *pvp; 7330Sstevel@tonic-gate u_offset_t poff; 7340Sstevel@tonic-gate 7350Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate /* 7380Sstevel@tonic-gate * The caller will free/invalidate large page in one shot instead of 7390Sstevel@tonic-gate * one small page at a time. 7400Sstevel@tonic-gate */ 7410Sstevel@tonic-gate if (pp->p_szc != 0) { 7420Sstevel@tonic-gate page_unlock(pp); 7430Sstevel@tonic-gate return; 7440Sstevel@tonic-gate } 7450Sstevel@tonic-gate 7460Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff); 7470Sstevel@tonic-gate if (!err && pvp != NULL) 7480Sstevel@tonic-gate VOP_DISPOSE(pvp, pp, fl, dn, cr); 7490Sstevel@tonic-gate else 7500Sstevel@tonic-gate fs_dispose(vp, pp, fl, dn, cr); 7510Sstevel@tonic-gate } 752