10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52414Saguzovsk * Common Development and Distribution License (the "License"). 62414Saguzovsk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*3898Srsb * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/types.h> 290Sstevel@tonic-gate #include <sys/param.h> 300Sstevel@tonic-gate #include <sys/systm.h> 310Sstevel@tonic-gate #include <sys/buf.h> 320Sstevel@tonic-gate #include <sys/cred.h> 330Sstevel@tonic-gate #include <sys/errno.h> 340Sstevel@tonic-gate #include <sys/vnode.h> 35*3898Srsb #include <sys/vfs_opreg.h> 360Sstevel@tonic-gate #include <sys/cmn_err.h> 370Sstevel@tonic-gate #include <sys/swap.h> 380Sstevel@tonic-gate #include <sys/mman.h> 390Sstevel@tonic-gate #include <sys/vmsystm.h> 400Sstevel@tonic-gate #include <sys/vtrace.h> 410Sstevel@tonic-gate #include <sys/debug.h> 420Sstevel@tonic-gate #include <sys/sysmacros.h> 430Sstevel@tonic-gate #include <sys/vm.h> 440Sstevel@tonic-gate 450Sstevel@tonic-gate #include <sys/fs/swapnode.h> 460Sstevel@tonic-gate 470Sstevel@tonic-gate #include <vm/seg.h> 480Sstevel@tonic-gate #include <vm/page.h> 490Sstevel@tonic-gate #include <vm/pvn.h> 500Sstevel@tonic-gate #include <fs/fs_subr.h> 510Sstevel@tonic-gate 520Sstevel@tonic-gate #include <vm/seg_kp.h> 530Sstevel@tonic-gate 540Sstevel@tonic-gate /* 550Sstevel@tonic-gate * Define the routines within this file. 560Sstevel@tonic-gate */ 570Sstevel@tonic-gate static int swap_getpage(struct vnode *vp, offset_t off, size_t len, 580Sstevel@tonic-gate uint_t *protp, struct page **plarr, size_t plsz, 590Sstevel@tonic-gate struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); 600Sstevel@tonic-gate static int swap_putpage(struct vnode *vp, offset_t off, size_t len, 610Sstevel@tonic-gate int flags, struct cred *cr); 620Sstevel@tonic-gate static void swap_inactive(struct vnode *vp, struct cred *cr); 630Sstevel@tonic-gate static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, 640Sstevel@tonic-gate cred_t *cr); 650Sstevel@tonic-gate 660Sstevel@tonic-gate static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len, 670Sstevel@tonic-gate uint_t *protp, page_t **plarr, size_t plsz, 680Sstevel@tonic-gate struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); 690Sstevel@tonic-gate 700Sstevel@tonic-gate int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len, 712414Saguzovsk uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp, 722414Saguzovsk uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr, 732414Saguzovsk enum seg_rw rw, struct cred *cr); 740Sstevel@tonic-gate 750Sstevel@tonic-gate static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off, 760Sstevel@tonic-gate size_t *lenp, int flags, struct cred *cr); 770Sstevel@tonic-gate 780Sstevel@tonic-gate const fs_operation_def_t swap_vnodeops_template[] = { 79*3898Srsb VOPNAME_INACTIVE, { .vop_inactive = swap_inactive }, 80*3898Srsb VOPNAME_GETPAGE, { .vop_getpage = swap_getpage }, 81*3898Srsb VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage }, 82*3898Srsb VOPNAME_DISPOSE, { .vop_dispose = swap_dispose }, 83*3898Srsb VOPNAME_SETFL, { .error = fs_error }, 84*3898Srsb VOPNAME_POLL, { .error = fs_error }, 85*3898Srsb VOPNAME_PATHCONF, { .error = fs_error }, 86*3898Srsb VOPNAME_GETSECATTR, { .error = fs_error }, 87*3898Srsb VOPNAME_SHRLOCK, { .error = fs_error }, 88*3898Srsb NULL, NULL 890Sstevel@tonic-gate }; 900Sstevel@tonic-gate 910Sstevel@tonic-gate vnodeops_t *swap_vnodeops; 920Sstevel@tonic-gate 930Sstevel@tonic-gate /* ARGSUSED */ 940Sstevel@tonic-gate static void 950Sstevel@tonic-gate swap_inactive( 960Sstevel@tonic-gate struct vnode *vp, 970Sstevel@tonic-gate struct cred *cr) 980Sstevel@tonic-gate { 990Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0); 1000Sstevel@tonic-gate } 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate /* 1030Sstevel@tonic-gate * Return all the pages from [off..off+len] in given file 1040Sstevel@tonic-gate */ 1050Sstevel@tonic-gate static int 1060Sstevel@tonic-gate swap_getpage( 1070Sstevel@tonic-gate struct vnode *vp, 1080Sstevel@tonic-gate offset_t off, 1090Sstevel@tonic-gate size_t len, 1100Sstevel@tonic-gate uint_t *protp, 1110Sstevel@tonic-gate page_t *pl[], 1120Sstevel@tonic-gate size_t plsz, 1130Sstevel@tonic-gate struct seg *seg, 1140Sstevel@tonic-gate caddr_t addr, 1150Sstevel@tonic-gate enum seg_rw rw, 1160Sstevel@tonic-gate struct cred *cr) 1170Sstevel@tonic-gate { 1180Sstevel@tonic-gate int err; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n", 1210Sstevel@tonic-gate (void *)vp, off, len, 0, 0); 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE, 1240Sstevel@tonic-gate "swapfs getpage:vp %p off %llx len %ld", 1250Sstevel@tonic-gate (void *)vp, off, len); 1260Sstevel@tonic-gate 1270Sstevel@tonic-gate if (len <= PAGESIZE) { 1280Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1290Sstevel@tonic-gate seg, addr, rw, cr); 1300Sstevel@tonic-gate } else { 1310Sstevel@tonic-gate err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, 1320Sstevel@tonic-gate protp, pl, plsz, seg, addr, rw, cr); 1330Sstevel@tonic-gate } 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate return (err); 1360Sstevel@tonic-gate } 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate /* 1390Sstevel@tonic-gate * Called from pvn_getpages or swap_getpage to get a particular page. 1400Sstevel@tonic-gate */ 1410Sstevel@tonic-gate /*ARGSUSED*/ 1420Sstevel@tonic-gate static int 1430Sstevel@tonic-gate swap_getapage( 1440Sstevel@tonic-gate struct vnode *vp, 1450Sstevel@tonic-gate u_offset_t off, 1460Sstevel@tonic-gate size_t len, 1470Sstevel@tonic-gate uint_t *protp, 1480Sstevel@tonic-gate page_t *pl[], 1490Sstevel@tonic-gate size_t plsz, 1500Sstevel@tonic-gate struct seg *seg, 1510Sstevel@tonic-gate caddr_t addr, 1520Sstevel@tonic-gate enum seg_rw rw, 1530Sstevel@tonic-gate struct cred *cr) 1540Sstevel@tonic-gate { 1550Sstevel@tonic-gate struct page *pp, *rpp; 1560Sstevel@tonic-gate int flags; 1570Sstevel@tonic-gate int err = 0; 1580Sstevel@tonic-gate struct vnode *pvp = NULL; 1590Sstevel@tonic-gate u_offset_t poff; 1600Sstevel@tonic-gate int flag_noreloc; 1610Sstevel@tonic-gate se_t lock; 1620Sstevel@tonic-gate extern int kcage_on; 1630Sstevel@tonic-gate int upgrade = 0; 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n", 1660Sstevel@tonic-gate vp, off, len, 0, 0); 1670Sstevel@tonic-gate 1680Sstevel@tonic-gate /* 1690Sstevel@tonic-gate * Until there is a call-back mechanism to cause SEGKP 1700Sstevel@tonic-gate * pages to be unlocked, make them non-relocatable. 1710Sstevel@tonic-gate */ 1720Sstevel@tonic-gate if (SEG_IS_SEGKP(seg)) 1730Sstevel@tonic-gate flag_noreloc = PG_NORELOC; 1740Sstevel@tonic-gate else 1750Sstevel@tonic-gate flag_noreloc = 0; 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate if (protp != NULL) 1780Sstevel@tonic-gate *protp = PROT_ALL; 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED); 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate again: 1830Sstevel@tonic-gate if (pp = page_lookup(vp, off, lock)) { 1840Sstevel@tonic-gate /* 1850Sstevel@tonic-gate * In very rare instances, a segkp page may have been 1860Sstevel@tonic-gate * relocated outside of the kernel by the kernel cage 1870Sstevel@tonic-gate * due to the window between page_unlock() and 1880Sstevel@tonic-gate * VOP_PUTPAGE() in segkp_unlock(). Due to the 1890Sstevel@tonic-gate * rareness of these occurances, the solution is to 1900Sstevel@tonic-gate * relocate the page to a P_NORELOC page. 1910Sstevel@tonic-gate */ 1920Sstevel@tonic-gate if (flag_noreloc != 0) { 1930Sstevel@tonic-gate if (!PP_ISNORELOC(pp) && kcage_on) { 1940Sstevel@tonic-gate if (lock != SE_EXCL) { 1950Sstevel@tonic-gate upgrade = 1; 1960Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 1970Sstevel@tonic-gate page_unlock(pp); 1980Sstevel@tonic-gate lock = SE_EXCL; 1990Sstevel@tonic-gate goto again; 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate } 2020Sstevel@tonic-gate 2030Sstevel@tonic-gate if (page_relocate_cage(&pp, &rpp) != 0) 2040Sstevel@tonic-gate panic("swap_getapage: " 2050Sstevel@tonic-gate "page_relocate_cage failed"); 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate pp = rpp; 2080Sstevel@tonic-gate } 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate if (pl) { 2120Sstevel@tonic-gate if (upgrade) 2130Sstevel@tonic-gate page_downgrade(pp); 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate pl[0] = pp; 2160Sstevel@tonic-gate pl[1] = NULL; 2170Sstevel@tonic-gate } else { 2180Sstevel@tonic-gate page_unlock(pp); 2190Sstevel@tonic-gate } 2200Sstevel@tonic-gate } else { 2210Sstevel@tonic-gate pp = page_create_va(vp, off, PAGESIZE, 2220Sstevel@tonic-gate PG_WAIT | PG_EXCL | flag_noreloc, 2230Sstevel@tonic-gate seg, addr); 2240Sstevel@tonic-gate /* 2250Sstevel@tonic-gate * Someone raced in and created the page after we did the 2260Sstevel@tonic-gate * lookup but before we did the create, so go back and 2270Sstevel@tonic-gate * try to look it up again. 2280Sstevel@tonic-gate */ 2290Sstevel@tonic-gate if (pp == NULL) 2300Sstevel@tonic-gate goto again; 2310Sstevel@tonic-gate if (rw != S_CREATE) { 2320Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff); 2330Sstevel@tonic-gate if (pvp) { 2340Sstevel@tonic-gate struct anon *ap; 2350Sstevel@tonic-gate kmutex_t *ahm; 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 2380Sstevel@tonic-gate err = VOP_PAGEIO(pvp, pp, poff, 2390Sstevel@tonic-gate PAGESIZE, flags, cr); 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate if (!err) { 2420Sstevel@tonic-gate ahm = &anonhash_lock[AH_LOCK(vp, off)]; 2430Sstevel@tonic-gate mutex_enter(ahm); 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate ap = swap_anon(vp, off); 2460Sstevel@tonic-gate if (ap == NULL) 2470Sstevel@tonic-gate panic("swap_getapage: null anon"); 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate if (ap->an_pvp == pvp && 2500Sstevel@tonic-gate ap->an_poff == poff) { 2510Sstevel@tonic-gate swap_phys_free(pvp, poff, 2520Sstevel@tonic-gate PAGESIZE); 2530Sstevel@tonic-gate ap->an_pvp = NULL; 2540Sstevel@tonic-gate ap->an_poff = NULL; 2550Sstevel@tonic-gate hat_setmod(pp); 2560Sstevel@tonic-gate } 2570Sstevel@tonic-gate 2580Sstevel@tonic-gate mutex_exit(ahm); 2590Sstevel@tonic-gate } 2600Sstevel@tonic-gate } else { 2610Sstevel@tonic-gate if (!err) 2620Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate /* 2650Sstevel@tonic-gate * If it's a fault ahead, release page_io_lock 2660Sstevel@tonic-gate * and SE_EXCL we grabbed in page_create_va 2670Sstevel@tonic-gate * 2680Sstevel@tonic-gate * If we are here, we haven't called VOP_PAGEIO 2690Sstevel@tonic-gate * and thus calling pvn_read_done(pp, B_READ) 2700Sstevel@tonic-gate * below may mislead that we tried i/o. Besides, 2710Sstevel@tonic-gate * in case of async, pvn_read_done() should 2720Sstevel@tonic-gate * not be called by *getpage() 2730Sstevel@tonic-gate */ 2740Sstevel@tonic-gate if (pl == NULL) { 2750Sstevel@tonic-gate /* 2760Sstevel@tonic-gate * swap_getphysname can return error 2770Sstevel@tonic-gate * only when we are getting called from 2780Sstevel@tonic-gate * swapslot_free which passes non-NULL 2790Sstevel@tonic-gate * pl to VOP_GETPAGE. 2800Sstevel@tonic-gate */ 2810Sstevel@tonic-gate ASSERT(err == 0); 2820Sstevel@tonic-gate page_io_unlock(pp); 2830Sstevel@tonic-gate page_unlock(pp); 2840Sstevel@tonic-gate } 2850Sstevel@tonic-gate } 2860Sstevel@tonic-gate } 2870Sstevel@tonic-gate 2880Sstevel@tonic-gate ASSERT(pp != NULL); 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate if (err && pl) 2910Sstevel@tonic-gate pvn_read_done(pp, B_ERROR); 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate if (!err && pl) 2940Sstevel@tonic-gate pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 2950Sstevel@tonic-gate } 2960Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE, 2970Sstevel@tonic-gate "swapfs getapage:pp %p vp %p off %llx", pp, vp, off); 2980Sstevel@tonic-gate return (err); 2990Sstevel@tonic-gate } 3000Sstevel@tonic-gate 3010Sstevel@tonic-gate /* 3020Sstevel@tonic-gate * Called from large page anon routines only! This is an ugly hack where 3030Sstevel@tonic-gate * the anon layer directly calls into swapfs with a preallocated large page. 3040Sstevel@tonic-gate * Another method would have been to change to VOP and add an extra arg for 3050Sstevel@tonic-gate * the preallocated large page. This all could be cleaned up later when we 3060Sstevel@tonic-gate * solve the anonymous naming problem and no longer need to loop across of 3070Sstevel@tonic-gate * the VOP in PAGESIZE increments to fill in or initialize a large page as 3080Sstevel@tonic-gate * is done today. I think the latter is better since it avoid a change to 3090Sstevel@tonic-gate * the VOP interface that could later be avoided. 3100Sstevel@tonic-gate */ 3110Sstevel@tonic-gate int 3120Sstevel@tonic-gate swap_getconpage( 3130Sstevel@tonic-gate struct vnode *vp, 3140Sstevel@tonic-gate u_offset_t off, 3150Sstevel@tonic-gate size_t len, 3160Sstevel@tonic-gate uint_t *protp, 3170Sstevel@tonic-gate page_t *pl[], 3180Sstevel@tonic-gate size_t plsz, 3190Sstevel@tonic-gate page_t *conpp, 3202414Saguzovsk uint_t *pszc, 3210Sstevel@tonic-gate spgcnt_t *nreloc, 3220Sstevel@tonic-gate struct seg *seg, 3230Sstevel@tonic-gate caddr_t addr, 3240Sstevel@tonic-gate enum seg_rw rw, 3250Sstevel@tonic-gate struct cred *cr) 3260Sstevel@tonic-gate { 3270Sstevel@tonic-gate struct page *pp; 3280Sstevel@tonic-gate int err = 0; 3290Sstevel@tonic-gate struct vnode *pvp = NULL; 3300Sstevel@tonic-gate u_offset_t poff; 3310Sstevel@tonic-gate 3320Sstevel@tonic-gate ASSERT(len == PAGESIZE); 3330Sstevel@tonic-gate ASSERT(pl != NULL); 3340Sstevel@tonic-gate ASSERT(plsz == PAGESIZE); 3350Sstevel@tonic-gate ASSERT(protp == NULL); 3360Sstevel@tonic-gate ASSERT(nreloc != NULL); 3370Sstevel@tonic-gate ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */ 3380Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n", 3390Sstevel@tonic-gate vp, off, len, 0, 0); 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate /* 3420Sstevel@tonic-gate * If we are not using a preallocated page then we know one already 3430Sstevel@tonic-gate * exists. So just let the old code handle it. 3440Sstevel@tonic-gate */ 3450Sstevel@tonic-gate if (conpp == NULL) { 3460Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 3470Sstevel@tonic-gate seg, addr, rw, cr); 3480Sstevel@tonic-gate return (err); 3490Sstevel@tonic-gate } 3500Sstevel@tonic-gate ASSERT(conpp->p_szc != 0); 3510Sstevel@tonic-gate ASSERT(PAGE_EXCL(conpp)); 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate 3540Sstevel@tonic-gate ASSERT(conpp->p_next == conpp); 3550Sstevel@tonic-gate ASSERT(conpp->p_prev == conpp); 3560Sstevel@tonic-gate ASSERT(!PP_ISAGED(conpp)); 3570Sstevel@tonic-gate ASSERT(!PP_ISFREE(conpp)); 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate *nreloc = 0; 3600Sstevel@tonic-gate pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0); 3610Sstevel@tonic-gate 3620Sstevel@tonic-gate /* 3630Sstevel@tonic-gate * If existing page is found we may need to relocate. 3640Sstevel@tonic-gate */ 3650Sstevel@tonic-gate if (pp != conpp) { 3660Sstevel@tonic-gate ASSERT(rw != S_CREATE); 3672414Saguzovsk ASSERT(pszc != NULL); 3680Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 3690Sstevel@tonic-gate if (pp->p_szc < conpp->p_szc) { 3702414Saguzovsk *pszc = pp->p_szc; 3710Sstevel@tonic-gate page_unlock(pp); 3720Sstevel@tonic-gate err = -1; 3732414Saguzovsk } else if (pp->p_szc > conpp->p_szc && 3742414Saguzovsk seg->s_szc > conpp->p_szc) { 3752414Saguzovsk *pszc = MIN(pp->p_szc, seg->s_szc); 3760Sstevel@tonic-gate page_unlock(pp); 3770Sstevel@tonic-gate err = -2; 3780Sstevel@tonic-gate } else { 3790Sstevel@tonic-gate pl[0] = pp; 3800Sstevel@tonic-gate pl[1] = NULL; 3810Sstevel@tonic-gate if (page_pptonum(pp) & 3822414Saguzovsk (page_get_pagecnt(conpp->p_szc) - 1)) 3830Sstevel@tonic-gate cmn_err(CE_PANIC, "swap_getconpage: no root"); 3840Sstevel@tonic-gate } 3850Sstevel@tonic-gate return (err); 3860Sstevel@tonic-gate } 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate if (*nreloc != 0) { 3910Sstevel@tonic-gate ASSERT(rw != S_CREATE); 3920Sstevel@tonic-gate pl[0] = pp; 3930Sstevel@tonic-gate pl[1] = NULL; 3940Sstevel@tonic-gate return (0); 3950Sstevel@tonic-gate } 3960Sstevel@tonic-gate 3970Sstevel@tonic-gate *nreloc = 1; 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate /* 4000Sstevel@tonic-gate * If necessary do the page io. 4010Sstevel@tonic-gate */ 4020Sstevel@tonic-gate if (rw != S_CREATE) { 4030Sstevel@tonic-gate /* 4040Sstevel@tonic-gate * Since we are only called now on behalf of an 4050Sstevel@tonic-gate * address space operation it's impossible for 4060Sstevel@tonic-gate * us to fail unlike swap_getapge() which 4070Sstevel@tonic-gate * also gets called from swapslot_free(). 4080Sstevel@tonic-gate */ 4090Sstevel@tonic-gate if (swap_getphysname(vp, off, &pvp, &poff)) { 4100Sstevel@tonic-gate cmn_err(CE_PANIC, 4110Sstevel@tonic-gate "swap_getconpage: swap_getphysname failed!"); 4120Sstevel@tonic-gate } 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate if (pvp) { 4150Sstevel@tonic-gate err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ, cr); 4160Sstevel@tonic-gate } else { 4170Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 4180Sstevel@tonic-gate } 4190Sstevel@tonic-gate } 4200Sstevel@tonic-gate 4210Sstevel@tonic-gate /* 4220Sstevel@tonic-gate * Normally we would let pvn_read_done() destroy 4230Sstevel@tonic-gate * the page on IO error. But since this is a preallocated 4240Sstevel@tonic-gate * page we'll let the anon layer handle it. 4250Sstevel@tonic-gate */ 4260Sstevel@tonic-gate page_io_unlock(pp); 4270Sstevel@tonic-gate if (err != 0) 4280Sstevel@tonic-gate page_hashout(pp, NULL); 4290Sstevel@tonic-gate ASSERT(pp->p_next == pp); 4300Sstevel@tonic-gate ASSERT(pp->p_prev == pp); 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE, 4330Sstevel@tonic-gate "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off); 4340Sstevel@tonic-gate 4350Sstevel@tonic-gate pl[0] = pp; 4360Sstevel@tonic-gate pl[1] = NULL; 4370Sstevel@tonic-gate return (err); 4380Sstevel@tonic-gate } 4390Sstevel@tonic-gate 4400Sstevel@tonic-gate /* Async putpage klustering stuff */ 4410Sstevel@tonic-gate int sw_pending_size; 4420Sstevel@tonic-gate extern int klustsize; 4430Sstevel@tonic-gate extern struct async_reqs *sw_getreq(); 4440Sstevel@tonic-gate extern void sw_putreq(struct async_reqs *); 4450Sstevel@tonic-gate extern void sw_putbackreq(struct async_reqs *); 4460Sstevel@tonic-gate extern struct async_reqs *sw_getfree(); 4470Sstevel@tonic-gate extern void sw_putfree(struct async_reqs *); 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate static size_t swap_putpagecnt, swap_pagespushed; 4500Sstevel@tonic-gate static size_t swap_otherfail, swap_otherpages; 4510Sstevel@tonic-gate static size_t swap_klustfail, swap_klustpages; 4520Sstevel@tonic-gate static size_t swap_getiofail, swap_getiopages; 4530Sstevel@tonic-gate 4540Sstevel@tonic-gate /* 4550Sstevel@tonic-gate * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 4560Sstevel@tonic-gate * If len == 0, do from off to EOF. 4570Sstevel@tonic-gate */ 4580Sstevel@tonic-gate static int swap_nopage = 0; /* Don't do swap_putpage's if set */ 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate /* ARGSUSED */ 4610Sstevel@tonic-gate static int 4620Sstevel@tonic-gate swap_putpage( 4630Sstevel@tonic-gate struct vnode *vp, 4640Sstevel@tonic-gate offset_t off, 4650Sstevel@tonic-gate size_t len, 4660Sstevel@tonic-gate int flags, 4670Sstevel@tonic-gate struct cred *cr) 4680Sstevel@tonic-gate { 4690Sstevel@tonic-gate page_t *pp; 4700Sstevel@tonic-gate u_offset_t io_off; 4710Sstevel@tonic-gate size_t io_len = 0; 4720Sstevel@tonic-gate int err = 0; 4730Sstevel@tonic-gate struct async_reqs *arg; 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate if (swap_nopage) 4760Sstevel@tonic-gate return (0); 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate ASSERT(vp->v_count != 0); 4790Sstevel@tonic-gate 4802779Ssl108498 /* 4812779Ssl108498 * Clear force flag so that p_lckcnt pages are not invalidated. 4822779Ssl108498 */ 4832779Ssl108498 flags &= ~B_FORCE; 4842779Ssl108498 4850Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, 4860Sstevel@tonic-gate "swap_putpage: vp %p, off %llx len %lx, flags %x\n", 4870Sstevel@tonic-gate (void *)vp, off, len, flags, 0); 4880Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE, 4890Sstevel@tonic-gate "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len); 4900Sstevel@tonic-gate 4910Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 4920Sstevel@tonic-gate return (ENOSYS); 4930Sstevel@tonic-gate 4940Sstevel@tonic-gate if (!vn_has_cached_data(vp)) 4950Sstevel@tonic-gate return (0); 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate if (len == 0) { 4980Sstevel@tonic-gate if (curproc == proc_pageout) 4990Sstevel@tonic-gate cmn_err(CE_PANIC, "swapfs: pageout can't block"); 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate /* Search the entire vp list for pages >= off. */ 5020Sstevel@tonic-gate err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage, 5030Sstevel@tonic-gate flags, cr); 5040Sstevel@tonic-gate } else { 5050Sstevel@tonic-gate u_offset_t eoff; 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate /* 5080Sstevel@tonic-gate * Loop over all offsets in the range [off...off + len] 5090Sstevel@tonic-gate * looking for pages to deal with. 5100Sstevel@tonic-gate */ 5110Sstevel@tonic-gate eoff = off + len; 5120Sstevel@tonic-gate for (io_off = (u_offset_t)off; io_off < eoff; 5130Sstevel@tonic-gate io_off += io_len) { 5140Sstevel@tonic-gate /* 5150Sstevel@tonic-gate * If we run out of the async req slot, put the page 5160Sstevel@tonic-gate * now instead of queuing. 5170Sstevel@tonic-gate */ 5180Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) && 5190Sstevel@tonic-gate sw_pending_size < klustsize && 5200Sstevel@tonic-gate (arg = sw_getfree())) { 5210Sstevel@tonic-gate /* 5220Sstevel@tonic-gate * If we are clustering, we should allow 5230Sstevel@tonic-gate * pageout to feed us more pages because # of 5240Sstevel@tonic-gate * pushes is limited by # of I/Os, and one 5250Sstevel@tonic-gate * cluster is considered to be one I/O. 5260Sstevel@tonic-gate */ 5270Sstevel@tonic-gate if (pushes) 5280Sstevel@tonic-gate pushes--; 5290Sstevel@tonic-gate 5300Sstevel@tonic-gate arg->a_vp = vp; 5310Sstevel@tonic-gate arg->a_off = io_off; 5320Sstevel@tonic-gate arg->a_len = PAGESIZE; 5330Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 5340Sstevel@tonic-gate arg->a_cred = kcred; 5350Sstevel@tonic-gate sw_putreq(arg); 5360Sstevel@tonic-gate io_len = PAGESIZE; 5370Sstevel@tonic-gate continue; 5380Sstevel@tonic-gate } 5390Sstevel@tonic-gate /* 5400Sstevel@tonic-gate * If we are not invalidating pages, use the 5410Sstevel@tonic-gate * routine page_lookup_nowait() to prevent 5420Sstevel@tonic-gate * reclaiming them from the free list. 5430Sstevel@tonic-gate */ 5440Sstevel@tonic-gate if ((flags & B_INVAL) || 5450Sstevel@tonic-gate (flags & (B_ASYNC | B_FREE)) == B_FREE) 5460Sstevel@tonic-gate pp = page_lookup(vp, io_off, SE_EXCL); 5470Sstevel@tonic-gate else 5480Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off, 5490Sstevel@tonic-gate (flags & B_FREE) ? SE_EXCL : SE_SHARED); 5500Sstevel@tonic-gate 5510Sstevel@tonic-gate if (pp == NULL || pvn_getdirty(pp, flags) == 0) 5520Sstevel@tonic-gate io_len = PAGESIZE; 5530Sstevel@tonic-gate else { 5540Sstevel@tonic-gate err = swap_putapage(vp, pp, &io_off, &io_len, 5550Sstevel@tonic-gate flags, cr); 5560Sstevel@tonic-gate if (err != 0) 5570Sstevel@tonic-gate break; 5580Sstevel@tonic-gate } 5590Sstevel@tonic-gate } 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate /* If invalidating, verify all pages on vnode list are gone. */ 5620Sstevel@tonic-gate if (err == 0 && off == 0 && len == 0 && 5630Sstevel@tonic-gate (flags & B_INVAL) && vn_has_cached_data(vp)) { 5640Sstevel@tonic-gate cmn_err(CE_WARN, 5650Sstevel@tonic-gate "swap_putpage: B_INVAL, pages not gone"); 5660Sstevel@tonic-gate } 5670Sstevel@tonic-gate return (err); 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* 5710Sstevel@tonic-gate * Write out a single page. 5720Sstevel@tonic-gate * For swapfs this means choose a physical swap slot and write the page 5730Sstevel@tonic-gate * out using VOP_PAGEIO. 5740Sstevel@tonic-gate * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty 5750Sstevel@tonic-gate * swapfs pages, a bunch of contiguous swap slots and then write them 5760Sstevel@tonic-gate * all out in one clustered i/o. 5770Sstevel@tonic-gate */ 5780Sstevel@tonic-gate /*ARGSUSED*/ 5790Sstevel@tonic-gate static int 5800Sstevel@tonic-gate swap_putapage( 5810Sstevel@tonic-gate struct vnode *vp, 5820Sstevel@tonic-gate page_t *pp, 5830Sstevel@tonic-gate u_offset_t *offp, 5840Sstevel@tonic-gate size_t *lenp, 5850Sstevel@tonic-gate int flags, 5860Sstevel@tonic-gate struct cred *cr) 5870Sstevel@tonic-gate { 5880Sstevel@tonic-gate int err; 5890Sstevel@tonic-gate struct vnode *pvp; 5900Sstevel@tonic-gate u_offset_t poff, off; 5910Sstevel@tonic-gate u_offset_t doff; 5920Sstevel@tonic-gate size_t dlen; 5930Sstevel@tonic-gate size_t klsz = 0; 5940Sstevel@tonic-gate u_offset_t klstart = 0; 5950Sstevel@tonic-gate struct vnode *klvp = NULL; 5960Sstevel@tonic-gate page_t *pplist; 5970Sstevel@tonic-gate se_t se; 5980Sstevel@tonic-gate struct async_reqs *arg; 5990Sstevel@tonic-gate size_t swap_klustsize; 6000Sstevel@tonic-gate 6010Sstevel@tonic-gate /* 6020Sstevel@tonic-gate * This check is added for callers who access swap_putpage with len = 0. 6030Sstevel@tonic-gate * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty. 6040Sstevel@tonic-gate * And it's necessary to do the same queuing if users have the same 6050Sstevel@tonic-gate * B_ASYNC|B_FREE flags on. 6060Sstevel@tonic-gate */ 6070Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) && 6080Sstevel@tonic-gate sw_pending_size < klustsize && (arg = sw_getfree())) { 6090Sstevel@tonic-gate 6100Sstevel@tonic-gate hat_setmod(pp); 6110Sstevel@tonic-gate page_io_unlock(pp); 6120Sstevel@tonic-gate page_unlock(pp); 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate arg->a_vp = vp; 6150Sstevel@tonic-gate arg->a_off = pp->p_offset; 6160Sstevel@tonic-gate arg->a_len = PAGESIZE; 6170Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 6180Sstevel@tonic-gate arg->a_cred = kcred; 6190Sstevel@tonic-gate sw_putreq(arg); 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate return (0); 6220Sstevel@tonic-gate } 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate SWAPFS_PRINT(SWAP_PUTP, 6250Sstevel@tonic-gate "swap_putapage: pp %p, vp %p, off %llx, flags %x\n", 6260Sstevel@tonic-gate pp, vp, pp->p_offset, flags, 0); 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate off = pp->p_offset; 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate doff = off; 6330Sstevel@tonic-gate dlen = PAGESIZE; 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) { 6360Sstevel@tonic-gate err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0); 6370Sstevel@tonic-gate hat_setmod(pp); 6380Sstevel@tonic-gate page_io_unlock(pp); 6390Sstevel@tonic-gate page_unlock(pp); 6400Sstevel@tonic-gate goto out; 6410Sstevel@tonic-gate } 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate klvp = pvp; 6440Sstevel@tonic-gate klstart = poff; 6450Sstevel@tonic-gate pplist = pp; 6460Sstevel@tonic-gate /* 6470Sstevel@tonic-gate * If this is ASYNC | FREE and we've accumulated a bunch of such 6480Sstevel@tonic-gate * pending requests, kluster. 6490Sstevel@tonic-gate */ 6500Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE)) 6510Sstevel@tonic-gate swap_klustsize = klustsize; 6520Sstevel@tonic-gate else 6530Sstevel@tonic-gate swap_klustsize = PAGESIZE; 6540Sstevel@tonic-gate se = (flags & B_FREE ? SE_EXCL : SE_SHARED); 6550Sstevel@tonic-gate klsz = PAGESIZE; 6560Sstevel@tonic-gate while (klsz < swap_klustsize) { 6570Sstevel@tonic-gate if ((arg = sw_getreq()) == NULL) { 6580Sstevel@tonic-gate swap_getiofail++; 6590Sstevel@tonic-gate swap_getiopages += btop(klsz); 6600Sstevel@tonic-gate break; 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate ASSERT(vn_matchops(arg->a_vp, swap_vnodeops)); 6630Sstevel@tonic-gate vp = arg->a_vp; 6640Sstevel@tonic-gate off = arg->a_off; 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, off, se)) == NULL) { 6670Sstevel@tonic-gate swap_otherfail++; 6680Sstevel@tonic-gate swap_otherpages += btop(klsz); 6690Sstevel@tonic-gate sw_putfree(arg); 6700Sstevel@tonic-gate break; 6710Sstevel@tonic-gate } 6720Sstevel@tonic-gate if (pvn_getdirty(pp, flags | B_DELWRI) == 0) { 6730Sstevel@tonic-gate sw_putfree(arg); 6740Sstevel@tonic-gate continue; 6750Sstevel@tonic-gate } 6760Sstevel@tonic-gate /* Get new physical backing store for the page */ 6770Sstevel@tonic-gate doff = off; 6780Sstevel@tonic-gate dlen = PAGESIZE; 6790Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, 6800Sstevel@tonic-gate &pvp, &poff)) { 6810Sstevel@tonic-gate swap_otherfail++; 6820Sstevel@tonic-gate swap_otherpages += btop(klsz); 6830Sstevel@tonic-gate hat_setmod(pp); 6840Sstevel@tonic-gate page_io_unlock(pp); 6850Sstevel@tonic-gate page_unlock(pp); 6860Sstevel@tonic-gate sw_putbackreq(arg); 6870Sstevel@tonic-gate break; 6880Sstevel@tonic-gate } 6890Sstevel@tonic-gate /* Try to cluster new physical name with previous ones */ 6900Sstevel@tonic-gate if (klvp == pvp && poff == klstart + klsz) { 6910Sstevel@tonic-gate klsz += PAGESIZE; 6920Sstevel@tonic-gate page_add(&pplist, pp); 6930Sstevel@tonic-gate pplist = pplist->p_next; 6940Sstevel@tonic-gate sw_putfree(arg); 6950Sstevel@tonic-gate } else if (klvp == pvp && poff == klstart - PAGESIZE) { 6960Sstevel@tonic-gate klsz += PAGESIZE; 6970Sstevel@tonic-gate klstart -= PAGESIZE; 6980Sstevel@tonic-gate page_add(&pplist, pp); 6990Sstevel@tonic-gate sw_putfree(arg); 7000Sstevel@tonic-gate } else { 7010Sstevel@tonic-gate swap_klustfail++; 7020Sstevel@tonic-gate swap_klustpages += btop(klsz); 7030Sstevel@tonic-gate hat_setmod(pp); 7040Sstevel@tonic-gate page_io_unlock(pp); 7050Sstevel@tonic-gate page_unlock(pp); 7060Sstevel@tonic-gate sw_putbackreq(arg); 7070Sstevel@tonic-gate break; 7080Sstevel@tonic-gate } 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate 7110Sstevel@tonic-gate err = VOP_PAGEIO(klvp, pplist, klstart, klsz, 7120Sstevel@tonic-gate B_WRITE | flags, cr); 7130Sstevel@tonic-gate 7140Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) 7150Sstevel@tonic-gate pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags); 7160Sstevel@tonic-gate 7170Sstevel@tonic-gate /* Statistics */ 7180Sstevel@tonic-gate if (!err) { 7190Sstevel@tonic-gate swap_putpagecnt++; 7200Sstevel@tonic-gate swap_pagespushed += btop(klsz); 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate out: 7230Sstevel@tonic-gate TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE, 7240Sstevel@tonic-gate "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx", 7250Sstevel@tonic-gate vp, klvp, klstart, klsz); 7260Sstevel@tonic-gate if (err && err != ENOMEM) 7270Sstevel@tonic-gate cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err); 7280Sstevel@tonic-gate if (lenp) 7290Sstevel@tonic-gate *lenp = PAGESIZE; 7300Sstevel@tonic-gate return (err); 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate static void 7340Sstevel@tonic-gate swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr) 7350Sstevel@tonic-gate { 7360Sstevel@tonic-gate int err; 7370Sstevel@tonic-gate u_offset_t off = pp->p_offset; 7380Sstevel@tonic-gate vnode_t *pvp; 7390Sstevel@tonic-gate u_offset_t poff; 7400Sstevel@tonic-gate 7410Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate /* 7440Sstevel@tonic-gate * The caller will free/invalidate large page in one shot instead of 7450Sstevel@tonic-gate * one small page at a time. 7460Sstevel@tonic-gate */ 7470Sstevel@tonic-gate if (pp->p_szc != 0) { 7480Sstevel@tonic-gate page_unlock(pp); 7490Sstevel@tonic-gate return; 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate 7520Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff); 7530Sstevel@tonic-gate if (!err && pvp != NULL) 7540Sstevel@tonic-gate VOP_DISPOSE(pvp, pp, fl, dn, cr); 7550Sstevel@tonic-gate else 7560Sstevel@tonic-gate fs_dispose(vp, pp, fl, dn, cr); 7570Sstevel@tonic-gate } 758