10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52414Saguzovsk * Common Development and Distribution License (the "License"). 62414Saguzovsk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*6695Saguzovsk * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/types.h> 290Sstevel@tonic-gate #include <sys/param.h> 300Sstevel@tonic-gate #include <sys/systm.h> 310Sstevel@tonic-gate #include <sys/buf.h> 320Sstevel@tonic-gate #include <sys/cred.h> 330Sstevel@tonic-gate #include <sys/errno.h> 340Sstevel@tonic-gate #include <sys/vnode.h> 353898Srsb #include <sys/vfs_opreg.h> 360Sstevel@tonic-gate #include <sys/cmn_err.h> 370Sstevel@tonic-gate #include <sys/swap.h> 380Sstevel@tonic-gate #include <sys/mman.h> 390Sstevel@tonic-gate #include <sys/vmsystm.h> 400Sstevel@tonic-gate #include <sys/vtrace.h> 410Sstevel@tonic-gate #include <sys/debug.h> 420Sstevel@tonic-gate #include <sys/sysmacros.h> 430Sstevel@tonic-gate #include <sys/vm.h> 440Sstevel@tonic-gate 450Sstevel@tonic-gate #include <sys/fs/swapnode.h> 460Sstevel@tonic-gate 470Sstevel@tonic-gate #include <vm/seg.h> 480Sstevel@tonic-gate #include <vm/page.h> 490Sstevel@tonic-gate #include <vm/pvn.h> 500Sstevel@tonic-gate #include <fs/fs_subr.h> 510Sstevel@tonic-gate 520Sstevel@tonic-gate #include <vm/seg_kp.h> 530Sstevel@tonic-gate 540Sstevel@tonic-gate /* 550Sstevel@tonic-gate * Define the routines within this file. 560Sstevel@tonic-gate */ 570Sstevel@tonic-gate static int swap_getpage(struct vnode *vp, offset_t off, size_t len, 585331Samw uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg, 595331Samw caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct); 600Sstevel@tonic-gate static int swap_putpage(struct vnode *vp, offset_t off, size_t len, 615331Samw int flags, struct cred *cr, caller_context_t *ct); 625331Samw static void swap_inactive(struct vnode *vp, struct cred *cr, 635331Samw caller_context_t *ct); 640Sstevel@tonic-gate static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, 655331Samw cred_t *cr, caller_context_t *ct); 660Sstevel@tonic-gate 670Sstevel@tonic-gate static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len, 680Sstevel@tonic-gate uint_t *protp, page_t **plarr, size_t plsz, 690Sstevel@tonic-gate struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); 700Sstevel@tonic-gate 710Sstevel@tonic-gate int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len, 722414Saguzovsk uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp, 732414Saguzovsk uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr, 742414Saguzovsk enum seg_rw rw, struct cred *cr); 750Sstevel@tonic-gate 760Sstevel@tonic-gate static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off, 770Sstevel@tonic-gate size_t *lenp, int flags, struct cred *cr); 780Sstevel@tonic-gate 790Sstevel@tonic-gate const fs_operation_def_t swap_vnodeops_template[] = { 803898Srsb VOPNAME_INACTIVE, { .vop_inactive = swap_inactive }, 813898Srsb VOPNAME_GETPAGE, { .vop_getpage = swap_getpage }, 823898Srsb VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage }, 833898Srsb VOPNAME_DISPOSE, { .vop_dispose = swap_dispose }, 843898Srsb VOPNAME_SETFL, { .error = fs_error }, 853898Srsb VOPNAME_POLL, { .error = fs_error }, 863898Srsb VOPNAME_PATHCONF, { .error = fs_error }, 873898Srsb VOPNAME_GETSECATTR, { .error = fs_error }, 883898Srsb VOPNAME_SHRLOCK, { .error = fs_error }, 893898Srsb NULL, NULL 900Sstevel@tonic-gate }; 910Sstevel@tonic-gate 920Sstevel@tonic-gate vnodeops_t *swap_vnodeops; 930Sstevel@tonic-gate 940Sstevel@tonic-gate /* ARGSUSED */ 950Sstevel@tonic-gate static void 960Sstevel@tonic-gate swap_inactive( 970Sstevel@tonic-gate struct vnode *vp, 985331Samw struct cred *cr, 995331Samw caller_context_t *ct) 1000Sstevel@tonic-gate { 1010Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0); 1020Sstevel@tonic-gate } 1030Sstevel@tonic-gate 1040Sstevel@tonic-gate /* 1050Sstevel@tonic-gate * Return all the pages from [off..off+len] in given file 1060Sstevel@tonic-gate */ 1075331Samw /*ARGSUSED*/ 1080Sstevel@tonic-gate static int 1090Sstevel@tonic-gate swap_getpage( 1100Sstevel@tonic-gate struct vnode *vp, 1110Sstevel@tonic-gate offset_t off, 1120Sstevel@tonic-gate size_t len, 1130Sstevel@tonic-gate uint_t *protp, 1140Sstevel@tonic-gate page_t *pl[], 1150Sstevel@tonic-gate size_t plsz, 1160Sstevel@tonic-gate struct seg *seg, 1170Sstevel@tonic-gate caddr_t addr, 1180Sstevel@tonic-gate enum seg_rw rw, 1195331Samw struct cred *cr, 1205331Samw caller_context_t *ct) 1210Sstevel@tonic-gate { 1220Sstevel@tonic-gate int err; 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n", 1250Sstevel@tonic-gate (void *)vp, off, len, 0, 0); 1260Sstevel@tonic-gate 1270Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE, 1280Sstevel@tonic-gate "swapfs getpage:vp %p off %llx len %ld", 1290Sstevel@tonic-gate (void *)vp, off, len); 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate if (len <= PAGESIZE) { 1320Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1330Sstevel@tonic-gate seg, addr, rw, cr); 1340Sstevel@tonic-gate } else { 1350Sstevel@tonic-gate err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, 1360Sstevel@tonic-gate protp, pl, plsz, seg, addr, rw, cr); 1370Sstevel@tonic-gate } 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate return (err); 1400Sstevel@tonic-gate } 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate /* 1430Sstevel@tonic-gate * Called from pvn_getpages or swap_getpage to get a particular page. 1440Sstevel@tonic-gate */ 1450Sstevel@tonic-gate /*ARGSUSED*/ 1460Sstevel@tonic-gate static int 1470Sstevel@tonic-gate swap_getapage( 1480Sstevel@tonic-gate struct vnode *vp, 1490Sstevel@tonic-gate u_offset_t off, 1500Sstevel@tonic-gate size_t len, 1510Sstevel@tonic-gate uint_t *protp, 1520Sstevel@tonic-gate page_t *pl[], 1530Sstevel@tonic-gate size_t plsz, 1540Sstevel@tonic-gate struct seg *seg, 1550Sstevel@tonic-gate caddr_t addr, 1560Sstevel@tonic-gate enum seg_rw rw, 1570Sstevel@tonic-gate struct cred *cr) 1580Sstevel@tonic-gate { 1590Sstevel@tonic-gate struct page *pp, *rpp; 1600Sstevel@tonic-gate int flags; 1610Sstevel@tonic-gate int err = 0; 1620Sstevel@tonic-gate struct vnode *pvp = NULL; 1630Sstevel@tonic-gate u_offset_t poff; 1640Sstevel@tonic-gate int flag_noreloc; 1650Sstevel@tonic-gate se_t lock; 1660Sstevel@tonic-gate extern int kcage_on; 1670Sstevel@tonic-gate int upgrade = 0; 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n", 170*6695Saguzovsk vp, off, len, 0, 0); 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate /* 1730Sstevel@tonic-gate * Until there is a call-back mechanism to cause SEGKP 1740Sstevel@tonic-gate * pages to be unlocked, make them non-relocatable. 1750Sstevel@tonic-gate */ 1760Sstevel@tonic-gate if (SEG_IS_SEGKP(seg)) 1770Sstevel@tonic-gate flag_noreloc = PG_NORELOC; 1780Sstevel@tonic-gate else 1790Sstevel@tonic-gate flag_noreloc = 0; 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate if (protp != NULL) 1820Sstevel@tonic-gate *protp = PROT_ALL; 1830Sstevel@tonic-gate 1840Sstevel@tonic-gate lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED); 1850Sstevel@tonic-gate 1860Sstevel@tonic-gate again: 1870Sstevel@tonic-gate if (pp = page_lookup(vp, off, lock)) { 1880Sstevel@tonic-gate /* 1890Sstevel@tonic-gate * In very rare instances, a segkp page may have been 1900Sstevel@tonic-gate * relocated outside of the kernel by the kernel cage 1910Sstevel@tonic-gate * due to the window between page_unlock() and 1920Sstevel@tonic-gate * VOP_PUTPAGE() in segkp_unlock(). Due to the 1930Sstevel@tonic-gate * rareness of these occurances, the solution is to 1940Sstevel@tonic-gate * relocate the page to a P_NORELOC page. 1950Sstevel@tonic-gate */ 1960Sstevel@tonic-gate if (flag_noreloc != 0) { 1970Sstevel@tonic-gate if (!PP_ISNORELOC(pp) && kcage_on) { 1980Sstevel@tonic-gate if (lock != SE_EXCL) { 1990Sstevel@tonic-gate upgrade = 1; 2000Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 2010Sstevel@tonic-gate page_unlock(pp); 2020Sstevel@tonic-gate lock = SE_EXCL; 2030Sstevel@tonic-gate goto again; 2040Sstevel@tonic-gate } 2050Sstevel@tonic-gate } 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate if (page_relocate_cage(&pp, &rpp) != 0) 2080Sstevel@tonic-gate panic("swap_getapage: " 2090Sstevel@tonic-gate "page_relocate_cage failed"); 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate pp = rpp; 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate } 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate if (pl) { 2160Sstevel@tonic-gate if (upgrade) 2170Sstevel@tonic-gate page_downgrade(pp); 2180Sstevel@tonic-gate 2190Sstevel@tonic-gate pl[0] = pp; 2200Sstevel@tonic-gate pl[1] = NULL; 2210Sstevel@tonic-gate } else { 2220Sstevel@tonic-gate page_unlock(pp); 2230Sstevel@tonic-gate } 2240Sstevel@tonic-gate } else { 2250Sstevel@tonic-gate pp = page_create_va(vp, off, PAGESIZE, 2260Sstevel@tonic-gate PG_WAIT | PG_EXCL | flag_noreloc, 2270Sstevel@tonic-gate seg, addr); 2280Sstevel@tonic-gate /* 2290Sstevel@tonic-gate * Someone raced in and created the page after we did the 2300Sstevel@tonic-gate * lookup but before we did the create, so go back and 2310Sstevel@tonic-gate * try to look it up again. 2320Sstevel@tonic-gate */ 2330Sstevel@tonic-gate if (pp == NULL) 2340Sstevel@tonic-gate goto again; 2350Sstevel@tonic-gate if (rw != S_CREATE) { 2360Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff); 2370Sstevel@tonic-gate if (pvp) { 2380Sstevel@tonic-gate struct anon *ap; 2390Sstevel@tonic-gate kmutex_t *ahm; 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 2420Sstevel@tonic-gate err = VOP_PAGEIO(pvp, pp, poff, 2435331Samw PAGESIZE, flags, cr, NULL); 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate if (!err) { 2460Sstevel@tonic-gate ahm = &anonhash_lock[AH_LOCK(vp, off)]; 2470Sstevel@tonic-gate mutex_enter(ahm); 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate ap = swap_anon(vp, off); 250*6695Saguzovsk if (ap == NULL) { 251*6695Saguzovsk panic("swap_getapage:" 252*6695Saguzovsk " null anon"); 253*6695Saguzovsk } 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate if (ap->an_pvp == pvp && 2560Sstevel@tonic-gate ap->an_poff == poff) { 2570Sstevel@tonic-gate swap_phys_free(pvp, poff, 2580Sstevel@tonic-gate PAGESIZE); 2590Sstevel@tonic-gate ap->an_pvp = NULL; 2600Sstevel@tonic-gate ap->an_poff = NULL; 2610Sstevel@tonic-gate hat_setmod(pp); 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate mutex_exit(ahm); 2650Sstevel@tonic-gate } 2660Sstevel@tonic-gate } else { 2670Sstevel@tonic-gate if (!err) 2680Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate /* 2710Sstevel@tonic-gate * If it's a fault ahead, release page_io_lock 2720Sstevel@tonic-gate * and SE_EXCL we grabbed in page_create_va 2730Sstevel@tonic-gate * 2740Sstevel@tonic-gate * If we are here, we haven't called VOP_PAGEIO 2750Sstevel@tonic-gate * and thus calling pvn_read_done(pp, B_READ) 2760Sstevel@tonic-gate * below may mislead that we tried i/o. Besides, 2770Sstevel@tonic-gate * in case of async, pvn_read_done() should 2780Sstevel@tonic-gate * not be called by *getpage() 2790Sstevel@tonic-gate */ 2800Sstevel@tonic-gate if (pl == NULL) { 2810Sstevel@tonic-gate /* 2820Sstevel@tonic-gate * swap_getphysname can return error 2830Sstevel@tonic-gate * only when we are getting called from 2840Sstevel@tonic-gate * swapslot_free which passes non-NULL 2850Sstevel@tonic-gate * pl to VOP_GETPAGE. 2860Sstevel@tonic-gate */ 2870Sstevel@tonic-gate ASSERT(err == 0); 2880Sstevel@tonic-gate page_io_unlock(pp); 2890Sstevel@tonic-gate page_unlock(pp); 2900Sstevel@tonic-gate } 2910Sstevel@tonic-gate } 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate ASSERT(pp != NULL); 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate if (err && pl) 2970Sstevel@tonic-gate pvn_read_done(pp, B_ERROR); 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate if (!err && pl) 3000Sstevel@tonic-gate pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 3010Sstevel@tonic-gate } 3020Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE, 303*6695Saguzovsk "swapfs getapage:pp %p vp %p off %llx", pp, vp, off); 3040Sstevel@tonic-gate return (err); 3050Sstevel@tonic-gate } 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate /* 3080Sstevel@tonic-gate * Called from large page anon routines only! This is an ugly hack where 3090Sstevel@tonic-gate * the anon layer directly calls into swapfs with a preallocated large page. 3100Sstevel@tonic-gate * Another method would have been to change to VOP and add an extra arg for 3110Sstevel@tonic-gate * the preallocated large page. This all could be cleaned up later when we 3120Sstevel@tonic-gate * solve the anonymous naming problem and no longer need to loop across of 3130Sstevel@tonic-gate * the VOP in PAGESIZE increments to fill in or initialize a large page as 3140Sstevel@tonic-gate * is done today. I think the latter is better since it avoid a change to 3150Sstevel@tonic-gate * the VOP interface that could later be avoided. 3160Sstevel@tonic-gate */ 3170Sstevel@tonic-gate int 3180Sstevel@tonic-gate swap_getconpage( 3190Sstevel@tonic-gate struct vnode *vp, 3200Sstevel@tonic-gate u_offset_t off, 3210Sstevel@tonic-gate size_t len, 3220Sstevel@tonic-gate uint_t *protp, 3230Sstevel@tonic-gate page_t *pl[], 3240Sstevel@tonic-gate size_t plsz, 3250Sstevel@tonic-gate page_t *conpp, 3262414Saguzovsk uint_t *pszc, 3270Sstevel@tonic-gate spgcnt_t *nreloc, 3280Sstevel@tonic-gate struct seg *seg, 3290Sstevel@tonic-gate caddr_t addr, 3300Sstevel@tonic-gate enum seg_rw rw, 3310Sstevel@tonic-gate struct cred *cr) 3320Sstevel@tonic-gate { 3330Sstevel@tonic-gate struct page *pp; 3340Sstevel@tonic-gate int err = 0; 3350Sstevel@tonic-gate struct vnode *pvp = NULL; 3360Sstevel@tonic-gate u_offset_t poff; 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate ASSERT(len == PAGESIZE); 3390Sstevel@tonic-gate ASSERT(pl != NULL); 3400Sstevel@tonic-gate ASSERT(plsz == PAGESIZE); 3410Sstevel@tonic-gate ASSERT(protp == NULL); 3420Sstevel@tonic-gate ASSERT(nreloc != NULL); 3430Sstevel@tonic-gate ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */ 3440Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n", 345*6695Saguzovsk vp, off, len, 0, 0); 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate /* 3480Sstevel@tonic-gate * If we are not using a preallocated page then we know one already 3490Sstevel@tonic-gate * exists. So just let the old code handle it. 3500Sstevel@tonic-gate */ 3510Sstevel@tonic-gate if (conpp == NULL) { 3520Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 3530Sstevel@tonic-gate seg, addr, rw, cr); 3540Sstevel@tonic-gate return (err); 3550Sstevel@tonic-gate } 3560Sstevel@tonic-gate ASSERT(conpp->p_szc != 0); 3570Sstevel@tonic-gate ASSERT(PAGE_EXCL(conpp)); 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate 3600Sstevel@tonic-gate ASSERT(conpp->p_next == conpp); 3610Sstevel@tonic-gate ASSERT(conpp->p_prev == conpp); 3620Sstevel@tonic-gate ASSERT(!PP_ISAGED(conpp)); 3630Sstevel@tonic-gate ASSERT(!PP_ISFREE(conpp)); 3640Sstevel@tonic-gate 3650Sstevel@tonic-gate *nreloc = 0; 3660Sstevel@tonic-gate pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0); 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate /* 3690Sstevel@tonic-gate * If existing page is found we may need to relocate. 3700Sstevel@tonic-gate */ 3710Sstevel@tonic-gate if (pp != conpp) { 3720Sstevel@tonic-gate ASSERT(rw != S_CREATE); 3732414Saguzovsk ASSERT(pszc != NULL); 3740Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 3750Sstevel@tonic-gate if (pp->p_szc < conpp->p_szc) { 3762414Saguzovsk *pszc = pp->p_szc; 3770Sstevel@tonic-gate page_unlock(pp); 3780Sstevel@tonic-gate err = -1; 3792414Saguzovsk } else if (pp->p_szc > conpp->p_szc && 3802414Saguzovsk seg->s_szc > conpp->p_szc) { 3812414Saguzovsk *pszc = MIN(pp->p_szc, seg->s_szc); 3820Sstevel@tonic-gate page_unlock(pp); 3830Sstevel@tonic-gate err = -2; 3840Sstevel@tonic-gate } else { 3850Sstevel@tonic-gate pl[0] = pp; 3860Sstevel@tonic-gate pl[1] = NULL; 3870Sstevel@tonic-gate if (page_pptonum(pp) & 3882414Saguzovsk (page_get_pagecnt(conpp->p_szc) - 1)) 389*6695Saguzovsk cmn_err(CE_PANIC, "swap_getconpage: no root"); 3900Sstevel@tonic-gate } 3910Sstevel@tonic-gate return (err); 3920Sstevel@tonic-gate } 3930Sstevel@tonic-gate 3940Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate if (*nreloc != 0) { 3970Sstevel@tonic-gate ASSERT(rw != S_CREATE); 3980Sstevel@tonic-gate pl[0] = pp; 3990Sstevel@tonic-gate pl[1] = NULL; 4000Sstevel@tonic-gate return (0); 4010Sstevel@tonic-gate } 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate *nreloc = 1; 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate /* 4060Sstevel@tonic-gate * If necessary do the page io. 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate if (rw != S_CREATE) { 4090Sstevel@tonic-gate /* 4100Sstevel@tonic-gate * Since we are only called now on behalf of an 4110Sstevel@tonic-gate * address space operation it's impossible for 4120Sstevel@tonic-gate * us to fail unlike swap_getapge() which 4130Sstevel@tonic-gate * also gets called from swapslot_free(). 4140Sstevel@tonic-gate */ 4150Sstevel@tonic-gate if (swap_getphysname(vp, off, &pvp, &poff)) { 4160Sstevel@tonic-gate cmn_err(CE_PANIC, 4170Sstevel@tonic-gate "swap_getconpage: swap_getphysname failed!"); 4180Sstevel@tonic-gate } 4190Sstevel@tonic-gate 420*6695Saguzovsk if (pvp != NULL) { 421*6695Saguzovsk err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ, 422*6695Saguzovsk cr, NULL); 423*6695Saguzovsk if (err == 0) { 424*6695Saguzovsk struct anon *ap; 425*6695Saguzovsk kmutex_t *ahm; 426*6695Saguzovsk 427*6695Saguzovsk ahm = &anonhash_lock[AH_LOCK(vp, off)]; 428*6695Saguzovsk mutex_enter(ahm); 429*6695Saguzovsk ap = swap_anon(vp, off); 430*6695Saguzovsk if (ap == NULL) 431*6695Saguzovsk panic("swap_getconpage: null anon"); 432*6695Saguzovsk if (ap->an_pvp != pvp || ap->an_poff != poff) 433*6695Saguzovsk panic("swap_getconpage: bad anon"); 434*6695Saguzovsk 435*6695Saguzovsk swap_phys_free(pvp, poff, PAGESIZE); 436*6695Saguzovsk ap->an_pvp = NULL; 437*6695Saguzovsk ap->an_poff = NULL; 438*6695Saguzovsk hat_setmod(pp); 439*6695Saguzovsk mutex_exit(ahm); 440*6695Saguzovsk } 4410Sstevel@tonic-gate } else { 4420Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 4430Sstevel@tonic-gate } 4440Sstevel@tonic-gate } 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate /* 4470Sstevel@tonic-gate * Normally we would let pvn_read_done() destroy 4480Sstevel@tonic-gate * the page on IO error. But since this is a preallocated 4490Sstevel@tonic-gate * page we'll let the anon layer handle it. 4500Sstevel@tonic-gate */ 4510Sstevel@tonic-gate page_io_unlock(pp); 4520Sstevel@tonic-gate if (err != 0) 4530Sstevel@tonic-gate page_hashout(pp, NULL); 4540Sstevel@tonic-gate ASSERT(pp->p_next == pp); 4550Sstevel@tonic-gate ASSERT(pp->p_prev == pp); 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE, 458*6695Saguzovsk "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off); 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate pl[0] = pp; 4610Sstevel@tonic-gate pl[1] = NULL; 4620Sstevel@tonic-gate return (err); 4630Sstevel@tonic-gate } 4640Sstevel@tonic-gate 4650Sstevel@tonic-gate /* Async putpage klustering stuff */ 4660Sstevel@tonic-gate int sw_pending_size; 4670Sstevel@tonic-gate extern int klustsize; 4680Sstevel@tonic-gate extern struct async_reqs *sw_getreq(); 4690Sstevel@tonic-gate extern void sw_putreq(struct async_reqs *); 4700Sstevel@tonic-gate extern void sw_putbackreq(struct async_reqs *); 4710Sstevel@tonic-gate extern struct async_reqs *sw_getfree(); 4720Sstevel@tonic-gate extern void sw_putfree(struct async_reqs *); 4730Sstevel@tonic-gate 4740Sstevel@tonic-gate static size_t swap_putpagecnt, swap_pagespushed; 4750Sstevel@tonic-gate static size_t swap_otherfail, swap_otherpages; 4760Sstevel@tonic-gate static size_t swap_klustfail, swap_klustpages; 4770Sstevel@tonic-gate static size_t swap_getiofail, swap_getiopages; 4780Sstevel@tonic-gate 4790Sstevel@tonic-gate /* 4800Sstevel@tonic-gate * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 4810Sstevel@tonic-gate * If len == 0, do from off to EOF. 4820Sstevel@tonic-gate */ 4830Sstevel@tonic-gate static int swap_nopage = 0; /* Don't do swap_putpage's if set */ 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* ARGSUSED */ 4860Sstevel@tonic-gate static int 4870Sstevel@tonic-gate swap_putpage( 4880Sstevel@tonic-gate struct vnode *vp, 4890Sstevel@tonic-gate offset_t off, 4900Sstevel@tonic-gate size_t len, 4910Sstevel@tonic-gate int flags, 4925331Samw struct cred *cr, 4935331Samw caller_context_t *ct) 4940Sstevel@tonic-gate { 4950Sstevel@tonic-gate page_t *pp; 4960Sstevel@tonic-gate u_offset_t io_off; 4970Sstevel@tonic-gate size_t io_len = 0; 4980Sstevel@tonic-gate int err = 0; 4990Sstevel@tonic-gate struct async_reqs *arg; 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate if (swap_nopage) 5020Sstevel@tonic-gate return (0); 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate ASSERT(vp->v_count != 0); 5050Sstevel@tonic-gate 5062779Ssl108498 /* 5072779Ssl108498 * Clear force flag so that p_lckcnt pages are not invalidated. 5082779Ssl108498 */ 5092779Ssl108498 flags &= ~B_FORCE; 5102779Ssl108498 5110Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, 5120Sstevel@tonic-gate "swap_putpage: vp %p, off %llx len %lx, flags %x\n", 5130Sstevel@tonic-gate (void *)vp, off, len, flags, 0); 5140Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE, 5150Sstevel@tonic-gate "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len); 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 5180Sstevel@tonic-gate return (ENOSYS); 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate if (!vn_has_cached_data(vp)) 5210Sstevel@tonic-gate return (0); 5220Sstevel@tonic-gate 5230Sstevel@tonic-gate if (len == 0) { 5240Sstevel@tonic-gate if (curproc == proc_pageout) 5250Sstevel@tonic-gate cmn_err(CE_PANIC, "swapfs: pageout can't block"); 5260Sstevel@tonic-gate 5270Sstevel@tonic-gate /* Search the entire vp list for pages >= off. */ 5280Sstevel@tonic-gate err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage, 5290Sstevel@tonic-gate flags, cr); 5300Sstevel@tonic-gate } else { 5310Sstevel@tonic-gate u_offset_t eoff; 5320Sstevel@tonic-gate 5330Sstevel@tonic-gate /* 5340Sstevel@tonic-gate * Loop over all offsets in the range [off...off + len] 5350Sstevel@tonic-gate * looking for pages to deal with. 5360Sstevel@tonic-gate */ 5370Sstevel@tonic-gate eoff = off + len; 5380Sstevel@tonic-gate for (io_off = (u_offset_t)off; io_off < eoff; 5390Sstevel@tonic-gate io_off += io_len) { 5400Sstevel@tonic-gate /* 5410Sstevel@tonic-gate * If we run out of the async req slot, put the page 5420Sstevel@tonic-gate * now instead of queuing. 5430Sstevel@tonic-gate */ 5440Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) && 5450Sstevel@tonic-gate sw_pending_size < klustsize && 5460Sstevel@tonic-gate (arg = sw_getfree())) { 5470Sstevel@tonic-gate /* 5480Sstevel@tonic-gate * If we are clustering, we should allow 5490Sstevel@tonic-gate * pageout to feed us more pages because # of 5500Sstevel@tonic-gate * pushes is limited by # of I/Os, and one 5510Sstevel@tonic-gate * cluster is considered to be one I/O. 5520Sstevel@tonic-gate */ 5530Sstevel@tonic-gate if (pushes) 5540Sstevel@tonic-gate pushes--; 5550Sstevel@tonic-gate 5560Sstevel@tonic-gate arg->a_vp = vp; 5570Sstevel@tonic-gate arg->a_off = io_off; 5580Sstevel@tonic-gate arg->a_len = PAGESIZE; 5590Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 5600Sstevel@tonic-gate arg->a_cred = kcred; 5610Sstevel@tonic-gate sw_putreq(arg); 5620Sstevel@tonic-gate io_len = PAGESIZE; 5630Sstevel@tonic-gate continue; 5640Sstevel@tonic-gate } 5650Sstevel@tonic-gate /* 5660Sstevel@tonic-gate * If we are not invalidating pages, use the 5670Sstevel@tonic-gate * routine page_lookup_nowait() to prevent 5680Sstevel@tonic-gate * reclaiming them from the free list. 5690Sstevel@tonic-gate */ 5700Sstevel@tonic-gate if ((flags & B_INVAL) || 5710Sstevel@tonic-gate (flags & (B_ASYNC | B_FREE)) == B_FREE) 5720Sstevel@tonic-gate pp = page_lookup(vp, io_off, SE_EXCL); 5730Sstevel@tonic-gate else 5740Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off, 575*6695Saguzovsk (flags & B_FREE) ? SE_EXCL : SE_SHARED); 5760Sstevel@tonic-gate 5770Sstevel@tonic-gate if (pp == NULL || pvn_getdirty(pp, flags) == 0) 5780Sstevel@tonic-gate io_len = PAGESIZE; 5790Sstevel@tonic-gate else { 5800Sstevel@tonic-gate err = swap_putapage(vp, pp, &io_off, &io_len, 5810Sstevel@tonic-gate flags, cr); 5820Sstevel@tonic-gate if (err != 0) 5830Sstevel@tonic-gate break; 5840Sstevel@tonic-gate } 5850Sstevel@tonic-gate } 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate /* If invalidating, verify all pages on vnode list are gone. */ 5880Sstevel@tonic-gate if (err == 0 && off == 0 && len == 0 && 5890Sstevel@tonic-gate (flags & B_INVAL) && vn_has_cached_data(vp)) { 5900Sstevel@tonic-gate cmn_err(CE_WARN, 5910Sstevel@tonic-gate "swap_putpage: B_INVAL, pages not gone"); 5920Sstevel@tonic-gate } 5930Sstevel@tonic-gate return (err); 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate /* 5970Sstevel@tonic-gate * Write out a single page. 5980Sstevel@tonic-gate * For swapfs this means choose a physical swap slot and write the page 5990Sstevel@tonic-gate * out using VOP_PAGEIO. 6000Sstevel@tonic-gate * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty 6010Sstevel@tonic-gate * swapfs pages, a bunch of contiguous swap slots and then write them 6020Sstevel@tonic-gate * all out in one clustered i/o. 6030Sstevel@tonic-gate */ 6040Sstevel@tonic-gate /*ARGSUSED*/ 6050Sstevel@tonic-gate static int 6060Sstevel@tonic-gate swap_putapage( 6070Sstevel@tonic-gate struct vnode *vp, 6080Sstevel@tonic-gate page_t *pp, 6090Sstevel@tonic-gate u_offset_t *offp, 6100Sstevel@tonic-gate size_t *lenp, 6110Sstevel@tonic-gate int flags, 6120Sstevel@tonic-gate struct cred *cr) 6130Sstevel@tonic-gate { 6140Sstevel@tonic-gate int err; 6150Sstevel@tonic-gate struct vnode *pvp; 6160Sstevel@tonic-gate u_offset_t poff, off; 6170Sstevel@tonic-gate u_offset_t doff; 6180Sstevel@tonic-gate size_t dlen; 6190Sstevel@tonic-gate size_t klsz = 0; 6200Sstevel@tonic-gate u_offset_t klstart = 0; 6210Sstevel@tonic-gate struct vnode *klvp = NULL; 6220Sstevel@tonic-gate page_t *pplist; 6230Sstevel@tonic-gate se_t se; 6240Sstevel@tonic-gate struct async_reqs *arg; 6250Sstevel@tonic-gate size_t swap_klustsize; 6260Sstevel@tonic-gate 6270Sstevel@tonic-gate /* 6280Sstevel@tonic-gate * This check is added for callers who access swap_putpage with len = 0. 6290Sstevel@tonic-gate * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty. 6300Sstevel@tonic-gate * And it's necessary to do the same queuing if users have the same 6310Sstevel@tonic-gate * B_ASYNC|B_FREE flags on. 6320Sstevel@tonic-gate */ 6330Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) && 6340Sstevel@tonic-gate sw_pending_size < klustsize && (arg = sw_getfree())) { 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate hat_setmod(pp); 6370Sstevel@tonic-gate page_io_unlock(pp); 6380Sstevel@tonic-gate page_unlock(pp); 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate arg->a_vp = vp; 6410Sstevel@tonic-gate arg->a_off = pp->p_offset; 6420Sstevel@tonic-gate arg->a_len = PAGESIZE; 6430Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 6440Sstevel@tonic-gate arg->a_cred = kcred; 6450Sstevel@tonic-gate sw_putreq(arg); 6460Sstevel@tonic-gate 6470Sstevel@tonic-gate return (0); 6480Sstevel@tonic-gate } 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate SWAPFS_PRINT(SWAP_PUTP, 651*6695Saguzovsk "swap_putapage: pp %p, vp %p, off %llx, flags %x\n", 652*6695Saguzovsk pp, vp, pp->p_offset, flags, 0); 6530Sstevel@tonic-gate 6540Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate off = pp->p_offset; 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate doff = off; 6590Sstevel@tonic-gate dlen = PAGESIZE; 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) { 6620Sstevel@tonic-gate err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0); 6630Sstevel@tonic-gate hat_setmod(pp); 6640Sstevel@tonic-gate page_io_unlock(pp); 6650Sstevel@tonic-gate page_unlock(pp); 6660Sstevel@tonic-gate goto out; 6670Sstevel@tonic-gate } 6680Sstevel@tonic-gate 6690Sstevel@tonic-gate klvp = pvp; 6700Sstevel@tonic-gate klstart = poff; 6710Sstevel@tonic-gate pplist = pp; 6720Sstevel@tonic-gate /* 6730Sstevel@tonic-gate * If this is ASYNC | FREE and we've accumulated a bunch of such 6740Sstevel@tonic-gate * pending requests, kluster. 6750Sstevel@tonic-gate */ 6760Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE)) 6770Sstevel@tonic-gate swap_klustsize = klustsize; 6780Sstevel@tonic-gate else 6790Sstevel@tonic-gate swap_klustsize = PAGESIZE; 6800Sstevel@tonic-gate se = (flags & B_FREE ? SE_EXCL : SE_SHARED); 6810Sstevel@tonic-gate klsz = PAGESIZE; 6820Sstevel@tonic-gate while (klsz < swap_klustsize) { 6830Sstevel@tonic-gate if ((arg = sw_getreq()) == NULL) { 6840Sstevel@tonic-gate swap_getiofail++; 6850Sstevel@tonic-gate swap_getiopages += btop(klsz); 6860Sstevel@tonic-gate break; 6870Sstevel@tonic-gate } 6880Sstevel@tonic-gate ASSERT(vn_matchops(arg->a_vp, swap_vnodeops)); 6890Sstevel@tonic-gate vp = arg->a_vp; 6900Sstevel@tonic-gate off = arg->a_off; 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, off, se)) == NULL) { 6930Sstevel@tonic-gate swap_otherfail++; 6940Sstevel@tonic-gate swap_otherpages += btop(klsz); 6950Sstevel@tonic-gate sw_putfree(arg); 6960Sstevel@tonic-gate break; 6970Sstevel@tonic-gate } 6980Sstevel@tonic-gate if (pvn_getdirty(pp, flags | B_DELWRI) == 0) { 6990Sstevel@tonic-gate sw_putfree(arg); 7000Sstevel@tonic-gate continue; 7010Sstevel@tonic-gate } 7020Sstevel@tonic-gate /* Get new physical backing store for the page */ 7030Sstevel@tonic-gate doff = off; 7040Sstevel@tonic-gate dlen = PAGESIZE; 7050Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, 706*6695Saguzovsk &pvp, &poff)) { 7070Sstevel@tonic-gate swap_otherfail++; 7080Sstevel@tonic-gate swap_otherpages += btop(klsz); 7090Sstevel@tonic-gate hat_setmod(pp); 7100Sstevel@tonic-gate page_io_unlock(pp); 7110Sstevel@tonic-gate page_unlock(pp); 7120Sstevel@tonic-gate sw_putbackreq(arg); 7130Sstevel@tonic-gate break; 7140Sstevel@tonic-gate } 7150Sstevel@tonic-gate /* Try to cluster new physical name with previous ones */ 7160Sstevel@tonic-gate if (klvp == pvp && poff == klstart + klsz) { 7170Sstevel@tonic-gate klsz += PAGESIZE; 7180Sstevel@tonic-gate page_add(&pplist, pp); 7190Sstevel@tonic-gate pplist = pplist->p_next; 7200Sstevel@tonic-gate sw_putfree(arg); 7210Sstevel@tonic-gate } else if (klvp == pvp && poff == klstart - PAGESIZE) { 7220Sstevel@tonic-gate klsz += PAGESIZE; 7230Sstevel@tonic-gate klstart -= PAGESIZE; 7240Sstevel@tonic-gate page_add(&pplist, pp); 7250Sstevel@tonic-gate sw_putfree(arg); 7260Sstevel@tonic-gate } else { 7270Sstevel@tonic-gate swap_klustfail++; 7280Sstevel@tonic-gate swap_klustpages += btop(klsz); 7290Sstevel@tonic-gate hat_setmod(pp); 7300Sstevel@tonic-gate page_io_unlock(pp); 7310Sstevel@tonic-gate page_unlock(pp); 7320Sstevel@tonic-gate sw_putbackreq(arg); 7330Sstevel@tonic-gate break; 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate } 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate err = VOP_PAGEIO(klvp, pplist, klstart, klsz, 738*6695Saguzovsk B_WRITE | flags, cr, NULL); 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) 7410Sstevel@tonic-gate pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags); 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate /* Statistics */ 7440Sstevel@tonic-gate if (!err) { 7450Sstevel@tonic-gate swap_putpagecnt++; 7460Sstevel@tonic-gate swap_pagespushed += btop(klsz); 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate out: 7490Sstevel@tonic-gate TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE, 750*6695Saguzovsk "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx", 751*6695Saguzovsk vp, klvp, klstart, klsz); 7520Sstevel@tonic-gate if (err && err != ENOMEM) 7530Sstevel@tonic-gate cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err); 7540Sstevel@tonic-gate if (lenp) 7550Sstevel@tonic-gate *lenp = PAGESIZE; 7560Sstevel@tonic-gate return (err); 7570Sstevel@tonic-gate } 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate static void 7605331Samw swap_dispose( 7615331Samw vnode_t *vp, 7625331Samw page_t *pp, 7635331Samw int fl, 7645331Samw int dn, 7655331Samw cred_t *cr, 7665331Samw caller_context_t *ct) 7670Sstevel@tonic-gate { 7680Sstevel@tonic-gate int err; 7690Sstevel@tonic-gate u_offset_t off = pp->p_offset; 7700Sstevel@tonic-gate vnode_t *pvp; 7710Sstevel@tonic-gate u_offset_t poff; 7720Sstevel@tonic-gate 7730Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate /* 7760Sstevel@tonic-gate * The caller will free/invalidate large page in one shot instead of 7770Sstevel@tonic-gate * one small page at a time. 7780Sstevel@tonic-gate */ 7790Sstevel@tonic-gate if (pp->p_szc != 0) { 7800Sstevel@tonic-gate page_unlock(pp); 7810Sstevel@tonic-gate return; 7820Sstevel@tonic-gate } 7830Sstevel@tonic-gate 7840Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff); 7850Sstevel@tonic-gate if (!err && pvp != NULL) 7865331Samw VOP_DISPOSE(pvp, pp, fl, dn, cr, ct); 7870Sstevel@tonic-gate else 7885331Samw fs_dispose(vp, pp, fl, dn, cr, ct); 7890Sstevel@tonic-gate } 790