10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52999Sstans * Common Development and Distribution License (the "License").
62999Sstans * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*12607Sjohn.levon@sun.com * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
260Sstevel@tonic-gate /* All Rights Reserved */
270Sstevel@tonic-gate
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988
300Sstevel@tonic-gate * The Regents of the University of California
310Sstevel@tonic-gate * All Rights Reserved
320Sstevel@tonic-gate *
330Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from
340Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its
350Sstevel@tonic-gate * contributors.
360Sstevel@tonic-gate */
370Sstevel@tonic-gate
380Sstevel@tonic-gate /*
390Sstevel@tonic-gate * VM - paged vnode.
400Sstevel@tonic-gate *
410Sstevel@tonic-gate * This file supplies vm support for the vnode operations that deal with pages.
420Sstevel@tonic-gate */
430Sstevel@tonic-gate #include <sys/types.h>
440Sstevel@tonic-gate #include <sys/t_lock.h>
450Sstevel@tonic-gate #include <sys/param.h>
460Sstevel@tonic-gate #include <sys/sysmacros.h>
470Sstevel@tonic-gate #include <sys/systm.h>
480Sstevel@tonic-gate #include <sys/time.h>
490Sstevel@tonic-gate #include <sys/buf.h>
500Sstevel@tonic-gate #include <sys/vnode.h>
510Sstevel@tonic-gate #include <sys/uio.h>
520Sstevel@tonic-gate #include <sys/vmsystm.h>
530Sstevel@tonic-gate #include <sys/mman.h>
540Sstevel@tonic-gate #include <sys/vfs.h>
550Sstevel@tonic-gate #include <sys/cred.h>
560Sstevel@tonic-gate #include <sys/user.h>
570Sstevel@tonic-gate #include <sys/kmem.h>
580Sstevel@tonic-gate #include <sys/cmn_err.h>
590Sstevel@tonic-gate #include <sys/debug.h>
600Sstevel@tonic-gate #include <sys/cpuvar.h>
610Sstevel@tonic-gate #include <sys/vtrace.h>
620Sstevel@tonic-gate #include <sys/tnf_probe.h>
630Sstevel@tonic-gate
640Sstevel@tonic-gate #include <vm/hat.h>
650Sstevel@tonic-gate #include <vm/as.h>
660Sstevel@tonic-gate #include <vm/seg.h>
670Sstevel@tonic-gate #include <vm/rm.h>
680Sstevel@tonic-gate #include <vm/pvn.h>
690Sstevel@tonic-gate #include <vm/page.h>
700Sstevel@tonic-gate #include <vm/seg_map.h>
710Sstevel@tonic-gate #include <vm/seg_kmem.h>
720Sstevel@tonic-gate #include <sys/fs/swapnode.h>
730Sstevel@tonic-gate
740Sstevel@tonic-gate int pvn_nofodklust = 0;
750Sstevel@tonic-gate int pvn_write_noklust = 0;
760Sstevel@tonic-gate
770Sstevel@tonic-gate uint_t pvn_vmodsort_supported = 0; /* set if HAT supports VMODSORT */
780Sstevel@tonic-gate uint_t pvn_vmodsort_disable = 0; /* set in /etc/system to disable HAT */
790Sstevel@tonic-gate /* support for vmodsort for testing */
800Sstevel@tonic-gate
810Sstevel@tonic-gate static struct kmem_cache *marker_cache = NULL;
820Sstevel@tonic-gate
830Sstevel@tonic-gate /*
840Sstevel@tonic-gate * Find the largest contiguous block which contains `addr' for file offset
850Sstevel@tonic-gate * `offset' in it while living within the file system block sizes (`vp_off'
860Sstevel@tonic-gate * and `vp_len') and the address space limits for which no pages currently
870Sstevel@tonic-gate * exist and which map to consecutive file offsets.
880Sstevel@tonic-gate */
890Sstevel@tonic-gate page_t *
pvn_read_kluster(struct vnode * vp,u_offset_t off,struct seg * seg,caddr_t addr,u_offset_t * offp,size_t * lenp,u_offset_t vp_off,size_t vp_len,int isra)900Sstevel@tonic-gate pvn_read_kluster(
910Sstevel@tonic-gate struct vnode *vp,
920Sstevel@tonic-gate u_offset_t off,
930Sstevel@tonic-gate struct seg *seg,
940Sstevel@tonic-gate caddr_t addr,
950Sstevel@tonic-gate u_offset_t *offp, /* return values */
960Sstevel@tonic-gate size_t *lenp, /* return values */
970Sstevel@tonic-gate u_offset_t vp_off,
980Sstevel@tonic-gate size_t vp_len,
990Sstevel@tonic-gate int isra)
1000Sstevel@tonic-gate {
1010Sstevel@tonic-gate ssize_t deltaf, deltab;
1020Sstevel@tonic-gate page_t *pp;
1030Sstevel@tonic-gate page_t *plist = NULL;
1040Sstevel@tonic-gate spgcnt_t pagesavail;
1050Sstevel@tonic-gate u_offset_t vp_end;
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate ASSERT(off >= vp_off && off < vp_off + vp_len);
1080Sstevel@tonic-gate
1090Sstevel@tonic-gate /*
1100Sstevel@tonic-gate * We only want to do klustering/read ahead if there
1110Sstevel@tonic-gate * is more than minfree pages currently available.
1120Sstevel@tonic-gate */
1130Sstevel@tonic-gate pagesavail = freemem - minfree;
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate if (pagesavail <= 0)
1160Sstevel@tonic-gate if (isra)
1170Sstevel@tonic-gate return ((page_t *)NULL); /* ra case - give up */
1180Sstevel@tonic-gate else
1190Sstevel@tonic-gate pagesavail = 1; /* must return a page */
1200Sstevel@tonic-gate
1210Sstevel@tonic-gate /* We calculate in pages instead of bytes due to 32-bit overflows */
1220Sstevel@tonic-gate if (pagesavail < (spgcnt_t)btopr(vp_len)) {
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate * Don't have enough free memory for the
1250Sstevel@tonic-gate * max request, try sizing down vp request.
1260Sstevel@tonic-gate */
1270Sstevel@tonic-gate deltab = (ssize_t)(off - vp_off);
1280Sstevel@tonic-gate vp_len -= deltab;
1290Sstevel@tonic-gate vp_off += deltab;
1300Sstevel@tonic-gate if (pagesavail < btopr(vp_len)) {
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate * Still not enough memory, just settle for
1330Sstevel@tonic-gate * pagesavail which is at least 1.
1340Sstevel@tonic-gate */
1350Sstevel@tonic-gate vp_len = ptob(pagesavail);
1360Sstevel@tonic-gate }
1370Sstevel@tonic-gate }
1380Sstevel@tonic-gate
1390Sstevel@tonic-gate vp_end = vp_off + vp_len;
1400Sstevel@tonic-gate ASSERT(off >= vp_off && off < vp_end);
1410Sstevel@tonic-gate
1420Sstevel@tonic-gate if (isra && SEGOP_KLUSTER(seg, addr, 0))
1430Sstevel@tonic-gate return ((page_t *)NULL); /* segment driver says no */
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate if ((plist = page_create_va(vp, off,
1460Sstevel@tonic-gate PAGESIZE, PG_EXCL | PG_WAIT, seg, addr)) == NULL)
1470Sstevel@tonic-gate return ((page_t *)NULL);
1480Sstevel@tonic-gate
1490Sstevel@tonic-gate if (vp_len <= PAGESIZE || pvn_nofodklust) {
1500Sstevel@tonic-gate *offp = off;
1510Sstevel@tonic-gate *lenp = MIN(vp_len, PAGESIZE);
1520Sstevel@tonic-gate } else {
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate * Scan back from front by incrementing "deltab" and
1550Sstevel@tonic-gate * comparing "off" with "vp_off + deltab" to avoid
1560Sstevel@tonic-gate * "signed" versus "unsigned" conversion problems.
1570Sstevel@tonic-gate */
1580Sstevel@tonic-gate for (deltab = PAGESIZE; off >= vp_off + deltab;
1590Sstevel@tonic-gate deltab += PAGESIZE) {
1600Sstevel@tonic-gate /*
1610Sstevel@tonic-gate * Call back to the segment driver to verify that
1620Sstevel@tonic-gate * the klustering/read ahead operation makes sense.
1630Sstevel@tonic-gate */
1640Sstevel@tonic-gate if (SEGOP_KLUSTER(seg, addr, -deltab))
1650Sstevel@tonic-gate break; /* page not eligible */
1660Sstevel@tonic-gate if ((pp = page_create_va(vp, off - deltab,
1670Sstevel@tonic-gate PAGESIZE, PG_EXCL, seg, addr - deltab))
1680Sstevel@tonic-gate == NULL)
1690Sstevel@tonic-gate break; /* already have the page */
1700Sstevel@tonic-gate /*
1710Sstevel@tonic-gate * Add page to front of page list.
1720Sstevel@tonic-gate */
1730Sstevel@tonic-gate page_add(&plist, pp);
1740Sstevel@tonic-gate }
1750Sstevel@tonic-gate deltab -= PAGESIZE;
1760Sstevel@tonic-gate
1770Sstevel@tonic-gate /* scan forward from front */
1780Sstevel@tonic-gate for (deltaf = PAGESIZE; off + deltaf < vp_end;
1790Sstevel@tonic-gate deltaf += PAGESIZE) {
1800Sstevel@tonic-gate /*
1810Sstevel@tonic-gate * Call back to the segment driver to verify that
1820Sstevel@tonic-gate * the klustering/read ahead operation makes sense.
1830Sstevel@tonic-gate */
1840Sstevel@tonic-gate if (SEGOP_KLUSTER(seg, addr, deltaf))
1850Sstevel@tonic-gate break; /* page not file extension */
1860Sstevel@tonic-gate if ((pp = page_create_va(vp, off + deltaf,
1870Sstevel@tonic-gate PAGESIZE, PG_EXCL, seg, addr + deltaf))
1880Sstevel@tonic-gate == NULL)
1890Sstevel@tonic-gate break; /* already have page */
1900Sstevel@tonic-gate
1910Sstevel@tonic-gate /*
1920Sstevel@tonic-gate * Add page to end of page list.
1930Sstevel@tonic-gate */
1940Sstevel@tonic-gate page_add(&plist, pp);
1950Sstevel@tonic-gate plist = plist->p_next;
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate *offp = off = off - deltab;
1980Sstevel@tonic-gate *lenp = deltab + deltaf;
1990Sstevel@tonic-gate ASSERT(off >= vp_off);
2000Sstevel@tonic-gate
2010Sstevel@tonic-gate /*
2020Sstevel@tonic-gate * If we ended up getting more than was actually
2030Sstevel@tonic-gate * requested, retract the returned length to only
2040Sstevel@tonic-gate * reflect what was requested. This might happen
2050Sstevel@tonic-gate * if we were allowed to kluster pages across a
2060Sstevel@tonic-gate * span of (say) 5 frags, and frag size is less
2070Sstevel@tonic-gate * than PAGESIZE. We need a whole number of
2080Sstevel@tonic-gate * pages to contain those frags, but the returned
2090Sstevel@tonic-gate * size should only allow the returned range to
2100Sstevel@tonic-gate * extend as far as the end of the frags.
2110Sstevel@tonic-gate */
2120Sstevel@tonic-gate if ((vp_off + vp_len) < (off + *lenp)) {
2130Sstevel@tonic-gate ASSERT(vp_end > off);
2140Sstevel@tonic-gate *lenp = vp_end - off;
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_PVN_READ_KLUSTER,
21811888SPavel.Filipensky@Sun.COM "pvn_read_kluster:seg %p addr %x isra %x",
21911888SPavel.Filipensky@Sun.COM seg, addr, isra);
2200Sstevel@tonic-gate return (plist);
2210Sstevel@tonic-gate }
2220Sstevel@tonic-gate
2230Sstevel@tonic-gate /*
2240Sstevel@tonic-gate * Handle pages for this vnode on either side of the page "pp"
2250Sstevel@tonic-gate * which has been locked by the caller. This routine will also
2260Sstevel@tonic-gate * do klustering in the range [vp_off, vp_off + vp_len] up
2270Sstevel@tonic-gate * until a page which is not found. The offset and length
2280Sstevel@tonic-gate * of pages included is returned in "*offp" and "*lenp".
2290Sstevel@tonic-gate *
2300Sstevel@tonic-gate * Returns a list of dirty locked pages all ready to be
2310Sstevel@tonic-gate * written back.
2320Sstevel@tonic-gate */
2330Sstevel@tonic-gate page_t *
pvn_write_kluster(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,u_offset_t vp_off,size_t vp_len,int flags)2340Sstevel@tonic-gate pvn_write_kluster(
2350Sstevel@tonic-gate struct vnode *vp,
2360Sstevel@tonic-gate page_t *pp,
2370Sstevel@tonic-gate u_offset_t *offp, /* return values */
2380Sstevel@tonic-gate size_t *lenp, /* return values */
2390Sstevel@tonic-gate u_offset_t vp_off,
2400Sstevel@tonic-gate size_t vp_len,
2410Sstevel@tonic-gate int flags)
2420Sstevel@tonic-gate {
2430Sstevel@tonic-gate u_offset_t off;
2440Sstevel@tonic-gate page_t *dirty;
2450Sstevel@tonic-gate size_t deltab, deltaf;
2460Sstevel@tonic-gate se_t se;
2470Sstevel@tonic-gate u_offset_t vp_end;
2480Sstevel@tonic-gate
2490Sstevel@tonic-gate off = pp->p_offset;
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate /*
2520Sstevel@tonic-gate * Kustering should not be done if we are invalidating
2530Sstevel@tonic-gate * pages since we could destroy pages that belong to
2540Sstevel@tonic-gate * some other process if this is a swap vnode.
2550Sstevel@tonic-gate */
2560Sstevel@tonic-gate if (pvn_write_noklust || ((flags & B_INVAL) && IS_SWAPVP(vp))) {
2570Sstevel@tonic-gate *offp = off;
2580Sstevel@tonic-gate *lenp = PAGESIZE;
2590Sstevel@tonic-gate return (pp);
2600Sstevel@tonic-gate }
2610Sstevel@tonic-gate
2620Sstevel@tonic-gate if (flags & (B_FREE | B_INVAL))
2630Sstevel@tonic-gate se = SE_EXCL;
2640Sstevel@tonic-gate else
2650Sstevel@tonic-gate se = SE_SHARED;
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate dirty = pp;
2680Sstevel@tonic-gate /*
2690Sstevel@tonic-gate * Scan backwards looking for pages to kluster by incrementing
2700Sstevel@tonic-gate * "deltab" and comparing "off" with "vp_off + deltab" to
2710Sstevel@tonic-gate * avoid "signed" versus "unsigned" conversion problems.
2720Sstevel@tonic-gate */
2730Sstevel@tonic-gate for (deltab = PAGESIZE; off >= vp_off + deltab; deltab += PAGESIZE) {
2740Sstevel@tonic-gate pp = page_lookup_nowait(vp, off - deltab, se);
2750Sstevel@tonic-gate if (pp == NULL)
2760Sstevel@tonic-gate break; /* page not found */
2770Sstevel@tonic-gate if (pvn_getdirty(pp, flags | B_DELWRI) == 0)
2780Sstevel@tonic-gate break;
2790Sstevel@tonic-gate page_add(&dirty, pp);
2800Sstevel@tonic-gate }
2810Sstevel@tonic-gate deltab -= PAGESIZE;
2820Sstevel@tonic-gate
2830Sstevel@tonic-gate vp_end = vp_off + vp_len;
2840Sstevel@tonic-gate /* now scan forwards looking for pages to kluster */
2850Sstevel@tonic-gate for (deltaf = PAGESIZE; off + deltaf < vp_end; deltaf += PAGESIZE) {
2860Sstevel@tonic-gate pp = page_lookup_nowait(vp, off + deltaf, se);
2870Sstevel@tonic-gate if (pp == NULL)
2880Sstevel@tonic-gate break; /* page not found */
2890Sstevel@tonic-gate if (pvn_getdirty(pp, flags | B_DELWRI) == 0)
2900Sstevel@tonic-gate break;
2910Sstevel@tonic-gate page_add(&dirty, pp);
2920Sstevel@tonic-gate dirty = dirty->p_next;
2930Sstevel@tonic-gate }
2940Sstevel@tonic-gate
2950Sstevel@tonic-gate *offp = off - deltab;
2960Sstevel@tonic-gate *lenp = deltab + deltaf;
2970Sstevel@tonic-gate return (dirty);
2980Sstevel@tonic-gate }
2990Sstevel@tonic-gate
3000Sstevel@tonic-gate /*
3010Sstevel@tonic-gate * Generic entry point used to release the "shared/exclusive" lock
3020Sstevel@tonic-gate * and the "p_iolock" on pages after i/o is complete.
3030Sstevel@tonic-gate */
3040Sstevel@tonic-gate void
pvn_io_done(page_t * plist)3050Sstevel@tonic-gate pvn_io_done(page_t *plist)
3060Sstevel@tonic-gate {
3070Sstevel@tonic-gate page_t *pp;
3080Sstevel@tonic-gate
3090Sstevel@tonic-gate while (plist != NULL) {
3100Sstevel@tonic-gate pp = plist;
3110Sstevel@tonic-gate page_sub(&plist, pp);
3120Sstevel@tonic-gate page_io_unlock(pp);
3130Sstevel@tonic-gate page_unlock(pp);
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate }
3160Sstevel@tonic-gate
3170Sstevel@tonic-gate /*
3180Sstevel@tonic-gate * Entry point to be used by file system getpage subr's and
3190Sstevel@tonic-gate * other such routines which either want to unlock pages (B_ASYNC
3200Sstevel@tonic-gate * request) or destroy a list of pages if an error occurred.
3210Sstevel@tonic-gate */
3220Sstevel@tonic-gate void
pvn_read_done(page_t * plist,int flags)3230Sstevel@tonic-gate pvn_read_done(page_t *plist, int flags)
3240Sstevel@tonic-gate {
3250Sstevel@tonic-gate page_t *pp;
3260Sstevel@tonic-gate
3270Sstevel@tonic-gate while (plist != NULL) {
3280Sstevel@tonic-gate pp = plist;
3290Sstevel@tonic-gate page_sub(&plist, pp);
3300Sstevel@tonic-gate page_io_unlock(pp);
3310Sstevel@tonic-gate if (flags & B_ERROR) {
3320Sstevel@tonic-gate /*LINTED: constant in conditional context*/
3330Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred);
3340Sstevel@tonic-gate } else {
3350Sstevel@tonic-gate (void) page_release(pp, 0);
3360Sstevel@tonic-gate }
3370Sstevel@tonic-gate }
3380Sstevel@tonic-gate }
3390Sstevel@tonic-gate
3400Sstevel@tonic-gate /*
3410Sstevel@tonic-gate * Automagic pageout.
3420Sstevel@tonic-gate * When memory gets tight, start freeing pages popping out of the
3430Sstevel@tonic-gate * write queue.
3440Sstevel@tonic-gate */
3450Sstevel@tonic-gate int write_free = 1;
3460Sstevel@tonic-gate pgcnt_t pages_before_pager = 200; /* LMXXX */
3470Sstevel@tonic-gate
3480Sstevel@tonic-gate /*
3490Sstevel@tonic-gate * Routine to be called when page-out's complete.
3500Sstevel@tonic-gate * The caller, typically VOP_PUTPAGE, has to explicity call this routine
3510Sstevel@tonic-gate * after waiting for i/o to complete (biowait) to free the list of
3520Sstevel@tonic-gate * pages associated with the buffer. These pages must be locked
3530Sstevel@tonic-gate * before i/o is initiated.
3540Sstevel@tonic-gate *
3550Sstevel@tonic-gate * If a write error occurs, the pages are marked as modified
3560Sstevel@tonic-gate * so the write will be re-tried later.
3570Sstevel@tonic-gate */
3580Sstevel@tonic-gate
3590Sstevel@tonic-gate void
pvn_write_done(page_t * plist,int flags)3600Sstevel@tonic-gate pvn_write_done(page_t *plist, int flags)
3610Sstevel@tonic-gate {
3620Sstevel@tonic-gate int dfree = 0;
3630Sstevel@tonic-gate int pgrec = 0;
3640Sstevel@tonic-gate int pgout = 0;
3650Sstevel@tonic-gate int pgpgout = 0;
3660Sstevel@tonic-gate int anonpgout = 0;
3670Sstevel@tonic-gate int anonfree = 0;
3680Sstevel@tonic-gate int fspgout = 0;
3690Sstevel@tonic-gate int fsfree = 0;
3700Sstevel@tonic-gate int execpgout = 0;
3710Sstevel@tonic-gate int execfree = 0;
3720Sstevel@tonic-gate page_t *pp;
3730Sstevel@tonic-gate struct cpu *cpup;
3740Sstevel@tonic-gate struct vnode *vp = NULL; /* for probe */
3750Sstevel@tonic-gate uint_t ppattr;
3762999Sstans kmutex_t *vphm = NULL;
3770Sstevel@tonic-gate
3780Sstevel@tonic-gate ASSERT((flags & B_READ) == 0);
3790Sstevel@tonic-gate
3800Sstevel@tonic-gate /*
3810Sstevel@tonic-gate * If we are about to start paging anyway, start freeing pages.
3820Sstevel@tonic-gate */
3830Sstevel@tonic-gate if (write_free && freemem < lotsfree + pages_before_pager &&
3840Sstevel@tonic-gate (flags & B_ERROR) == 0) {
3850Sstevel@tonic-gate flags |= B_FREE;
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate /*
3890Sstevel@tonic-gate * Handle each page involved in the i/o operation.
3900Sstevel@tonic-gate */
3910Sstevel@tonic-gate while (plist != NULL) {
3920Sstevel@tonic-gate pp = plist;
3930Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp) && page_iolock_assert(pp));
3940Sstevel@tonic-gate page_sub(&plist, pp);
3950Sstevel@tonic-gate
3960Sstevel@tonic-gate /* Kernel probe support */
3970Sstevel@tonic-gate if (vp == NULL)
3980Sstevel@tonic-gate vp = pp->p_vnode;
3990Sstevel@tonic-gate
4004324Sqiao if (((flags & B_ERROR) == 0) && IS_VMODSORT(vp)) {
4012999Sstans /*
4022999Sstans * Move page to the top of the v_page list.
4032999Sstans * Skip pages modified during IO.
4042999Sstans */
4052999Sstans vphm = page_vnode_mutex(vp);
4062999Sstans mutex_enter(vphm);
4072999Sstans if ((pp->p_vpnext != pp) && !hat_ismod(pp)) {
4082999Sstans page_vpsub(&vp->v_pages, pp);
4092999Sstans page_vpadd(&vp->v_pages, pp);
4102999Sstans }
4112999Sstans mutex_exit(vphm);
4122999Sstans }
4132999Sstans
4140Sstevel@tonic-gate if (flags & B_ERROR) {
4150Sstevel@tonic-gate /*
4160Sstevel@tonic-gate * Write operation failed. We don't want
4170Sstevel@tonic-gate * to destroy (or free) the page unless B_FORCE
4180Sstevel@tonic-gate * is set. We set the mod bit again and release
4190Sstevel@tonic-gate * all locks on the page so that it will get written
4200Sstevel@tonic-gate * back again later when things are hopefully
4210Sstevel@tonic-gate * better again.
4220Sstevel@tonic-gate * If B_INVAL and B_FORCE is set we really have
4230Sstevel@tonic-gate * to destroy the page.
4240Sstevel@tonic-gate */
4250Sstevel@tonic-gate if ((flags & (B_INVAL|B_FORCE)) == (B_INVAL|B_FORCE)) {
4260Sstevel@tonic-gate page_io_unlock(pp);
4270Sstevel@tonic-gate /*LINTED: constant in conditional context*/
4280Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred);
4290Sstevel@tonic-gate } else {
4304324Sqiao hat_setmod_only(pp);
4310Sstevel@tonic-gate page_io_unlock(pp);
4320Sstevel@tonic-gate page_unlock(pp);
4330Sstevel@tonic-gate }
4340Sstevel@tonic-gate } else if (flags & B_INVAL) {
4350Sstevel@tonic-gate /*
4360Sstevel@tonic-gate * XXX - Failed writes with B_INVAL set are
4370Sstevel@tonic-gate * not handled appropriately.
4380Sstevel@tonic-gate */
4390Sstevel@tonic-gate page_io_unlock(pp);
4400Sstevel@tonic-gate /*LINTED: constant in conditional context*/
4410Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred);
4420Sstevel@tonic-gate } else if (flags & B_FREE ||!hat_page_is_mapped(pp)) {
4430Sstevel@tonic-gate /*
4440Sstevel@tonic-gate * Update statistics for pages being paged out
4450Sstevel@tonic-gate */
4460Sstevel@tonic-gate if (pp->p_vnode) {
4470Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) {
4480Sstevel@tonic-gate anonpgout++;
4490Sstevel@tonic-gate } else {
4500Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) {
4510Sstevel@tonic-gate execpgout++;
4520Sstevel@tonic-gate } else {
4530Sstevel@tonic-gate fspgout++;
4540Sstevel@tonic-gate }
4550Sstevel@tonic-gate }
4560Sstevel@tonic-gate }
4570Sstevel@tonic-gate page_io_unlock(pp);
4580Sstevel@tonic-gate pgout = 1;
4590Sstevel@tonic-gate pgpgout++;
4600Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_OUT,
46111888SPavel.Filipensky@Sun.COM "page_ws_out:pp %p", pp);
4620Sstevel@tonic-gate
4630Sstevel@tonic-gate /*
4640Sstevel@tonic-gate * The page_struct_lock need not be acquired to
4650Sstevel@tonic-gate * examine "p_lckcnt" and "p_cowcnt" since we'll
4660Sstevel@tonic-gate * have an "exclusive" lock if the upgrade succeeds.
4670Sstevel@tonic-gate */
4680Sstevel@tonic-gate if (page_tryupgrade(pp) &&
4690Sstevel@tonic-gate pp->p_lckcnt == 0 && pp->p_cowcnt == 0) {
4700Sstevel@tonic-gate /*
4710Sstevel@tonic-gate * Check if someone has reclaimed the
4720Sstevel@tonic-gate * page. If ref and mod are not set, no
4730Sstevel@tonic-gate * one is using it so we can free it.
4740Sstevel@tonic-gate * The rest of the system is careful
4750Sstevel@tonic-gate * to use the NOSYNC flag to unload
4760Sstevel@tonic-gate * translations set up for i/o w/o
4770Sstevel@tonic-gate * affecting ref and mod bits.
4780Sstevel@tonic-gate *
4790Sstevel@tonic-gate * Obtain a copy of the real hardware
4800Sstevel@tonic-gate * mod bit using hat_pagesync(pp, HAT_DONTZERO)
4810Sstevel@tonic-gate * to avoid having to flush the cache.
4820Sstevel@tonic-gate */
4830Sstevel@tonic-gate ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO |
48411888SPavel.Filipensky@Sun.COM HAT_SYNC_STOPON_MOD);
4850Sstevel@tonic-gate ck_refmod:
4860Sstevel@tonic-gate if (!(ppattr & (P_REF | P_MOD))) {
4870Sstevel@tonic-gate if (hat_page_is_mapped(pp)) {
4880Sstevel@tonic-gate /*
4890Sstevel@tonic-gate * Doesn't look like the page
4900Sstevel@tonic-gate * was modified so now we
4910Sstevel@tonic-gate * really have to unload the
4920Sstevel@tonic-gate * translations. Meanwhile
4930Sstevel@tonic-gate * another CPU could've
4940Sstevel@tonic-gate * modified it so we have to
4950Sstevel@tonic-gate * check again. We don't loop
4960Sstevel@tonic-gate * forever here because now
4970Sstevel@tonic-gate * the translations are gone
4980Sstevel@tonic-gate * and no one can get a new one
4990Sstevel@tonic-gate * since we have the "exclusive"
5000Sstevel@tonic-gate * lock on the page.
5010Sstevel@tonic-gate */
5020Sstevel@tonic-gate (void) hat_pageunload(pp,
50311888SPavel.Filipensky@Sun.COM HAT_FORCE_PGUNLOAD);
5040Sstevel@tonic-gate ppattr = hat_page_getattr(pp,
50511888SPavel.Filipensky@Sun.COM P_REF | P_MOD);
5060Sstevel@tonic-gate goto ck_refmod;
5070Sstevel@tonic-gate }
5080Sstevel@tonic-gate /*
5090Sstevel@tonic-gate * Update statistics for pages being
5100Sstevel@tonic-gate * freed
5110Sstevel@tonic-gate */
5120Sstevel@tonic-gate if (pp->p_vnode) {
5130Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) {
5140Sstevel@tonic-gate anonfree++;
5150Sstevel@tonic-gate } else {
5160Sstevel@tonic-gate if (pp->p_vnode->v_flag
5170Sstevel@tonic-gate & VVMEXEC) {
5180Sstevel@tonic-gate execfree++;
5190Sstevel@tonic-gate } else {
5200Sstevel@tonic-gate fsfree++;
5210Sstevel@tonic-gate }
5220Sstevel@tonic-gate }
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate /*LINTED: constant in conditional ctx*/
5250Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE,
52611888SPavel.Filipensky@Sun.COM (flags & B_DONTNEED), kcred);
5270Sstevel@tonic-gate dfree++;
5280Sstevel@tonic-gate } else {
5290Sstevel@tonic-gate page_unlock(pp);
5300Sstevel@tonic-gate pgrec++;
5310Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_FREE,
5320Sstevel@tonic-gate "page_ws_free:pp %p", pp);
5330Sstevel@tonic-gate }
5340Sstevel@tonic-gate } else {
5350Sstevel@tonic-gate /*
5360Sstevel@tonic-gate * Page is either `locked' in memory
5370Sstevel@tonic-gate * or was reclaimed and now has a
5380Sstevel@tonic-gate * "shared" lock, so release it.
5390Sstevel@tonic-gate */
5400Sstevel@tonic-gate page_unlock(pp);
5410Sstevel@tonic-gate }
5420Sstevel@tonic-gate } else {
5430Sstevel@tonic-gate /*
5440Sstevel@tonic-gate * Neither B_FREE nor B_INVAL nor B_ERROR.
5450Sstevel@tonic-gate * Just release locks.
5460Sstevel@tonic-gate */
5470Sstevel@tonic-gate page_io_unlock(pp);
5480Sstevel@tonic-gate page_unlock(pp);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate }
5510Sstevel@tonic-gate
5520Sstevel@tonic-gate CPU_STATS_ENTER_K();
5530Sstevel@tonic-gate cpup = CPU; /* get cpup now that CPU cannot change */
5540Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, dfree, dfree);
5550Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgrec, pgrec);
5560Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgout, pgout);
5570Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgpgout, pgpgout);
5580Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, anonpgout, anonpgout);
5590Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, anonfree, anonfree);
5600Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fspgout, fspgout);
5610Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fsfree, fsfree);
5620Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execpgout, execpgout);
5630Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execfree, execfree);
5640Sstevel@tonic-gate CPU_STATS_EXIT_K();
5650Sstevel@tonic-gate
5660Sstevel@tonic-gate /* Kernel probe */
5670Sstevel@tonic-gate TNF_PROBE_4(pageout, "vm pageio io", /* CSTYLED */,
56811888SPavel.Filipensky@Sun.COM tnf_opaque, vnode, vp,
56911888SPavel.Filipensky@Sun.COM tnf_ulong, pages_pageout, pgpgout,
57011888SPavel.Filipensky@Sun.COM tnf_ulong, pages_freed, dfree,
57111888SPavel.Filipensky@Sun.COM tnf_ulong, pages_reclaimed, pgrec);
5720Sstevel@tonic-gate }
5730Sstevel@tonic-gate
5740Sstevel@tonic-gate /*
5750Sstevel@tonic-gate * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
5760Sstevel@tonic-gate * B_TRUNC, B_FORCE}. B_DELWRI indicates that this page is part of a kluster
5770Sstevel@tonic-gate * operation and is only to be considered if it doesn't involve any
5780Sstevel@tonic-gate * waiting here. B_TRUNC indicates that the file is being truncated
5790Sstevel@tonic-gate * and so no i/o needs to be done. B_FORCE indicates that the page
5800Sstevel@tonic-gate * must be destroyed so don't try wrting it out.
5810Sstevel@tonic-gate *
5820Sstevel@tonic-gate * The caller must ensure that the page is locked. Returns 1, if
5830Sstevel@tonic-gate * the page should be written back (the "iolock" is held in this
5840Sstevel@tonic-gate * case), or 0 if the page has been dealt with or has been
5850Sstevel@tonic-gate * unlocked.
5860Sstevel@tonic-gate */
5870Sstevel@tonic-gate int
pvn_getdirty(page_t * pp,int flags)5880Sstevel@tonic-gate pvn_getdirty(page_t *pp, int flags)
5890Sstevel@tonic-gate {
5900Sstevel@tonic-gate ASSERT((flags & (B_INVAL | B_FREE)) ?
5910Sstevel@tonic-gate PAGE_EXCL(pp) : PAGE_SHARED(pp));
5920Sstevel@tonic-gate ASSERT(PP_ISFREE(pp) == 0);
5930Sstevel@tonic-gate
5940Sstevel@tonic-gate /*
5950Sstevel@tonic-gate * If trying to invalidate or free a logically `locked' page,
5960Sstevel@tonic-gate * forget it. Don't need page_struct_lock to check p_lckcnt and
5970Sstevel@tonic-gate * p_cowcnt as the page is exclusively locked.
5980Sstevel@tonic-gate */
5990Sstevel@tonic-gate if ((flags & (B_INVAL | B_FREE)) && !(flags & (B_TRUNC|B_FORCE)) &&
6000Sstevel@tonic-gate (pp->p_lckcnt != 0 || pp->p_cowcnt != 0)) {
6010Sstevel@tonic-gate page_unlock(pp);
6020Sstevel@tonic-gate return (0);
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate
6050Sstevel@tonic-gate /*
6060Sstevel@tonic-gate * Now acquire the i/o lock so we can add it to the dirty
6070Sstevel@tonic-gate * list (if necessary). We avoid blocking on the i/o lock
6080Sstevel@tonic-gate * in the following cases:
6090Sstevel@tonic-gate *
6100Sstevel@tonic-gate * If B_DELWRI is set, which implies that this request is
6110Sstevel@tonic-gate * due to a klustering operartion.
6120Sstevel@tonic-gate *
6130Sstevel@tonic-gate * If this is an async (B_ASYNC) operation and we are not doing
6140Sstevel@tonic-gate * invalidation (B_INVAL) [The current i/o or fsflush will ensure
6150Sstevel@tonic-gate * that the the page is written out].
6160Sstevel@tonic-gate */
6170Sstevel@tonic-gate if ((flags & B_DELWRI) || ((flags & (B_INVAL | B_ASYNC)) == B_ASYNC)) {
6180Sstevel@tonic-gate if (!page_io_trylock(pp)) {
6190Sstevel@tonic-gate page_unlock(pp);
6200Sstevel@tonic-gate return (0);
6210Sstevel@tonic-gate }
6220Sstevel@tonic-gate } else {
6230Sstevel@tonic-gate page_io_lock(pp);
6240Sstevel@tonic-gate }
6250Sstevel@tonic-gate
6260Sstevel@tonic-gate /*
6270Sstevel@tonic-gate * If we want to free or invalidate the page then
6280Sstevel@tonic-gate * we need to unload it so that anyone who wants
6290Sstevel@tonic-gate * it will have to take a minor fault to get it.
6300Sstevel@tonic-gate * Otherwise, we're just writing the page back so we
6310Sstevel@tonic-gate * need to sync up the hardwre and software mod bit to
6320Sstevel@tonic-gate * detect any future modifications. We clear the
6330Sstevel@tonic-gate * software mod bit when we put the page on the dirty
6340Sstevel@tonic-gate * list.
6350Sstevel@tonic-gate */
6360Sstevel@tonic-gate if (flags & (B_INVAL | B_FREE)) {
6370Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
6380Sstevel@tonic-gate } else {
6390Sstevel@tonic-gate (void) hat_pagesync(pp, HAT_SYNC_ZERORM);
6400Sstevel@tonic-gate }
6410Sstevel@tonic-gate
6420Sstevel@tonic-gate if (!hat_ismod(pp) || (flags & B_TRUNC)) {
6430Sstevel@tonic-gate /*
6440Sstevel@tonic-gate * Don't need to add it to the
6450Sstevel@tonic-gate * list after all.
6460Sstevel@tonic-gate */
6470Sstevel@tonic-gate page_io_unlock(pp);
6480Sstevel@tonic-gate if (flags & B_INVAL) {
6490Sstevel@tonic-gate /*LINTED: constant in conditional context*/
6500Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred);
6510Sstevel@tonic-gate } else if (flags & B_FREE) {
6520Sstevel@tonic-gate /*LINTED: constant in conditional context*/
6530Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred);
6540Sstevel@tonic-gate } else {
6550Sstevel@tonic-gate /*
6560Sstevel@tonic-gate * This is advisory path for the callers
6570Sstevel@tonic-gate * of VOP_PUTPAGE() who prefer freeing the
6580Sstevel@tonic-gate * page _only_ if no one else is accessing it.
6590Sstevel@tonic-gate * E.g. segmap_release()
6600Sstevel@tonic-gate *
6610Sstevel@tonic-gate * The above hat_ismod() check is useless because:
6620Sstevel@tonic-gate * (1) we may not be holding SE_EXCL lock;
6630Sstevel@tonic-gate * (2) we've not unloaded _all_ translations
6640Sstevel@tonic-gate *
6650Sstevel@tonic-gate * Let page_release() do the heavy-lifting.
6660Sstevel@tonic-gate */
6670Sstevel@tonic-gate (void) page_release(pp, 1);
6680Sstevel@tonic-gate }
6690Sstevel@tonic-gate return (0);
6700Sstevel@tonic-gate }
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate /*
6730Sstevel@tonic-gate * Page is dirty, get it ready for the write back
6740Sstevel@tonic-gate * and add page to the dirty list.
6750Sstevel@tonic-gate */
6760Sstevel@tonic-gate hat_clrrefmod(pp);
6770Sstevel@tonic-gate
6780Sstevel@tonic-gate /*
6790Sstevel@tonic-gate * If we're going to free the page when we're done
6800Sstevel@tonic-gate * then we can let others try to use it starting now.
6810Sstevel@tonic-gate * We'll detect the fact that they used it when the
6820Sstevel@tonic-gate * i/o is done and avoid freeing the page.
6830Sstevel@tonic-gate */
6840Sstevel@tonic-gate if (flags & B_FREE)
6850Sstevel@tonic-gate page_downgrade(pp);
6860Sstevel@tonic-gate
6870Sstevel@tonic-gate
6880Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PVN_GETDIRTY, "pvn_getdirty:pp %p", pp);
6890Sstevel@tonic-gate
6900Sstevel@tonic-gate return (1);
6910Sstevel@tonic-gate }
6920Sstevel@tonic-gate
6930Sstevel@tonic-gate
6940Sstevel@tonic-gate /*ARGSUSED*/
6950Sstevel@tonic-gate static int
marker_constructor(void * buf,void * cdrarg,int kmflags)6960Sstevel@tonic-gate marker_constructor(void *buf, void *cdrarg, int kmflags)
6970Sstevel@tonic-gate {
6980Sstevel@tonic-gate page_t *mark = buf;
6990Sstevel@tonic-gate bzero(mark, sizeof (page_t));
70011888SPavel.Filipensky@Sun.COM mark->p_hash = PVN_VPLIST_HASH_TAG;
7010Sstevel@tonic-gate return (0);
7020Sstevel@tonic-gate }
7030Sstevel@tonic-gate
7040Sstevel@tonic-gate void
pvn_init()7050Sstevel@tonic-gate pvn_init()
7060Sstevel@tonic-gate {
7070Sstevel@tonic-gate if (pvn_vmodsort_disable == 0)
7080Sstevel@tonic-gate pvn_vmodsort_supported = hat_supported(HAT_VMODSORT, NULL);
7090Sstevel@tonic-gate marker_cache = kmem_cache_create("marker_cache",
7100Sstevel@tonic-gate sizeof (page_t), 0, marker_constructor,
7110Sstevel@tonic-gate NULL, NULL, NULL, NULL, 0);
7120Sstevel@tonic-gate }
7130Sstevel@tonic-gate
7140Sstevel@tonic-gate
7150Sstevel@tonic-gate /*
7160Sstevel@tonic-gate * Process a vnode's page list for all pages whose offset is >= off.
7170Sstevel@tonic-gate * Pages are to either be free'd, invalidated, or written back to disk.
7180Sstevel@tonic-gate *
7190Sstevel@tonic-gate * An "exclusive" lock is acquired for each page if B_INVAL or B_FREE
7200Sstevel@tonic-gate * is specified, otherwise they are "shared" locked.
7210Sstevel@tonic-gate *
7220Sstevel@tonic-gate * Flags are {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_TRUNC}
7230Sstevel@tonic-gate *
7240Sstevel@tonic-gate * Special marker page_t's are inserted in the list in order
7250Sstevel@tonic-gate * to keep track of where we are in the list when locks are dropped.
7260Sstevel@tonic-gate *
7270Sstevel@tonic-gate * Note the list is circular and insertions can happen only at the
7280Sstevel@tonic-gate * head and tail of the list. The algorithm ensures visiting all pages
7290Sstevel@tonic-gate * on the list in the following way:
7300Sstevel@tonic-gate *
7310Sstevel@tonic-gate * Drop two marker pages at the end of the list.
7320Sstevel@tonic-gate *
7330Sstevel@tonic-gate * Move one marker page backwards towards the start of the list until
7340Sstevel@tonic-gate * it is at the list head, processing the pages passed along the way.
7350Sstevel@tonic-gate *
7360Sstevel@tonic-gate * Due to race conditions when the vphm mutex is dropped, additional pages
7370Sstevel@tonic-gate * can be added to either end of the list, so we'll continue to move
7380Sstevel@tonic-gate * the marker and process pages until it is up against the end marker.
7390Sstevel@tonic-gate *
7400Sstevel@tonic-gate * There is one special exit condition. If we are processing a VMODSORT
7410Sstevel@tonic-gate * vnode and only writing back modified pages, we can stop as soon as
7420Sstevel@tonic-gate * we run into an unmodified page. This makes fsync(3) operations fast.
7430Sstevel@tonic-gate */
7440Sstevel@tonic-gate int
pvn_vplist_dirty(vnode_t * vp,u_offset_t off,int (* putapage)(vnode_t *,page_t *,u_offset_t *,size_t *,int,cred_t *),int flags,cred_t * cred)7450Sstevel@tonic-gate pvn_vplist_dirty(
7460Sstevel@tonic-gate vnode_t *vp,
7470Sstevel@tonic-gate u_offset_t off,
7480Sstevel@tonic-gate int (*putapage)(vnode_t *, page_t *, u_offset_t *,
7490Sstevel@tonic-gate size_t *, int, cred_t *),
7500Sstevel@tonic-gate int flags,
7510Sstevel@tonic-gate cred_t *cred)
7520Sstevel@tonic-gate {
7530Sstevel@tonic-gate page_t *pp;
7540Sstevel@tonic-gate page_t *mark; /* marker page that moves toward head */
7550Sstevel@tonic-gate page_t *end; /* marker page at end of list */
7560Sstevel@tonic-gate int err = 0;
7570Sstevel@tonic-gate int error;
7580Sstevel@tonic-gate kmutex_t *vphm;
7590Sstevel@tonic-gate se_t se;
7600Sstevel@tonic-gate page_t **where_to_move;
7610Sstevel@tonic-gate
7620Sstevel@tonic-gate ASSERT(vp->v_type != VCHR);
7630Sstevel@tonic-gate
7640Sstevel@tonic-gate if (vp->v_pages == NULL)
7650Sstevel@tonic-gate return (0);
7660Sstevel@tonic-gate
7670Sstevel@tonic-gate
7680Sstevel@tonic-gate /*
7690Sstevel@tonic-gate * Serialize vplist_dirty operations on this vnode by setting VVMLOCK.
7700Sstevel@tonic-gate *
7710Sstevel@tonic-gate * Don't block on VVMLOCK if B_ASYNC is set. This prevents sync()
7720Sstevel@tonic-gate * from getting blocked while flushing pages to a dead NFS server.
7730Sstevel@tonic-gate */
7740Sstevel@tonic-gate mutex_enter(&vp->v_lock);
7750Sstevel@tonic-gate if ((vp->v_flag & VVMLOCK) && (flags & B_ASYNC)) {
7760Sstevel@tonic-gate mutex_exit(&vp->v_lock);
7770Sstevel@tonic-gate return (EAGAIN);
7780Sstevel@tonic-gate }
7790Sstevel@tonic-gate
7800Sstevel@tonic-gate while (vp->v_flag & VVMLOCK)
7810Sstevel@tonic-gate cv_wait(&vp->v_cv, &vp->v_lock);
7820Sstevel@tonic-gate
7830Sstevel@tonic-gate if (vp->v_pages == NULL) {
7840Sstevel@tonic-gate mutex_exit(&vp->v_lock);
7850Sstevel@tonic-gate return (0);
7860Sstevel@tonic-gate }
7870Sstevel@tonic-gate
7880Sstevel@tonic-gate vp->v_flag |= VVMLOCK;
7890Sstevel@tonic-gate mutex_exit(&vp->v_lock);
7900Sstevel@tonic-gate
7910Sstevel@tonic-gate
7920Sstevel@tonic-gate /*
7930Sstevel@tonic-gate * Set up the marker pages used to walk the list
7940Sstevel@tonic-gate */
7950Sstevel@tonic-gate end = kmem_cache_alloc(marker_cache, KM_SLEEP);
7960Sstevel@tonic-gate end->p_vnode = vp;
7970Sstevel@tonic-gate end->p_offset = (u_offset_t)-2;
7980Sstevel@tonic-gate mark = kmem_cache_alloc(marker_cache, KM_SLEEP);
7990Sstevel@tonic-gate mark->p_vnode = vp;
8000Sstevel@tonic-gate mark->p_offset = (u_offset_t)-1;
8010Sstevel@tonic-gate
8020Sstevel@tonic-gate /*
8030Sstevel@tonic-gate * Grab the lock protecting the vnode's page list
8040Sstevel@tonic-gate * note that this lock is dropped at times in the loop.
8050Sstevel@tonic-gate */
8060Sstevel@tonic-gate vphm = page_vnode_mutex(vp);
8070Sstevel@tonic-gate mutex_enter(vphm);
8080Sstevel@tonic-gate if (vp->v_pages == NULL)
8090Sstevel@tonic-gate goto leave;
8100Sstevel@tonic-gate
8110Sstevel@tonic-gate /*
8120Sstevel@tonic-gate * insert the markers and loop through the list of pages
8130Sstevel@tonic-gate */
8140Sstevel@tonic-gate page_vpadd(&vp->v_pages->p_vpprev->p_vpnext, mark);
8150Sstevel@tonic-gate page_vpadd(&mark->p_vpnext, end);
8160Sstevel@tonic-gate for (;;) {
8170Sstevel@tonic-gate
8180Sstevel@tonic-gate /*
8190Sstevel@tonic-gate * If only doing an async write back, then we can
8200Sstevel@tonic-gate * stop as soon as we get to start of the list.
8210Sstevel@tonic-gate */
8220Sstevel@tonic-gate if (flags == B_ASYNC && vp->v_pages == mark)
8230Sstevel@tonic-gate break;
8240Sstevel@tonic-gate
8250Sstevel@tonic-gate /*
8260Sstevel@tonic-gate * otherwise stop when we've gone through all the pages
8270Sstevel@tonic-gate */
8280Sstevel@tonic-gate if (mark->p_vpprev == end)
8290Sstevel@tonic-gate break;
8300Sstevel@tonic-gate
8310Sstevel@tonic-gate pp = mark->p_vpprev;
8320Sstevel@tonic-gate if (vp->v_pages == pp)
8330Sstevel@tonic-gate where_to_move = &vp->v_pages;
8340Sstevel@tonic-gate else
8350Sstevel@tonic-gate where_to_move = &pp->p_vpprev->p_vpnext;
8360Sstevel@tonic-gate
8370Sstevel@tonic-gate ASSERT(pp->p_vnode == vp);
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate /*
8400Sstevel@tonic-gate * If just flushing dirty pages to disk and this vnode
8410Sstevel@tonic-gate * is using a sorted list of pages, we can stop processing
8420Sstevel@tonic-gate * as soon as we find an unmodified page. Since all the
8430Sstevel@tonic-gate * modified pages are visited first.
8440Sstevel@tonic-gate */
8450Sstevel@tonic-gate if (IS_VMODSORT(vp) &&
8462999Sstans !(flags & (B_INVAL | B_FREE | B_TRUNC))) {
8472999Sstans if (!hat_ismod(pp) && !page_io_locked(pp)) {
8480Sstevel@tonic-gate #ifdef DEBUG
8492999Sstans /*
8502999Sstans * For debug kernels examine what should be
8512999Sstans * all the remaining clean pages, asserting
8522999Sstans * that they are not modified.
8532999Sstans */
8542999Sstans page_t *chk = pp;
8552999Sstans int attr;
8560Sstevel@tonic-gate
8572999Sstans page_vpsub(&vp->v_pages, mark);
8582999Sstans page_vpadd(where_to_move, mark);
8592999Sstans do {
8602999Sstans chk = chk->p_vpprev;
8612999Sstans ASSERT(chk != end);
8622999Sstans if (chk == mark)
8632999Sstans continue;
8642999Sstans attr = hat_page_getattr(chk, P_MOD |
8652999Sstans P_REF);
8662999Sstans if ((attr & P_MOD) == 0)
8672999Sstans continue;
8682999Sstans panic("v_pages list not all clean: "
8692999Sstans "page_t*=%p vnode=%p off=%lx "
8702999Sstans "attr=0x%x last clean page_t*=%p\n",
8712999Sstans (void *)chk, (void *)chk->p_vnode,
8722999Sstans (long)chk->p_offset, attr,
8732999Sstans (void *)pp);
8742999Sstans } while (chk != vp->v_pages);
8750Sstevel@tonic-gate #endif
8762999Sstans break;
8772999Sstans } else if (!(flags & B_ASYNC) && !hat_ismod(pp)) {
8782999Sstans /*
8792999Sstans * Couldn't get io lock, wait until IO is done.
8802999Sstans * Block only for sync IO since we don't want
8812999Sstans * to block async IO.
8822999Sstans */
8832999Sstans mutex_exit(vphm);
8842999Sstans page_io_wait(pp);
8852999Sstans mutex_enter(vphm);
8862999Sstans continue;
8872999Sstans }
8880Sstevel@tonic-gate }
8890Sstevel@tonic-gate
8900Sstevel@tonic-gate /*
8917972SPeter.Telford@Sun.COM * Skip this page if the offset is out of the desired range.
8927972SPeter.Telford@Sun.COM * Just move the marker and continue.
8937972SPeter.Telford@Sun.COM */
8947972SPeter.Telford@Sun.COM if (pp->p_offset < off) {
8957972SPeter.Telford@Sun.COM page_vpsub(&vp->v_pages, mark);
8967972SPeter.Telford@Sun.COM page_vpadd(where_to_move, mark);
8977972SPeter.Telford@Sun.COM continue;
8987972SPeter.Telford@Sun.COM }
8997972SPeter.Telford@Sun.COM
9007972SPeter.Telford@Sun.COM /*
9010Sstevel@tonic-gate * If we are supposed to invalidate or free this
9020Sstevel@tonic-gate * page, then we need an exclusive lock.
9030Sstevel@tonic-gate */
9040Sstevel@tonic-gate se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED;
9050Sstevel@tonic-gate
9060Sstevel@tonic-gate /*
9070Sstevel@tonic-gate * We must acquire the page lock for all synchronous
9080Sstevel@tonic-gate * operations (invalidate, free and write).
9090Sstevel@tonic-gate */
9100Sstevel@tonic-gate if ((flags & B_INVAL) != 0 || (flags & B_ASYNC) == 0) {
9110Sstevel@tonic-gate /*
9120Sstevel@tonic-gate * If the page_lock() drops the mutex
9130Sstevel@tonic-gate * we must retry the loop.
9140Sstevel@tonic-gate */
9150Sstevel@tonic-gate if (!page_lock(pp, se, vphm, P_NO_RECLAIM))
9160Sstevel@tonic-gate continue;
9170Sstevel@tonic-gate
9180Sstevel@tonic-gate /*
9190Sstevel@tonic-gate * It's ok to move the marker page now.
9200Sstevel@tonic-gate */
9210Sstevel@tonic-gate page_vpsub(&vp->v_pages, mark);
9220Sstevel@tonic-gate page_vpadd(where_to_move, mark);
9230Sstevel@tonic-gate } else {
9240Sstevel@tonic-gate
9250Sstevel@tonic-gate /*
9260Sstevel@tonic-gate * update the marker page for all remaining cases
9270Sstevel@tonic-gate */
9280Sstevel@tonic-gate page_vpsub(&vp->v_pages, mark);
9290Sstevel@tonic-gate page_vpadd(where_to_move, mark);
9300Sstevel@tonic-gate
9310Sstevel@tonic-gate /*
9320Sstevel@tonic-gate * For write backs, If we can't lock the page, it's
9330Sstevel@tonic-gate * invalid or in the process of being destroyed. Skip
9340Sstevel@tonic-gate * it, assuming someone else is writing it.
9350Sstevel@tonic-gate */
9360Sstevel@tonic-gate if (!page_trylock(pp, se))
9370Sstevel@tonic-gate continue;
9380Sstevel@tonic-gate }
9390Sstevel@tonic-gate
9400Sstevel@tonic-gate ASSERT(pp->p_vnode == vp);
9410Sstevel@tonic-gate
9420Sstevel@tonic-gate /*
9430Sstevel@tonic-gate * Successfully locked the page, now figure out what to
9440Sstevel@tonic-gate * do with it. Free pages are easily dealt with, invalidate
9450Sstevel@tonic-gate * if desired or just go on to the next page.
9460Sstevel@tonic-gate */
9470Sstevel@tonic-gate if (PP_ISFREE(pp)) {
9480Sstevel@tonic-gate if ((flags & B_INVAL) == 0) {
9490Sstevel@tonic-gate page_unlock(pp);
9500Sstevel@tonic-gate continue;
9510Sstevel@tonic-gate }
9520Sstevel@tonic-gate
9530Sstevel@tonic-gate /*
9540Sstevel@tonic-gate * Invalidate (destroy) the page.
9550Sstevel@tonic-gate */
9560Sstevel@tonic-gate mutex_exit(vphm);
9570Sstevel@tonic-gate page_destroy_free(pp);
9580Sstevel@tonic-gate mutex_enter(vphm);
9590Sstevel@tonic-gate continue;
9600Sstevel@tonic-gate }
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate /*
9630Sstevel@tonic-gate * pvn_getdirty() figures out what do do with a dirty page.
9640Sstevel@tonic-gate * If the page is dirty, the putapage() routine will write it
9650Sstevel@tonic-gate * and will kluster any other adjacent dirty pages it can.
9660Sstevel@tonic-gate *
9670Sstevel@tonic-gate * pvn_getdirty() and `(*putapage)' unlock the page.
9680Sstevel@tonic-gate */
9690Sstevel@tonic-gate mutex_exit(vphm);
9700Sstevel@tonic-gate if (pvn_getdirty(pp, flags)) {
9710Sstevel@tonic-gate error = (*putapage)(vp, pp, NULL, NULL, flags, cred);
9720Sstevel@tonic-gate if (!err)
9730Sstevel@tonic-gate err = error;
9740Sstevel@tonic-gate }
9750Sstevel@tonic-gate mutex_enter(vphm);
9760Sstevel@tonic-gate }
9770Sstevel@tonic-gate page_vpsub(&vp->v_pages, mark);
9780Sstevel@tonic-gate page_vpsub(&vp->v_pages, end);
9790Sstevel@tonic-gate
9800Sstevel@tonic-gate leave:
9810Sstevel@tonic-gate /*
9820Sstevel@tonic-gate * Release v_pages mutex, also VVMLOCK and wakeup blocked thrds
9830Sstevel@tonic-gate */
9840Sstevel@tonic-gate mutex_exit(vphm);
9850Sstevel@tonic-gate kmem_cache_free(marker_cache, mark);
9860Sstevel@tonic-gate kmem_cache_free(marker_cache, end);
9870Sstevel@tonic-gate mutex_enter(&vp->v_lock);
9880Sstevel@tonic-gate vp->v_flag &= ~VVMLOCK;
9890Sstevel@tonic-gate cv_broadcast(&vp->v_cv);
9900Sstevel@tonic-gate mutex_exit(&vp->v_lock);
9910Sstevel@tonic-gate return (err);
9920Sstevel@tonic-gate }
9930Sstevel@tonic-gate
9940Sstevel@tonic-gate /*
99511888SPavel.Filipensky@Sun.COM * Walk the vp->v_pages list, for every page call the callback function
99611888SPavel.Filipensky@Sun.COM * pointed by *page_check. If page_check returns non-zero, then mark the
99711888SPavel.Filipensky@Sun.COM * page as modified and if VMODSORT is set, move it to the end of v_pages
99811888SPavel.Filipensky@Sun.COM * list. Moving makes sense only if we have at least two pages - this also
99911888SPavel.Filipensky@Sun.COM * avoids having v_pages temporarily being NULL after calling page_vpsub()
100011888SPavel.Filipensky@Sun.COM * if there was just one page.
100111888SPavel.Filipensky@Sun.COM */
100211888SPavel.Filipensky@Sun.COM void
pvn_vplist_setdirty(vnode_t * vp,int (* page_check)(page_t *))100311888SPavel.Filipensky@Sun.COM pvn_vplist_setdirty(vnode_t *vp, int (*page_check)(page_t *))
100411888SPavel.Filipensky@Sun.COM {
100511888SPavel.Filipensky@Sun.COM page_t *pp, *next, *end;
100611888SPavel.Filipensky@Sun.COM kmutex_t *vphm;
100711888SPavel.Filipensky@Sun.COM int shuffle;
100811888SPavel.Filipensky@Sun.COM
100911888SPavel.Filipensky@Sun.COM vphm = page_vnode_mutex(vp);
101011888SPavel.Filipensky@Sun.COM mutex_enter(vphm);
101111888SPavel.Filipensky@Sun.COM
101211888SPavel.Filipensky@Sun.COM if (vp->v_pages == NULL) {
101311888SPavel.Filipensky@Sun.COM mutex_exit(vphm);
101411888SPavel.Filipensky@Sun.COM return;
101511888SPavel.Filipensky@Sun.COM }
101611888SPavel.Filipensky@Sun.COM
101711888SPavel.Filipensky@Sun.COM end = vp->v_pages->p_vpprev;
101811888SPavel.Filipensky@Sun.COM shuffle = IS_VMODSORT(vp) && (vp->v_pages != end);
101911888SPavel.Filipensky@Sun.COM pp = vp->v_pages;
102011888SPavel.Filipensky@Sun.COM
102111888SPavel.Filipensky@Sun.COM for (;;) {
102211888SPavel.Filipensky@Sun.COM next = pp->p_vpnext;
102311888SPavel.Filipensky@Sun.COM if (pp->p_hash != PVN_VPLIST_HASH_TAG && page_check(pp)) {
102411888SPavel.Filipensky@Sun.COM /*
102511888SPavel.Filipensky@Sun.COM * hat_setmod_only() in contrast to hat_setmod() does
102611888SPavel.Filipensky@Sun.COM * not shuffle the pages and does not grab the mutex
102711888SPavel.Filipensky@Sun.COM * page_vnode_mutex. Exactly what we need.
102811888SPavel.Filipensky@Sun.COM */
102911888SPavel.Filipensky@Sun.COM hat_setmod_only(pp);
103011888SPavel.Filipensky@Sun.COM if (shuffle) {
103111888SPavel.Filipensky@Sun.COM page_vpsub(&vp->v_pages, pp);
103211888SPavel.Filipensky@Sun.COM ASSERT(vp->v_pages != NULL);
103311888SPavel.Filipensky@Sun.COM page_vpadd(&vp->v_pages->p_vpprev->p_vpnext,
103411888SPavel.Filipensky@Sun.COM pp);
103511888SPavel.Filipensky@Sun.COM }
103611888SPavel.Filipensky@Sun.COM }
103711888SPavel.Filipensky@Sun.COM /* Stop if we have just processed the last page. */
103811888SPavel.Filipensky@Sun.COM if (pp == end)
103911888SPavel.Filipensky@Sun.COM break;
104011888SPavel.Filipensky@Sun.COM pp = next;
104111888SPavel.Filipensky@Sun.COM }
104211888SPavel.Filipensky@Sun.COM
104311888SPavel.Filipensky@Sun.COM mutex_exit(vphm);
104411888SPavel.Filipensky@Sun.COM }
104511888SPavel.Filipensky@Sun.COM
104611888SPavel.Filipensky@Sun.COM /*
10470Sstevel@tonic-gate * Zero out zbytes worth of data. Caller should be aware that this
10480Sstevel@tonic-gate * routine may enter back into the fs layer (xxx_getpage). Locks
10490Sstevel@tonic-gate * that the xxx_getpage routine may need should not be held while
10500Sstevel@tonic-gate * calling this.
10510Sstevel@tonic-gate */
10520Sstevel@tonic-gate void
pvn_vpzero(struct vnode * vp,u_offset_t vplen,size_t zbytes)10530Sstevel@tonic-gate pvn_vpzero(struct vnode *vp, u_offset_t vplen, size_t zbytes)
10540Sstevel@tonic-gate {
10550Sstevel@tonic-gate caddr_t addr;
10560Sstevel@tonic-gate
10570Sstevel@tonic-gate ASSERT(vp->v_type != VCHR);
10580Sstevel@tonic-gate
10590Sstevel@tonic-gate if (vp->v_pages == NULL)
10600Sstevel@tonic-gate return;
10610Sstevel@tonic-gate
10620Sstevel@tonic-gate /*
10630Sstevel@tonic-gate * zbytes may be zero but there still may be some portion of
10640Sstevel@tonic-gate * a page which needs clearing (since zbytes is a function
10650Sstevel@tonic-gate * of filesystem block size, not pagesize.)
10660Sstevel@tonic-gate */
10670Sstevel@tonic-gate if (zbytes == 0 && (PAGESIZE - (vplen & PAGEOFFSET)) == 0)
10680Sstevel@tonic-gate return;
10690Sstevel@tonic-gate
10700Sstevel@tonic-gate /*
10710Sstevel@tonic-gate * We get the last page and handle the partial
10720Sstevel@tonic-gate * zeroing via kernel mappings. This will make the page
10730Sstevel@tonic-gate * dirty so that we know that when this page is written
10740Sstevel@tonic-gate * back, the zeroed information will go out with it. If
10750Sstevel@tonic-gate * the page is not currently in memory, then the kzero
10760Sstevel@tonic-gate * operation will cause it to be brought it. We use kzero
10770Sstevel@tonic-gate * instead of bzero so that if the page cannot be read in
10780Sstevel@tonic-gate * for any reason, the system will not panic. We need
10790Sstevel@tonic-gate * to zero out a minimum of the fs given zbytes, but we
10800Sstevel@tonic-gate * might also have to do more to get the entire last page.
10810Sstevel@tonic-gate */
10820Sstevel@tonic-gate
10830Sstevel@tonic-gate if ((zbytes + (vplen & MAXBOFFSET)) > MAXBSIZE)
10840Sstevel@tonic-gate panic("pvn_vptrunc zbytes");
10850Sstevel@tonic-gate addr = segmap_getmapflt(segkmap, vp, vplen,
10860Sstevel@tonic-gate MAX(zbytes, PAGESIZE - (vplen & PAGEOFFSET)), 1, S_WRITE);
10870Sstevel@tonic-gate (void) kzero(addr + (vplen & MAXBOFFSET),
10880Sstevel@tonic-gate MAX(zbytes, PAGESIZE - (vplen & PAGEOFFSET)));
10890Sstevel@tonic-gate (void) segmap_release(segkmap, addr, SM_WRITE | SM_ASYNC);
10900Sstevel@tonic-gate }
10910Sstevel@tonic-gate
10920Sstevel@tonic-gate /*
10930Sstevel@tonic-gate * Handles common work of the VOP_GETPAGE routines when more than
10940Sstevel@tonic-gate * one page must be returned by calling a file system specific operation
10950Sstevel@tonic-gate * to do most of the work. Must be called with the vp already locked
10960Sstevel@tonic-gate * by the VOP_GETPAGE routine.
10970Sstevel@tonic-gate */
10980Sstevel@tonic-gate int
pvn_getpages(int (* getpage)(vnode_t *,u_offset_t,size_t,uint_t *,page_t * [],size_t,struct seg *,caddr_t,enum seg_rw,cred_t *),struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cred)10990Sstevel@tonic-gate pvn_getpages(
11000Sstevel@tonic-gate int (*getpage)(vnode_t *, u_offset_t, size_t, uint_t *, page_t *[],
11010Sstevel@tonic-gate size_t, struct seg *, caddr_t, enum seg_rw, cred_t *),
11020Sstevel@tonic-gate struct vnode *vp,
11030Sstevel@tonic-gate u_offset_t off,
11040Sstevel@tonic-gate size_t len,
11050Sstevel@tonic-gate uint_t *protp,
11060Sstevel@tonic-gate page_t *pl[],
11070Sstevel@tonic-gate size_t plsz,
11080Sstevel@tonic-gate struct seg *seg,
11090Sstevel@tonic-gate caddr_t addr,
11100Sstevel@tonic-gate enum seg_rw rw,
11110Sstevel@tonic-gate struct cred *cred)
11120Sstevel@tonic-gate {
11130Sstevel@tonic-gate page_t **ppp;
11140Sstevel@tonic-gate u_offset_t o, eoff;
11150Sstevel@tonic-gate size_t sz, xlen;
11160Sstevel@tonic-gate int err;
11170Sstevel@tonic-gate
11180Sstevel@tonic-gate ASSERT(plsz >= len); /* insure that we have enough space */
11190Sstevel@tonic-gate
11200Sstevel@tonic-gate /*
11210Sstevel@tonic-gate * Loop one page at a time and let getapage function fill
11220Sstevel@tonic-gate * in the next page in array. We only allow one page to be
11230Sstevel@tonic-gate * returned at a time (except for the last page) so that we
11240Sstevel@tonic-gate * don't have any problems with duplicates and other such
11250Sstevel@tonic-gate * painful problems. This is a very simple minded algorithm,
11260Sstevel@tonic-gate * but it does the job correctly. We hope that the cost of a
11270Sstevel@tonic-gate * getapage call for a resident page that we might have been
11280Sstevel@tonic-gate * able to get from an earlier call doesn't cost too much.
11290Sstevel@tonic-gate */
11300Sstevel@tonic-gate ppp = pl;
11310Sstevel@tonic-gate sz = PAGESIZE;
11320Sstevel@tonic-gate eoff = off + len;
11330Sstevel@tonic-gate xlen = len;
11340Sstevel@tonic-gate for (o = off; o < eoff; o += PAGESIZE, addr += PAGESIZE,
11350Sstevel@tonic-gate xlen -= PAGESIZE) {
11360Sstevel@tonic-gate if (o + PAGESIZE >= eoff) {
11370Sstevel@tonic-gate /*
11380Sstevel@tonic-gate * Last time through - allow the all of
11390Sstevel@tonic-gate * what's left of the pl[] array to be used.
11400Sstevel@tonic-gate */
11410Sstevel@tonic-gate sz = plsz - (o - off);
11420Sstevel@tonic-gate }
11430Sstevel@tonic-gate err = (*getpage)(vp, o, xlen, protp, ppp, sz, seg, addr,
11440Sstevel@tonic-gate rw, cred);
11450Sstevel@tonic-gate if (err) {
11460Sstevel@tonic-gate /*
11470Sstevel@tonic-gate * Release any pages we already got.
11480Sstevel@tonic-gate */
11490Sstevel@tonic-gate if (o > off && pl != NULL) {
11500Sstevel@tonic-gate for (ppp = pl; *ppp != NULL; *ppp++ = NULL)
11510Sstevel@tonic-gate (void) page_release(*ppp, 1);
11520Sstevel@tonic-gate }
11530Sstevel@tonic-gate break;
11540Sstevel@tonic-gate }
11550Sstevel@tonic-gate if (pl != NULL)
11560Sstevel@tonic-gate ppp++;
11570Sstevel@tonic-gate }
11580Sstevel@tonic-gate return (err);
11590Sstevel@tonic-gate }
11600Sstevel@tonic-gate
11610Sstevel@tonic-gate /*
11620Sstevel@tonic-gate * Initialize the page list array.
11630Sstevel@tonic-gate */
11643351Saguzovsk /*ARGSUSED*/
11650Sstevel@tonic-gate void
pvn_plist_init(page_t * pp,page_t * pl[],size_t plsz,u_offset_t off,size_t io_len,enum seg_rw rw)11660Sstevel@tonic-gate pvn_plist_init(page_t *pp, page_t *pl[], size_t plsz,
11670Sstevel@tonic-gate u_offset_t off, size_t io_len, enum seg_rw rw)
11680Sstevel@tonic-gate {
11690Sstevel@tonic-gate ssize_t sz;
11700Sstevel@tonic-gate page_t *ppcur, **ppp;
11710Sstevel@tonic-gate
11723351Saguzovsk /*
11733351Saguzovsk * Set up to load plsz worth
11743351Saguzovsk * starting at the needed page.
11753351Saguzovsk */
11763351Saguzovsk while (pp != NULL && pp->p_offset != off) {
11770Sstevel@tonic-gate /*
11783351Saguzovsk * Remove page from the i/o list,
11793351Saguzovsk * release the i/o and the page lock.
11800Sstevel@tonic-gate */
11813351Saguzovsk ppcur = pp;
11823351Saguzovsk page_sub(&pp, ppcur);
11833351Saguzovsk page_io_unlock(ppcur);
11843351Saguzovsk (void) page_release(ppcur, 1);
11850Sstevel@tonic-gate }
11860Sstevel@tonic-gate
11873351Saguzovsk if (pp == NULL) {
11883351Saguzovsk pl[0] = NULL;
11893351Saguzovsk return;
11903351Saguzovsk }
11913351Saguzovsk
11923351Saguzovsk sz = plsz;
11933351Saguzovsk
11940Sstevel@tonic-gate /*
11950Sstevel@tonic-gate * Initialize the page list array.
11960Sstevel@tonic-gate */
11970Sstevel@tonic-gate ppp = pl;
11980Sstevel@tonic-gate do {
11990Sstevel@tonic-gate ppcur = pp;
12000Sstevel@tonic-gate *ppp++ = ppcur;
12010Sstevel@tonic-gate page_sub(&pp, ppcur);
12020Sstevel@tonic-gate page_io_unlock(ppcur);
12030Sstevel@tonic-gate if (rw != S_CREATE)
12040Sstevel@tonic-gate page_downgrade(ppcur);
12050Sstevel@tonic-gate sz -= PAGESIZE;
12060Sstevel@tonic-gate } while (sz > 0 && pp != NULL);
12070Sstevel@tonic-gate *ppp = NULL; /* terminate list */
12080Sstevel@tonic-gate
12090Sstevel@tonic-gate /*
12100Sstevel@tonic-gate * Now free the remaining pages that weren't
12110Sstevel@tonic-gate * loaded in the page list.
12120Sstevel@tonic-gate */
12130Sstevel@tonic-gate while (pp != NULL) {
12140Sstevel@tonic-gate ppcur = pp;
12150Sstevel@tonic-gate page_sub(&pp, ppcur);
12160Sstevel@tonic-gate page_io_unlock(ppcur);
12170Sstevel@tonic-gate (void) page_release(ppcur, 1);
12180Sstevel@tonic-gate }
12190Sstevel@tonic-gate }
1220