10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52418Scth * Common Development and Distribution License (the "License").
62418Scth * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*11066Srafael.vanoni@sun.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
270Sstevel@tonic-gate /* All Rights Reserved */
280Sstevel@tonic-gate
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988
310Sstevel@tonic-gate * The Regents of the University of California
320Sstevel@tonic-gate * All Rights Reserved
330Sstevel@tonic-gate *
340Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from
350Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its
360Sstevel@tonic-gate * contributors.
370Sstevel@tonic-gate */
380Sstevel@tonic-gate
390Sstevel@tonic-gate #include <sys/types.h>
400Sstevel@tonic-gate #include <sys/t_lock.h>
410Sstevel@tonic-gate #include <sys/sysmacros.h>
420Sstevel@tonic-gate #include <sys/conf.h>
430Sstevel@tonic-gate #include <sys/cpuvar.h>
440Sstevel@tonic-gate #include <sys/errno.h>
450Sstevel@tonic-gate #include <sys/debug.h>
460Sstevel@tonic-gate #include <sys/buf.h>
470Sstevel@tonic-gate #include <sys/var.h>
480Sstevel@tonic-gate #include <sys/vnode.h>
490Sstevel@tonic-gate #include <sys/bitmap.h>
500Sstevel@tonic-gate #include <sys/cmn_err.h>
510Sstevel@tonic-gate #include <sys/kmem.h>
520Sstevel@tonic-gate #include <sys/vmem.h>
530Sstevel@tonic-gate #include <sys/atomic.h>
540Sstevel@tonic-gate #include <vm/seg_kmem.h>
550Sstevel@tonic-gate #include <vm/page.h>
560Sstevel@tonic-gate #include <vm/pvn.h>
570Sstevel@tonic-gate #include <sys/vtrace.h>
580Sstevel@tonic-gate #include <sys/tnf_probe.h>
590Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
600Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
610Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
620Sstevel@tonic-gate #include <sys/systm.h>
630Sstevel@tonic-gate #include <sys/vfs.h>
640Sstevel@tonic-gate #include <sys/sdt.h>
650Sstevel@tonic-gate
660Sstevel@tonic-gate /* Locks */
670Sstevel@tonic-gate static kmutex_t blist_lock; /* protects b_list */
680Sstevel@tonic-gate static kmutex_t bhdr_lock; /* protects the bhdrlist */
690Sstevel@tonic-gate static kmutex_t bfree_lock; /* protects the bfreelist structure */
700Sstevel@tonic-gate
710Sstevel@tonic-gate struct hbuf *hbuf; /* Hash buckets */
720Sstevel@tonic-gate struct dwbuf *dwbuf; /* Delayed write buckets */
730Sstevel@tonic-gate static struct buf *bhdrlist; /* buf header free list */
740Sstevel@tonic-gate static int nbuf; /* number of buffer headers allocated */
750Sstevel@tonic-gate
760Sstevel@tonic-gate static int lastindex; /* Reference point on where to start */
770Sstevel@tonic-gate /* when looking for free buffers */
780Sstevel@tonic-gate
790Sstevel@tonic-gate #define bio_bhash(dev, bn) (hash2ints((dev), (int)(bn)) & v.v_hmask)
800Sstevel@tonic-gate #define EMPTY_LIST ((struct buf *)-1)
810Sstevel@tonic-gate
820Sstevel@tonic-gate static kcondvar_t bio_mem_cv; /* Condition variables */
830Sstevel@tonic-gate static kcondvar_t bio_flushinval_cv;
840Sstevel@tonic-gate static int bio_doingflush; /* flush in progress */
850Sstevel@tonic-gate static int bio_doinginval; /* inval in progress */
860Sstevel@tonic-gate static int bio_flinv_cv_wanted; /* someone waiting for cv */
870Sstevel@tonic-gate
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate * Statistics on the buffer cache
900Sstevel@tonic-gate */
910Sstevel@tonic-gate struct biostats biostats = {
920Sstevel@tonic-gate { "buffer_cache_lookups", KSTAT_DATA_UINT32 },
930Sstevel@tonic-gate { "buffer_cache_hits", KSTAT_DATA_UINT32 },
940Sstevel@tonic-gate { "new_buffer_requests", KSTAT_DATA_UINT32 },
950Sstevel@tonic-gate { "waits_for_buffer_allocs", KSTAT_DATA_UINT32 },
960Sstevel@tonic-gate { "buffers_locked_by_someone", KSTAT_DATA_UINT32 },
970Sstevel@tonic-gate { "duplicate_buffers_found", KSTAT_DATA_UINT32 }
980Sstevel@tonic-gate };
990Sstevel@tonic-gate
1000Sstevel@tonic-gate /*
1010Sstevel@tonic-gate * kstat data
1020Sstevel@tonic-gate */
1030Sstevel@tonic-gate kstat_named_t *biostats_ptr = (kstat_named_t *)&biostats;
1040Sstevel@tonic-gate uint_t biostats_ndata = (uint_t)(sizeof (biostats) /
1050Sstevel@tonic-gate sizeof (kstat_named_t));
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate * Statistics on ufs buffer cache
1090Sstevel@tonic-gate * Not protected by locks
1100Sstevel@tonic-gate */
1110Sstevel@tonic-gate struct ufsbiostats ub = {
1120Sstevel@tonic-gate { "breads", KSTAT_DATA_UINT32 },
1130Sstevel@tonic-gate { "bwrites", KSTAT_DATA_UINT32 },
1140Sstevel@tonic-gate { "fbiwrites", KSTAT_DATA_UINT32 },
1150Sstevel@tonic-gate { "getpages", KSTAT_DATA_UINT32 },
1160Sstevel@tonic-gate { "getras", KSTAT_DATA_UINT32 },
1170Sstevel@tonic-gate { "putsyncs", KSTAT_DATA_UINT32 },
1180Sstevel@tonic-gate { "putasyncs", KSTAT_DATA_UINT32 },
1190Sstevel@tonic-gate { "putpageios", KSTAT_DATA_UINT32 },
1200Sstevel@tonic-gate };
1210Sstevel@tonic-gate
1220Sstevel@tonic-gate /*
1230Sstevel@tonic-gate * more UFS Logging eccentricities...
1240Sstevel@tonic-gate *
1250Sstevel@tonic-gate * required since "#pragma weak ..." doesn't work in reverse order.
1260Sstevel@tonic-gate * i.e.: genunix (bio.c) is loaded before the ufs modules and pointers
1270Sstevel@tonic-gate * to ufs routines don't get plugged into bio.c calls so
1280Sstevel@tonic-gate * we initialize it when setting up the "lufsops" table
1290Sstevel@tonic-gate * in "lufs.c:_init()"
1300Sstevel@tonic-gate */
1310Sstevel@tonic-gate void (*bio_lufs_strategy)(void *, buf_t *);
1320Sstevel@tonic-gate void (*bio_snapshot_strategy)(void *, buf_t *);
1330Sstevel@tonic-gate
1340Sstevel@tonic-gate
1350Sstevel@tonic-gate /* Private routines */
1360Sstevel@tonic-gate static struct buf *bio_getfreeblk(long);
1370Sstevel@tonic-gate static void bio_mem_get(long);
1380Sstevel@tonic-gate static void bio_bhdr_free(struct buf *);
1390Sstevel@tonic-gate static struct buf *bio_bhdr_alloc(void);
1400Sstevel@tonic-gate static void bio_recycle(int, long);
1410Sstevel@tonic-gate static void bio_pageio_done(struct buf *);
1420Sstevel@tonic-gate static int bio_incore(dev_t, daddr_t);
1430Sstevel@tonic-gate
1440Sstevel@tonic-gate /*
1450Sstevel@tonic-gate * Buffer cache constants
1460Sstevel@tonic-gate */
1470Sstevel@tonic-gate #define BIO_BUF_PERCENT (100/2) /* default: 2% of memory */
1480Sstevel@tonic-gate #define BIO_MAX_PERCENT (100/20) /* max is 20% of real memory */
1490Sstevel@tonic-gate #define BIO_BHDR_POOL 100 /* Default bhdr pool size */
1500Sstevel@tonic-gate #define BIO_MIN_HDR 10 /* Minimum number of buffer headers */
1510Sstevel@tonic-gate #define BIO_MIN_HWM (BIO_MIN_HDR * MAXBSIZE / 1024)
1520Sstevel@tonic-gate #define BIO_HASHLEN 4 /* Target length of hash chains */
1530Sstevel@tonic-gate
1540Sstevel@tonic-gate
1550Sstevel@tonic-gate /* Flags for bio_recycle() */
1560Sstevel@tonic-gate #define BIO_HEADER 0x01
1570Sstevel@tonic-gate #define BIO_MEM 0x02
1580Sstevel@tonic-gate
1590Sstevel@tonic-gate extern int bufhwm; /* User tunable - high water mark for mem */
1600Sstevel@tonic-gate extern int bufhwm_pct; /* ditto - given in % of physmem */
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate * The following routines allocate and free
1640Sstevel@tonic-gate * buffers with various side effects. In general the
1650Sstevel@tonic-gate * arguments to an allocate routine are a device and
1660Sstevel@tonic-gate * a block number, and the value is a pointer to
1670Sstevel@tonic-gate * to the buffer header; the buffer returned is locked with a
1680Sstevel@tonic-gate * binary semaphore so that no one else can touch it. If the block was
1690Sstevel@tonic-gate * already in core, no I/O need be done; if it is
1700Sstevel@tonic-gate * already locked, the process waits until it becomes free.
1710Sstevel@tonic-gate * The following routines allocate a buffer:
1720Sstevel@tonic-gate * getblk
1730Sstevel@tonic-gate * bread/BREAD
1740Sstevel@tonic-gate * breada
1750Sstevel@tonic-gate * Eventually the buffer must be released, possibly with the
1760Sstevel@tonic-gate * side effect of writing it out, by using one of
1770Sstevel@tonic-gate * bwrite/BWRITE/brwrite
1780Sstevel@tonic-gate * bdwrite/bdrwrite
1790Sstevel@tonic-gate * bawrite
1800Sstevel@tonic-gate * brelse
1810Sstevel@tonic-gate *
1820Sstevel@tonic-gate * The B_WANTED/B_BUSY bits are NOT used by these routines for synchronization.
1830Sstevel@tonic-gate * Instead, a binary semaphore, b_sem is used to gain exclusive access to
1840Sstevel@tonic-gate * a buffer and a binary semaphore, b_io is used for I/O synchronization.
1850Sstevel@tonic-gate * B_DONE is still used to denote a buffer with I/O complete on it.
1860Sstevel@tonic-gate *
1870Sstevel@tonic-gate * The bfreelist.b_bcount field is computed everytime fsflush runs. It is
1880Sstevel@tonic-gate * should not be used where a very accurate count of the free buffers is
1890Sstevel@tonic-gate * needed.
1900Sstevel@tonic-gate */
1910Sstevel@tonic-gate
1920Sstevel@tonic-gate /*
1930Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer.
1940Sstevel@tonic-gate *
1950Sstevel@tonic-gate * This interface is provided for binary compatibility. Using
1960Sstevel@tonic-gate * BREAD() directly avoids the extra function call overhead invoked
1970Sstevel@tonic-gate * by calling this routine.
1980Sstevel@tonic-gate */
1990Sstevel@tonic-gate struct buf *
bread(dev_t dev,daddr_t blkno,long bsize)2000Sstevel@tonic-gate bread(dev_t dev, daddr_t blkno, long bsize)
2010Sstevel@tonic-gate {
2020Sstevel@tonic-gate return (BREAD(dev, blkno, bsize));
2030Sstevel@tonic-gate }
2040Sstevel@tonic-gate
2050Sstevel@tonic-gate /*
2060Sstevel@tonic-gate * Common code for reading a buffer with various options
2070Sstevel@tonic-gate *
2080Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer.
2090Sstevel@tonic-gate */
2100Sstevel@tonic-gate struct buf *
bread_common(void * arg,dev_t dev,daddr_t blkno,long bsize)2110Sstevel@tonic-gate bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize)
2120Sstevel@tonic-gate {
2130Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
2140Sstevel@tonic-gate struct buf *bp;
2150Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
2160Sstevel@tonic-gate
2170Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1);
2180Sstevel@tonic-gate bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1);
2190Sstevel@tonic-gate if (bp->b_flags & B_DONE)
2200Sstevel@tonic-gate return (bp);
2210Sstevel@tonic-gate bp->b_flags |= B_READ;
2220Sstevel@tonic-gate ASSERT(bp->b_bcount == bsize);
2230Sstevel@tonic-gate if (ufsvfsp == NULL) { /* !ufs */
2240Sstevel@tonic-gate (void) bdev_strategy(bp);
2250Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
2260Sstevel@tonic-gate /* ufs && logging */
2270Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
2280Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
2290Sstevel@tonic-gate /* ufs && snapshots */
2300Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
2310Sstevel@tonic-gate } else {
232*11066Srafael.vanoni@sun.com ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
2330Sstevel@tonic-gate ub.ub_breads.value.ul++; /* ufs && !logging */
2340Sstevel@tonic-gate (void) bdev_strategy(bp);
2350Sstevel@tonic-gate }
2360Sstevel@tonic-gate if (lwp != NULL)
2370Sstevel@tonic-gate lwp->lwp_ru.inblock++;
2380Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1);
2390Sstevel@tonic-gate (void) biowait(bp);
2400Sstevel@tonic-gate return (bp);
2410Sstevel@tonic-gate }
2420Sstevel@tonic-gate
2430Sstevel@tonic-gate /*
2440Sstevel@tonic-gate * Read in the block, like bread, but also start I/O on the
2450Sstevel@tonic-gate * read-ahead block (which is not allocated to the caller).
2460Sstevel@tonic-gate */
2470Sstevel@tonic-gate struct buf *
breada(dev_t dev,daddr_t blkno,daddr_t rablkno,long bsize)2480Sstevel@tonic-gate breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize)
2490Sstevel@tonic-gate {
2500Sstevel@tonic-gate struct buf *bp, *rabp;
2510Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
2520Sstevel@tonic-gate
2530Sstevel@tonic-gate bp = NULL;
2540Sstevel@tonic-gate if (!bio_incore(dev, blkno)) {
2550Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1);
2560Sstevel@tonic-gate bp = GETBLK(dev, blkno, bsize);
2570Sstevel@tonic-gate if ((bp->b_flags & B_DONE) == 0) {
2580Sstevel@tonic-gate bp->b_flags |= B_READ;
2590Sstevel@tonic-gate bp->b_bcount = bsize;
2600Sstevel@tonic-gate (void) bdev_strategy(bp);
2610Sstevel@tonic-gate if (lwp != NULL)
2620Sstevel@tonic-gate lwp->lwp_ru.inblock++;
2630Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1);
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate }
2660Sstevel@tonic-gate if (rablkno && bfreelist.b_bcount > 1 &&
2670Sstevel@tonic-gate !bio_incore(dev, rablkno)) {
2680Sstevel@tonic-gate rabp = GETBLK(dev, rablkno, bsize);
2690Sstevel@tonic-gate if (rabp->b_flags & B_DONE)
2700Sstevel@tonic-gate brelse(rabp);
2710Sstevel@tonic-gate else {
2720Sstevel@tonic-gate rabp->b_flags |= B_READ|B_ASYNC;
2730Sstevel@tonic-gate rabp->b_bcount = bsize;
2740Sstevel@tonic-gate (void) bdev_strategy(rabp);
2750Sstevel@tonic-gate if (lwp != NULL)
2760Sstevel@tonic-gate lwp->lwp_ru.inblock++;
2770Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1);
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate }
2800Sstevel@tonic-gate if (bp == NULL)
2810Sstevel@tonic-gate return (BREAD(dev, blkno, bsize));
2820Sstevel@tonic-gate (void) biowait(bp);
2830Sstevel@tonic-gate return (bp);
2840Sstevel@tonic-gate }
2850Sstevel@tonic-gate
2860Sstevel@tonic-gate /*
2870Sstevel@tonic-gate * Common code for writing a buffer with various options.
2880Sstevel@tonic-gate *
2890Sstevel@tonic-gate * force_wait - wait for write completion regardless of B_ASYNC flag
2900Sstevel@tonic-gate * do_relse - release the buffer when we are done
2910Sstevel@tonic-gate * clear_flags - flags to clear from the buffer
2920Sstevel@tonic-gate */
2930Sstevel@tonic-gate void
bwrite_common(void * arg,struct buf * bp,int force_wait,int do_relse,int clear_flags)2940Sstevel@tonic-gate bwrite_common(void *arg, struct buf *bp, int force_wait,
2950Sstevel@tonic-gate int do_relse, int clear_flags)
2960Sstevel@tonic-gate {
2970Sstevel@tonic-gate register int do_wait;
2980Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
2990Sstevel@tonic-gate int flag;
3000Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
3010Sstevel@tonic-gate struct cpu *cpup;
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
3040Sstevel@tonic-gate flag = bp->b_flags;
3050Sstevel@tonic-gate bp->b_flags &= ~clear_flags;
3060Sstevel@tonic-gate if (lwp != NULL)
3070Sstevel@tonic-gate lwp->lwp_ru.oublock++;
3080Sstevel@tonic-gate CPU_STATS_ENTER_K();
3090Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */
3100Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, lwrite, 1);
3110Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bwrite, 1);
3120Sstevel@tonic-gate do_wait = ((flag & B_ASYNC) == 0 || force_wait);
3130Sstevel@tonic-gate if (do_wait == 0)
3140Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bawrite, 1);
3150Sstevel@tonic-gate CPU_STATS_EXIT_K();
3160Sstevel@tonic-gate if (ufsvfsp == NULL) {
3170Sstevel@tonic-gate (void) bdev_strategy(bp);
3180Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
3190Sstevel@tonic-gate /* ufs && logging */
3200Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
3210Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
3220Sstevel@tonic-gate /* ufs && snapshots */
3230Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
3240Sstevel@tonic-gate } else {
3250Sstevel@tonic-gate ub.ub_bwrites.value.ul++; /* ufs && !logging */
3260Sstevel@tonic-gate (void) bdev_strategy(bp);
3270Sstevel@tonic-gate }
3280Sstevel@tonic-gate if (do_wait) {
3290Sstevel@tonic-gate (void) biowait(bp);
3300Sstevel@tonic-gate if (do_relse) {
3310Sstevel@tonic-gate brelse(bp);
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate }
3340Sstevel@tonic-gate }
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate * Write the buffer, waiting for completion (unless B_ASYNC is set).
3380Sstevel@tonic-gate * Then release the buffer.
3390Sstevel@tonic-gate * This interface is provided for binary compatibility. Using
3400Sstevel@tonic-gate * BWRITE() directly avoids the extra function call overhead invoked
3410Sstevel@tonic-gate * by calling this routine.
3420Sstevel@tonic-gate */
3430Sstevel@tonic-gate void
bwrite(struct buf * bp)3440Sstevel@tonic-gate bwrite(struct buf *bp)
3450Sstevel@tonic-gate {
3460Sstevel@tonic-gate BWRITE(bp);
3470Sstevel@tonic-gate }
3480Sstevel@tonic-gate
3490Sstevel@tonic-gate /*
3500Sstevel@tonic-gate * Write the buffer, waiting for completion.
3510Sstevel@tonic-gate * But don't release the buffer afterwards.
3520Sstevel@tonic-gate * This interface is provided for binary compatibility. Using
3530Sstevel@tonic-gate * BWRITE2() directly avoids the extra function call overhead.
3540Sstevel@tonic-gate */
3550Sstevel@tonic-gate void
bwrite2(struct buf * bp)3560Sstevel@tonic-gate bwrite2(struct buf *bp)
3570Sstevel@tonic-gate {
3580Sstevel@tonic-gate BWRITE2(bp);
3590Sstevel@tonic-gate }
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate /*
3620Sstevel@tonic-gate * Release the buffer, marking it so that if it is grabbed
3630Sstevel@tonic-gate * for another purpose it will be written out before being
3640Sstevel@tonic-gate * given up (e.g. when writing a partial block where it is
3650Sstevel@tonic-gate * assumed that another write for the same block will soon follow).
3660Sstevel@tonic-gate * Also save the time that the block is first marked as delayed
3670Sstevel@tonic-gate * so that it will be written in a reasonable time.
3680Sstevel@tonic-gate */
3690Sstevel@tonic-gate void
bdwrite(struct buf * bp)3700Sstevel@tonic-gate bdwrite(struct buf *bp)
3710Sstevel@tonic-gate {
3720Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
3730Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lwrite, 1);
3740Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0)
375*11066Srafael.vanoni@sun.com bp->b_start = ddi_get_lbolt();
3760Sstevel@tonic-gate /*
3770Sstevel@tonic-gate * B_DONE allows others to use the buffer, B_DELWRI causes the
3780Sstevel@tonic-gate * buffer to be written before being reused, and setting b_resid
3790Sstevel@tonic-gate * to zero says the buffer is complete.
3800Sstevel@tonic-gate */
3810Sstevel@tonic-gate bp->b_flags |= B_DELWRI | B_DONE;
3820Sstevel@tonic-gate bp->b_resid = 0;
3830Sstevel@tonic-gate brelse(bp);
3840Sstevel@tonic-gate }
3850Sstevel@tonic-gate
3860Sstevel@tonic-gate /*
3870Sstevel@tonic-gate * Release the buffer, start I/O on it, but don't wait for completion.
3880Sstevel@tonic-gate */
3890Sstevel@tonic-gate void
bawrite(struct buf * bp)3900Sstevel@tonic-gate bawrite(struct buf *bp)
3910Sstevel@tonic-gate {
3920Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
3930Sstevel@tonic-gate
3940Sstevel@tonic-gate /* Use bfreelist.b_bcount as a weird-ass heuristic */
3950Sstevel@tonic-gate if (bfreelist.b_bcount > 4)
3960Sstevel@tonic-gate bp->b_flags |= B_ASYNC;
3970Sstevel@tonic-gate BWRITE(bp);
3980Sstevel@tonic-gate }
3990Sstevel@tonic-gate
4000Sstevel@tonic-gate /*
4010Sstevel@tonic-gate * Release the buffer, with no I/O implied.
4020Sstevel@tonic-gate */
4030Sstevel@tonic-gate void
brelse(struct buf * bp)4040Sstevel@tonic-gate brelse(struct buf *bp)
4050Sstevel@tonic-gate {
4060Sstevel@tonic-gate struct buf **backp;
4070Sstevel@tonic-gate uint_t index;
4080Sstevel@tonic-gate kmutex_t *hmp;
4090Sstevel@tonic-gate struct buf *dp;
4100Sstevel@tonic-gate struct hbuf *hp;
4110Sstevel@tonic-gate
4120Sstevel@tonic-gate
4130Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
4140Sstevel@tonic-gate
4150Sstevel@tonic-gate /*
4160Sstevel@tonic-gate * Clear the retry write flag if the buffer was written without
4170Sstevel@tonic-gate * error. The presence of B_DELWRI means the buffer has not yet
4180Sstevel@tonic-gate * been written and the presence of B_ERROR means that an error
4190Sstevel@tonic-gate * is still occurring.
4200Sstevel@tonic-gate */
4210Sstevel@tonic-gate if ((bp->b_flags & (B_ERROR | B_DELWRI | B_RETRYWRI)) == B_RETRYWRI) {
4220Sstevel@tonic-gate bp->b_flags &= ~B_RETRYWRI;
4230Sstevel@tonic-gate }
4240Sstevel@tonic-gate
4250Sstevel@tonic-gate /* Check for anomalous conditions */
4260Sstevel@tonic-gate if (bp->b_flags & (B_ERROR|B_NOCACHE)) {
4270Sstevel@tonic-gate if (bp->b_flags & B_NOCACHE) {
4280Sstevel@tonic-gate /* Don't add to the freelist. Destroy it now */
4290Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize);
4300Sstevel@tonic-gate sema_destroy(&bp->b_sem);
4310Sstevel@tonic-gate sema_destroy(&bp->b_io);
4320Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf));
4330Sstevel@tonic-gate return;
4340Sstevel@tonic-gate }
4350Sstevel@tonic-gate /*
4360Sstevel@tonic-gate * If a write failed and we are supposed to retry write,
4370Sstevel@tonic-gate * don't toss the buffer. Keep it around and mark it
4380Sstevel@tonic-gate * delayed write in the hopes that it will eventually
4390Sstevel@tonic-gate * get flushed (and still keep the system running.)
4400Sstevel@tonic-gate */
4410Sstevel@tonic-gate if ((bp->b_flags & (B_READ | B_RETRYWRI)) == B_RETRYWRI) {
4420Sstevel@tonic-gate bp->b_flags |= B_DELWRI;
4430Sstevel@tonic-gate /* keep fsflush from trying continuously to flush */
444*11066Srafael.vanoni@sun.com bp->b_start = ddi_get_lbolt();
4450Sstevel@tonic-gate } else
4460Sstevel@tonic-gate bp->b_flags |= B_AGE|B_STALE;
4470Sstevel@tonic-gate bp->b_flags &= ~B_ERROR;
4480Sstevel@tonic-gate bp->b_error = 0;
4490Sstevel@tonic-gate }
4500Sstevel@tonic-gate
4510Sstevel@tonic-gate /*
4520Sstevel@tonic-gate * If delayed write is set then put in on the delayed
4530Sstevel@tonic-gate * write list instead of the free buffer list.
4540Sstevel@tonic-gate */
4550Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno);
4560Sstevel@tonic-gate hmp = &hbuf[index].b_lock;
4570Sstevel@tonic-gate
4580Sstevel@tonic-gate mutex_enter(hmp);
4590Sstevel@tonic-gate hp = &hbuf[index];
4600Sstevel@tonic-gate dp = (struct buf *)hp;
4610Sstevel@tonic-gate
4620Sstevel@tonic-gate /*
4630Sstevel@tonic-gate * Make sure that the number of entries on this list are
4640Sstevel@tonic-gate * Zero <= count <= total # buffers
4650Sstevel@tonic-gate */
4660Sstevel@tonic-gate ASSERT(hp->b_length >= 0);
4670Sstevel@tonic-gate ASSERT(hp->b_length < nbuf);
4680Sstevel@tonic-gate
4690Sstevel@tonic-gate hp->b_length++; /* We are adding this buffer */
4700Sstevel@tonic-gate
4710Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) {
4720Sstevel@tonic-gate /*
4730Sstevel@tonic-gate * This buffer goes on the delayed write buffer list
4740Sstevel@tonic-gate */
4750Sstevel@tonic-gate dp = (struct buf *)&dwbuf[index];
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate ASSERT(bp->b_bufsize > 0);
4780Sstevel@tonic-gate ASSERT(bp->b_bcount > 0);
4790Sstevel@tonic-gate ASSERT(bp->b_un.b_addr != NULL);
4800Sstevel@tonic-gate
4810Sstevel@tonic-gate if (bp->b_flags & B_AGE) {
4820Sstevel@tonic-gate backp = &dp->av_forw;
4830Sstevel@tonic-gate (*backp)->av_back = bp;
4840Sstevel@tonic-gate bp->av_forw = *backp;
4850Sstevel@tonic-gate *backp = bp;
4860Sstevel@tonic-gate bp->av_back = dp;
4870Sstevel@tonic-gate } else {
4880Sstevel@tonic-gate backp = &dp->av_back;
4890Sstevel@tonic-gate (*backp)->av_forw = bp;
4900Sstevel@tonic-gate bp->av_back = *backp;
4910Sstevel@tonic-gate *backp = bp;
4920Sstevel@tonic-gate bp->av_forw = dp;
4930Sstevel@tonic-gate }
4940Sstevel@tonic-gate mutex_exit(hmp);
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) {
4970Sstevel@tonic-gate /*
4980Sstevel@tonic-gate * Should come here very very rarely.
4990Sstevel@tonic-gate */
5000Sstevel@tonic-gate mutex_enter(&bfree_lock);
5010Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) {
5020Sstevel@tonic-gate bfreelist.b_flags &= ~B_WANTED;
5030Sstevel@tonic-gate cv_broadcast(&bio_mem_cv);
5040Sstevel@tonic-gate }
5050Sstevel@tonic-gate mutex_exit(&bfree_lock);
5060Sstevel@tonic-gate }
5070Sstevel@tonic-gate
5080Sstevel@tonic-gate bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC);
5090Sstevel@tonic-gate /*
5100Sstevel@tonic-gate * Don't let anyone get the buffer off the freelist before we
5110Sstevel@tonic-gate * release our hold on it.
5120Sstevel@tonic-gate */
5130Sstevel@tonic-gate sema_v(&bp->b_sem);
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate
5160Sstevel@tonic-gate /*
5170Sstevel@tonic-gate * Return a count of the number of B_BUSY buffers in the system
5180Sstevel@tonic-gate * Can only be used as a good estimate. If 'cleanit' is set,
5190Sstevel@tonic-gate * try to flush all bufs.
5200Sstevel@tonic-gate */
5210Sstevel@tonic-gate int
bio_busy(int cleanit)5220Sstevel@tonic-gate bio_busy(int cleanit)
5230Sstevel@tonic-gate {
5240Sstevel@tonic-gate struct buf *bp, *dp;
5250Sstevel@tonic-gate int busy = 0;
5260Sstevel@tonic-gate int i;
5270Sstevel@tonic-gate kmutex_t *hmp;
5280Sstevel@tonic-gate
5290Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
5300Sstevel@tonic-gate vfs_syncprogress();
5310Sstevel@tonic-gate dp = (struct buf *)&hbuf[i];
5320Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
5330Sstevel@tonic-gate
5340Sstevel@tonic-gate mutex_enter(hmp);
5350Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
5360Sstevel@tonic-gate if (bp->b_flags & B_BUSY)
5370Sstevel@tonic-gate busy++;
5380Sstevel@tonic-gate }
5390Sstevel@tonic-gate mutex_exit(hmp);
5400Sstevel@tonic-gate }
5410Sstevel@tonic-gate
5420Sstevel@tonic-gate if (cleanit && busy != 0) {
5430Sstevel@tonic-gate bflush(NODEV);
5440Sstevel@tonic-gate }
5450Sstevel@tonic-gate
5460Sstevel@tonic-gate return (busy);
5470Sstevel@tonic-gate }
5480Sstevel@tonic-gate
5490Sstevel@tonic-gate /*
5500Sstevel@tonic-gate * this interface is provided for binary compatibility.
5510Sstevel@tonic-gate *
5520Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate
5530Sstevel@tonic-gate * block is already associated, return it; otherwise search
5540Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it.
5550Sstevel@tonic-gate */
5560Sstevel@tonic-gate struct buf *
getblk(dev_t dev,daddr_t blkno,long bsize)5570Sstevel@tonic-gate getblk(dev_t dev, daddr_t blkno, long bsize)
5580Sstevel@tonic-gate {
5590Sstevel@tonic-gate return (getblk_common(/* ufsvfsp */ NULL, dev,
560*11066Srafael.vanoni@sun.com blkno, bsize, /* errflg */ 0));
5610Sstevel@tonic-gate }
5620Sstevel@tonic-gate
5630Sstevel@tonic-gate /*
5640Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate
5650Sstevel@tonic-gate * block is already associated, return it; otherwise search
5660Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it.
5670Sstevel@tonic-gate */
5680Sstevel@tonic-gate struct buf *
getblk_common(void * arg,dev_t dev,daddr_t blkno,long bsize,int errflg)5690Sstevel@tonic-gate getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg)
5700Sstevel@tonic-gate {
5710Sstevel@tonic-gate ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg;
5720Sstevel@tonic-gate struct buf *bp;
5730Sstevel@tonic-gate struct buf *dp;
5740Sstevel@tonic-gate struct buf *nbp = NULL;
5750Sstevel@tonic-gate struct buf *errbp;
5760Sstevel@tonic-gate uint_t index;
5770Sstevel@tonic-gate kmutex_t *hmp;
5780Sstevel@tonic-gate struct hbuf *hp;
5790Sstevel@tonic-gate
5800Sstevel@tonic-gate if (getmajor(dev) >= devcnt)
5810Sstevel@tonic-gate cmn_err(CE_PANIC, "blkdev");
5820Sstevel@tonic-gate
5830Sstevel@tonic-gate biostats.bio_lookup.value.ui32++;
5840Sstevel@tonic-gate
5850Sstevel@tonic-gate index = bio_bhash(dev, blkno);
5860Sstevel@tonic-gate hp = &hbuf[index];
5870Sstevel@tonic-gate dp = (struct buf *)hp;
5880Sstevel@tonic-gate hmp = &hp->b_lock;
5890Sstevel@tonic-gate
5900Sstevel@tonic-gate mutex_enter(hmp);
5910Sstevel@tonic-gate loop:
5920Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
5930Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
5940Sstevel@tonic-gate (bp->b_flags & B_STALE))
5950Sstevel@tonic-gate continue;
5960Sstevel@tonic-gate /*
5970Sstevel@tonic-gate * Avoid holding the hash lock in the event that
5980Sstevel@tonic-gate * the buffer is locked by someone. Since the hash chain
5990Sstevel@tonic-gate * may change when we drop the hash lock
6000Sstevel@tonic-gate * we have to start at the beginning of the chain if the
6010Sstevel@tonic-gate * buffer identity/contents aren't valid.
6020Sstevel@tonic-gate */
6030Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) {
6040Sstevel@tonic-gate biostats.bio_bufbusy.value.ui32++;
6050Sstevel@tonic-gate mutex_exit(hmp);
6060Sstevel@tonic-gate /*
6070Sstevel@tonic-gate * OK, we are dealing with a busy buffer.
6080Sstevel@tonic-gate * In the case that we are panicking and we
6090Sstevel@tonic-gate * got called from bread(), we have some chance
6100Sstevel@tonic-gate * for error recovery. So better bail out from
6110Sstevel@tonic-gate * here since sema_p() won't block. If we got
6120Sstevel@tonic-gate * called directly from ufs routines, there is
6130Sstevel@tonic-gate * no way to report an error yet.
6140Sstevel@tonic-gate */
6150Sstevel@tonic-gate if (panicstr && errflg)
6160Sstevel@tonic-gate goto errout;
6170Sstevel@tonic-gate /*
6180Sstevel@tonic-gate * For the following line of code to work
6190Sstevel@tonic-gate * correctly never kmem_free the buffer "header".
6200Sstevel@tonic-gate */
6210Sstevel@tonic-gate sema_p(&bp->b_sem);
6220Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
6230Sstevel@tonic-gate (bp->b_flags & B_STALE)) {
6240Sstevel@tonic-gate sema_v(&bp->b_sem);
6250Sstevel@tonic-gate mutex_enter(hmp);
6260Sstevel@tonic-gate goto loop; /* start over */
6270Sstevel@tonic-gate }
6280Sstevel@tonic-gate mutex_enter(hmp);
6290Sstevel@tonic-gate }
6300Sstevel@tonic-gate /* Found */
6310Sstevel@tonic-gate biostats.bio_hit.value.ui32++;
6320Sstevel@tonic-gate bp->b_flags &= ~B_AGE;
6330Sstevel@tonic-gate
6340Sstevel@tonic-gate /*
6350Sstevel@tonic-gate * Yank it off the free/delayed write lists
6360Sstevel@tonic-gate */
6370Sstevel@tonic-gate hp->b_length--;
6380Sstevel@tonic-gate notavail(bp);
6390Sstevel@tonic-gate mutex_exit(hmp);
6400Sstevel@tonic-gate
6410Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) == NULL);
6420Sstevel@tonic-gate
6430Sstevel@tonic-gate if (nbp == NULL) {
6440Sstevel@tonic-gate /*
6450Sstevel@tonic-gate * Make the common path short.
6460Sstevel@tonic-gate */
6470Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
6480Sstevel@tonic-gate return (bp);
6490Sstevel@tonic-gate }
6500Sstevel@tonic-gate
6510Sstevel@tonic-gate biostats.bio_bufdup.value.ui32++;
6520Sstevel@tonic-gate
6530Sstevel@tonic-gate /*
6540Sstevel@tonic-gate * The buffer must have entered during the lock upgrade
6550Sstevel@tonic-gate * so free the new buffer we allocated and return the
6560Sstevel@tonic-gate * found buffer.
6570Sstevel@tonic-gate */
6580Sstevel@tonic-gate kmem_free(nbp->b_un.b_addr, nbp->b_bufsize);
6590Sstevel@tonic-gate nbp->b_un.b_addr = NULL;
6600Sstevel@tonic-gate
6610Sstevel@tonic-gate /*
6620Sstevel@tonic-gate * Account for the memory
6630Sstevel@tonic-gate */
6640Sstevel@tonic-gate mutex_enter(&bfree_lock);
6650Sstevel@tonic-gate bfreelist.b_bufsize += nbp->b_bufsize;
6660Sstevel@tonic-gate mutex_exit(&bfree_lock);
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate /*
6690Sstevel@tonic-gate * Destroy buf identity, and place on avail list
6700Sstevel@tonic-gate */
6710Sstevel@tonic-gate nbp->b_dev = (o_dev_t)NODEV;
6720Sstevel@tonic-gate nbp->b_edev = NODEV;
6730Sstevel@tonic-gate nbp->b_flags = 0;
6740Sstevel@tonic-gate nbp->b_file = NULL;
6750Sstevel@tonic-gate nbp->b_offset = -1;
6760Sstevel@tonic-gate
6770Sstevel@tonic-gate sema_v(&nbp->b_sem);
6780Sstevel@tonic-gate bio_bhdr_free(nbp);
6790Sstevel@tonic-gate
6800Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
6810Sstevel@tonic-gate return (bp);
6820Sstevel@tonic-gate }
6830Sstevel@tonic-gate
6840Sstevel@tonic-gate /*
6850Sstevel@tonic-gate * bio_getfreeblk may block so check the hash chain again.
6860Sstevel@tonic-gate */
6870Sstevel@tonic-gate if (nbp == NULL) {
6880Sstevel@tonic-gate mutex_exit(hmp);
6890Sstevel@tonic-gate nbp = bio_getfreeblk(bsize);
6900Sstevel@tonic-gate mutex_enter(hmp);
6910Sstevel@tonic-gate goto loop;
6920Sstevel@tonic-gate }
6930Sstevel@tonic-gate
6940Sstevel@tonic-gate /*
6950Sstevel@tonic-gate * New buffer. Assign nbp and stick it on the hash.
6960Sstevel@tonic-gate */
6970Sstevel@tonic-gate nbp->b_flags = B_BUSY;
6980Sstevel@tonic-gate nbp->b_edev = dev;
6990Sstevel@tonic-gate nbp->b_dev = (o_dev_t)cmpdev(dev);
7000Sstevel@tonic-gate nbp->b_blkno = blkno;
7010Sstevel@tonic-gate nbp->b_iodone = NULL;
7020Sstevel@tonic-gate nbp->b_bcount = bsize;
7030Sstevel@tonic-gate /*
7040Sstevel@tonic-gate * If we are given a ufsvfsp and the vfs_root field is NULL
7050Sstevel@tonic-gate * then this must be I/O for a superblock. A superblock's
7060Sstevel@tonic-gate * buffer is set up in mountfs() and there is no root vnode
7070Sstevel@tonic-gate * at that point.
7080Sstevel@tonic-gate */
7090Sstevel@tonic-gate if (ufsvfsp && ufsvfsp->vfs_root) {
7100Sstevel@tonic-gate nbp->b_vp = ufsvfsp->vfs_root;
7110Sstevel@tonic-gate } else {
7120Sstevel@tonic-gate nbp->b_vp = NULL;
7130Sstevel@tonic-gate }
7140Sstevel@tonic-gate
7150Sstevel@tonic-gate ASSERT((nbp->b_flags & B_NOCACHE) == NULL);
7160Sstevel@tonic-gate
7170Sstevel@tonic-gate binshash(nbp, dp);
7180Sstevel@tonic-gate mutex_exit(hmp);
7190Sstevel@tonic-gate
7200Sstevel@tonic-gate ASSERT(SEMA_HELD(&nbp->b_sem));
7210Sstevel@tonic-gate
7220Sstevel@tonic-gate return (nbp);
7230Sstevel@tonic-gate
7240Sstevel@tonic-gate
7250Sstevel@tonic-gate /*
7260Sstevel@tonic-gate * Come here in case of an internal error. At this point we couldn't
7270Sstevel@tonic-gate * get a buffer, but he have to return one. Hence we allocate some
7280Sstevel@tonic-gate * kind of error reply buffer on the fly. This buffer is marked as
7290Sstevel@tonic-gate * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following:
7300Sstevel@tonic-gate * - B_ERROR will indicate error to the caller.
7310Sstevel@tonic-gate * - B_DONE will prevent us from reading the buffer from
7320Sstevel@tonic-gate * the device.
7330Sstevel@tonic-gate * - B_NOCACHE will cause that this buffer gets free'd in
7340Sstevel@tonic-gate * brelse().
7350Sstevel@tonic-gate */
7360Sstevel@tonic-gate
7370Sstevel@tonic-gate errout:
7380Sstevel@tonic-gate errbp = geteblk();
7390Sstevel@tonic-gate sema_p(&errbp->b_sem);
7400Sstevel@tonic-gate errbp->b_flags &= ~B_BUSY;
7410Sstevel@tonic-gate errbp->b_flags |= (B_ERROR | B_DONE);
7420Sstevel@tonic-gate return (errbp);
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate
7450Sstevel@tonic-gate /*
7460Sstevel@tonic-gate * Get an empty block, not assigned to any particular device.
7470Sstevel@tonic-gate * Returns a locked buffer that is not on any hash or free list.
7480Sstevel@tonic-gate */
7490Sstevel@tonic-gate struct buf *
ngeteblk(long bsize)7500Sstevel@tonic-gate ngeteblk(long bsize)
7510Sstevel@tonic-gate {
7520Sstevel@tonic-gate struct buf *bp;
7530Sstevel@tonic-gate
7540Sstevel@tonic-gate bp = kmem_alloc(sizeof (struct buf), KM_SLEEP);
7550Sstevel@tonic-gate bioinit(bp);
7560Sstevel@tonic-gate bp->av_forw = bp->av_back = NULL;
7570Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP);
7580Sstevel@tonic-gate bp->b_bufsize = bsize;
7590Sstevel@tonic-gate bp->b_flags = B_BUSY | B_NOCACHE | B_AGE;
7600Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV;
7610Sstevel@tonic-gate bp->b_edev = NODEV;
7620Sstevel@tonic-gate bp->b_lblkno = 0;
7630Sstevel@tonic-gate bp->b_bcount = bsize;
7640Sstevel@tonic-gate bp->b_iodone = NULL;
7650Sstevel@tonic-gate return (bp);
7660Sstevel@tonic-gate }
7670Sstevel@tonic-gate
7680Sstevel@tonic-gate /*
7690Sstevel@tonic-gate * Interface of geteblk() is kept intact to maintain driver compatibility.
7700Sstevel@tonic-gate * Use ngeteblk() to allocate block size other than 1 KB.
7710Sstevel@tonic-gate */
7720Sstevel@tonic-gate struct buf *
geteblk(void)7730Sstevel@tonic-gate geteblk(void)
7740Sstevel@tonic-gate {
7750Sstevel@tonic-gate return (ngeteblk((long)1024));
7760Sstevel@tonic-gate }
7770Sstevel@tonic-gate
7780Sstevel@tonic-gate /*
7790Sstevel@tonic-gate * Return a buffer w/o sleeping
7800Sstevel@tonic-gate */
7810Sstevel@tonic-gate struct buf *
trygetblk(dev_t dev,daddr_t blkno)7820Sstevel@tonic-gate trygetblk(dev_t dev, daddr_t blkno)
7830Sstevel@tonic-gate {
7840Sstevel@tonic-gate struct buf *bp;
7850Sstevel@tonic-gate struct buf *dp;
7860Sstevel@tonic-gate struct hbuf *hp;
7870Sstevel@tonic-gate kmutex_t *hmp;
7880Sstevel@tonic-gate uint_t index;
7890Sstevel@tonic-gate
7900Sstevel@tonic-gate index = bio_bhash(dev, blkno);
7910Sstevel@tonic-gate hp = &hbuf[index];
7920Sstevel@tonic-gate hmp = &hp->b_lock;
7930Sstevel@tonic-gate
7940Sstevel@tonic-gate if (!mutex_tryenter(hmp))
7950Sstevel@tonic-gate return (NULL);
7960Sstevel@tonic-gate
7970Sstevel@tonic-gate dp = (struct buf *)hp;
7980Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
7990Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
8000Sstevel@tonic-gate (bp->b_flags & B_STALE))
8010Sstevel@tonic-gate continue;
8020Sstevel@tonic-gate /*
8030Sstevel@tonic-gate * Get access to a valid buffer without sleeping
8040Sstevel@tonic-gate */
8050Sstevel@tonic-gate if (sema_tryp(&bp->b_sem)) {
8060Sstevel@tonic-gate if (bp->b_flags & B_DONE) {
8070Sstevel@tonic-gate hp->b_length--;
8080Sstevel@tonic-gate notavail(bp);
8090Sstevel@tonic-gate mutex_exit(hmp);
8100Sstevel@tonic-gate return (bp);
8110Sstevel@tonic-gate } else {
8120Sstevel@tonic-gate sema_v(&bp->b_sem);
8130Sstevel@tonic-gate break;
8140Sstevel@tonic-gate }
8150Sstevel@tonic-gate }
8160Sstevel@tonic-gate break;
8170Sstevel@tonic-gate }
8180Sstevel@tonic-gate mutex_exit(hmp);
8190Sstevel@tonic-gate return (NULL);
8200Sstevel@tonic-gate }
8210Sstevel@tonic-gate
8220Sstevel@tonic-gate /*
8230Sstevel@tonic-gate * Wait for I/O completion on the buffer; return errors
8240Sstevel@tonic-gate * to the user.
8250Sstevel@tonic-gate */
8260Sstevel@tonic-gate int
iowait(struct buf * bp)8270Sstevel@tonic-gate iowait(struct buf *bp)
8280Sstevel@tonic-gate {
8290Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
8300Sstevel@tonic-gate return (biowait(bp));
8310Sstevel@tonic-gate }
8320Sstevel@tonic-gate
8330Sstevel@tonic-gate /*
8340Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous,
8350Sstevel@tonic-gate * and wake up anyone waiting for it.
8360Sstevel@tonic-gate */
8370Sstevel@tonic-gate void
iodone(struct buf * bp)8380Sstevel@tonic-gate iodone(struct buf *bp)
8390Sstevel@tonic-gate {
8400Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
8410Sstevel@tonic-gate (void) biodone(bp);
8420Sstevel@tonic-gate }
8430Sstevel@tonic-gate
8440Sstevel@tonic-gate /*
8450Sstevel@tonic-gate * Zero the core associated with a buffer.
8460Sstevel@tonic-gate */
8470Sstevel@tonic-gate void
clrbuf(struct buf * bp)8480Sstevel@tonic-gate clrbuf(struct buf *bp)
8490Sstevel@tonic-gate {
8500Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
8510Sstevel@tonic-gate bzero(bp->b_un.b_addr, bp->b_bcount);
8520Sstevel@tonic-gate bp->b_resid = 0;
8530Sstevel@tonic-gate }
8540Sstevel@tonic-gate
8550Sstevel@tonic-gate
8560Sstevel@tonic-gate /*
8570Sstevel@tonic-gate * Make sure all write-behind blocks on dev (or NODEV for all)
8580Sstevel@tonic-gate * are flushed out.
8590Sstevel@tonic-gate */
8600Sstevel@tonic-gate void
bflush(dev_t dev)8610Sstevel@tonic-gate bflush(dev_t dev)
8620Sstevel@tonic-gate {
8630Sstevel@tonic-gate struct buf *bp, *dp;
8640Sstevel@tonic-gate struct hbuf *hp;
8650Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST;
8660Sstevel@tonic-gate int i, index;
8670Sstevel@tonic-gate kmutex_t *hmp;
8680Sstevel@tonic-gate
8690Sstevel@tonic-gate mutex_enter(&blist_lock);
8700Sstevel@tonic-gate /*
8710Sstevel@tonic-gate * Wait for any invalidates or flushes ahead of us to finish.
8720Sstevel@tonic-gate * We really could split blist_lock up per device for better
8730Sstevel@tonic-gate * parallelism here.
8740Sstevel@tonic-gate */
8750Sstevel@tonic-gate while (bio_doinginval || bio_doingflush) {
8760Sstevel@tonic-gate bio_flinv_cv_wanted = 1;
8770Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock);
8780Sstevel@tonic-gate }
8790Sstevel@tonic-gate bio_doingflush++;
8800Sstevel@tonic-gate /*
8810Sstevel@tonic-gate * Gather all B_DELWRI buffer for device.
8820Sstevel@tonic-gate * Lock ordering is b_sem > hash lock (brelse).
8830Sstevel@tonic-gate * Since we are finding the buffer via the delayed write list,
8840Sstevel@tonic-gate * it may be busy and we would block trying to get the
8850Sstevel@tonic-gate * b_sem lock while holding hash lock. So transfer all the
8860Sstevel@tonic-gate * candidates on the delwri_list and then drop the hash locks.
8870Sstevel@tonic-gate */
8880Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
8890Sstevel@tonic-gate vfs_syncprogress();
8900Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
8910Sstevel@tonic-gate dp = (struct buf *)&dwbuf[i];
8920Sstevel@tonic-gate mutex_enter(hmp);
8930Sstevel@tonic-gate for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) {
8940Sstevel@tonic-gate if (dev == NODEV || bp->b_edev == dev) {
8950Sstevel@tonic-gate if (bp->b_list == NULL) {
8960Sstevel@tonic-gate bp->b_list = delwri_list;
8970Sstevel@tonic-gate delwri_list = bp;
8980Sstevel@tonic-gate }
8990Sstevel@tonic-gate }
9000Sstevel@tonic-gate }
9010Sstevel@tonic-gate mutex_exit(hmp);
9020Sstevel@tonic-gate }
9030Sstevel@tonic-gate mutex_exit(&blist_lock);
9040Sstevel@tonic-gate
9050Sstevel@tonic-gate /*
9060Sstevel@tonic-gate * Now that the hash locks have been dropped grab the semaphores
9070Sstevel@tonic-gate * and write back all the buffers that have B_DELWRI set.
9080Sstevel@tonic-gate */
9090Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) {
9100Sstevel@tonic-gate vfs_syncprogress();
9110Sstevel@tonic-gate bp = delwri_list;
9120Sstevel@tonic-gate
9130Sstevel@tonic-gate sema_p(&bp->b_sem); /* may block */
9140Sstevel@tonic-gate if ((dev != bp->b_edev && dev != NODEV) ||
9150Sstevel@tonic-gate (panicstr && bp->b_flags & B_BUSY)) {
9160Sstevel@tonic-gate sema_v(&bp->b_sem);
9170Sstevel@tonic-gate delwri_list = bp->b_list;
9180Sstevel@tonic-gate bp->b_list = NULL;
9190Sstevel@tonic-gate continue; /* No longer a candidate */
9200Sstevel@tonic-gate }
9210Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) {
9220Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno);
9230Sstevel@tonic-gate hp = &hbuf[index];
9240Sstevel@tonic-gate hmp = &hp->b_lock;
9250Sstevel@tonic-gate dp = (struct buf *)hp;
9260Sstevel@tonic-gate
9270Sstevel@tonic-gate bp->b_flags |= B_ASYNC;
9280Sstevel@tonic-gate mutex_enter(hmp);
9290Sstevel@tonic-gate hp->b_length--;
9300Sstevel@tonic-gate notavail(bp);
9310Sstevel@tonic-gate mutex_exit(hmp);
9320Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */
9330Sstevel@tonic-gate BWRITE(bp);
9340Sstevel@tonic-gate } else { /* ufs */
9350Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp);
9360Sstevel@tonic-gate }
9370Sstevel@tonic-gate } else {
9380Sstevel@tonic-gate sema_v(&bp->b_sem);
9390Sstevel@tonic-gate }
9400Sstevel@tonic-gate delwri_list = bp->b_list;
9410Sstevel@tonic-gate bp->b_list = NULL;
9420Sstevel@tonic-gate }
9430Sstevel@tonic-gate mutex_enter(&blist_lock);
9440Sstevel@tonic-gate bio_doingflush--;
9450Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
9460Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
9470Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
9480Sstevel@tonic-gate }
9490Sstevel@tonic-gate mutex_exit(&blist_lock);
9500Sstevel@tonic-gate }
9510Sstevel@tonic-gate
9520Sstevel@tonic-gate /*
9530Sstevel@tonic-gate * Ensure that a specified block is up-to-date on disk.
9540Sstevel@tonic-gate */
9550Sstevel@tonic-gate void
blkflush(dev_t dev,daddr_t blkno)9560Sstevel@tonic-gate blkflush(dev_t dev, daddr_t blkno)
9570Sstevel@tonic-gate {
9580Sstevel@tonic-gate struct buf *bp, *dp;
9590Sstevel@tonic-gate struct hbuf *hp;
9600Sstevel@tonic-gate struct buf *sbp = NULL;
9610Sstevel@tonic-gate uint_t index;
9620Sstevel@tonic-gate kmutex_t *hmp;
9630Sstevel@tonic-gate
9640Sstevel@tonic-gate index = bio_bhash(dev, blkno);
9650Sstevel@tonic-gate hp = &hbuf[index];
9660Sstevel@tonic-gate dp = (struct buf *)hp;
9670Sstevel@tonic-gate hmp = &hp->b_lock;
9680Sstevel@tonic-gate
9690Sstevel@tonic-gate /*
9700Sstevel@tonic-gate * Identify the buffer in the cache belonging to
9710Sstevel@tonic-gate * this device and blkno (if any).
9720Sstevel@tonic-gate */
9730Sstevel@tonic-gate mutex_enter(hmp);
9740Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
9750Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
9760Sstevel@tonic-gate (bp->b_flags & B_STALE))
9770Sstevel@tonic-gate continue;
9780Sstevel@tonic-gate sbp = bp;
9790Sstevel@tonic-gate break;
9800Sstevel@tonic-gate }
9810Sstevel@tonic-gate mutex_exit(hmp);
9820Sstevel@tonic-gate if (sbp == NULL)
9830Sstevel@tonic-gate return;
9840Sstevel@tonic-gate /*
9850Sstevel@tonic-gate * Now check the buffer we have identified and
9860Sstevel@tonic-gate * make sure it still belongs to the device and is B_DELWRI
9870Sstevel@tonic-gate */
9880Sstevel@tonic-gate sema_p(&sbp->b_sem);
9890Sstevel@tonic-gate if (sbp->b_blkno == blkno && sbp->b_edev == dev &&
9900Sstevel@tonic-gate (sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) {
9910Sstevel@tonic-gate mutex_enter(hmp);
9920Sstevel@tonic-gate hp->b_length--;
9930Sstevel@tonic-gate notavail(sbp);
9940Sstevel@tonic-gate mutex_exit(hmp);
9950Sstevel@tonic-gate /*
9960Sstevel@tonic-gate * XXX - There is nothing to guarantee a synchronous
9970Sstevel@tonic-gate * write here if the B_ASYNC flag is set. This needs
9980Sstevel@tonic-gate * some investigation.
9990Sstevel@tonic-gate */
10000Sstevel@tonic-gate if (sbp->b_vp == NULL) { /* !ufs */
10010Sstevel@tonic-gate BWRITE(sbp); /* synchronous write */
10020Sstevel@tonic-gate } else { /* ufs */
10030Sstevel@tonic-gate UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp);
10040Sstevel@tonic-gate }
10050Sstevel@tonic-gate } else {
10060Sstevel@tonic-gate sema_v(&sbp->b_sem);
10070Sstevel@tonic-gate }
10080Sstevel@tonic-gate }
10090Sstevel@tonic-gate
10100Sstevel@tonic-gate /*
10110Sstevel@tonic-gate * Same as binval, except can force-invalidate delayed-write buffers
10120Sstevel@tonic-gate * (which are not be already flushed because of device errors). Also
10130Sstevel@tonic-gate * makes sure that the retry write flag is cleared.
10140Sstevel@tonic-gate */
10150Sstevel@tonic-gate int
bfinval(dev_t dev,int force)10160Sstevel@tonic-gate bfinval(dev_t dev, int force)
10170Sstevel@tonic-gate {
10180Sstevel@tonic-gate struct buf *dp;
10190Sstevel@tonic-gate struct buf *bp;
10200Sstevel@tonic-gate struct buf *binval_list = EMPTY_LIST;
10210Sstevel@tonic-gate int i, error = 0;
10220Sstevel@tonic-gate kmutex_t *hmp;
10230Sstevel@tonic-gate uint_t index;
10240Sstevel@tonic-gate struct buf **backp;
10250Sstevel@tonic-gate
10260Sstevel@tonic-gate mutex_enter(&blist_lock);
10270Sstevel@tonic-gate /*
10280Sstevel@tonic-gate * Wait for any flushes ahead of us to finish, it's ok to
10290Sstevel@tonic-gate * do invalidates in parallel.
10300Sstevel@tonic-gate */
10310Sstevel@tonic-gate while (bio_doingflush) {
10320Sstevel@tonic-gate bio_flinv_cv_wanted = 1;
10330Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock);
10340Sstevel@tonic-gate }
10350Sstevel@tonic-gate bio_doinginval++;
10360Sstevel@tonic-gate
10370Sstevel@tonic-gate /* Gather bp's */
10380Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
10390Sstevel@tonic-gate dp = (struct buf *)&hbuf[i];
10400Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
10410Sstevel@tonic-gate
10420Sstevel@tonic-gate mutex_enter(hmp);
10430Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
10440Sstevel@tonic-gate if (bp->b_edev == dev) {
10450Sstevel@tonic-gate if (bp->b_list == NULL) {
10460Sstevel@tonic-gate bp->b_list = binval_list;
10470Sstevel@tonic-gate binval_list = bp;
10480Sstevel@tonic-gate }
10490Sstevel@tonic-gate }
10500Sstevel@tonic-gate }
10510Sstevel@tonic-gate mutex_exit(hmp);
10520Sstevel@tonic-gate }
10530Sstevel@tonic-gate mutex_exit(&blist_lock);
10540Sstevel@tonic-gate
10550Sstevel@tonic-gate /* Invalidate all bp's found */
10560Sstevel@tonic-gate while (binval_list != EMPTY_LIST) {
10570Sstevel@tonic-gate bp = binval_list;
10580Sstevel@tonic-gate
10590Sstevel@tonic-gate sema_p(&bp->b_sem);
10600Sstevel@tonic-gate if (bp->b_edev == dev) {
10610Sstevel@tonic-gate if (force && (bp->b_flags & B_DELWRI)) {
10620Sstevel@tonic-gate /* clear B_DELWRI, move to non-dw freelist */
10630Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno);
10640Sstevel@tonic-gate hmp = &hbuf[index].b_lock;
10650Sstevel@tonic-gate dp = (struct buf *)&hbuf[index];
10660Sstevel@tonic-gate mutex_enter(hmp);
10670Sstevel@tonic-gate
10680Sstevel@tonic-gate /* remove from delayed write freelist */
10690Sstevel@tonic-gate notavail(bp);
10700Sstevel@tonic-gate
10710Sstevel@tonic-gate /* add to B_AGE side of non-dw freelist */
10720Sstevel@tonic-gate backp = &dp->av_forw;
10730Sstevel@tonic-gate (*backp)->av_back = bp;
10740Sstevel@tonic-gate bp->av_forw = *backp;
10750Sstevel@tonic-gate *backp = bp;
10760Sstevel@tonic-gate bp->av_back = dp;
10770Sstevel@tonic-gate
10780Sstevel@tonic-gate /*
10790Sstevel@tonic-gate * make sure write retries and busy are cleared
10800Sstevel@tonic-gate */
10810Sstevel@tonic-gate bp->b_flags &=
10820Sstevel@tonic-gate ~(B_BUSY | B_DELWRI | B_RETRYWRI);
10830Sstevel@tonic-gate mutex_exit(hmp);
10840Sstevel@tonic-gate }
10850Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0)
10860Sstevel@tonic-gate bp->b_flags |= B_STALE|B_AGE;
10870Sstevel@tonic-gate else
10880Sstevel@tonic-gate error = EIO;
10890Sstevel@tonic-gate }
10900Sstevel@tonic-gate sema_v(&bp->b_sem);
10910Sstevel@tonic-gate binval_list = bp->b_list;
10920Sstevel@tonic-gate bp->b_list = NULL;
10930Sstevel@tonic-gate }
10940Sstevel@tonic-gate mutex_enter(&blist_lock);
10950Sstevel@tonic-gate bio_doinginval--;
10960Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
10970Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
10980Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
10990Sstevel@tonic-gate }
11000Sstevel@tonic-gate mutex_exit(&blist_lock);
11010Sstevel@tonic-gate return (error);
11020Sstevel@tonic-gate }
11030Sstevel@tonic-gate
11040Sstevel@tonic-gate /*
11050Sstevel@tonic-gate * If possible, invalidate blocks for a dev on demand
11060Sstevel@tonic-gate */
11070Sstevel@tonic-gate void
binval(dev_t dev)11080Sstevel@tonic-gate binval(dev_t dev)
11090Sstevel@tonic-gate {
11100Sstevel@tonic-gate (void) bfinval(dev, 0);
11110Sstevel@tonic-gate }
11120Sstevel@tonic-gate
11130Sstevel@tonic-gate /*
11140Sstevel@tonic-gate * Initialize the buffer I/O system by freeing
11150Sstevel@tonic-gate * all buffers and setting all device hash buffer lists to empty.
11160Sstevel@tonic-gate */
11170Sstevel@tonic-gate void
binit(void)11180Sstevel@tonic-gate binit(void)
11190Sstevel@tonic-gate {
11200Sstevel@tonic-gate struct buf *bp;
11210Sstevel@tonic-gate unsigned int i, pct;
11220Sstevel@tonic-gate ulong_t bio_max_hwm, bio_default_hwm;
11230Sstevel@tonic-gate
11240Sstevel@tonic-gate /*
11250Sstevel@tonic-gate * Maximum/Default values for bufhwm are set to the smallest of:
11260Sstevel@tonic-gate * - BIO_MAX_PERCENT resp. BIO_BUF_PERCENT of real memory
11270Sstevel@tonic-gate * - 1/4 of kernel virtual memory
11280Sstevel@tonic-gate * - INT32_MAX to prevent overflows of v.v_bufhwm (which is int).
11290Sstevel@tonic-gate * Additionally, in order to allow simple tuning by percentage of
11300Sstevel@tonic-gate * physical memory, bufhwm_pct is used to calculate the default if
11310Sstevel@tonic-gate * the value of this tunable is between 0 and BIO_MAX_PERCENT.
11320Sstevel@tonic-gate *
11330Sstevel@tonic-gate * Since the unit for v.v_bufhwm is kilobytes, this allows for
11340Sstevel@tonic-gate * a maximum of 1024 * 2GB == 2TB memory usage by buffer headers.
11350Sstevel@tonic-gate */
11360Sstevel@tonic-gate bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT,
11370Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024);
11380Sstevel@tonic-gate bio_max_hwm = MIN(INT32_MAX, bio_max_hwm);
11390Sstevel@tonic-gate
11400Sstevel@tonic-gate pct = BIO_BUF_PERCENT;
11410Sstevel@tonic-gate if (bufhwm_pct != 0 &&
11420Sstevel@tonic-gate ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) {
11430Sstevel@tonic-gate pct = BIO_BUF_PERCENT;
11440Sstevel@tonic-gate /*
11450Sstevel@tonic-gate * Invalid user specified value, emit a warning.
11460Sstevel@tonic-gate */
11470Sstevel@tonic-gate cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \
1148*11066Srafael.vanoni@sun.com range(1..%d). Using %d as default.",
1149*11066Srafael.vanoni@sun.com bufhwm_pct,
1150*11066Srafael.vanoni@sun.com 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT);
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate
11530Sstevel@tonic-gate bio_default_hwm = MIN(physmem / pct,
11540Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024);
11550Sstevel@tonic-gate bio_default_hwm = MIN(INT32_MAX, bio_default_hwm);
11560Sstevel@tonic-gate
11570Sstevel@tonic-gate if ((v.v_bufhwm = bufhwm) == 0)
11580Sstevel@tonic-gate v.v_bufhwm = bio_default_hwm;
11590Sstevel@tonic-gate
11600Sstevel@tonic-gate if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) {
11610Sstevel@tonic-gate v.v_bufhwm = (int)bio_max_hwm;
11620Sstevel@tonic-gate /*
11630Sstevel@tonic-gate * Invalid user specified value, emit a warning.
11640Sstevel@tonic-gate */
11650Sstevel@tonic-gate cmn_err(CE_WARN,
1166*11066Srafael.vanoni@sun.com "binit: bufhwm(%d) out \
1167*11066Srafael.vanoni@sun.com of range(%d..%lu). Using %lu as default",
1168*11066Srafael.vanoni@sun.com bufhwm,
1169*11066Srafael.vanoni@sun.com BIO_MIN_HWM, bio_max_hwm, bio_max_hwm);
11700Sstevel@tonic-gate }
11710Sstevel@tonic-gate
11720Sstevel@tonic-gate /*
11730Sstevel@tonic-gate * Determine the number of hash buckets. Default is to
11740Sstevel@tonic-gate * create ~BIO_HASHLEN entries per chain based on MAXBSIZE buffers.
11750Sstevel@tonic-gate * Round up number to the next power of 2.
11760Sstevel@tonic-gate */
11770Sstevel@tonic-gate v.v_hbuf = 1 << highbit((((ulong_t)v.v_bufhwm * 1024) / MAXBSIZE) /
11780Sstevel@tonic-gate BIO_HASHLEN);
11790Sstevel@tonic-gate v.v_hmask = v.v_hbuf - 1;
11800Sstevel@tonic-gate v.v_buf = BIO_BHDR_POOL;
11810Sstevel@tonic-gate
11820Sstevel@tonic-gate hbuf = kmem_zalloc(v.v_hbuf * sizeof (struct hbuf), KM_SLEEP);
11830Sstevel@tonic-gate
11840Sstevel@tonic-gate dwbuf = kmem_zalloc(v.v_hbuf * sizeof (struct dwbuf), KM_SLEEP);
11850Sstevel@tonic-gate
11860Sstevel@tonic-gate bfreelist.b_bufsize = (size_t)v.v_bufhwm * 1024;
11870Sstevel@tonic-gate bp = &bfreelist;
11880Sstevel@tonic-gate bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp;
11890Sstevel@tonic-gate
11900Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
11910Sstevel@tonic-gate hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i];
11920Sstevel@tonic-gate hbuf[i].av_forw = hbuf[i].av_back = (struct buf *)&hbuf[i];
11930Sstevel@tonic-gate
11940Sstevel@tonic-gate /*
11950Sstevel@tonic-gate * Initialize the delayed write buffer list.
11960Sstevel@tonic-gate */
11970Sstevel@tonic-gate dwbuf[i].b_forw = dwbuf[i].b_back = (struct buf *)&dwbuf[i];
11980Sstevel@tonic-gate dwbuf[i].av_forw = dwbuf[i].av_back = (struct buf *)&dwbuf[i];
11990Sstevel@tonic-gate }
12000Sstevel@tonic-gate }
12010Sstevel@tonic-gate
12020Sstevel@tonic-gate /*
12030Sstevel@tonic-gate * Wait for I/O completion on the buffer; return error code.
12040Sstevel@tonic-gate * If bp was for synchronous I/O, bp is invalid and associated
12050Sstevel@tonic-gate * resources are freed on return.
12060Sstevel@tonic-gate */
12070Sstevel@tonic-gate int
biowait(struct buf * bp)12080Sstevel@tonic-gate biowait(struct buf *bp)
12090Sstevel@tonic-gate {
12100Sstevel@tonic-gate int error = 0;
12110Sstevel@tonic-gate struct cpu *cpup;
12120Sstevel@tonic-gate
12130Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
12140Sstevel@tonic-gate
12150Sstevel@tonic-gate cpup = CPU;
12160Sstevel@tonic-gate atomic_add_64(&cpup->cpu_stats.sys.iowait, 1);
12170Sstevel@tonic-gate DTRACE_IO1(wait__start, struct buf *, bp);
12180Sstevel@tonic-gate
12190Sstevel@tonic-gate /*
12200Sstevel@tonic-gate * In case of panic, busy wait for completion
12210Sstevel@tonic-gate */
12220Sstevel@tonic-gate if (panicstr) {
12230Sstevel@tonic-gate while ((bp->b_flags & B_DONE) == 0)
12240Sstevel@tonic-gate drv_usecwait(10);
12250Sstevel@tonic-gate } else
12260Sstevel@tonic-gate sema_p(&bp->b_io);
12270Sstevel@tonic-gate
12280Sstevel@tonic-gate DTRACE_IO1(wait__done, struct buf *, bp);
12290Sstevel@tonic-gate atomic_add_64(&cpup->cpu_stats.sys.iowait, -1);
12300Sstevel@tonic-gate
12310Sstevel@tonic-gate error = geterror(bp);
12320Sstevel@tonic-gate if ((bp->b_flags & B_ASYNC) == 0) {
12330Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
12340Sstevel@tonic-gate bp_mapout(bp);
12350Sstevel@tonic-gate }
12360Sstevel@tonic-gate return (error);
12370Sstevel@tonic-gate }
12380Sstevel@tonic-gate
12390Sstevel@tonic-gate static void
biodone_tnf_probe(struct buf * bp)12400Sstevel@tonic-gate biodone_tnf_probe(struct buf *bp)
12410Sstevel@tonic-gate {
12420Sstevel@tonic-gate /* Kernel probe */
12430Sstevel@tonic-gate TNF_PROBE_3(biodone, "io blockio", /* CSTYLED */,
1244*11066Srafael.vanoni@sun.com tnf_device, device, bp->b_edev,
1245*11066Srafael.vanoni@sun.com tnf_diskaddr, block, bp->b_lblkno,
1246*11066Srafael.vanoni@sun.com tnf_opaque, buf, bp);
12470Sstevel@tonic-gate }
12480Sstevel@tonic-gate
12490Sstevel@tonic-gate /*
12500Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous,
12510Sstevel@tonic-gate * and wake up anyone waiting for it.
12520Sstevel@tonic-gate */
12530Sstevel@tonic-gate void
biodone(struct buf * bp)12540Sstevel@tonic-gate biodone(struct buf *bp)
12550Sstevel@tonic-gate {
12560Sstevel@tonic-gate if (bp->b_flags & B_STARTED) {
12570Sstevel@tonic-gate DTRACE_IO1(done, struct buf *, bp);
12580Sstevel@tonic-gate bp->b_flags &= ~B_STARTED;
12590Sstevel@tonic-gate }
12600Sstevel@tonic-gate
12610Sstevel@tonic-gate /*
12620Sstevel@tonic-gate * Call the TNF probe here instead of the inline code
12630Sstevel@tonic-gate * to force our compiler to use the tail call optimization.
12640Sstevel@tonic-gate */
12650Sstevel@tonic-gate biodone_tnf_probe(bp);
12660Sstevel@tonic-gate
12670Sstevel@tonic-gate if (bp->b_iodone != NULL) {
12680Sstevel@tonic-gate (*(bp->b_iodone))(bp);
12690Sstevel@tonic-gate return;
12700Sstevel@tonic-gate }
12710Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0);
12720Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
12730Sstevel@tonic-gate bp->b_flags |= B_DONE;
12740Sstevel@tonic-gate if (bp->b_flags & B_ASYNC) {
12750Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_REMAPPED))
12760Sstevel@tonic-gate bio_pageio_done(bp);
12770Sstevel@tonic-gate else
12780Sstevel@tonic-gate brelse(bp); /* release bp to freelist */
12790Sstevel@tonic-gate } else {
12800Sstevel@tonic-gate sema_v(&bp->b_io);
12810Sstevel@tonic-gate }
12820Sstevel@tonic-gate }
12830Sstevel@tonic-gate
12840Sstevel@tonic-gate /*
12850Sstevel@tonic-gate * Pick up the device's error number and pass it to the user;
12860Sstevel@tonic-gate * if there is an error but the number is 0 set a generalized code.
12870Sstevel@tonic-gate */
12880Sstevel@tonic-gate int
geterror(struct buf * bp)12890Sstevel@tonic-gate geterror(struct buf *bp)
12900Sstevel@tonic-gate {
12910Sstevel@tonic-gate int error = 0;
12920Sstevel@tonic-gate
12930Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
12940Sstevel@tonic-gate if (bp->b_flags & B_ERROR) {
12950Sstevel@tonic-gate error = bp->b_error;
12960Sstevel@tonic-gate if (!error)
12970Sstevel@tonic-gate error = EIO;
12980Sstevel@tonic-gate }
12990Sstevel@tonic-gate return (error);
13000Sstevel@tonic-gate }
13010Sstevel@tonic-gate
13020Sstevel@tonic-gate /*
13030Sstevel@tonic-gate * Support for pageio buffers.
13040Sstevel@tonic-gate *
13050Sstevel@tonic-gate * This stuff should be generalized to provide a generalized bp
13060Sstevel@tonic-gate * header facility that can be used for things other than pageio.
13070Sstevel@tonic-gate */
13080Sstevel@tonic-gate
13090Sstevel@tonic-gate /*
13100Sstevel@tonic-gate * Allocate and initialize a buf struct for use with pageio.
13110Sstevel@tonic-gate */
13120Sstevel@tonic-gate struct buf *
pageio_setup(struct page * pp,size_t len,struct vnode * vp,int flags)13130Sstevel@tonic-gate pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
13140Sstevel@tonic-gate {
13150Sstevel@tonic-gate struct buf *bp;
13160Sstevel@tonic-gate struct cpu *cpup;
13170Sstevel@tonic-gate
13180Sstevel@tonic-gate if (flags & B_READ) {
13190Sstevel@tonic-gate CPU_STATS_ENTER_K();
13200Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */
13210Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgin, 1);
13220Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len));
13230Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) {
13240Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
13250Sstevel@tonic-gate if (lwp != NULL)
13260Sstevel@tonic-gate lwp->lwp_ru.majflt++;
13270Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, maj_fault, 1);
13280Sstevel@tonic-gate /* Kernel probe */
13290Sstevel@tonic-gate TNF_PROBE_2(major_fault, "vm pagefault", /* CSTYLED */,
1330*11066Srafael.vanoni@sun.com tnf_opaque, vnode, pp->p_vnode,
1331*11066Srafael.vanoni@sun.com tnf_offset, offset, pp->p_offset);
13320Sstevel@tonic-gate }
13330Sstevel@tonic-gate /*
13340Sstevel@tonic-gate * Update statistics for pages being paged in
13350Sstevel@tonic-gate */
13360Sstevel@tonic-gate if (pp != NULL && pp->p_vnode != NULL) {
13370Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) {
1338*11066Srafael.vanoni@sun.com CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len));
13390Sstevel@tonic-gate } else {
13400Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) {
13410Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execpgin,
1342*11066Srafael.vanoni@sun.com btopr(len));
13430Sstevel@tonic-gate } else {
13440Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fspgin,
1345*11066Srafael.vanoni@sun.com btopr(len));
13460Sstevel@tonic-gate }
13470Sstevel@tonic-gate }
13480Sstevel@tonic-gate }
13490Sstevel@tonic-gate CPU_STATS_EXIT_K();
13500Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_IN,
13510Sstevel@tonic-gate "page_ws_in:pp %p", pp);
13520Sstevel@tonic-gate /* Kernel probe */
13530Sstevel@tonic-gate TNF_PROBE_3(pagein, "vm pageio io", /* CSTYLED */,
1354*11066Srafael.vanoni@sun.com tnf_opaque, vnode, pp->p_vnode,
1355*11066Srafael.vanoni@sun.com tnf_offset, offset, pp->p_offset,
1356*11066Srafael.vanoni@sun.com tnf_size, size, len);
13570Sstevel@tonic-gate }
13580Sstevel@tonic-gate
13590Sstevel@tonic-gate bp = kmem_zalloc(sizeof (struct buf), KM_SLEEP);
13600Sstevel@tonic-gate bp->b_bcount = len;
13610Sstevel@tonic-gate bp->b_bufsize = len;
13620Sstevel@tonic-gate bp->b_pages = pp;
13630Sstevel@tonic-gate bp->b_flags = B_PAGEIO | B_NOCACHE | B_BUSY | flags;
13640Sstevel@tonic-gate bp->b_offset = -1;
13650Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
13660Sstevel@tonic-gate
13670Sstevel@tonic-gate /* Initialize bp->b_sem in "locked" state */
13680Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
13690Sstevel@tonic-gate
13700Sstevel@tonic-gate VN_HOLD(vp);
13710Sstevel@tonic-gate bp->b_vp = vp;
13720Sstevel@tonic-gate THREAD_KPRI_RELEASE_N(btopr(len)); /* release kpri from page_locks */
13730Sstevel@tonic-gate
13740Sstevel@tonic-gate /*
13750Sstevel@tonic-gate * Caller sets dev & blkno and can adjust
13760Sstevel@tonic-gate * b_addr for page offset and can use bp_mapin
13770Sstevel@tonic-gate * to make pages kernel addressable.
13780Sstevel@tonic-gate */
13790Sstevel@tonic-gate return (bp);
13800Sstevel@tonic-gate }
13810Sstevel@tonic-gate
13820Sstevel@tonic-gate void
pageio_done(struct buf * bp)13830Sstevel@tonic-gate pageio_done(struct buf *bp)
13840Sstevel@tonic-gate {
13850Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
13860Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
13870Sstevel@tonic-gate bp_mapout(bp);
13880Sstevel@tonic-gate VN_RELE(bp->b_vp);
13890Sstevel@tonic-gate bp->b_vp = NULL;
13900Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) != 0);
13910Sstevel@tonic-gate
13920Sstevel@tonic-gate /* A sema_v(bp->b_sem) is implied if we are destroying it */
13930Sstevel@tonic-gate sema_destroy(&bp->b_sem);
13940Sstevel@tonic-gate sema_destroy(&bp->b_io);
13950Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf));
13960Sstevel@tonic-gate }
13970Sstevel@tonic-gate
13980Sstevel@tonic-gate /*
13990Sstevel@tonic-gate * Check to see whether the buffers, except the one pointed by sbp,
14000Sstevel@tonic-gate * associated with the device are busy.
14010Sstevel@tonic-gate * NOTE: This expensive operation shall be improved together with ufs_icheck().
14020Sstevel@tonic-gate */
14030Sstevel@tonic-gate int
bcheck(dev_t dev,struct buf * sbp)14040Sstevel@tonic-gate bcheck(dev_t dev, struct buf *sbp)
14050Sstevel@tonic-gate {
14060Sstevel@tonic-gate struct buf *bp;
14070Sstevel@tonic-gate struct buf *dp;
14080Sstevel@tonic-gate int i;
14090Sstevel@tonic-gate kmutex_t *hmp;
14100Sstevel@tonic-gate
14110Sstevel@tonic-gate /*
14120Sstevel@tonic-gate * check for busy bufs for this filesystem
14130Sstevel@tonic-gate */
14140Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
14150Sstevel@tonic-gate dp = (struct buf *)&hbuf[i];
14160Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
14170Sstevel@tonic-gate
14180Sstevel@tonic-gate mutex_enter(hmp);
14190Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
14200Sstevel@tonic-gate /*
14210Sstevel@tonic-gate * if buf is busy or dirty, then filesystem is busy
14220Sstevel@tonic-gate */
14230Sstevel@tonic-gate if ((bp->b_edev == dev) &&
14240Sstevel@tonic-gate ((bp->b_flags & B_STALE) == 0) &&
14250Sstevel@tonic-gate (bp->b_flags & (B_DELWRI|B_BUSY)) &&
14260Sstevel@tonic-gate (bp != sbp)) {
14270Sstevel@tonic-gate mutex_exit(hmp);
14280Sstevel@tonic-gate return (1);
14290Sstevel@tonic-gate }
14300Sstevel@tonic-gate }
14310Sstevel@tonic-gate mutex_exit(hmp);
14320Sstevel@tonic-gate }
14330Sstevel@tonic-gate return (0);
14340Sstevel@tonic-gate }
14350Sstevel@tonic-gate
14360Sstevel@tonic-gate /*
14370Sstevel@tonic-gate * Hash two 32 bit entities.
14380Sstevel@tonic-gate */
14390Sstevel@tonic-gate int
hash2ints(int x,int y)14400Sstevel@tonic-gate hash2ints(int x, int y)
14410Sstevel@tonic-gate {
14420Sstevel@tonic-gate int hash = 0;
14430Sstevel@tonic-gate
14440Sstevel@tonic-gate hash = x - 1;
14450Sstevel@tonic-gate hash = ((hash * 7) + (x >> 8)) - 1;
14460Sstevel@tonic-gate hash = ((hash * 7) + (x >> 16)) - 1;
14470Sstevel@tonic-gate hash = ((hash * 7) + (x >> 24)) - 1;
14480Sstevel@tonic-gate hash = ((hash * 7) + y) - 1;
14490Sstevel@tonic-gate hash = ((hash * 7) + (y >> 8)) - 1;
14500Sstevel@tonic-gate hash = ((hash * 7) + (y >> 16)) - 1;
14510Sstevel@tonic-gate hash = ((hash * 7) + (y >> 24)) - 1;
14520Sstevel@tonic-gate
14530Sstevel@tonic-gate return (hash);
14540Sstevel@tonic-gate }
14550Sstevel@tonic-gate
14560Sstevel@tonic-gate
14570Sstevel@tonic-gate /*
14580Sstevel@tonic-gate * Return a new buffer struct.
14590Sstevel@tonic-gate * Create a new buffer if we haven't gone over our high water
14600Sstevel@tonic-gate * mark for memory, otherwise try to get one off the freelist.
14610Sstevel@tonic-gate *
14620Sstevel@tonic-gate * Returns a locked buf that has no id and is not on any hash or free
14630Sstevel@tonic-gate * list.
14640Sstevel@tonic-gate */
14650Sstevel@tonic-gate static struct buf *
bio_getfreeblk(long bsize)14660Sstevel@tonic-gate bio_getfreeblk(long bsize)
14670Sstevel@tonic-gate {
14680Sstevel@tonic-gate struct buf *bp, *dp;
14690Sstevel@tonic-gate struct hbuf *hp;
14700Sstevel@tonic-gate kmutex_t *hmp;
14710Sstevel@tonic-gate uint_t start, end;
14720Sstevel@tonic-gate
14730Sstevel@tonic-gate /*
14740Sstevel@tonic-gate * mutex_enter(&bfree_lock);
14750Sstevel@tonic-gate * bfreelist.b_bufsize represents the amount of memory
14760Sstevel@tonic-gate * mutex_exit(&bfree_lock); protect ref to bfreelist
14770Sstevel@tonic-gate * we are allowed to allocate in the cache before we hit our hwm.
14780Sstevel@tonic-gate */
14790Sstevel@tonic-gate bio_mem_get(bsize); /* Account for our memory request */
14800Sstevel@tonic-gate
14810Sstevel@tonic-gate again:
14820Sstevel@tonic-gate bp = bio_bhdr_alloc(); /* Get a buf hdr */
14830Sstevel@tonic-gate sema_p(&bp->b_sem); /* Should never fail */
14840Sstevel@tonic-gate
14850Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL);
14860Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_NOSLEEP);
14870Sstevel@tonic-gate if (bp->b_un.b_addr != NULL) {
14880Sstevel@tonic-gate /*
14890Sstevel@tonic-gate * Make the common path short
14900Sstevel@tonic-gate */
14910Sstevel@tonic-gate bp->b_bufsize = bsize;
14920Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
14930Sstevel@tonic-gate return (bp);
14940Sstevel@tonic-gate } else {
14950Sstevel@tonic-gate struct buf *save;
14960Sstevel@tonic-gate
14970Sstevel@tonic-gate save = bp; /* Save bp we allocated */
14980Sstevel@tonic-gate start = end = lastindex;
14990Sstevel@tonic-gate
15000Sstevel@tonic-gate biostats.bio_bufwant.value.ui32++;
15010Sstevel@tonic-gate
15020Sstevel@tonic-gate /*
15030Sstevel@tonic-gate * Memory isn't available from the system now. Scan
15040Sstevel@tonic-gate * the hash buckets till enough space is found.
15050Sstevel@tonic-gate */
15060Sstevel@tonic-gate do {
15070Sstevel@tonic-gate hp = &hbuf[start];
15080Sstevel@tonic-gate hmp = &hp->b_lock;
15090Sstevel@tonic-gate dp = (struct buf *)hp;
15100Sstevel@tonic-gate
15110Sstevel@tonic-gate mutex_enter(hmp);
15120Sstevel@tonic-gate bp = dp->av_forw;
15130Sstevel@tonic-gate
15140Sstevel@tonic-gate while (bp != dp) {
15150Sstevel@tonic-gate
15160Sstevel@tonic-gate ASSERT(bp != NULL);
15170Sstevel@tonic-gate
15180Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) {
15190Sstevel@tonic-gate bp = bp->av_forw;
15200Sstevel@tonic-gate continue;
15210Sstevel@tonic-gate }
15220Sstevel@tonic-gate
15230Sstevel@tonic-gate /*
15240Sstevel@tonic-gate * Since we are going down the freelist
15250Sstevel@tonic-gate * associated with this hash bucket the
15260Sstevel@tonic-gate * B_DELWRI flag should not be set.
15270Sstevel@tonic-gate */
15280Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI));
15290Sstevel@tonic-gate
15300Sstevel@tonic-gate if (bp->b_bufsize == bsize) {
15310Sstevel@tonic-gate hp->b_length--;
15320Sstevel@tonic-gate notavail(bp);
15330Sstevel@tonic-gate bremhash(bp);
15340Sstevel@tonic-gate mutex_exit(hmp);
15350Sstevel@tonic-gate
15360Sstevel@tonic-gate /*
15370Sstevel@tonic-gate * Didn't kmem_alloc any more, so don't
15380Sstevel@tonic-gate * count it twice.
15390Sstevel@tonic-gate */
15400Sstevel@tonic-gate mutex_enter(&bfree_lock);
15410Sstevel@tonic-gate bfreelist.b_bufsize += bsize;
15420Sstevel@tonic-gate mutex_exit(&bfree_lock);
15430Sstevel@tonic-gate
15440Sstevel@tonic-gate /*
15450Sstevel@tonic-gate * Update the lastindex value.
15460Sstevel@tonic-gate */
15470Sstevel@tonic-gate lastindex = start;
15480Sstevel@tonic-gate
15490Sstevel@tonic-gate /*
15500Sstevel@tonic-gate * Put our saved bp back on the list
15510Sstevel@tonic-gate */
15520Sstevel@tonic-gate sema_v(&save->b_sem);
15530Sstevel@tonic-gate bio_bhdr_free(save);
15540Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
15550Sstevel@tonic-gate return (bp);
15560Sstevel@tonic-gate }
15570Sstevel@tonic-gate sema_v(&bp->b_sem);
15580Sstevel@tonic-gate bp = bp->av_forw;
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate mutex_exit(hmp);
15610Sstevel@tonic-gate start = ((start + 1) % v.v_hbuf);
15620Sstevel@tonic-gate } while (start != end);
15630Sstevel@tonic-gate
15640Sstevel@tonic-gate biostats.bio_bufwait.value.ui32++;
15650Sstevel@tonic-gate bp = save; /* Use original bp */
15660Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP);
15670Sstevel@tonic-gate }
15680Sstevel@tonic-gate
15690Sstevel@tonic-gate bp->b_bufsize = bsize;
15700Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
15710Sstevel@tonic-gate return (bp);
15720Sstevel@tonic-gate }
15730Sstevel@tonic-gate
15740Sstevel@tonic-gate /*
15750Sstevel@tonic-gate * Allocate a buffer header. If none currently available, allocate
15760Sstevel@tonic-gate * a new pool.
15770Sstevel@tonic-gate */
15780Sstevel@tonic-gate static struct buf *
bio_bhdr_alloc(void)15790Sstevel@tonic-gate bio_bhdr_alloc(void)
15800Sstevel@tonic-gate {
15810Sstevel@tonic-gate struct buf *dp, *sdp;
15820Sstevel@tonic-gate struct buf *bp;
15830Sstevel@tonic-gate int i;
15840Sstevel@tonic-gate
15850Sstevel@tonic-gate for (;;) {
15860Sstevel@tonic-gate mutex_enter(&bhdr_lock);
15870Sstevel@tonic-gate if (bhdrlist != NULL) {
15880Sstevel@tonic-gate bp = bhdrlist;
15890Sstevel@tonic-gate bhdrlist = bp->av_forw;
15900Sstevel@tonic-gate mutex_exit(&bhdr_lock);
15910Sstevel@tonic-gate bp->av_forw = NULL;
15920Sstevel@tonic-gate return (bp);
15930Sstevel@tonic-gate }
15940Sstevel@tonic-gate mutex_exit(&bhdr_lock);
15950Sstevel@tonic-gate
15960Sstevel@tonic-gate /*
15970Sstevel@tonic-gate * Need to allocate a new pool. If the system is currently
15980Sstevel@tonic-gate * out of memory, then try freeing things on the freelist.
15990Sstevel@tonic-gate */
16000Sstevel@tonic-gate dp = kmem_zalloc(sizeof (struct buf) * v.v_buf, KM_NOSLEEP);
16010Sstevel@tonic-gate if (dp == NULL) {
16020Sstevel@tonic-gate /*
16030Sstevel@tonic-gate * System can't give us a pool of headers, try
16040Sstevel@tonic-gate * recycling from the free lists.
16050Sstevel@tonic-gate */
16060Sstevel@tonic-gate bio_recycle(BIO_HEADER, 0);
16070Sstevel@tonic-gate } else {
16080Sstevel@tonic-gate sdp = dp;
16090Sstevel@tonic-gate for (i = 0; i < v.v_buf; i++, dp++) {
16100Sstevel@tonic-gate /*
16110Sstevel@tonic-gate * The next two lines are needed since NODEV
16120Sstevel@tonic-gate * is -1 and not NULL
16130Sstevel@tonic-gate */
16140Sstevel@tonic-gate dp->b_dev = (o_dev_t)NODEV;
16150Sstevel@tonic-gate dp->b_edev = NODEV;
16160Sstevel@tonic-gate dp->av_forw = dp + 1;
16170Sstevel@tonic-gate sema_init(&dp->b_sem, 1, NULL, SEMA_DEFAULT,
16180Sstevel@tonic-gate NULL);
16190Sstevel@tonic-gate sema_init(&dp->b_io, 0, NULL, SEMA_DEFAULT,
16200Sstevel@tonic-gate NULL);
16210Sstevel@tonic-gate dp->b_offset = -1;
16220Sstevel@tonic-gate }
16230Sstevel@tonic-gate mutex_enter(&bhdr_lock);
16240Sstevel@tonic-gate (--dp)->av_forw = bhdrlist; /* Fix last pointer */
16250Sstevel@tonic-gate bhdrlist = sdp;
16260Sstevel@tonic-gate nbuf += v.v_buf;
16270Sstevel@tonic-gate bp = bhdrlist;
16280Sstevel@tonic-gate bhdrlist = bp->av_forw;
16290Sstevel@tonic-gate mutex_exit(&bhdr_lock);
16300Sstevel@tonic-gate
16310Sstevel@tonic-gate bp->av_forw = NULL;
16320Sstevel@tonic-gate return (bp);
16330Sstevel@tonic-gate }
16340Sstevel@tonic-gate }
16350Sstevel@tonic-gate }
16360Sstevel@tonic-gate
16370Sstevel@tonic-gate static void
bio_bhdr_free(struct buf * bp)16380Sstevel@tonic-gate bio_bhdr_free(struct buf *bp)
16390Sstevel@tonic-gate {
16400Sstevel@tonic-gate ASSERT(bp->b_back == NULL);
16410Sstevel@tonic-gate ASSERT(bp->b_forw == NULL);
16420Sstevel@tonic-gate ASSERT(bp->av_back == NULL);
16430Sstevel@tonic-gate ASSERT(bp->av_forw == NULL);
16440Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL);
16450Sstevel@tonic-gate ASSERT(bp->b_dev == (o_dev_t)NODEV);
16460Sstevel@tonic-gate ASSERT(bp->b_edev == NODEV);
16470Sstevel@tonic-gate ASSERT(bp->b_flags == 0);
16480Sstevel@tonic-gate
16490Sstevel@tonic-gate mutex_enter(&bhdr_lock);
16500Sstevel@tonic-gate bp->av_forw = bhdrlist;
16510Sstevel@tonic-gate bhdrlist = bp;
16520Sstevel@tonic-gate mutex_exit(&bhdr_lock);
16530Sstevel@tonic-gate }
16540Sstevel@tonic-gate
16550Sstevel@tonic-gate /*
16560Sstevel@tonic-gate * If we haven't gone over the high water mark, it's o.k. to
16570Sstevel@tonic-gate * allocate more buffer space, otherwise recycle buffers
16580Sstevel@tonic-gate * from the freelist until enough memory is free for a bsize request.
16590Sstevel@tonic-gate *
16600Sstevel@tonic-gate * We account for this memory, even though
16610Sstevel@tonic-gate * we don't allocate it here.
16620Sstevel@tonic-gate */
16630Sstevel@tonic-gate static void
bio_mem_get(long bsize)16640Sstevel@tonic-gate bio_mem_get(long bsize)
16650Sstevel@tonic-gate {
16660Sstevel@tonic-gate mutex_enter(&bfree_lock);
16670Sstevel@tonic-gate if (bfreelist.b_bufsize > bsize) {
16680Sstevel@tonic-gate bfreelist.b_bufsize -= bsize;
16690Sstevel@tonic-gate mutex_exit(&bfree_lock);
16700Sstevel@tonic-gate return;
16710Sstevel@tonic-gate }
16720Sstevel@tonic-gate mutex_exit(&bfree_lock);
16730Sstevel@tonic-gate bio_recycle(BIO_MEM, bsize);
16740Sstevel@tonic-gate }
16750Sstevel@tonic-gate
16760Sstevel@tonic-gate /*
16770Sstevel@tonic-gate * flush a list of delayed write buffers.
16780Sstevel@tonic-gate * (currently used only by bio_recycle below.)
16790Sstevel@tonic-gate */
16800Sstevel@tonic-gate static void
bio_flushlist(struct buf * delwri_list)16810Sstevel@tonic-gate bio_flushlist(struct buf *delwri_list)
16820Sstevel@tonic-gate {
16830Sstevel@tonic-gate struct buf *bp;
16840Sstevel@tonic-gate
16850Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) {
16860Sstevel@tonic-gate bp = delwri_list;
16870Sstevel@tonic-gate bp->b_flags |= B_AGE | B_ASYNC;
16880Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */
16890Sstevel@tonic-gate BWRITE(bp);
16900Sstevel@tonic-gate } else { /* ufs */
16910Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp);
16920Sstevel@tonic-gate }
16930Sstevel@tonic-gate delwri_list = bp->b_list;
16940Sstevel@tonic-gate bp->b_list = NULL;
16950Sstevel@tonic-gate }
16960Sstevel@tonic-gate }
16970Sstevel@tonic-gate
16980Sstevel@tonic-gate /*
16990Sstevel@tonic-gate * Start recycling buffers on the freelist for one of 2 reasons:
17000Sstevel@tonic-gate * - we need a buffer header
17010Sstevel@tonic-gate * - we need to free up memory
17020Sstevel@tonic-gate * Once started we continue to recycle buffers until the B_AGE
17030Sstevel@tonic-gate * buffers are gone.
17040Sstevel@tonic-gate */
17050Sstevel@tonic-gate static void
bio_recycle(int want,long bsize)17060Sstevel@tonic-gate bio_recycle(int want, long bsize)
17070Sstevel@tonic-gate {
17080Sstevel@tonic-gate struct buf *bp, *dp, *dwp, *nbp;
17090Sstevel@tonic-gate struct hbuf *hp;
17100Sstevel@tonic-gate int found = 0;
17110Sstevel@tonic-gate kmutex_t *hmp;
17120Sstevel@tonic-gate int start, end;
17130Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST;
17140Sstevel@tonic-gate
17150Sstevel@tonic-gate /*
17160Sstevel@tonic-gate * Recycle buffers.
17170Sstevel@tonic-gate */
17180Sstevel@tonic-gate top:
17190Sstevel@tonic-gate start = end = lastindex;
17200Sstevel@tonic-gate do {
17210Sstevel@tonic-gate hp = &hbuf[start];
17220Sstevel@tonic-gate hmp = &hp->b_lock;
17230Sstevel@tonic-gate dp = (struct buf *)hp;
17240Sstevel@tonic-gate
17250Sstevel@tonic-gate mutex_enter(hmp);
17260Sstevel@tonic-gate bp = dp->av_forw;
17270Sstevel@tonic-gate
17280Sstevel@tonic-gate while (bp != dp) {
17290Sstevel@tonic-gate
17300Sstevel@tonic-gate ASSERT(bp != NULL);
17310Sstevel@tonic-gate
17320Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) {
17330Sstevel@tonic-gate bp = bp->av_forw;
17340Sstevel@tonic-gate continue;
17350Sstevel@tonic-gate }
17360Sstevel@tonic-gate /*
17370Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff??
17380Sstevel@tonic-gate */
17390Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) {
17400Sstevel@tonic-gate sema_v(&bp->b_sem);
17410Sstevel@tonic-gate mutex_exit(hmp);
17420Sstevel@tonic-gate lastindex = start;
17430Sstevel@tonic-gate return; /* All done */
17440Sstevel@tonic-gate }
17450Sstevel@tonic-gate
17460Sstevel@tonic-gate ASSERT(MUTEX_HELD(&hp->b_lock));
17470Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI));
17480Sstevel@tonic-gate hp->b_length--;
17490Sstevel@tonic-gate notavail(bp);
17500Sstevel@tonic-gate
17510Sstevel@tonic-gate /*
17520Sstevel@tonic-gate * Remove bhdr from cache, free up memory,
17530Sstevel@tonic-gate * and add the hdr to the freelist.
17540Sstevel@tonic-gate */
17550Sstevel@tonic-gate bremhash(bp);
17560Sstevel@tonic-gate mutex_exit(hmp);
17570Sstevel@tonic-gate
17580Sstevel@tonic-gate if (bp->b_bufsize) {
17590Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize);
17600Sstevel@tonic-gate bp->b_un.b_addr = NULL;
17610Sstevel@tonic-gate mutex_enter(&bfree_lock);
17620Sstevel@tonic-gate bfreelist.b_bufsize += bp->b_bufsize;
17630Sstevel@tonic-gate mutex_exit(&bfree_lock);
17640Sstevel@tonic-gate }
17650Sstevel@tonic-gate
17660Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV;
17670Sstevel@tonic-gate bp->b_edev = NODEV;
17680Sstevel@tonic-gate bp->b_flags = 0;
17690Sstevel@tonic-gate sema_v(&bp->b_sem);
17700Sstevel@tonic-gate bio_bhdr_free(bp);
17710Sstevel@tonic-gate if (want == BIO_HEADER) {
17720Sstevel@tonic-gate found = 1;
17730Sstevel@tonic-gate } else {
17740Sstevel@tonic-gate ASSERT(want == BIO_MEM);
17750Sstevel@tonic-gate if (!found && bfreelist.b_bufsize >= bsize) {
17760Sstevel@tonic-gate /* Account for the memory we want */
17770Sstevel@tonic-gate mutex_enter(&bfree_lock);
17780Sstevel@tonic-gate if (bfreelist.b_bufsize >= bsize) {
17790Sstevel@tonic-gate bfreelist.b_bufsize -= bsize;
17800Sstevel@tonic-gate found = 1;
17810Sstevel@tonic-gate }
17820Sstevel@tonic-gate mutex_exit(&bfree_lock);
17830Sstevel@tonic-gate }
17840Sstevel@tonic-gate }
17850Sstevel@tonic-gate
17860Sstevel@tonic-gate /*
17870Sstevel@tonic-gate * Since we dropped hmp start from the
17880Sstevel@tonic-gate * begining.
17890Sstevel@tonic-gate */
17900Sstevel@tonic-gate mutex_enter(hmp);
17910Sstevel@tonic-gate bp = dp->av_forw;
17920Sstevel@tonic-gate }
17930Sstevel@tonic-gate mutex_exit(hmp);
17940Sstevel@tonic-gate
17950Sstevel@tonic-gate /*
17960Sstevel@tonic-gate * Look at the delayed write list.
17970Sstevel@tonic-gate * First gather into a private list, then write them.
17980Sstevel@tonic-gate */
17990Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[start];
18000Sstevel@tonic-gate mutex_enter(&blist_lock);
18010Sstevel@tonic-gate bio_doingflush++;
18020Sstevel@tonic-gate mutex_enter(hmp);
18030Sstevel@tonic-gate for (bp = dwp->av_forw; bp != dwp; bp = nbp) {
18040Sstevel@tonic-gate
18050Sstevel@tonic-gate ASSERT(bp != NULL);
18060Sstevel@tonic-gate nbp = bp->av_forw;
18070Sstevel@tonic-gate
18080Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem))
18090Sstevel@tonic-gate continue;
18100Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI);
18110Sstevel@tonic-gate /*
18120Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff??
18130Sstevel@tonic-gate */
18140Sstevel@tonic-gate
18150Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) {
18160Sstevel@tonic-gate sema_v(&bp->b_sem);
18170Sstevel@tonic-gate mutex_exit(hmp);
18180Sstevel@tonic-gate lastindex = start;
18190Sstevel@tonic-gate mutex_exit(&blist_lock);
18200Sstevel@tonic-gate bio_flushlist(delwri_list);
18210Sstevel@tonic-gate mutex_enter(&blist_lock);
18220Sstevel@tonic-gate bio_doingflush--;
18230Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
18240Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
18250Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
18260Sstevel@tonic-gate }
18270Sstevel@tonic-gate mutex_exit(&blist_lock);
18280Sstevel@tonic-gate return; /* All done */
18290Sstevel@tonic-gate }
18300Sstevel@tonic-gate
18310Sstevel@tonic-gate /*
18320Sstevel@tonic-gate * If the buffer is already on a flush or
18330Sstevel@tonic-gate * invalidate list then just skip it.
18340Sstevel@tonic-gate */
18350Sstevel@tonic-gate if (bp->b_list != NULL) {
18360Sstevel@tonic-gate sema_v(&bp->b_sem);
18370Sstevel@tonic-gate continue;
18380Sstevel@tonic-gate }
18390Sstevel@tonic-gate /*
18400Sstevel@tonic-gate * We are still on the same bucket.
18410Sstevel@tonic-gate */
18420Sstevel@tonic-gate hp->b_length--;
18430Sstevel@tonic-gate notavail(bp);
18440Sstevel@tonic-gate bp->b_list = delwri_list;
18450Sstevel@tonic-gate delwri_list = bp;
18460Sstevel@tonic-gate }
18470Sstevel@tonic-gate mutex_exit(hmp);
18480Sstevel@tonic-gate mutex_exit(&blist_lock);
18490Sstevel@tonic-gate bio_flushlist(delwri_list);
18500Sstevel@tonic-gate delwri_list = EMPTY_LIST;
18510Sstevel@tonic-gate mutex_enter(&blist_lock);
18520Sstevel@tonic-gate bio_doingflush--;
18530Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
18540Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
18550Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
18560Sstevel@tonic-gate }
18570Sstevel@tonic-gate mutex_exit(&blist_lock);
18580Sstevel@tonic-gate start = (start + 1) % v.v_hbuf;
18590Sstevel@tonic-gate
18600Sstevel@tonic-gate } while (start != end);
18610Sstevel@tonic-gate
18620Sstevel@tonic-gate if (found)
18630Sstevel@tonic-gate return;
18640Sstevel@tonic-gate
18650Sstevel@tonic-gate /*
18660Sstevel@tonic-gate * Free lists exhausted and we haven't satisfied the request.
18670Sstevel@tonic-gate * Wait here for more entries to be added to freelist.
18680Sstevel@tonic-gate * Because this might have just happened, make it timed.
18690Sstevel@tonic-gate */
18700Sstevel@tonic-gate mutex_enter(&bfree_lock);
18710Sstevel@tonic-gate bfreelist.b_flags |= B_WANTED;
1872*11066Srafael.vanoni@sun.com (void) cv_reltimedwait(&bio_mem_cv, &bfree_lock, hz, TR_CLOCK_TICK);
18730Sstevel@tonic-gate mutex_exit(&bfree_lock);
18740Sstevel@tonic-gate goto top;
18750Sstevel@tonic-gate }
18760Sstevel@tonic-gate
18770Sstevel@tonic-gate /*
18780Sstevel@tonic-gate * See if the block is associated with some buffer
18790Sstevel@tonic-gate * (mainly to avoid getting hung up on a wait in breada).
18800Sstevel@tonic-gate */
18810Sstevel@tonic-gate static int
bio_incore(dev_t dev,daddr_t blkno)18820Sstevel@tonic-gate bio_incore(dev_t dev, daddr_t blkno)
18830Sstevel@tonic-gate {
18840Sstevel@tonic-gate struct buf *bp;
18850Sstevel@tonic-gate struct buf *dp;
18860Sstevel@tonic-gate uint_t index;
18870Sstevel@tonic-gate kmutex_t *hmp;
18880Sstevel@tonic-gate
18890Sstevel@tonic-gate index = bio_bhash(dev, blkno);
18900Sstevel@tonic-gate dp = (struct buf *)&hbuf[index];
18910Sstevel@tonic-gate hmp = &hbuf[index].b_lock;
18920Sstevel@tonic-gate
18930Sstevel@tonic-gate mutex_enter(hmp);
18940Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
18950Sstevel@tonic-gate if (bp->b_blkno == blkno && bp->b_edev == dev &&
18960Sstevel@tonic-gate (bp->b_flags & B_STALE) == 0) {
18970Sstevel@tonic-gate mutex_exit(hmp);
18980Sstevel@tonic-gate return (1);
18990Sstevel@tonic-gate }
19000Sstevel@tonic-gate }
19010Sstevel@tonic-gate mutex_exit(hmp);
19020Sstevel@tonic-gate return (0);
19030Sstevel@tonic-gate }
19040Sstevel@tonic-gate
19050Sstevel@tonic-gate static void
bio_pageio_done(struct buf * bp)19060Sstevel@tonic-gate bio_pageio_done(struct buf *bp)
19070Sstevel@tonic-gate {
19080Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) {
19090Sstevel@tonic-gate
19100Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
19110Sstevel@tonic-gate bp_mapout(bp);
19120Sstevel@tonic-gate
19130Sstevel@tonic-gate if (bp->b_flags & B_READ)
19140Sstevel@tonic-gate pvn_read_done(bp->b_pages, bp->b_flags);
19150Sstevel@tonic-gate else
19160Sstevel@tonic-gate pvn_write_done(bp->b_pages, B_WRITE | bp->b_flags);
19170Sstevel@tonic-gate pageio_done(bp);
19180Sstevel@tonic-gate } else {
19190Sstevel@tonic-gate ASSERT(bp->b_flags & B_REMAPPED);
19200Sstevel@tonic-gate bp_mapout(bp);
19210Sstevel@tonic-gate brelse(bp);
19220Sstevel@tonic-gate }
19230Sstevel@tonic-gate }
19240Sstevel@tonic-gate
19250Sstevel@tonic-gate /*
19260Sstevel@tonic-gate * bioerror(9F) - indicate error in buffer header
19270Sstevel@tonic-gate * If 'error' is zero, remove the error indication.
19280Sstevel@tonic-gate */
19290Sstevel@tonic-gate void
bioerror(struct buf * bp,int error)19300Sstevel@tonic-gate bioerror(struct buf *bp, int error)
19310Sstevel@tonic-gate {
19320Sstevel@tonic-gate ASSERT(bp != NULL);
19330Sstevel@tonic-gate ASSERT(error >= 0);
19340Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
19350Sstevel@tonic-gate
19360Sstevel@tonic-gate if (error != 0) {
19370Sstevel@tonic-gate bp->b_flags |= B_ERROR;
19380Sstevel@tonic-gate } else {
19390Sstevel@tonic-gate bp->b_flags &= ~B_ERROR;
19400Sstevel@tonic-gate }
19410Sstevel@tonic-gate bp->b_error = error;
19420Sstevel@tonic-gate }
19430Sstevel@tonic-gate
19440Sstevel@tonic-gate /*
19450Sstevel@tonic-gate * bioreset(9F) - reuse a private buffer header after I/O is complete
19460Sstevel@tonic-gate */
19470Sstevel@tonic-gate void
bioreset(struct buf * bp)19480Sstevel@tonic-gate bioreset(struct buf *bp)
19490Sstevel@tonic-gate {
19500Sstevel@tonic-gate ASSERT(bp != NULL);
19510Sstevel@tonic-gate
19520Sstevel@tonic-gate biofini(bp);
19530Sstevel@tonic-gate bioinit(bp);
19540Sstevel@tonic-gate }
19550Sstevel@tonic-gate
19560Sstevel@tonic-gate /*
19570Sstevel@tonic-gate * biosize(9F) - return size of a buffer header
19580Sstevel@tonic-gate */
19590Sstevel@tonic-gate size_t
biosize(void)19600Sstevel@tonic-gate biosize(void)
19610Sstevel@tonic-gate {
19620Sstevel@tonic-gate return (sizeof (struct buf));
19630Sstevel@tonic-gate }
19640Sstevel@tonic-gate
19650Sstevel@tonic-gate /*
19660Sstevel@tonic-gate * biomodified(9F) - check if buffer is modified
19670Sstevel@tonic-gate */
19680Sstevel@tonic-gate int
biomodified(struct buf * bp)19690Sstevel@tonic-gate biomodified(struct buf *bp)
19700Sstevel@tonic-gate {
19710Sstevel@tonic-gate int npf;
19720Sstevel@tonic-gate int ppattr;
19730Sstevel@tonic-gate struct page *pp;
19740Sstevel@tonic-gate
19750Sstevel@tonic-gate ASSERT(bp != NULL);
19760Sstevel@tonic-gate
19770Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) == 0) {
19780Sstevel@tonic-gate return (-1);
19790Sstevel@tonic-gate }
19800Sstevel@tonic-gate pp = bp->b_pages;
19810Sstevel@tonic-gate npf = btopr(bp->b_bcount + ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET));
19820Sstevel@tonic-gate
19830Sstevel@tonic-gate while (npf > 0) {
19840Sstevel@tonic-gate ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO |
1985*11066Srafael.vanoni@sun.com HAT_SYNC_STOPON_MOD);
19860Sstevel@tonic-gate if (ppattr & P_MOD)
19870Sstevel@tonic-gate return (1);
19880Sstevel@tonic-gate pp = pp->p_next;
19890Sstevel@tonic-gate npf--;
19900Sstevel@tonic-gate }
19910Sstevel@tonic-gate
19920Sstevel@tonic-gate return (0);
19930Sstevel@tonic-gate }
19940Sstevel@tonic-gate
19950Sstevel@tonic-gate /*
19960Sstevel@tonic-gate * bioinit(9F) - initialize a buffer structure
19970Sstevel@tonic-gate */
19980Sstevel@tonic-gate void
bioinit(struct buf * bp)19990Sstevel@tonic-gate bioinit(struct buf *bp)
20000Sstevel@tonic-gate {
20010Sstevel@tonic-gate bzero(bp, sizeof (struct buf));
20020Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
20030Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
20040Sstevel@tonic-gate bp->b_offset = -1;
20050Sstevel@tonic-gate }
20060Sstevel@tonic-gate
20070Sstevel@tonic-gate /*
20080Sstevel@tonic-gate * biofini(9F) - uninitialize a buffer structure
20090Sstevel@tonic-gate */
20100Sstevel@tonic-gate void
biofini(struct buf * bp)20110Sstevel@tonic-gate biofini(struct buf *bp)
20120Sstevel@tonic-gate {
20130Sstevel@tonic-gate sema_destroy(&bp->b_io);
20140Sstevel@tonic-gate sema_destroy(&bp->b_sem);
20150Sstevel@tonic-gate }
20160Sstevel@tonic-gate
20170Sstevel@tonic-gate /*
20180Sstevel@tonic-gate * bioclone(9F) - clone a buffer
20190Sstevel@tonic-gate */
20200Sstevel@tonic-gate struct buf *
bioclone(struct buf * bp,off_t off,size_t len,dev_t dev,daddr_t blkno,int (* iodone)(struct buf *),struct buf * bp_mem,int sleep)20210Sstevel@tonic-gate bioclone(struct buf *bp, off_t off, size_t len, dev_t dev, daddr_t blkno,
20220Sstevel@tonic-gate int (*iodone)(struct buf *), struct buf *bp_mem, int sleep)
20230Sstevel@tonic-gate {
20240Sstevel@tonic-gate struct buf *bufp;
20250Sstevel@tonic-gate
20260Sstevel@tonic-gate ASSERT(bp);
20270Sstevel@tonic-gate if (bp_mem == NULL) {
20280Sstevel@tonic-gate bufp = kmem_alloc(sizeof (struct buf), sleep);
20290Sstevel@tonic-gate if (bufp == NULL) {
20300Sstevel@tonic-gate return (NULL);
20310Sstevel@tonic-gate }
20320Sstevel@tonic-gate bioinit(bufp);
20330Sstevel@tonic-gate } else {
20340Sstevel@tonic-gate bufp = bp_mem;
20350Sstevel@tonic-gate bioreset(bufp);
20360Sstevel@tonic-gate }
20370Sstevel@tonic-gate
20380Sstevel@tonic-gate #define BUF_CLONE_FLAGS (B_READ|B_WRITE|B_SHADOW|B_PHYS|B_PAGEIO|B_FAILFAST|\
20390Sstevel@tonic-gate B_ABRWRITE)
20400Sstevel@tonic-gate
20410Sstevel@tonic-gate /*
20422418Scth * The cloned buffer does not inherit the B_REMAPPED flag.
20430Sstevel@tonic-gate */
20440Sstevel@tonic-gate bufp->b_flags = (bp->b_flags & BUF_CLONE_FLAGS) | B_BUSY;
20450Sstevel@tonic-gate bufp->b_bcount = len;
20460Sstevel@tonic-gate bufp->b_blkno = blkno;
20470Sstevel@tonic-gate bufp->b_iodone = iodone;
20480Sstevel@tonic-gate bufp->b_proc = bp->b_proc;
20490Sstevel@tonic-gate bufp->b_edev = dev;
20500Sstevel@tonic-gate bufp->b_file = bp->b_file;
20510Sstevel@tonic-gate bufp->b_offset = bp->b_offset;
20520Sstevel@tonic-gate
20530Sstevel@tonic-gate if (bp->b_flags & B_SHADOW) {
20540Sstevel@tonic-gate ASSERT(bp->b_shadow);
20550Sstevel@tonic-gate ASSERT(bp->b_flags & B_PHYS);
20560Sstevel@tonic-gate
20570Sstevel@tonic-gate bufp->b_shadow = bp->b_shadow +
2058*11066Srafael.vanoni@sun.com btop(((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off);
20590Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)((uintptr_t)bp->b_un.b_addr + off);
20602418Scth if (bp->b_flags & B_REMAPPED)
20612418Scth bufp->b_proc = NULL;
20620Sstevel@tonic-gate } else {
20630Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) {
20640Sstevel@tonic-gate struct page *pp;
20650Sstevel@tonic-gate off_t o;
20660Sstevel@tonic-gate int i;
20670Sstevel@tonic-gate
20680Sstevel@tonic-gate pp = bp->b_pages;
20690Sstevel@tonic-gate o = ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off;
20700Sstevel@tonic-gate for (i = btop(o); i > 0; i--) {
20710Sstevel@tonic-gate pp = pp->p_next;
20720Sstevel@tonic-gate }
20730Sstevel@tonic-gate bufp->b_pages = pp;
20740Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)(o & PAGEOFFSET);
20750Sstevel@tonic-gate } else {
20760Sstevel@tonic-gate bufp->b_un.b_addr =
2077*11066Srafael.vanoni@sun.com (caddr_t)((uintptr_t)bp->b_un.b_addr + off);
20780Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
20790Sstevel@tonic-gate bufp->b_proc = NULL;
20800Sstevel@tonic-gate }
20810Sstevel@tonic-gate }
20820Sstevel@tonic-gate return (bufp);
20830Sstevel@tonic-gate }
2084