10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*2418Scth * Common Development and Distribution License (the "License"). 6*2418Scth * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*2418Scth * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 310Sstevel@tonic-gate * The Regents of the University of California 320Sstevel@tonic-gate * All Rights Reserved 330Sstevel@tonic-gate * 340Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 350Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 360Sstevel@tonic-gate * contributors. 370Sstevel@tonic-gate */ 380Sstevel@tonic-gate 390Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 400Sstevel@tonic-gate 410Sstevel@tonic-gate #include <sys/types.h> 420Sstevel@tonic-gate #include <sys/t_lock.h> 430Sstevel@tonic-gate #include <sys/sysmacros.h> 440Sstevel@tonic-gate #include <sys/conf.h> 450Sstevel@tonic-gate #include <sys/cpuvar.h> 460Sstevel@tonic-gate #include <sys/errno.h> 470Sstevel@tonic-gate #include <sys/debug.h> 480Sstevel@tonic-gate #include <sys/buf.h> 490Sstevel@tonic-gate #include <sys/var.h> 500Sstevel@tonic-gate #include <sys/vnode.h> 510Sstevel@tonic-gate #include <sys/bitmap.h> 520Sstevel@tonic-gate #include <sys/cmn_err.h> 530Sstevel@tonic-gate #include <sys/kmem.h> 540Sstevel@tonic-gate #include <sys/vmem.h> 550Sstevel@tonic-gate #include <sys/atomic.h> 560Sstevel@tonic-gate #include <vm/seg_kmem.h> 570Sstevel@tonic-gate #include <vm/page.h> 580Sstevel@tonic-gate #include <vm/pvn.h> 590Sstevel@tonic-gate #include <sys/vtrace.h> 600Sstevel@tonic-gate #include <sys/tnf_probe.h> 610Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 620Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 630Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 640Sstevel@tonic-gate #include <sys/systm.h> 650Sstevel@tonic-gate #include <sys/vfs.h> 660Sstevel@tonic-gate #include <sys/sdt.h> 670Sstevel@tonic-gate 680Sstevel@tonic-gate /* Locks */ 690Sstevel@tonic-gate static kmutex_t blist_lock; /* protects b_list */ 700Sstevel@tonic-gate static kmutex_t bhdr_lock; /* protects the bhdrlist */ 710Sstevel@tonic-gate static kmutex_t bfree_lock; /* protects the bfreelist structure */ 720Sstevel@tonic-gate 730Sstevel@tonic-gate struct hbuf *hbuf; /* Hash buckets */ 740Sstevel@tonic-gate struct dwbuf *dwbuf; /* Delayed write buckets */ 750Sstevel@tonic-gate static struct buf *bhdrlist; /* buf header free list */ 760Sstevel@tonic-gate static int nbuf; /* number of buffer headers allocated */ 770Sstevel@tonic-gate 780Sstevel@tonic-gate static int lastindex; /* Reference point on where to start */ 790Sstevel@tonic-gate /* when looking for free buffers */ 800Sstevel@tonic-gate 810Sstevel@tonic-gate #define bio_bhash(dev, bn) (hash2ints((dev), (int)(bn)) & v.v_hmask) 820Sstevel@tonic-gate #define EMPTY_LIST ((struct buf *)-1) 830Sstevel@tonic-gate 840Sstevel@tonic-gate static kcondvar_t bio_mem_cv; /* Condition variables */ 850Sstevel@tonic-gate static kcondvar_t bio_flushinval_cv; 860Sstevel@tonic-gate static int bio_doingflush; /* flush in progress */ 870Sstevel@tonic-gate static int bio_doinginval; /* inval in progress */ 880Sstevel@tonic-gate static int bio_flinv_cv_wanted; /* someone waiting for cv */ 890Sstevel@tonic-gate 900Sstevel@tonic-gate /* 910Sstevel@tonic-gate * Statistics on the buffer cache 920Sstevel@tonic-gate */ 930Sstevel@tonic-gate struct biostats biostats = { 940Sstevel@tonic-gate { "buffer_cache_lookups", KSTAT_DATA_UINT32 }, 950Sstevel@tonic-gate { "buffer_cache_hits", KSTAT_DATA_UINT32 }, 960Sstevel@tonic-gate { "new_buffer_requests", KSTAT_DATA_UINT32 }, 970Sstevel@tonic-gate { "waits_for_buffer_allocs", KSTAT_DATA_UINT32 }, 980Sstevel@tonic-gate { "buffers_locked_by_someone", KSTAT_DATA_UINT32 }, 990Sstevel@tonic-gate { "duplicate_buffers_found", KSTAT_DATA_UINT32 } 1000Sstevel@tonic-gate }; 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate /* 1030Sstevel@tonic-gate * kstat data 1040Sstevel@tonic-gate */ 1050Sstevel@tonic-gate kstat_named_t *biostats_ptr = (kstat_named_t *)&biostats; 1060Sstevel@tonic-gate uint_t biostats_ndata = (uint_t)(sizeof (biostats) / 1070Sstevel@tonic-gate sizeof (kstat_named_t)); 1080Sstevel@tonic-gate 1090Sstevel@tonic-gate /* 1100Sstevel@tonic-gate * Statistics on ufs buffer cache 1110Sstevel@tonic-gate * Not protected by locks 1120Sstevel@tonic-gate */ 1130Sstevel@tonic-gate struct ufsbiostats ub = { 1140Sstevel@tonic-gate { "breads", KSTAT_DATA_UINT32 }, 1150Sstevel@tonic-gate { "bwrites", KSTAT_DATA_UINT32 }, 1160Sstevel@tonic-gate { "fbiwrites", KSTAT_DATA_UINT32 }, 1170Sstevel@tonic-gate { "getpages", KSTAT_DATA_UINT32 }, 1180Sstevel@tonic-gate { "getras", KSTAT_DATA_UINT32 }, 1190Sstevel@tonic-gate { "putsyncs", KSTAT_DATA_UINT32 }, 1200Sstevel@tonic-gate { "putasyncs", KSTAT_DATA_UINT32 }, 1210Sstevel@tonic-gate { "putpageios", KSTAT_DATA_UINT32 }, 1220Sstevel@tonic-gate }; 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate /* 1250Sstevel@tonic-gate * more UFS Logging eccentricities... 1260Sstevel@tonic-gate * 1270Sstevel@tonic-gate * required since "#pragma weak ..." doesn't work in reverse order. 1280Sstevel@tonic-gate * i.e.: genunix (bio.c) is loaded before the ufs modules and pointers 1290Sstevel@tonic-gate * to ufs routines don't get plugged into bio.c calls so 1300Sstevel@tonic-gate * we initialize it when setting up the "lufsops" table 1310Sstevel@tonic-gate * in "lufs.c:_init()" 1320Sstevel@tonic-gate */ 1330Sstevel@tonic-gate void (*bio_lufs_strategy)(void *, buf_t *); 1340Sstevel@tonic-gate void (*bio_snapshot_strategy)(void *, buf_t *); 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate 1370Sstevel@tonic-gate /* Private routines */ 1380Sstevel@tonic-gate static struct buf *bio_getfreeblk(long); 1390Sstevel@tonic-gate static void bio_mem_get(long); 1400Sstevel@tonic-gate static void bio_bhdr_free(struct buf *); 1410Sstevel@tonic-gate static struct buf *bio_bhdr_alloc(void); 1420Sstevel@tonic-gate static void bio_recycle(int, long); 1430Sstevel@tonic-gate static void bio_pageio_done(struct buf *); 1440Sstevel@tonic-gate static int bio_incore(dev_t, daddr_t); 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate /* 1470Sstevel@tonic-gate * Buffer cache constants 1480Sstevel@tonic-gate */ 1490Sstevel@tonic-gate #define BIO_BUF_PERCENT (100/2) /* default: 2% of memory */ 1500Sstevel@tonic-gate #define BIO_MAX_PERCENT (100/20) /* max is 20% of real memory */ 1510Sstevel@tonic-gate #define BIO_BHDR_POOL 100 /* Default bhdr pool size */ 1520Sstevel@tonic-gate #define BIO_MIN_HDR 10 /* Minimum number of buffer headers */ 1530Sstevel@tonic-gate #define BIO_MIN_HWM (BIO_MIN_HDR * MAXBSIZE / 1024) 1540Sstevel@tonic-gate #define BIO_HASHLEN 4 /* Target length of hash chains */ 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate /* Flags for bio_recycle() */ 1580Sstevel@tonic-gate #define BIO_HEADER 0x01 1590Sstevel@tonic-gate #define BIO_MEM 0x02 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate extern int bufhwm; /* User tunable - high water mark for mem */ 1620Sstevel@tonic-gate extern int bufhwm_pct; /* ditto - given in % of physmem */ 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate /* 1650Sstevel@tonic-gate * The following routines allocate and free 1660Sstevel@tonic-gate * buffers with various side effects. In general the 1670Sstevel@tonic-gate * arguments to an allocate routine are a device and 1680Sstevel@tonic-gate * a block number, and the value is a pointer to 1690Sstevel@tonic-gate * to the buffer header; the buffer returned is locked with a 1700Sstevel@tonic-gate * binary semaphore so that no one else can touch it. If the block was 1710Sstevel@tonic-gate * already in core, no I/O need be done; if it is 1720Sstevel@tonic-gate * already locked, the process waits until it becomes free. 1730Sstevel@tonic-gate * The following routines allocate a buffer: 1740Sstevel@tonic-gate * getblk 1750Sstevel@tonic-gate * bread/BREAD 1760Sstevel@tonic-gate * breada 1770Sstevel@tonic-gate * Eventually the buffer must be released, possibly with the 1780Sstevel@tonic-gate * side effect of writing it out, by using one of 1790Sstevel@tonic-gate * bwrite/BWRITE/brwrite 1800Sstevel@tonic-gate * bdwrite/bdrwrite 1810Sstevel@tonic-gate * bawrite 1820Sstevel@tonic-gate * brelse 1830Sstevel@tonic-gate * 1840Sstevel@tonic-gate * The B_WANTED/B_BUSY bits are NOT used by these routines for synchronization. 1850Sstevel@tonic-gate * Instead, a binary semaphore, b_sem is used to gain exclusive access to 1860Sstevel@tonic-gate * a buffer and a binary semaphore, b_io is used for I/O synchronization. 1870Sstevel@tonic-gate * B_DONE is still used to denote a buffer with I/O complete on it. 1880Sstevel@tonic-gate * 1890Sstevel@tonic-gate * The bfreelist.b_bcount field is computed everytime fsflush runs. It is 1900Sstevel@tonic-gate * should not be used where a very accurate count of the free buffers is 1910Sstevel@tonic-gate * needed. 1920Sstevel@tonic-gate */ 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate /* 1950Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer. 1960Sstevel@tonic-gate * 1970Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 1980Sstevel@tonic-gate * BREAD() directly avoids the extra function call overhead invoked 1990Sstevel@tonic-gate * by calling this routine. 2000Sstevel@tonic-gate */ 2010Sstevel@tonic-gate struct buf * 2020Sstevel@tonic-gate bread(dev_t dev, daddr_t blkno, long bsize) 2030Sstevel@tonic-gate { 2040Sstevel@tonic-gate return (BREAD(dev, blkno, bsize)); 2050Sstevel@tonic-gate } 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate /* 2080Sstevel@tonic-gate * Common code for reading a buffer with various options 2090Sstevel@tonic-gate * 2100Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer. 2110Sstevel@tonic-gate */ 2120Sstevel@tonic-gate struct buf * 2130Sstevel@tonic-gate bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize) 2140Sstevel@tonic-gate { 2150Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; 2160Sstevel@tonic-gate struct buf *bp; 2170Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 2180Sstevel@tonic-gate 2190Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1); 2200Sstevel@tonic-gate bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1); 2210Sstevel@tonic-gate if (bp->b_flags & B_DONE) 2220Sstevel@tonic-gate return (bp); 2230Sstevel@tonic-gate bp->b_flags |= B_READ; 2240Sstevel@tonic-gate ASSERT(bp->b_bcount == bsize); 2250Sstevel@tonic-gate if (ufsvfsp == NULL) { /* !ufs */ 2260Sstevel@tonic-gate (void) bdev_strategy(bp); 2270Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { 2280Sstevel@tonic-gate /* ufs && logging */ 2290Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); 2300Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { 2310Sstevel@tonic-gate /* ufs && snapshots */ 2320Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); 2330Sstevel@tonic-gate } else { 2340Sstevel@tonic-gate ufsvfsp->vfs_iotstamp = lbolt; 2350Sstevel@tonic-gate ub.ub_breads.value.ul++; /* ufs && !logging */ 2360Sstevel@tonic-gate (void) bdev_strategy(bp); 2370Sstevel@tonic-gate } 2380Sstevel@tonic-gate if (lwp != NULL) 2390Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2400Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2410Sstevel@tonic-gate (void) biowait(bp); 2420Sstevel@tonic-gate return (bp); 2430Sstevel@tonic-gate } 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate /* 2460Sstevel@tonic-gate * Read in the block, like bread, but also start I/O on the 2470Sstevel@tonic-gate * read-ahead block (which is not allocated to the caller). 2480Sstevel@tonic-gate */ 2490Sstevel@tonic-gate struct buf * 2500Sstevel@tonic-gate breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize) 2510Sstevel@tonic-gate { 2520Sstevel@tonic-gate struct buf *bp, *rabp; 2530Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate bp = NULL; 2560Sstevel@tonic-gate if (!bio_incore(dev, blkno)) { 2570Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1); 2580Sstevel@tonic-gate bp = GETBLK(dev, blkno, bsize); 2590Sstevel@tonic-gate if ((bp->b_flags & B_DONE) == 0) { 2600Sstevel@tonic-gate bp->b_flags |= B_READ; 2610Sstevel@tonic-gate bp->b_bcount = bsize; 2620Sstevel@tonic-gate (void) bdev_strategy(bp); 2630Sstevel@tonic-gate if (lwp != NULL) 2640Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2650Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2660Sstevel@tonic-gate } 2670Sstevel@tonic-gate } 2680Sstevel@tonic-gate if (rablkno && bfreelist.b_bcount > 1 && 2690Sstevel@tonic-gate !bio_incore(dev, rablkno)) { 2700Sstevel@tonic-gate rabp = GETBLK(dev, rablkno, bsize); 2710Sstevel@tonic-gate if (rabp->b_flags & B_DONE) 2720Sstevel@tonic-gate brelse(rabp); 2730Sstevel@tonic-gate else { 2740Sstevel@tonic-gate rabp->b_flags |= B_READ|B_ASYNC; 2750Sstevel@tonic-gate rabp->b_bcount = bsize; 2760Sstevel@tonic-gate (void) bdev_strategy(rabp); 2770Sstevel@tonic-gate if (lwp != NULL) 2780Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2790Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2800Sstevel@tonic-gate } 2810Sstevel@tonic-gate } 2820Sstevel@tonic-gate if (bp == NULL) 2830Sstevel@tonic-gate return (BREAD(dev, blkno, bsize)); 2840Sstevel@tonic-gate (void) biowait(bp); 2850Sstevel@tonic-gate return (bp); 2860Sstevel@tonic-gate } 2870Sstevel@tonic-gate 2880Sstevel@tonic-gate /* 2890Sstevel@tonic-gate * Common code for writing a buffer with various options. 2900Sstevel@tonic-gate * 2910Sstevel@tonic-gate * force_wait - wait for write completion regardless of B_ASYNC flag 2920Sstevel@tonic-gate * do_relse - release the buffer when we are done 2930Sstevel@tonic-gate * clear_flags - flags to clear from the buffer 2940Sstevel@tonic-gate */ 2950Sstevel@tonic-gate void 2960Sstevel@tonic-gate bwrite_common(void *arg, struct buf *bp, int force_wait, 2970Sstevel@tonic-gate int do_relse, int clear_flags) 2980Sstevel@tonic-gate { 2990Sstevel@tonic-gate register int do_wait; 3000Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; 3010Sstevel@tonic-gate int flag; 3020Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 3030Sstevel@tonic-gate struct cpu *cpup; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3060Sstevel@tonic-gate flag = bp->b_flags; 3070Sstevel@tonic-gate bp->b_flags &= ~clear_flags; 3080Sstevel@tonic-gate if (lwp != NULL) 3090Sstevel@tonic-gate lwp->lwp_ru.oublock++; 3100Sstevel@tonic-gate CPU_STATS_ENTER_K(); 3110Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */ 3120Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, lwrite, 1); 3130Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bwrite, 1); 3140Sstevel@tonic-gate do_wait = ((flag & B_ASYNC) == 0 || force_wait); 3150Sstevel@tonic-gate if (do_wait == 0) 3160Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bawrite, 1); 3170Sstevel@tonic-gate CPU_STATS_EXIT_K(); 3180Sstevel@tonic-gate if (ufsvfsp == NULL) { 3190Sstevel@tonic-gate (void) bdev_strategy(bp); 3200Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { 3210Sstevel@tonic-gate /* ufs && logging */ 3220Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); 3230Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { 3240Sstevel@tonic-gate /* ufs && snapshots */ 3250Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); 3260Sstevel@tonic-gate } else { 3270Sstevel@tonic-gate ub.ub_bwrites.value.ul++; /* ufs && !logging */ 3280Sstevel@tonic-gate (void) bdev_strategy(bp); 3290Sstevel@tonic-gate } 3300Sstevel@tonic-gate if (do_wait) { 3310Sstevel@tonic-gate (void) biowait(bp); 3320Sstevel@tonic-gate if (do_relse) { 3330Sstevel@tonic-gate brelse(bp); 3340Sstevel@tonic-gate } 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate } 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate /* 3390Sstevel@tonic-gate * Write the buffer, waiting for completion (unless B_ASYNC is set). 3400Sstevel@tonic-gate * Then release the buffer. 3410Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 3420Sstevel@tonic-gate * BWRITE() directly avoids the extra function call overhead invoked 3430Sstevel@tonic-gate * by calling this routine. 3440Sstevel@tonic-gate */ 3450Sstevel@tonic-gate void 3460Sstevel@tonic-gate bwrite(struct buf *bp) 3470Sstevel@tonic-gate { 3480Sstevel@tonic-gate BWRITE(bp); 3490Sstevel@tonic-gate } 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate /* 3520Sstevel@tonic-gate * Write the buffer, waiting for completion. 3530Sstevel@tonic-gate * But don't release the buffer afterwards. 3540Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 3550Sstevel@tonic-gate * BWRITE2() directly avoids the extra function call overhead. 3560Sstevel@tonic-gate */ 3570Sstevel@tonic-gate void 3580Sstevel@tonic-gate bwrite2(struct buf *bp) 3590Sstevel@tonic-gate { 3600Sstevel@tonic-gate BWRITE2(bp); 3610Sstevel@tonic-gate } 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate /* 3640Sstevel@tonic-gate * Release the buffer, marking it so that if it is grabbed 3650Sstevel@tonic-gate * for another purpose it will be written out before being 3660Sstevel@tonic-gate * given up (e.g. when writing a partial block where it is 3670Sstevel@tonic-gate * assumed that another write for the same block will soon follow). 3680Sstevel@tonic-gate * Also save the time that the block is first marked as delayed 3690Sstevel@tonic-gate * so that it will be written in a reasonable time. 3700Sstevel@tonic-gate */ 3710Sstevel@tonic-gate void 3720Sstevel@tonic-gate bdwrite(struct buf *bp) 3730Sstevel@tonic-gate { 3740Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3750Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lwrite, 1); 3760Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0) 3770Sstevel@tonic-gate bp->b_start = lbolt; 3780Sstevel@tonic-gate /* 3790Sstevel@tonic-gate * B_DONE allows others to use the buffer, B_DELWRI causes the 3800Sstevel@tonic-gate * buffer to be written before being reused, and setting b_resid 3810Sstevel@tonic-gate * to zero says the buffer is complete. 3820Sstevel@tonic-gate */ 3830Sstevel@tonic-gate bp->b_flags |= B_DELWRI | B_DONE; 3840Sstevel@tonic-gate bp->b_resid = 0; 3850Sstevel@tonic-gate brelse(bp); 3860Sstevel@tonic-gate } 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate /* 3890Sstevel@tonic-gate * Release the buffer, start I/O on it, but don't wait for completion. 3900Sstevel@tonic-gate */ 3910Sstevel@tonic-gate void 3920Sstevel@tonic-gate bawrite(struct buf *bp) 3930Sstevel@tonic-gate { 3940Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate /* Use bfreelist.b_bcount as a weird-ass heuristic */ 3970Sstevel@tonic-gate if (bfreelist.b_bcount > 4) 3980Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 3990Sstevel@tonic-gate BWRITE(bp); 4000Sstevel@tonic-gate } 4010Sstevel@tonic-gate 4020Sstevel@tonic-gate /* 4030Sstevel@tonic-gate * Release the buffer, with no I/O implied. 4040Sstevel@tonic-gate */ 4050Sstevel@tonic-gate void 4060Sstevel@tonic-gate brelse(struct buf *bp) 4070Sstevel@tonic-gate { 4080Sstevel@tonic-gate struct buf **backp; 4090Sstevel@tonic-gate uint_t index; 4100Sstevel@tonic-gate kmutex_t *hmp; 4110Sstevel@tonic-gate struct buf *dp; 4120Sstevel@tonic-gate struct hbuf *hp; 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 4160Sstevel@tonic-gate 4170Sstevel@tonic-gate /* 4180Sstevel@tonic-gate * Clear the retry write flag if the buffer was written without 4190Sstevel@tonic-gate * error. The presence of B_DELWRI means the buffer has not yet 4200Sstevel@tonic-gate * been written and the presence of B_ERROR means that an error 4210Sstevel@tonic-gate * is still occurring. 4220Sstevel@tonic-gate */ 4230Sstevel@tonic-gate if ((bp->b_flags & (B_ERROR | B_DELWRI | B_RETRYWRI)) == B_RETRYWRI) { 4240Sstevel@tonic-gate bp->b_flags &= ~B_RETRYWRI; 4250Sstevel@tonic-gate } 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate /* Check for anomalous conditions */ 4280Sstevel@tonic-gate if (bp->b_flags & (B_ERROR|B_NOCACHE)) { 4290Sstevel@tonic-gate if (bp->b_flags & B_NOCACHE) { 4300Sstevel@tonic-gate /* Don't add to the freelist. Destroy it now */ 4310Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize); 4320Sstevel@tonic-gate sema_destroy(&bp->b_sem); 4330Sstevel@tonic-gate sema_destroy(&bp->b_io); 4340Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf)); 4350Sstevel@tonic-gate return; 4360Sstevel@tonic-gate } 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * If a write failed and we are supposed to retry write, 4390Sstevel@tonic-gate * don't toss the buffer. Keep it around and mark it 4400Sstevel@tonic-gate * delayed write in the hopes that it will eventually 4410Sstevel@tonic-gate * get flushed (and still keep the system running.) 4420Sstevel@tonic-gate */ 4430Sstevel@tonic-gate if ((bp->b_flags & (B_READ | B_RETRYWRI)) == B_RETRYWRI) { 4440Sstevel@tonic-gate bp->b_flags |= B_DELWRI; 4450Sstevel@tonic-gate /* keep fsflush from trying continuously to flush */ 4460Sstevel@tonic-gate bp->b_start = lbolt; 4470Sstevel@tonic-gate } else 4480Sstevel@tonic-gate bp->b_flags |= B_AGE|B_STALE; 4490Sstevel@tonic-gate bp->b_flags &= ~B_ERROR; 4500Sstevel@tonic-gate bp->b_error = 0; 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate /* 4540Sstevel@tonic-gate * If delayed write is set then put in on the delayed 4550Sstevel@tonic-gate * write list instead of the free buffer list. 4560Sstevel@tonic-gate */ 4570Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 4580Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate mutex_enter(hmp); 4610Sstevel@tonic-gate hp = &hbuf[index]; 4620Sstevel@tonic-gate dp = (struct buf *)hp; 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate /* 4650Sstevel@tonic-gate * Make sure that the number of entries on this list are 4660Sstevel@tonic-gate * Zero <= count <= total # buffers 4670Sstevel@tonic-gate */ 4680Sstevel@tonic-gate ASSERT(hp->b_length >= 0); 4690Sstevel@tonic-gate ASSERT(hp->b_length < nbuf); 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate hp->b_length++; /* We are adding this buffer */ 4720Sstevel@tonic-gate 4730Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 4740Sstevel@tonic-gate /* 4750Sstevel@tonic-gate * This buffer goes on the delayed write buffer list 4760Sstevel@tonic-gate */ 4770Sstevel@tonic-gate dp = (struct buf *)&dwbuf[index]; 4780Sstevel@tonic-gate } 4790Sstevel@tonic-gate ASSERT(bp->b_bufsize > 0); 4800Sstevel@tonic-gate ASSERT(bp->b_bcount > 0); 4810Sstevel@tonic-gate ASSERT(bp->b_un.b_addr != NULL); 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate if (bp->b_flags & B_AGE) { 4840Sstevel@tonic-gate backp = &dp->av_forw; 4850Sstevel@tonic-gate (*backp)->av_back = bp; 4860Sstevel@tonic-gate bp->av_forw = *backp; 4870Sstevel@tonic-gate *backp = bp; 4880Sstevel@tonic-gate bp->av_back = dp; 4890Sstevel@tonic-gate } else { 4900Sstevel@tonic-gate backp = &dp->av_back; 4910Sstevel@tonic-gate (*backp)->av_forw = bp; 4920Sstevel@tonic-gate bp->av_back = *backp; 4930Sstevel@tonic-gate *backp = bp; 4940Sstevel@tonic-gate bp->av_forw = dp; 4950Sstevel@tonic-gate } 4960Sstevel@tonic-gate mutex_exit(hmp); 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) { 4990Sstevel@tonic-gate /* 5000Sstevel@tonic-gate * Should come here very very rarely. 5010Sstevel@tonic-gate */ 5020Sstevel@tonic-gate mutex_enter(&bfree_lock); 5030Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) { 5040Sstevel@tonic-gate bfreelist.b_flags &= ~B_WANTED; 5050Sstevel@tonic-gate cv_broadcast(&bio_mem_cv); 5060Sstevel@tonic-gate } 5070Sstevel@tonic-gate mutex_exit(&bfree_lock); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC); 5110Sstevel@tonic-gate /* 5120Sstevel@tonic-gate * Don't let anyone get the buffer off the freelist before we 5130Sstevel@tonic-gate * release our hold on it. 5140Sstevel@tonic-gate */ 5150Sstevel@tonic-gate sema_v(&bp->b_sem); 5160Sstevel@tonic-gate } 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate /* 5190Sstevel@tonic-gate * Return a count of the number of B_BUSY buffers in the system 5200Sstevel@tonic-gate * Can only be used as a good estimate. If 'cleanit' is set, 5210Sstevel@tonic-gate * try to flush all bufs. 5220Sstevel@tonic-gate */ 5230Sstevel@tonic-gate int 5240Sstevel@tonic-gate bio_busy(int cleanit) 5250Sstevel@tonic-gate { 5260Sstevel@tonic-gate struct buf *bp, *dp; 5270Sstevel@tonic-gate int busy = 0; 5280Sstevel@tonic-gate int i; 5290Sstevel@tonic-gate kmutex_t *hmp; 5300Sstevel@tonic-gate 5310Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 5320Sstevel@tonic-gate vfs_syncprogress(); 5330Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 5340Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate mutex_enter(hmp); 5370Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 5380Sstevel@tonic-gate if (bp->b_flags & B_BUSY) 5390Sstevel@tonic-gate busy++; 5400Sstevel@tonic-gate } 5410Sstevel@tonic-gate mutex_exit(hmp); 5420Sstevel@tonic-gate } 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate if (cleanit && busy != 0) { 5450Sstevel@tonic-gate bflush(NODEV); 5460Sstevel@tonic-gate } 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate return (busy); 5490Sstevel@tonic-gate } 5500Sstevel@tonic-gate 5510Sstevel@tonic-gate /* 5520Sstevel@tonic-gate * this interface is provided for binary compatibility. 5530Sstevel@tonic-gate * 5540Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate 5550Sstevel@tonic-gate * block is already associated, return it; otherwise search 5560Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it. 5570Sstevel@tonic-gate */ 5580Sstevel@tonic-gate struct buf * 5590Sstevel@tonic-gate getblk(dev_t dev, daddr_t blkno, long bsize) 5600Sstevel@tonic-gate { 5610Sstevel@tonic-gate return (getblk_common(/* ufsvfsp */ NULL, dev, 5620Sstevel@tonic-gate blkno, bsize, /* errflg */ 0)); 5630Sstevel@tonic-gate } 5640Sstevel@tonic-gate 5650Sstevel@tonic-gate /* 5660Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate 5670Sstevel@tonic-gate * block is already associated, return it; otherwise search 5680Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it. 5690Sstevel@tonic-gate */ 5700Sstevel@tonic-gate struct buf * 5710Sstevel@tonic-gate getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg) 5720Sstevel@tonic-gate { 5730Sstevel@tonic-gate ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg; 5740Sstevel@tonic-gate struct buf *bp; 5750Sstevel@tonic-gate struct buf *dp; 5760Sstevel@tonic-gate struct buf *nbp = NULL; 5770Sstevel@tonic-gate struct buf *errbp; 5780Sstevel@tonic-gate uint_t index; 5790Sstevel@tonic-gate kmutex_t *hmp; 5800Sstevel@tonic-gate struct hbuf *hp; 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate if (getmajor(dev) >= devcnt) 5830Sstevel@tonic-gate cmn_err(CE_PANIC, "blkdev"); 5840Sstevel@tonic-gate 5850Sstevel@tonic-gate biostats.bio_lookup.value.ui32++; 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate index = bio_bhash(dev, blkno); 5880Sstevel@tonic-gate hp = &hbuf[index]; 5890Sstevel@tonic-gate dp = (struct buf *)hp; 5900Sstevel@tonic-gate hmp = &hp->b_lock; 5910Sstevel@tonic-gate 5920Sstevel@tonic-gate mutex_enter(hmp); 5930Sstevel@tonic-gate loop: 5940Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 5950Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 5960Sstevel@tonic-gate (bp->b_flags & B_STALE)) 5970Sstevel@tonic-gate continue; 5980Sstevel@tonic-gate /* 5990Sstevel@tonic-gate * Avoid holding the hash lock in the event that 6000Sstevel@tonic-gate * the buffer is locked by someone. Since the hash chain 6010Sstevel@tonic-gate * may change when we drop the hash lock 6020Sstevel@tonic-gate * we have to start at the beginning of the chain if the 6030Sstevel@tonic-gate * buffer identity/contents aren't valid. 6040Sstevel@tonic-gate */ 6050Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 6060Sstevel@tonic-gate biostats.bio_bufbusy.value.ui32++; 6070Sstevel@tonic-gate mutex_exit(hmp); 6080Sstevel@tonic-gate /* 6090Sstevel@tonic-gate * OK, we are dealing with a busy buffer. 6100Sstevel@tonic-gate * In the case that we are panicking and we 6110Sstevel@tonic-gate * got called from bread(), we have some chance 6120Sstevel@tonic-gate * for error recovery. So better bail out from 6130Sstevel@tonic-gate * here since sema_p() won't block. If we got 6140Sstevel@tonic-gate * called directly from ufs routines, there is 6150Sstevel@tonic-gate * no way to report an error yet. 6160Sstevel@tonic-gate */ 6170Sstevel@tonic-gate if (panicstr && errflg) 6180Sstevel@tonic-gate goto errout; 6190Sstevel@tonic-gate /* 6200Sstevel@tonic-gate * For the following line of code to work 6210Sstevel@tonic-gate * correctly never kmem_free the buffer "header". 6220Sstevel@tonic-gate */ 6230Sstevel@tonic-gate sema_p(&bp->b_sem); 6240Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 6250Sstevel@tonic-gate (bp->b_flags & B_STALE)) { 6260Sstevel@tonic-gate sema_v(&bp->b_sem); 6270Sstevel@tonic-gate mutex_enter(hmp); 6280Sstevel@tonic-gate goto loop; /* start over */ 6290Sstevel@tonic-gate } 6300Sstevel@tonic-gate mutex_enter(hmp); 6310Sstevel@tonic-gate } 6320Sstevel@tonic-gate /* Found */ 6330Sstevel@tonic-gate biostats.bio_hit.value.ui32++; 6340Sstevel@tonic-gate bp->b_flags &= ~B_AGE; 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate /* 6370Sstevel@tonic-gate * Yank it off the free/delayed write lists 6380Sstevel@tonic-gate */ 6390Sstevel@tonic-gate hp->b_length--; 6400Sstevel@tonic-gate notavail(bp); 6410Sstevel@tonic-gate mutex_exit(hmp); 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) == NULL); 6440Sstevel@tonic-gate 6450Sstevel@tonic-gate if (nbp == NULL) { 6460Sstevel@tonic-gate /* 6470Sstevel@tonic-gate * Make the common path short. 6480Sstevel@tonic-gate */ 6490Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 6500Sstevel@tonic-gate return (bp); 6510Sstevel@tonic-gate } 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate biostats.bio_bufdup.value.ui32++; 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate /* 6560Sstevel@tonic-gate * The buffer must have entered during the lock upgrade 6570Sstevel@tonic-gate * so free the new buffer we allocated and return the 6580Sstevel@tonic-gate * found buffer. 6590Sstevel@tonic-gate */ 6600Sstevel@tonic-gate kmem_free(nbp->b_un.b_addr, nbp->b_bufsize); 6610Sstevel@tonic-gate nbp->b_un.b_addr = NULL; 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate /* 6640Sstevel@tonic-gate * Account for the memory 6650Sstevel@tonic-gate */ 6660Sstevel@tonic-gate mutex_enter(&bfree_lock); 6670Sstevel@tonic-gate bfreelist.b_bufsize += nbp->b_bufsize; 6680Sstevel@tonic-gate mutex_exit(&bfree_lock); 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate /* 6710Sstevel@tonic-gate * Destroy buf identity, and place on avail list 6720Sstevel@tonic-gate */ 6730Sstevel@tonic-gate nbp->b_dev = (o_dev_t)NODEV; 6740Sstevel@tonic-gate nbp->b_edev = NODEV; 6750Sstevel@tonic-gate nbp->b_flags = 0; 6760Sstevel@tonic-gate nbp->b_file = NULL; 6770Sstevel@tonic-gate nbp->b_offset = -1; 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate sema_v(&nbp->b_sem); 6800Sstevel@tonic-gate bio_bhdr_free(nbp); 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 6830Sstevel@tonic-gate return (bp); 6840Sstevel@tonic-gate } 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate /* 6870Sstevel@tonic-gate * bio_getfreeblk may block so check the hash chain again. 6880Sstevel@tonic-gate */ 6890Sstevel@tonic-gate if (nbp == NULL) { 6900Sstevel@tonic-gate mutex_exit(hmp); 6910Sstevel@tonic-gate nbp = bio_getfreeblk(bsize); 6920Sstevel@tonic-gate mutex_enter(hmp); 6930Sstevel@tonic-gate goto loop; 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate /* 6970Sstevel@tonic-gate * New buffer. Assign nbp and stick it on the hash. 6980Sstevel@tonic-gate */ 6990Sstevel@tonic-gate nbp->b_flags = B_BUSY; 7000Sstevel@tonic-gate nbp->b_edev = dev; 7010Sstevel@tonic-gate nbp->b_dev = (o_dev_t)cmpdev(dev); 7020Sstevel@tonic-gate nbp->b_blkno = blkno; 7030Sstevel@tonic-gate nbp->b_iodone = NULL; 7040Sstevel@tonic-gate nbp->b_bcount = bsize; 7050Sstevel@tonic-gate /* 7060Sstevel@tonic-gate * If we are given a ufsvfsp and the vfs_root field is NULL 7070Sstevel@tonic-gate * then this must be I/O for a superblock. A superblock's 7080Sstevel@tonic-gate * buffer is set up in mountfs() and there is no root vnode 7090Sstevel@tonic-gate * at that point. 7100Sstevel@tonic-gate */ 7110Sstevel@tonic-gate if (ufsvfsp && ufsvfsp->vfs_root) { 7120Sstevel@tonic-gate nbp->b_vp = ufsvfsp->vfs_root; 7130Sstevel@tonic-gate } else { 7140Sstevel@tonic-gate nbp->b_vp = NULL; 7150Sstevel@tonic-gate } 7160Sstevel@tonic-gate 7170Sstevel@tonic-gate ASSERT((nbp->b_flags & B_NOCACHE) == NULL); 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate binshash(nbp, dp); 7200Sstevel@tonic-gate mutex_exit(hmp); 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate ASSERT(SEMA_HELD(&nbp->b_sem)); 7230Sstevel@tonic-gate 7240Sstevel@tonic-gate return (nbp); 7250Sstevel@tonic-gate 7260Sstevel@tonic-gate 7270Sstevel@tonic-gate /* 7280Sstevel@tonic-gate * Come here in case of an internal error. At this point we couldn't 7290Sstevel@tonic-gate * get a buffer, but he have to return one. Hence we allocate some 7300Sstevel@tonic-gate * kind of error reply buffer on the fly. This buffer is marked as 7310Sstevel@tonic-gate * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following: 7320Sstevel@tonic-gate * - B_ERROR will indicate error to the caller. 7330Sstevel@tonic-gate * - B_DONE will prevent us from reading the buffer from 7340Sstevel@tonic-gate * the device. 7350Sstevel@tonic-gate * - B_NOCACHE will cause that this buffer gets free'd in 7360Sstevel@tonic-gate * brelse(). 7370Sstevel@tonic-gate */ 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate errout: 7400Sstevel@tonic-gate errbp = geteblk(); 7410Sstevel@tonic-gate sema_p(&errbp->b_sem); 7420Sstevel@tonic-gate errbp->b_flags &= ~B_BUSY; 7430Sstevel@tonic-gate errbp->b_flags |= (B_ERROR | B_DONE); 7440Sstevel@tonic-gate return (errbp); 7450Sstevel@tonic-gate } 7460Sstevel@tonic-gate 7470Sstevel@tonic-gate /* 7480Sstevel@tonic-gate * Get an empty block, not assigned to any particular device. 7490Sstevel@tonic-gate * Returns a locked buffer that is not on any hash or free list. 7500Sstevel@tonic-gate */ 7510Sstevel@tonic-gate struct buf * 7520Sstevel@tonic-gate ngeteblk(long bsize) 7530Sstevel@tonic-gate { 7540Sstevel@tonic-gate struct buf *bp; 7550Sstevel@tonic-gate 7560Sstevel@tonic-gate bp = kmem_alloc(sizeof (struct buf), KM_SLEEP); 7570Sstevel@tonic-gate bioinit(bp); 7580Sstevel@tonic-gate bp->av_forw = bp->av_back = NULL; 7590Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP); 7600Sstevel@tonic-gate bp->b_bufsize = bsize; 7610Sstevel@tonic-gate bp->b_flags = B_BUSY | B_NOCACHE | B_AGE; 7620Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 7630Sstevel@tonic-gate bp->b_edev = NODEV; 7640Sstevel@tonic-gate bp->b_lblkno = 0; 7650Sstevel@tonic-gate bp->b_bcount = bsize; 7660Sstevel@tonic-gate bp->b_iodone = NULL; 7670Sstevel@tonic-gate return (bp); 7680Sstevel@tonic-gate } 7690Sstevel@tonic-gate 7700Sstevel@tonic-gate /* 7710Sstevel@tonic-gate * Interface of geteblk() is kept intact to maintain driver compatibility. 7720Sstevel@tonic-gate * Use ngeteblk() to allocate block size other than 1 KB. 7730Sstevel@tonic-gate */ 7740Sstevel@tonic-gate struct buf * 7750Sstevel@tonic-gate geteblk(void) 7760Sstevel@tonic-gate { 7770Sstevel@tonic-gate return (ngeteblk((long)1024)); 7780Sstevel@tonic-gate } 7790Sstevel@tonic-gate 7800Sstevel@tonic-gate /* 7810Sstevel@tonic-gate * Return a buffer w/o sleeping 7820Sstevel@tonic-gate */ 7830Sstevel@tonic-gate struct buf * 7840Sstevel@tonic-gate trygetblk(dev_t dev, daddr_t blkno) 7850Sstevel@tonic-gate { 7860Sstevel@tonic-gate struct buf *bp; 7870Sstevel@tonic-gate struct buf *dp; 7880Sstevel@tonic-gate struct hbuf *hp; 7890Sstevel@tonic-gate kmutex_t *hmp; 7900Sstevel@tonic-gate uint_t index; 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate index = bio_bhash(dev, blkno); 7930Sstevel@tonic-gate hp = &hbuf[index]; 7940Sstevel@tonic-gate hmp = &hp->b_lock; 7950Sstevel@tonic-gate 7960Sstevel@tonic-gate if (!mutex_tryenter(hmp)) 7970Sstevel@tonic-gate return (NULL); 7980Sstevel@tonic-gate 7990Sstevel@tonic-gate dp = (struct buf *)hp; 8000Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 8010Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 8020Sstevel@tonic-gate (bp->b_flags & B_STALE)) 8030Sstevel@tonic-gate continue; 8040Sstevel@tonic-gate /* 8050Sstevel@tonic-gate * Get access to a valid buffer without sleeping 8060Sstevel@tonic-gate */ 8070Sstevel@tonic-gate if (sema_tryp(&bp->b_sem)) { 8080Sstevel@tonic-gate if (bp->b_flags & B_DONE) { 8090Sstevel@tonic-gate hp->b_length--; 8100Sstevel@tonic-gate notavail(bp); 8110Sstevel@tonic-gate mutex_exit(hmp); 8120Sstevel@tonic-gate return (bp); 8130Sstevel@tonic-gate } else { 8140Sstevel@tonic-gate sema_v(&bp->b_sem); 8150Sstevel@tonic-gate break; 8160Sstevel@tonic-gate } 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate break; 8190Sstevel@tonic-gate } 8200Sstevel@tonic-gate mutex_exit(hmp); 8210Sstevel@tonic-gate return (NULL); 8220Sstevel@tonic-gate } 8230Sstevel@tonic-gate 8240Sstevel@tonic-gate /* 8250Sstevel@tonic-gate * Wait for I/O completion on the buffer; return errors 8260Sstevel@tonic-gate * to the user. 8270Sstevel@tonic-gate */ 8280Sstevel@tonic-gate int 8290Sstevel@tonic-gate iowait(struct buf *bp) 8300Sstevel@tonic-gate { 8310Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8320Sstevel@tonic-gate return (biowait(bp)); 8330Sstevel@tonic-gate } 8340Sstevel@tonic-gate 8350Sstevel@tonic-gate /* 8360Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous, 8370Sstevel@tonic-gate * and wake up anyone waiting for it. 8380Sstevel@tonic-gate */ 8390Sstevel@tonic-gate void 8400Sstevel@tonic-gate iodone(struct buf *bp) 8410Sstevel@tonic-gate { 8420Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8430Sstevel@tonic-gate (void) biodone(bp); 8440Sstevel@tonic-gate } 8450Sstevel@tonic-gate 8460Sstevel@tonic-gate /* 8470Sstevel@tonic-gate * Zero the core associated with a buffer. 8480Sstevel@tonic-gate */ 8490Sstevel@tonic-gate void 8500Sstevel@tonic-gate clrbuf(struct buf *bp) 8510Sstevel@tonic-gate { 8520Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8530Sstevel@tonic-gate bzero(bp->b_un.b_addr, bp->b_bcount); 8540Sstevel@tonic-gate bp->b_resid = 0; 8550Sstevel@tonic-gate } 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate 8580Sstevel@tonic-gate /* 8590Sstevel@tonic-gate * Make sure all write-behind blocks on dev (or NODEV for all) 8600Sstevel@tonic-gate * are flushed out. 8610Sstevel@tonic-gate */ 8620Sstevel@tonic-gate void 8630Sstevel@tonic-gate bflush(dev_t dev) 8640Sstevel@tonic-gate { 8650Sstevel@tonic-gate struct buf *bp, *dp; 8660Sstevel@tonic-gate struct hbuf *hp; 8670Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST; 8680Sstevel@tonic-gate int i, index; 8690Sstevel@tonic-gate kmutex_t *hmp; 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate mutex_enter(&blist_lock); 8720Sstevel@tonic-gate /* 8730Sstevel@tonic-gate * Wait for any invalidates or flushes ahead of us to finish. 8740Sstevel@tonic-gate * We really could split blist_lock up per device for better 8750Sstevel@tonic-gate * parallelism here. 8760Sstevel@tonic-gate */ 8770Sstevel@tonic-gate while (bio_doinginval || bio_doingflush) { 8780Sstevel@tonic-gate bio_flinv_cv_wanted = 1; 8790Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock); 8800Sstevel@tonic-gate } 8810Sstevel@tonic-gate bio_doingflush++; 8820Sstevel@tonic-gate /* 8830Sstevel@tonic-gate * Gather all B_DELWRI buffer for device. 8840Sstevel@tonic-gate * Lock ordering is b_sem > hash lock (brelse). 8850Sstevel@tonic-gate * Since we are finding the buffer via the delayed write list, 8860Sstevel@tonic-gate * it may be busy and we would block trying to get the 8870Sstevel@tonic-gate * b_sem lock while holding hash lock. So transfer all the 8880Sstevel@tonic-gate * candidates on the delwri_list and then drop the hash locks. 8890Sstevel@tonic-gate */ 8900Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 8910Sstevel@tonic-gate vfs_syncprogress(); 8920Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 8930Sstevel@tonic-gate dp = (struct buf *)&dwbuf[i]; 8940Sstevel@tonic-gate mutex_enter(hmp); 8950Sstevel@tonic-gate for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) { 8960Sstevel@tonic-gate if (dev == NODEV || bp->b_edev == dev) { 8970Sstevel@tonic-gate if (bp->b_list == NULL) { 8980Sstevel@tonic-gate bp->b_list = delwri_list; 8990Sstevel@tonic-gate delwri_list = bp; 9000Sstevel@tonic-gate } 9010Sstevel@tonic-gate } 9020Sstevel@tonic-gate } 9030Sstevel@tonic-gate mutex_exit(hmp); 9040Sstevel@tonic-gate } 9050Sstevel@tonic-gate mutex_exit(&blist_lock); 9060Sstevel@tonic-gate 9070Sstevel@tonic-gate /* 9080Sstevel@tonic-gate * Now that the hash locks have been dropped grab the semaphores 9090Sstevel@tonic-gate * and write back all the buffers that have B_DELWRI set. 9100Sstevel@tonic-gate */ 9110Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) { 9120Sstevel@tonic-gate vfs_syncprogress(); 9130Sstevel@tonic-gate bp = delwri_list; 9140Sstevel@tonic-gate 9150Sstevel@tonic-gate sema_p(&bp->b_sem); /* may block */ 9160Sstevel@tonic-gate if ((dev != bp->b_edev && dev != NODEV) || 9170Sstevel@tonic-gate (panicstr && bp->b_flags & B_BUSY)) { 9180Sstevel@tonic-gate sema_v(&bp->b_sem); 9190Sstevel@tonic-gate delwri_list = bp->b_list; 9200Sstevel@tonic-gate bp->b_list = NULL; 9210Sstevel@tonic-gate continue; /* No longer a candidate */ 9220Sstevel@tonic-gate } 9230Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 9240Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 9250Sstevel@tonic-gate hp = &hbuf[index]; 9260Sstevel@tonic-gate hmp = &hp->b_lock; 9270Sstevel@tonic-gate dp = (struct buf *)hp; 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 9300Sstevel@tonic-gate mutex_enter(hmp); 9310Sstevel@tonic-gate hp->b_length--; 9320Sstevel@tonic-gate notavail(bp); 9330Sstevel@tonic-gate mutex_exit(hmp); 9340Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */ 9350Sstevel@tonic-gate BWRITE(bp); 9360Sstevel@tonic-gate } else { /* ufs */ 9370Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); 9380Sstevel@tonic-gate } 9390Sstevel@tonic-gate } else { 9400Sstevel@tonic-gate sema_v(&bp->b_sem); 9410Sstevel@tonic-gate } 9420Sstevel@tonic-gate delwri_list = bp->b_list; 9430Sstevel@tonic-gate bp->b_list = NULL; 9440Sstevel@tonic-gate } 9450Sstevel@tonic-gate mutex_enter(&blist_lock); 9460Sstevel@tonic-gate bio_doingflush--; 9470Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 9480Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 9490Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 9500Sstevel@tonic-gate } 9510Sstevel@tonic-gate mutex_exit(&blist_lock); 9520Sstevel@tonic-gate } 9530Sstevel@tonic-gate 9540Sstevel@tonic-gate /* 9550Sstevel@tonic-gate * Ensure that a specified block is up-to-date on disk. 9560Sstevel@tonic-gate */ 9570Sstevel@tonic-gate void 9580Sstevel@tonic-gate blkflush(dev_t dev, daddr_t blkno) 9590Sstevel@tonic-gate { 9600Sstevel@tonic-gate struct buf *bp, *dp; 9610Sstevel@tonic-gate struct hbuf *hp; 9620Sstevel@tonic-gate struct buf *sbp = NULL; 9630Sstevel@tonic-gate uint_t index; 9640Sstevel@tonic-gate kmutex_t *hmp; 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate index = bio_bhash(dev, blkno); 9670Sstevel@tonic-gate hp = &hbuf[index]; 9680Sstevel@tonic-gate dp = (struct buf *)hp; 9690Sstevel@tonic-gate hmp = &hp->b_lock; 9700Sstevel@tonic-gate 9710Sstevel@tonic-gate /* 9720Sstevel@tonic-gate * Identify the buffer in the cache belonging to 9730Sstevel@tonic-gate * this device and blkno (if any). 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate mutex_enter(hmp); 9760Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 9770Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 9780Sstevel@tonic-gate (bp->b_flags & B_STALE)) 9790Sstevel@tonic-gate continue; 9800Sstevel@tonic-gate sbp = bp; 9810Sstevel@tonic-gate break; 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate mutex_exit(hmp); 9840Sstevel@tonic-gate if (sbp == NULL) 9850Sstevel@tonic-gate return; 9860Sstevel@tonic-gate /* 9870Sstevel@tonic-gate * Now check the buffer we have identified and 9880Sstevel@tonic-gate * make sure it still belongs to the device and is B_DELWRI 9890Sstevel@tonic-gate */ 9900Sstevel@tonic-gate sema_p(&sbp->b_sem); 9910Sstevel@tonic-gate if (sbp->b_blkno == blkno && sbp->b_edev == dev && 9920Sstevel@tonic-gate (sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) { 9930Sstevel@tonic-gate mutex_enter(hmp); 9940Sstevel@tonic-gate hp->b_length--; 9950Sstevel@tonic-gate notavail(sbp); 9960Sstevel@tonic-gate mutex_exit(hmp); 9970Sstevel@tonic-gate /* 9980Sstevel@tonic-gate * XXX - There is nothing to guarantee a synchronous 9990Sstevel@tonic-gate * write here if the B_ASYNC flag is set. This needs 10000Sstevel@tonic-gate * some investigation. 10010Sstevel@tonic-gate */ 10020Sstevel@tonic-gate if (sbp->b_vp == NULL) { /* !ufs */ 10030Sstevel@tonic-gate BWRITE(sbp); /* synchronous write */ 10040Sstevel@tonic-gate } else { /* ufs */ 10050Sstevel@tonic-gate UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp); 10060Sstevel@tonic-gate } 10070Sstevel@tonic-gate } else { 10080Sstevel@tonic-gate sema_v(&sbp->b_sem); 10090Sstevel@tonic-gate } 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate /* 10130Sstevel@tonic-gate * Same as binval, except can force-invalidate delayed-write buffers 10140Sstevel@tonic-gate * (which are not be already flushed because of device errors). Also 10150Sstevel@tonic-gate * makes sure that the retry write flag is cleared. 10160Sstevel@tonic-gate */ 10170Sstevel@tonic-gate int 10180Sstevel@tonic-gate bfinval(dev_t dev, int force) 10190Sstevel@tonic-gate { 10200Sstevel@tonic-gate struct buf *dp; 10210Sstevel@tonic-gate struct buf *bp; 10220Sstevel@tonic-gate struct buf *binval_list = EMPTY_LIST; 10230Sstevel@tonic-gate int i, error = 0; 10240Sstevel@tonic-gate kmutex_t *hmp; 10250Sstevel@tonic-gate uint_t index; 10260Sstevel@tonic-gate struct buf **backp; 10270Sstevel@tonic-gate 10280Sstevel@tonic-gate mutex_enter(&blist_lock); 10290Sstevel@tonic-gate /* 10300Sstevel@tonic-gate * Wait for any flushes ahead of us to finish, it's ok to 10310Sstevel@tonic-gate * do invalidates in parallel. 10320Sstevel@tonic-gate */ 10330Sstevel@tonic-gate while (bio_doingflush) { 10340Sstevel@tonic-gate bio_flinv_cv_wanted = 1; 10350Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock); 10360Sstevel@tonic-gate } 10370Sstevel@tonic-gate bio_doinginval++; 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate /* Gather bp's */ 10400Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 10410Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 10420Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 10430Sstevel@tonic-gate 10440Sstevel@tonic-gate mutex_enter(hmp); 10450Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 10460Sstevel@tonic-gate if (bp->b_edev == dev) { 10470Sstevel@tonic-gate if (bp->b_list == NULL) { 10480Sstevel@tonic-gate bp->b_list = binval_list; 10490Sstevel@tonic-gate binval_list = bp; 10500Sstevel@tonic-gate } 10510Sstevel@tonic-gate } 10520Sstevel@tonic-gate } 10530Sstevel@tonic-gate mutex_exit(hmp); 10540Sstevel@tonic-gate } 10550Sstevel@tonic-gate mutex_exit(&blist_lock); 10560Sstevel@tonic-gate 10570Sstevel@tonic-gate /* Invalidate all bp's found */ 10580Sstevel@tonic-gate while (binval_list != EMPTY_LIST) { 10590Sstevel@tonic-gate bp = binval_list; 10600Sstevel@tonic-gate 10610Sstevel@tonic-gate sema_p(&bp->b_sem); 10620Sstevel@tonic-gate if (bp->b_edev == dev) { 10630Sstevel@tonic-gate if (force && (bp->b_flags & B_DELWRI)) { 10640Sstevel@tonic-gate /* clear B_DELWRI, move to non-dw freelist */ 10650Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 10660Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 10670Sstevel@tonic-gate dp = (struct buf *)&hbuf[index]; 10680Sstevel@tonic-gate mutex_enter(hmp); 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate /* remove from delayed write freelist */ 10710Sstevel@tonic-gate notavail(bp); 10720Sstevel@tonic-gate 10730Sstevel@tonic-gate /* add to B_AGE side of non-dw freelist */ 10740Sstevel@tonic-gate backp = &dp->av_forw; 10750Sstevel@tonic-gate (*backp)->av_back = bp; 10760Sstevel@tonic-gate bp->av_forw = *backp; 10770Sstevel@tonic-gate *backp = bp; 10780Sstevel@tonic-gate bp->av_back = dp; 10790Sstevel@tonic-gate 10800Sstevel@tonic-gate /* 10810Sstevel@tonic-gate * make sure write retries and busy are cleared 10820Sstevel@tonic-gate */ 10830Sstevel@tonic-gate bp->b_flags &= 10840Sstevel@tonic-gate ~(B_BUSY | B_DELWRI | B_RETRYWRI); 10850Sstevel@tonic-gate mutex_exit(hmp); 10860Sstevel@tonic-gate } 10870Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0) 10880Sstevel@tonic-gate bp->b_flags |= B_STALE|B_AGE; 10890Sstevel@tonic-gate else 10900Sstevel@tonic-gate error = EIO; 10910Sstevel@tonic-gate } 10920Sstevel@tonic-gate sema_v(&bp->b_sem); 10930Sstevel@tonic-gate binval_list = bp->b_list; 10940Sstevel@tonic-gate bp->b_list = NULL; 10950Sstevel@tonic-gate } 10960Sstevel@tonic-gate mutex_enter(&blist_lock); 10970Sstevel@tonic-gate bio_doinginval--; 10980Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 10990Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 11000Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 11010Sstevel@tonic-gate } 11020Sstevel@tonic-gate mutex_exit(&blist_lock); 11030Sstevel@tonic-gate return (error); 11040Sstevel@tonic-gate } 11050Sstevel@tonic-gate 11060Sstevel@tonic-gate /* 11070Sstevel@tonic-gate * If possible, invalidate blocks for a dev on demand 11080Sstevel@tonic-gate */ 11090Sstevel@tonic-gate void 11100Sstevel@tonic-gate binval(dev_t dev) 11110Sstevel@tonic-gate { 11120Sstevel@tonic-gate (void) bfinval(dev, 0); 11130Sstevel@tonic-gate } 11140Sstevel@tonic-gate 11150Sstevel@tonic-gate /* 11160Sstevel@tonic-gate * Initialize the buffer I/O system by freeing 11170Sstevel@tonic-gate * all buffers and setting all device hash buffer lists to empty. 11180Sstevel@tonic-gate */ 11190Sstevel@tonic-gate void 11200Sstevel@tonic-gate binit(void) 11210Sstevel@tonic-gate { 11220Sstevel@tonic-gate struct buf *bp; 11230Sstevel@tonic-gate unsigned int i, pct; 11240Sstevel@tonic-gate ulong_t bio_max_hwm, bio_default_hwm; 11250Sstevel@tonic-gate 11260Sstevel@tonic-gate /* 11270Sstevel@tonic-gate * Maximum/Default values for bufhwm are set to the smallest of: 11280Sstevel@tonic-gate * - BIO_MAX_PERCENT resp. BIO_BUF_PERCENT of real memory 11290Sstevel@tonic-gate * - 1/4 of kernel virtual memory 11300Sstevel@tonic-gate * - INT32_MAX to prevent overflows of v.v_bufhwm (which is int). 11310Sstevel@tonic-gate * Additionally, in order to allow simple tuning by percentage of 11320Sstevel@tonic-gate * physical memory, bufhwm_pct is used to calculate the default if 11330Sstevel@tonic-gate * the value of this tunable is between 0 and BIO_MAX_PERCENT. 11340Sstevel@tonic-gate * 11350Sstevel@tonic-gate * Since the unit for v.v_bufhwm is kilobytes, this allows for 11360Sstevel@tonic-gate * a maximum of 1024 * 2GB == 2TB memory usage by buffer headers. 11370Sstevel@tonic-gate */ 11380Sstevel@tonic-gate bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT, 11390Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); 11400Sstevel@tonic-gate bio_max_hwm = MIN(INT32_MAX, bio_max_hwm); 11410Sstevel@tonic-gate 11420Sstevel@tonic-gate pct = BIO_BUF_PERCENT; 11430Sstevel@tonic-gate if (bufhwm_pct != 0 && 11440Sstevel@tonic-gate ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) { 11450Sstevel@tonic-gate pct = BIO_BUF_PERCENT; 11460Sstevel@tonic-gate /* 11470Sstevel@tonic-gate * Invalid user specified value, emit a warning. 11480Sstevel@tonic-gate */ 11490Sstevel@tonic-gate cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \ 11500Sstevel@tonic-gate range(1..%d). Using %d as default.", 11510Sstevel@tonic-gate bufhwm_pct, 11520Sstevel@tonic-gate 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT); 11530Sstevel@tonic-gate } 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate bio_default_hwm = MIN(physmem / pct, 11560Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); 11570Sstevel@tonic-gate bio_default_hwm = MIN(INT32_MAX, bio_default_hwm); 11580Sstevel@tonic-gate 11590Sstevel@tonic-gate if ((v.v_bufhwm = bufhwm) == 0) 11600Sstevel@tonic-gate v.v_bufhwm = bio_default_hwm; 11610Sstevel@tonic-gate 11620Sstevel@tonic-gate if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) { 11630Sstevel@tonic-gate v.v_bufhwm = (int)bio_max_hwm; 11640Sstevel@tonic-gate /* 11650Sstevel@tonic-gate * Invalid user specified value, emit a warning. 11660Sstevel@tonic-gate */ 11670Sstevel@tonic-gate cmn_err(CE_WARN, 11680Sstevel@tonic-gate "binit: bufhwm(%d) out \ 11690Sstevel@tonic-gate of range(%d..%lu). Using %lu as default", 11700Sstevel@tonic-gate bufhwm, 11710Sstevel@tonic-gate BIO_MIN_HWM, bio_max_hwm, bio_max_hwm); 11720Sstevel@tonic-gate } 11730Sstevel@tonic-gate 11740Sstevel@tonic-gate /* 11750Sstevel@tonic-gate * Determine the number of hash buckets. Default is to 11760Sstevel@tonic-gate * create ~BIO_HASHLEN entries per chain based on MAXBSIZE buffers. 11770Sstevel@tonic-gate * Round up number to the next power of 2. 11780Sstevel@tonic-gate */ 11790Sstevel@tonic-gate v.v_hbuf = 1 << highbit((((ulong_t)v.v_bufhwm * 1024) / MAXBSIZE) / 11800Sstevel@tonic-gate BIO_HASHLEN); 11810Sstevel@tonic-gate v.v_hmask = v.v_hbuf - 1; 11820Sstevel@tonic-gate v.v_buf = BIO_BHDR_POOL; 11830Sstevel@tonic-gate 11840Sstevel@tonic-gate hbuf = kmem_zalloc(v.v_hbuf * sizeof (struct hbuf), KM_SLEEP); 11850Sstevel@tonic-gate 11860Sstevel@tonic-gate dwbuf = kmem_zalloc(v.v_hbuf * sizeof (struct dwbuf), KM_SLEEP); 11870Sstevel@tonic-gate 11880Sstevel@tonic-gate bfreelist.b_bufsize = (size_t)v.v_bufhwm * 1024; 11890Sstevel@tonic-gate bp = &bfreelist; 11900Sstevel@tonic-gate bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp; 11910Sstevel@tonic-gate 11920Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 11930Sstevel@tonic-gate hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i]; 11940Sstevel@tonic-gate hbuf[i].av_forw = hbuf[i].av_back = (struct buf *)&hbuf[i]; 11950Sstevel@tonic-gate 11960Sstevel@tonic-gate /* 11970Sstevel@tonic-gate * Initialize the delayed write buffer list. 11980Sstevel@tonic-gate */ 11990Sstevel@tonic-gate dwbuf[i].b_forw = dwbuf[i].b_back = (struct buf *)&dwbuf[i]; 12000Sstevel@tonic-gate dwbuf[i].av_forw = dwbuf[i].av_back = (struct buf *)&dwbuf[i]; 12010Sstevel@tonic-gate } 12020Sstevel@tonic-gate } 12030Sstevel@tonic-gate 12040Sstevel@tonic-gate /* 12050Sstevel@tonic-gate * Wait for I/O completion on the buffer; return error code. 12060Sstevel@tonic-gate * If bp was for synchronous I/O, bp is invalid and associated 12070Sstevel@tonic-gate * resources are freed on return. 12080Sstevel@tonic-gate */ 12090Sstevel@tonic-gate int 12100Sstevel@tonic-gate biowait(struct buf *bp) 12110Sstevel@tonic-gate { 12120Sstevel@tonic-gate int error = 0; 12130Sstevel@tonic-gate struct cpu *cpup; 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12160Sstevel@tonic-gate 12170Sstevel@tonic-gate cpup = CPU; 12180Sstevel@tonic-gate atomic_add_64(&cpup->cpu_stats.sys.iowait, 1); 12190Sstevel@tonic-gate DTRACE_IO1(wait__start, struct buf *, bp); 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate /* 12220Sstevel@tonic-gate * In case of panic, busy wait for completion 12230Sstevel@tonic-gate */ 12240Sstevel@tonic-gate if (panicstr) { 12250Sstevel@tonic-gate while ((bp->b_flags & B_DONE) == 0) 12260Sstevel@tonic-gate drv_usecwait(10); 12270Sstevel@tonic-gate } else 12280Sstevel@tonic-gate sema_p(&bp->b_io); 12290Sstevel@tonic-gate 12300Sstevel@tonic-gate DTRACE_IO1(wait__done, struct buf *, bp); 12310Sstevel@tonic-gate atomic_add_64(&cpup->cpu_stats.sys.iowait, -1); 12320Sstevel@tonic-gate 12330Sstevel@tonic-gate error = geterror(bp); 12340Sstevel@tonic-gate if ((bp->b_flags & B_ASYNC) == 0) { 12350Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 12360Sstevel@tonic-gate bp_mapout(bp); 12370Sstevel@tonic-gate } 12380Sstevel@tonic-gate return (error); 12390Sstevel@tonic-gate } 12400Sstevel@tonic-gate 12410Sstevel@tonic-gate static void 12420Sstevel@tonic-gate biodone_tnf_probe(struct buf *bp) 12430Sstevel@tonic-gate { 12440Sstevel@tonic-gate /* Kernel probe */ 12450Sstevel@tonic-gate TNF_PROBE_3(biodone, "io blockio", /* CSTYLED */, 12460Sstevel@tonic-gate tnf_device, device, bp->b_edev, 12470Sstevel@tonic-gate tnf_diskaddr, block, bp->b_lblkno, 12480Sstevel@tonic-gate tnf_opaque, buf, bp); 12490Sstevel@tonic-gate } 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate /* 12520Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous, 12530Sstevel@tonic-gate * and wake up anyone waiting for it. 12540Sstevel@tonic-gate */ 12550Sstevel@tonic-gate void 12560Sstevel@tonic-gate biodone(struct buf *bp) 12570Sstevel@tonic-gate { 12580Sstevel@tonic-gate if (bp->b_flags & B_STARTED) { 12590Sstevel@tonic-gate DTRACE_IO1(done, struct buf *, bp); 12600Sstevel@tonic-gate bp->b_flags &= ~B_STARTED; 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate /* 12640Sstevel@tonic-gate * Call the TNF probe here instead of the inline code 12650Sstevel@tonic-gate * to force our compiler to use the tail call optimization. 12660Sstevel@tonic-gate */ 12670Sstevel@tonic-gate biodone_tnf_probe(bp); 12680Sstevel@tonic-gate 12690Sstevel@tonic-gate if (bp->b_iodone != NULL) { 12700Sstevel@tonic-gate (*(bp->b_iodone))(bp); 12710Sstevel@tonic-gate return; 12720Sstevel@tonic-gate } 12730Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0); 12740Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12750Sstevel@tonic-gate bp->b_flags |= B_DONE; 12760Sstevel@tonic-gate if (bp->b_flags & B_ASYNC) { 12770Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_REMAPPED)) 12780Sstevel@tonic-gate bio_pageio_done(bp); 12790Sstevel@tonic-gate else 12800Sstevel@tonic-gate brelse(bp); /* release bp to freelist */ 12810Sstevel@tonic-gate } else { 12820Sstevel@tonic-gate sema_v(&bp->b_io); 12830Sstevel@tonic-gate } 12840Sstevel@tonic-gate } 12850Sstevel@tonic-gate 12860Sstevel@tonic-gate /* 12870Sstevel@tonic-gate * Pick up the device's error number and pass it to the user; 12880Sstevel@tonic-gate * if there is an error but the number is 0 set a generalized code. 12890Sstevel@tonic-gate */ 12900Sstevel@tonic-gate int 12910Sstevel@tonic-gate geterror(struct buf *bp) 12920Sstevel@tonic-gate { 12930Sstevel@tonic-gate int error = 0; 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12960Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 12970Sstevel@tonic-gate error = bp->b_error; 12980Sstevel@tonic-gate if (!error) 12990Sstevel@tonic-gate error = EIO; 13000Sstevel@tonic-gate } 13010Sstevel@tonic-gate return (error); 13020Sstevel@tonic-gate } 13030Sstevel@tonic-gate 13040Sstevel@tonic-gate /* 13050Sstevel@tonic-gate * Support for pageio buffers. 13060Sstevel@tonic-gate * 13070Sstevel@tonic-gate * This stuff should be generalized to provide a generalized bp 13080Sstevel@tonic-gate * header facility that can be used for things other than pageio. 13090Sstevel@tonic-gate */ 13100Sstevel@tonic-gate 13110Sstevel@tonic-gate /* 13120Sstevel@tonic-gate * Allocate and initialize a buf struct for use with pageio. 13130Sstevel@tonic-gate */ 13140Sstevel@tonic-gate struct buf * 13150Sstevel@tonic-gate pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags) 13160Sstevel@tonic-gate { 13170Sstevel@tonic-gate struct buf *bp; 13180Sstevel@tonic-gate struct cpu *cpup; 13190Sstevel@tonic-gate 13200Sstevel@tonic-gate if (flags & B_READ) { 13210Sstevel@tonic-gate CPU_STATS_ENTER_K(); 13220Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */ 13230Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgin, 1); 13240Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len)); 13250Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) { 13260Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 13270Sstevel@tonic-gate if (lwp != NULL) 13280Sstevel@tonic-gate lwp->lwp_ru.majflt++; 13290Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, maj_fault, 1); 13300Sstevel@tonic-gate /* Kernel probe */ 13310Sstevel@tonic-gate TNF_PROBE_2(major_fault, "vm pagefault", /* CSTYLED */, 13320Sstevel@tonic-gate tnf_opaque, vnode, pp->p_vnode, 13330Sstevel@tonic-gate tnf_offset, offset, pp->p_offset); 13340Sstevel@tonic-gate } 13350Sstevel@tonic-gate /* 13360Sstevel@tonic-gate * Update statistics for pages being paged in 13370Sstevel@tonic-gate */ 13380Sstevel@tonic-gate if (pp != NULL && pp->p_vnode != NULL) { 13390Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) { 13400Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, anonpgin, 13410Sstevel@tonic-gate btopr(len)); 13420Sstevel@tonic-gate } else { 13430Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) { 13440Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execpgin, 13450Sstevel@tonic-gate btopr(len)); 13460Sstevel@tonic-gate } else { 13470Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fspgin, 13480Sstevel@tonic-gate btopr(len)); 13490Sstevel@tonic-gate } 13500Sstevel@tonic-gate } 13510Sstevel@tonic-gate } 13520Sstevel@tonic-gate CPU_STATS_EXIT_K(); 13530Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_IN, 13540Sstevel@tonic-gate "page_ws_in:pp %p", pp); 13550Sstevel@tonic-gate /* Kernel probe */ 13560Sstevel@tonic-gate TNF_PROBE_3(pagein, "vm pageio io", /* CSTYLED */, 13570Sstevel@tonic-gate tnf_opaque, vnode, pp->p_vnode, 13580Sstevel@tonic-gate tnf_offset, offset, pp->p_offset, 13590Sstevel@tonic-gate tnf_size, size, len); 13600Sstevel@tonic-gate } 13610Sstevel@tonic-gate 13620Sstevel@tonic-gate bp = kmem_zalloc(sizeof (struct buf), KM_SLEEP); 13630Sstevel@tonic-gate bp->b_bcount = len; 13640Sstevel@tonic-gate bp->b_bufsize = len; 13650Sstevel@tonic-gate bp->b_pages = pp; 13660Sstevel@tonic-gate bp->b_flags = B_PAGEIO | B_NOCACHE | B_BUSY | flags; 13670Sstevel@tonic-gate bp->b_offset = -1; 13680Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 13690Sstevel@tonic-gate 13700Sstevel@tonic-gate /* Initialize bp->b_sem in "locked" state */ 13710Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 13720Sstevel@tonic-gate 13730Sstevel@tonic-gate VN_HOLD(vp); 13740Sstevel@tonic-gate bp->b_vp = vp; 13750Sstevel@tonic-gate THREAD_KPRI_RELEASE_N(btopr(len)); /* release kpri from page_locks */ 13760Sstevel@tonic-gate 13770Sstevel@tonic-gate /* 13780Sstevel@tonic-gate * Caller sets dev & blkno and can adjust 13790Sstevel@tonic-gate * b_addr for page offset and can use bp_mapin 13800Sstevel@tonic-gate * to make pages kernel addressable. 13810Sstevel@tonic-gate */ 13820Sstevel@tonic-gate return (bp); 13830Sstevel@tonic-gate } 13840Sstevel@tonic-gate 13850Sstevel@tonic-gate void 13860Sstevel@tonic-gate pageio_done(struct buf *bp) 13870Sstevel@tonic-gate { 13880Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 13890Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 13900Sstevel@tonic-gate bp_mapout(bp); 13910Sstevel@tonic-gate VN_RELE(bp->b_vp); 13920Sstevel@tonic-gate bp->b_vp = NULL; 13930Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) != 0); 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate /* A sema_v(bp->b_sem) is implied if we are destroying it */ 13960Sstevel@tonic-gate sema_destroy(&bp->b_sem); 13970Sstevel@tonic-gate sema_destroy(&bp->b_io); 13980Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf)); 13990Sstevel@tonic-gate } 14000Sstevel@tonic-gate 14010Sstevel@tonic-gate /* 14020Sstevel@tonic-gate * Check to see whether the buffers, except the one pointed by sbp, 14030Sstevel@tonic-gate * associated with the device are busy. 14040Sstevel@tonic-gate * NOTE: This expensive operation shall be improved together with ufs_icheck(). 14050Sstevel@tonic-gate */ 14060Sstevel@tonic-gate int 14070Sstevel@tonic-gate bcheck(dev_t dev, struct buf *sbp) 14080Sstevel@tonic-gate { 14090Sstevel@tonic-gate struct buf *bp; 14100Sstevel@tonic-gate struct buf *dp; 14110Sstevel@tonic-gate int i; 14120Sstevel@tonic-gate kmutex_t *hmp; 14130Sstevel@tonic-gate 14140Sstevel@tonic-gate /* 14150Sstevel@tonic-gate * check for busy bufs for this filesystem 14160Sstevel@tonic-gate */ 14170Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 14180Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 14190Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 14200Sstevel@tonic-gate 14210Sstevel@tonic-gate mutex_enter(hmp); 14220Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 14230Sstevel@tonic-gate /* 14240Sstevel@tonic-gate * if buf is busy or dirty, then filesystem is busy 14250Sstevel@tonic-gate */ 14260Sstevel@tonic-gate if ((bp->b_edev == dev) && 14270Sstevel@tonic-gate ((bp->b_flags & B_STALE) == 0) && 14280Sstevel@tonic-gate (bp->b_flags & (B_DELWRI|B_BUSY)) && 14290Sstevel@tonic-gate (bp != sbp)) { 14300Sstevel@tonic-gate mutex_exit(hmp); 14310Sstevel@tonic-gate return (1); 14320Sstevel@tonic-gate } 14330Sstevel@tonic-gate } 14340Sstevel@tonic-gate mutex_exit(hmp); 14350Sstevel@tonic-gate } 14360Sstevel@tonic-gate return (0); 14370Sstevel@tonic-gate } 14380Sstevel@tonic-gate 14390Sstevel@tonic-gate /* 14400Sstevel@tonic-gate * Hash two 32 bit entities. 14410Sstevel@tonic-gate */ 14420Sstevel@tonic-gate int 14430Sstevel@tonic-gate hash2ints(int x, int y) 14440Sstevel@tonic-gate { 14450Sstevel@tonic-gate int hash = 0; 14460Sstevel@tonic-gate 14470Sstevel@tonic-gate hash = x - 1; 14480Sstevel@tonic-gate hash = ((hash * 7) + (x >> 8)) - 1; 14490Sstevel@tonic-gate hash = ((hash * 7) + (x >> 16)) - 1; 14500Sstevel@tonic-gate hash = ((hash * 7) + (x >> 24)) - 1; 14510Sstevel@tonic-gate hash = ((hash * 7) + y) - 1; 14520Sstevel@tonic-gate hash = ((hash * 7) + (y >> 8)) - 1; 14530Sstevel@tonic-gate hash = ((hash * 7) + (y >> 16)) - 1; 14540Sstevel@tonic-gate hash = ((hash * 7) + (y >> 24)) - 1; 14550Sstevel@tonic-gate 14560Sstevel@tonic-gate return (hash); 14570Sstevel@tonic-gate } 14580Sstevel@tonic-gate 14590Sstevel@tonic-gate 14600Sstevel@tonic-gate /* 14610Sstevel@tonic-gate * Return a new buffer struct. 14620Sstevel@tonic-gate * Create a new buffer if we haven't gone over our high water 14630Sstevel@tonic-gate * mark for memory, otherwise try to get one off the freelist. 14640Sstevel@tonic-gate * 14650Sstevel@tonic-gate * Returns a locked buf that has no id and is not on any hash or free 14660Sstevel@tonic-gate * list. 14670Sstevel@tonic-gate */ 14680Sstevel@tonic-gate static struct buf * 14690Sstevel@tonic-gate bio_getfreeblk(long bsize) 14700Sstevel@tonic-gate { 14710Sstevel@tonic-gate struct buf *bp, *dp; 14720Sstevel@tonic-gate struct hbuf *hp; 14730Sstevel@tonic-gate kmutex_t *hmp; 14740Sstevel@tonic-gate uint_t start, end; 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate /* 14770Sstevel@tonic-gate * mutex_enter(&bfree_lock); 14780Sstevel@tonic-gate * bfreelist.b_bufsize represents the amount of memory 14790Sstevel@tonic-gate * mutex_exit(&bfree_lock); protect ref to bfreelist 14800Sstevel@tonic-gate * we are allowed to allocate in the cache before we hit our hwm. 14810Sstevel@tonic-gate */ 14820Sstevel@tonic-gate bio_mem_get(bsize); /* Account for our memory request */ 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate again: 14850Sstevel@tonic-gate bp = bio_bhdr_alloc(); /* Get a buf hdr */ 14860Sstevel@tonic-gate sema_p(&bp->b_sem); /* Should never fail */ 14870Sstevel@tonic-gate 14880Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL); 14890Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_NOSLEEP); 14900Sstevel@tonic-gate if (bp->b_un.b_addr != NULL) { 14910Sstevel@tonic-gate /* 14920Sstevel@tonic-gate * Make the common path short 14930Sstevel@tonic-gate */ 14940Sstevel@tonic-gate bp->b_bufsize = bsize; 14950Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 14960Sstevel@tonic-gate return (bp); 14970Sstevel@tonic-gate } else { 14980Sstevel@tonic-gate struct buf *save; 14990Sstevel@tonic-gate 15000Sstevel@tonic-gate save = bp; /* Save bp we allocated */ 15010Sstevel@tonic-gate start = end = lastindex; 15020Sstevel@tonic-gate 15030Sstevel@tonic-gate biostats.bio_bufwant.value.ui32++; 15040Sstevel@tonic-gate 15050Sstevel@tonic-gate /* 15060Sstevel@tonic-gate * Memory isn't available from the system now. Scan 15070Sstevel@tonic-gate * the hash buckets till enough space is found. 15080Sstevel@tonic-gate */ 15090Sstevel@tonic-gate do { 15100Sstevel@tonic-gate hp = &hbuf[start]; 15110Sstevel@tonic-gate hmp = &hp->b_lock; 15120Sstevel@tonic-gate dp = (struct buf *)hp; 15130Sstevel@tonic-gate 15140Sstevel@tonic-gate mutex_enter(hmp); 15150Sstevel@tonic-gate bp = dp->av_forw; 15160Sstevel@tonic-gate 15170Sstevel@tonic-gate while (bp != dp) { 15180Sstevel@tonic-gate 15190Sstevel@tonic-gate ASSERT(bp != NULL); 15200Sstevel@tonic-gate 15210Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 15220Sstevel@tonic-gate bp = bp->av_forw; 15230Sstevel@tonic-gate continue; 15240Sstevel@tonic-gate } 15250Sstevel@tonic-gate 15260Sstevel@tonic-gate /* 15270Sstevel@tonic-gate * Since we are going down the freelist 15280Sstevel@tonic-gate * associated with this hash bucket the 15290Sstevel@tonic-gate * B_DELWRI flag should not be set. 15300Sstevel@tonic-gate */ 15310Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI)); 15320Sstevel@tonic-gate 15330Sstevel@tonic-gate if (bp->b_bufsize == bsize) { 15340Sstevel@tonic-gate hp->b_length--; 15350Sstevel@tonic-gate notavail(bp); 15360Sstevel@tonic-gate bremhash(bp); 15370Sstevel@tonic-gate mutex_exit(hmp); 15380Sstevel@tonic-gate 15390Sstevel@tonic-gate /* 15400Sstevel@tonic-gate * Didn't kmem_alloc any more, so don't 15410Sstevel@tonic-gate * count it twice. 15420Sstevel@tonic-gate */ 15430Sstevel@tonic-gate mutex_enter(&bfree_lock); 15440Sstevel@tonic-gate bfreelist.b_bufsize += bsize; 15450Sstevel@tonic-gate mutex_exit(&bfree_lock); 15460Sstevel@tonic-gate 15470Sstevel@tonic-gate /* 15480Sstevel@tonic-gate * Update the lastindex value. 15490Sstevel@tonic-gate */ 15500Sstevel@tonic-gate lastindex = start; 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate /* 15530Sstevel@tonic-gate * Put our saved bp back on the list 15540Sstevel@tonic-gate */ 15550Sstevel@tonic-gate sema_v(&save->b_sem); 15560Sstevel@tonic-gate bio_bhdr_free(save); 15570Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15580Sstevel@tonic-gate return (bp); 15590Sstevel@tonic-gate } 15600Sstevel@tonic-gate sema_v(&bp->b_sem); 15610Sstevel@tonic-gate bp = bp->av_forw; 15620Sstevel@tonic-gate } 15630Sstevel@tonic-gate mutex_exit(hmp); 15640Sstevel@tonic-gate start = ((start + 1) % v.v_hbuf); 15650Sstevel@tonic-gate } while (start != end); 15660Sstevel@tonic-gate 15670Sstevel@tonic-gate biostats.bio_bufwait.value.ui32++; 15680Sstevel@tonic-gate bp = save; /* Use original bp */ 15690Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP); 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate 15720Sstevel@tonic-gate bp->b_bufsize = bsize; 15730Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15740Sstevel@tonic-gate return (bp); 15750Sstevel@tonic-gate } 15760Sstevel@tonic-gate 15770Sstevel@tonic-gate /* 15780Sstevel@tonic-gate * Allocate a buffer header. If none currently available, allocate 15790Sstevel@tonic-gate * a new pool. 15800Sstevel@tonic-gate */ 15810Sstevel@tonic-gate static struct buf * 15820Sstevel@tonic-gate bio_bhdr_alloc(void) 15830Sstevel@tonic-gate { 15840Sstevel@tonic-gate struct buf *dp, *sdp; 15850Sstevel@tonic-gate struct buf *bp; 15860Sstevel@tonic-gate int i; 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate for (;;) { 15890Sstevel@tonic-gate mutex_enter(&bhdr_lock); 15900Sstevel@tonic-gate if (bhdrlist != NULL) { 15910Sstevel@tonic-gate bp = bhdrlist; 15920Sstevel@tonic-gate bhdrlist = bp->av_forw; 15930Sstevel@tonic-gate mutex_exit(&bhdr_lock); 15940Sstevel@tonic-gate bp->av_forw = NULL; 15950Sstevel@tonic-gate return (bp); 15960Sstevel@tonic-gate } 15970Sstevel@tonic-gate mutex_exit(&bhdr_lock); 15980Sstevel@tonic-gate 15990Sstevel@tonic-gate /* 16000Sstevel@tonic-gate * Need to allocate a new pool. If the system is currently 16010Sstevel@tonic-gate * out of memory, then try freeing things on the freelist. 16020Sstevel@tonic-gate */ 16030Sstevel@tonic-gate dp = kmem_zalloc(sizeof (struct buf) * v.v_buf, KM_NOSLEEP); 16040Sstevel@tonic-gate if (dp == NULL) { 16050Sstevel@tonic-gate /* 16060Sstevel@tonic-gate * System can't give us a pool of headers, try 16070Sstevel@tonic-gate * recycling from the free lists. 16080Sstevel@tonic-gate */ 16090Sstevel@tonic-gate bio_recycle(BIO_HEADER, 0); 16100Sstevel@tonic-gate } else { 16110Sstevel@tonic-gate sdp = dp; 16120Sstevel@tonic-gate for (i = 0; i < v.v_buf; i++, dp++) { 16130Sstevel@tonic-gate /* 16140Sstevel@tonic-gate * The next two lines are needed since NODEV 16150Sstevel@tonic-gate * is -1 and not NULL 16160Sstevel@tonic-gate */ 16170Sstevel@tonic-gate dp->b_dev = (o_dev_t)NODEV; 16180Sstevel@tonic-gate dp->b_edev = NODEV; 16190Sstevel@tonic-gate dp->av_forw = dp + 1; 16200Sstevel@tonic-gate sema_init(&dp->b_sem, 1, NULL, SEMA_DEFAULT, 16210Sstevel@tonic-gate NULL); 16220Sstevel@tonic-gate sema_init(&dp->b_io, 0, NULL, SEMA_DEFAULT, 16230Sstevel@tonic-gate NULL); 16240Sstevel@tonic-gate dp->b_offset = -1; 16250Sstevel@tonic-gate } 16260Sstevel@tonic-gate mutex_enter(&bhdr_lock); 16270Sstevel@tonic-gate (--dp)->av_forw = bhdrlist; /* Fix last pointer */ 16280Sstevel@tonic-gate bhdrlist = sdp; 16290Sstevel@tonic-gate nbuf += v.v_buf; 16300Sstevel@tonic-gate bp = bhdrlist; 16310Sstevel@tonic-gate bhdrlist = bp->av_forw; 16320Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16330Sstevel@tonic-gate 16340Sstevel@tonic-gate bp->av_forw = NULL; 16350Sstevel@tonic-gate return (bp); 16360Sstevel@tonic-gate } 16370Sstevel@tonic-gate } 16380Sstevel@tonic-gate } 16390Sstevel@tonic-gate 16400Sstevel@tonic-gate static void 16410Sstevel@tonic-gate bio_bhdr_free(struct buf *bp) 16420Sstevel@tonic-gate { 16430Sstevel@tonic-gate ASSERT(bp->b_back == NULL); 16440Sstevel@tonic-gate ASSERT(bp->b_forw == NULL); 16450Sstevel@tonic-gate ASSERT(bp->av_back == NULL); 16460Sstevel@tonic-gate ASSERT(bp->av_forw == NULL); 16470Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL); 16480Sstevel@tonic-gate ASSERT(bp->b_dev == (o_dev_t)NODEV); 16490Sstevel@tonic-gate ASSERT(bp->b_edev == NODEV); 16500Sstevel@tonic-gate ASSERT(bp->b_flags == 0); 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate mutex_enter(&bhdr_lock); 16530Sstevel@tonic-gate bp->av_forw = bhdrlist; 16540Sstevel@tonic-gate bhdrlist = bp; 16550Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16560Sstevel@tonic-gate } 16570Sstevel@tonic-gate 16580Sstevel@tonic-gate /* 16590Sstevel@tonic-gate * If we haven't gone over the high water mark, it's o.k. to 16600Sstevel@tonic-gate * allocate more buffer space, otherwise recycle buffers 16610Sstevel@tonic-gate * from the freelist until enough memory is free for a bsize request. 16620Sstevel@tonic-gate * 16630Sstevel@tonic-gate * We account for this memory, even though 16640Sstevel@tonic-gate * we don't allocate it here. 16650Sstevel@tonic-gate */ 16660Sstevel@tonic-gate static void 16670Sstevel@tonic-gate bio_mem_get(long bsize) 16680Sstevel@tonic-gate { 16690Sstevel@tonic-gate mutex_enter(&bfree_lock); 16700Sstevel@tonic-gate if (bfreelist.b_bufsize > bsize) { 16710Sstevel@tonic-gate bfreelist.b_bufsize -= bsize; 16720Sstevel@tonic-gate mutex_exit(&bfree_lock); 16730Sstevel@tonic-gate return; 16740Sstevel@tonic-gate } 16750Sstevel@tonic-gate mutex_exit(&bfree_lock); 16760Sstevel@tonic-gate bio_recycle(BIO_MEM, bsize); 16770Sstevel@tonic-gate } 16780Sstevel@tonic-gate 16790Sstevel@tonic-gate /* 16800Sstevel@tonic-gate * flush a list of delayed write buffers. 16810Sstevel@tonic-gate * (currently used only by bio_recycle below.) 16820Sstevel@tonic-gate */ 16830Sstevel@tonic-gate static void 16840Sstevel@tonic-gate bio_flushlist(struct buf *delwri_list) 16850Sstevel@tonic-gate { 16860Sstevel@tonic-gate struct buf *bp; 16870Sstevel@tonic-gate 16880Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) { 16890Sstevel@tonic-gate bp = delwri_list; 16900Sstevel@tonic-gate bp->b_flags |= B_AGE | B_ASYNC; 16910Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */ 16920Sstevel@tonic-gate BWRITE(bp); 16930Sstevel@tonic-gate } else { /* ufs */ 16940Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); 16950Sstevel@tonic-gate } 16960Sstevel@tonic-gate delwri_list = bp->b_list; 16970Sstevel@tonic-gate bp->b_list = NULL; 16980Sstevel@tonic-gate } 16990Sstevel@tonic-gate } 17000Sstevel@tonic-gate 17010Sstevel@tonic-gate /* 17020Sstevel@tonic-gate * Start recycling buffers on the freelist for one of 2 reasons: 17030Sstevel@tonic-gate * - we need a buffer header 17040Sstevel@tonic-gate * - we need to free up memory 17050Sstevel@tonic-gate * Once started we continue to recycle buffers until the B_AGE 17060Sstevel@tonic-gate * buffers are gone. 17070Sstevel@tonic-gate */ 17080Sstevel@tonic-gate static void 17090Sstevel@tonic-gate bio_recycle(int want, long bsize) 17100Sstevel@tonic-gate { 17110Sstevel@tonic-gate struct buf *bp, *dp, *dwp, *nbp; 17120Sstevel@tonic-gate struct hbuf *hp; 17130Sstevel@tonic-gate int found = 0; 17140Sstevel@tonic-gate kmutex_t *hmp; 17150Sstevel@tonic-gate int start, end; 17160Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST; 17170Sstevel@tonic-gate 17180Sstevel@tonic-gate /* 17190Sstevel@tonic-gate * Recycle buffers. 17200Sstevel@tonic-gate */ 17210Sstevel@tonic-gate top: 17220Sstevel@tonic-gate start = end = lastindex; 17230Sstevel@tonic-gate do { 17240Sstevel@tonic-gate hp = &hbuf[start]; 17250Sstevel@tonic-gate hmp = &hp->b_lock; 17260Sstevel@tonic-gate dp = (struct buf *)hp; 17270Sstevel@tonic-gate 17280Sstevel@tonic-gate mutex_enter(hmp); 17290Sstevel@tonic-gate bp = dp->av_forw; 17300Sstevel@tonic-gate 17310Sstevel@tonic-gate while (bp != dp) { 17320Sstevel@tonic-gate 17330Sstevel@tonic-gate ASSERT(bp != NULL); 17340Sstevel@tonic-gate 17350Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 17360Sstevel@tonic-gate bp = bp->av_forw; 17370Sstevel@tonic-gate continue; 17380Sstevel@tonic-gate } 17390Sstevel@tonic-gate /* 17400Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff?? 17410Sstevel@tonic-gate */ 17420Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) { 17430Sstevel@tonic-gate sema_v(&bp->b_sem); 17440Sstevel@tonic-gate mutex_exit(hmp); 17450Sstevel@tonic-gate lastindex = start; 17460Sstevel@tonic-gate return; /* All done */ 17470Sstevel@tonic-gate } 17480Sstevel@tonic-gate 17490Sstevel@tonic-gate ASSERT(MUTEX_HELD(&hp->b_lock)); 17500Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI)); 17510Sstevel@tonic-gate hp->b_length--; 17520Sstevel@tonic-gate notavail(bp); 17530Sstevel@tonic-gate 17540Sstevel@tonic-gate /* 17550Sstevel@tonic-gate * Remove bhdr from cache, free up memory, 17560Sstevel@tonic-gate * and add the hdr to the freelist. 17570Sstevel@tonic-gate */ 17580Sstevel@tonic-gate bremhash(bp); 17590Sstevel@tonic-gate mutex_exit(hmp); 17600Sstevel@tonic-gate 17610Sstevel@tonic-gate if (bp->b_bufsize) { 17620Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize); 17630Sstevel@tonic-gate bp->b_un.b_addr = NULL; 17640Sstevel@tonic-gate mutex_enter(&bfree_lock); 17650Sstevel@tonic-gate bfreelist.b_bufsize += bp->b_bufsize; 17660Sstevel@tonic-gate mutex_exit(&bfree_lock); 17670Sstevel@tonic-gate } 17680Sstevel@tonic-gate 17690Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 17700Sstevel@tonic-gate bp->b_edev = NODEV; 17710Sstevel@tonic-gate bp->b_flags = 0; 17720Sstevel@tonic-gate sema_v(&bp->b_sem); 17730Sstevel@tonic-gate bio_bhdr_free(bp); 17740Sstevel@tonic-gate if (want == BIO_HEADER) { 17750Sstevel@tonic-gate found = 1; 17760Sstevel@tonic-gate } else { 17770Sstevel@tonic-gate ASSERT(want == BIO_MEM); 17780Sstevel@tonic-gate if (!found && bfreelist.b_bufsize >= bsize) { 17790Sstevel@tonic-gate /* Account for the memory we want */ 17800Sstevel@tonic-gate mutex_enter(&bfree_lock); 17810Sstevel@tonic-gate if (bfreelist.b_bufsize >= bsize) { 17820Sstevel@tonic-gate bfreelist.b_bufsize -= bsize; 17830Sstevel@tonic-gate found = 1; 17840Sstevel@tonic-gate } 17850Sstevel@tonic-gate mutex_exit(&bfree_lock); 17860Sstevel@tonic-gate } 17870Sstevel@tonic-gate } 17880Sstevel@tonic-gate 17890Sstevel@tonic-gate /* 17900Sstevel@tonic-gate * Since we dropped hmp start from the 17910Sstevel@tonic-gate * begining. 17920Sstevel@tonic-gate */ 17930Sstevel@tonic-gate mutex_enter(hmp); 17940Sstevel@tonic-gate bp = dp->av_forw; 17950Sstevel@tonic-gate } 17960Sstevel@tonic-gate mutex_exit(hmp); 17970Sstevel@tonic-gate 17980Sstevel@tonic-gate /* 17990Sstevel@tonic-gate * Look at the delayed write list. 18000Sstevel@tonic-gate * First gather into a private list, then write them. 18010Sstevel@tonic-gate */ 18020Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[start]; 18030Sstevel@tonic-gate mutex_enter(&blist_lock); 18040Sstevel@tonic-gate bio_doingflush++; 18050Sstevel@tonic-gate mutex_enter(hmp); 18060Sstevel@tonic-gate for (bp = dwp->av_forw; bp != dwp; bp = nbp) { 18070Sstevel@tonic-gate 18080Sstevel@tonic-gate ASSERT(bp != NULL); 18090Sstevel@tonic-gate nbp = bp->av_forw; 18100Sstevel@tonic-gate 18110Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) 18120Sstevel@tonic-gate continue; 18130Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI); 18140Sstevel@tonic-gate /* 18150Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff?? 18160Sstevel@tonic-gate */ 18170Sstevel@tonic-gate 18180Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) { 18190Sstevel@tonic-gate sema_v(&bp->b_sem); 18200Sstevel@tonic-gate mutex_exit(hmp); 18210Sstevel@tonic-gate lastindex = start; 18220Sstevel@tonic-gate mutex_exit(&blist_lock); 18230Sstevel@tonic-gate bio_flushlist(delwri_list); 18240Sstevel@tonic-gate mutex_enter(&blist_lock); 18250Sstevel@tonic-gate bio_doingflush--; 18260Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 18270Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 18280Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 18290Sstevel@tonic-gate } 18300Sstevel@tonic-gate mutex_exit(&blist_lock); 18310Sstevel@tonic-gate return; /* All done */ 18320Sstevel@tonic-gate } 18330Sstevel@tonic-gate 18340Sstevel@tonic-gate /* 18350Sstevel@tonic-gate * If the buffer is already on a flush or 18360Sstevel@tonic-gate * invalidate list then just skip it. 18370Sstevel@tonic-gate */ 18380Sstevel@tonic-gate if (bp->b_list != NULL) { 18390Sstevel@tonic-gate sema_v(&bp->b_sem); 18400Sstevel@tonic-gate continue; 18410Sstevel@tonic-gate } 18420Sstevel@tonic-gate /* 18430Sstevel@tonic-gate * We are still on the same bucket. 18440Sstevel@tonic-gate */ 18450Sstevel@tonic-gate hp->b_length--; 18460Sstevel@tonic-gate notavail(bp); 18470Sstevel@tonic-gate bp->b_list = delwri_list; 18480Sstevel@tonic-gate delwri_list = bp; 18490Sstevel@tonic-gate } 18500Sstevel@tonic-gate mutex_exit(hmp); 18510Sstevel@tonic-gate mutex_exit(&blist_lock); 18520Sstevel@tonic-gate bio_flushlist(delwri_list); 18530Sstevel@tonic-gate delwri_list = EMPTY_LIST; 18540Sstevel@tonic-gate mutex_enter(&blist_lock); 18550Sstevel@tonic-gate bio_doingflush--; 18560Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 18570Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 18580Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 18590Sstevel@tonic-gate } 18600Sstevel@tonic-gate mutex_exit(&blist_lock); 18610Sstevel@tonic-gate start = (start + 1) % v.v_hbuf; 18620Sstevel@tonic-gate 18630Sstevel@tonic-gate } while (start != end); 18640Sstevel@tonic-gate 18650Sstevel@tonic-gate if (found) 18660Sstevel@tonic-gate return; 18670Sstevel@tonic-gate 18680Sstevel@tonic-gate /* 18690Sstevel@tonic-gate * Free lists exhausted and we haven't satisfied the request. 18700Sstevel@tonic-gate * Wait here for more entries to be added to freelist. 18710Sstevel@tonic-gate * Because this might have just happened, make it timed. 18720Sstevel@tonic-gate */ 18730Sstevel@tonic-gate mutex_enter(&bfree_lock); 18740Sstevel@tonic-gate bfreelist.b_flags |= B_WANTED; 18750Sstevel@tonic-gate (void) cv_timedwait(&bio_mem_cv, &bfree_lock, lbolt+hz); 18760Sstevel@tonic-gate mutex_exit(&bfree_lock); 18770Sstevel@tonic-gate goto top; 18780Sstevel@tonic-gate } 18790Sstevel@tonic-gate 18800Sstevel@tonic-gate /* 18810Sstevel@tonic-gate * See if the block is associated with some buffer 18820Sstevel@tonic-gate * (mainly to avoid getting hung up on a wait in breada). 18830Sstevel@tonic-gate */ 18840Sstevel@tonic-gate static int 18850Sstevel@tonic-gate bio_incore(dev_t dev, daddr_t blkno) 18860Sstevel@tonic-gate { 18870Sstevel@tonic-gate struct buf *bp; 18880Sstevel@tonic-gate struct buf *dp; 18890Sstevel@tonic-gate uint_t index; 18900Sstevel@tonic-gate kmutex_t *hmp; 18910Sstevel@tonic-gate 18920Sstevel@tonic-gate index = bio_bhash(dev, blkno); 18930Sstevel@tonic-gate dp = (struct buf *)&hbuf[index]; 18940Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 18950Sstevel@tonic-gate 18960Sstevel@tonic-gate mutex_enter(hmp); 18970Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 18980Sstevel@tonic-gate if (bp->b_blkno == blkno && bp->b_edev == dev && 18990Sstevel@tonic-gate (bp->b_flags & B_STALE) == 0) { 19000Sstevel@tonic-gate mutex_exit(hmp); 19010Sstevel@tonic-gate return (1); 19020Sstevel@tonic-gate } 19030Sstevel@tonic-gate } 19040Sstevel@tonic-gate mutex_exit(hmp); 19050Sstevel@tonic-gate return (0); 19060Sstevel@tonic-gate } 19070Sstevel@tonic-gate 19080Sstevel@tonic-gate static void 19090Sstevel@tonic-gate bio_pageio_done(struct buf *bp) 19100Sstevel@tonic-gate { 19110Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) { 19120Sstevel@tonic-gate 19130Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 19140Sstevel@tonic-gate bp_mapout(bp); 19150Sstevel@tonic-gate 19160Sstevel@tonic-gate if (bp->b_flags & B_READ) 19170Sstevel@tonic-gate pvn_read_done(bp->b_pages, bp->b_flags); 19180Sstevel@tonic-gate else 19190Sstevel@tonic-gate pvn_write_done(bp->b_pages, B_WRITE | bp->b_flags); 19200Sstevel@tonic-gate pageio_done(bp); 19210Sstevel@tonic-gate } else { 19220Sstevel@tonic-gate ASSERT(bp->b_flags & B_REMAPPED); 19230Sstevel@tonic-gate bp_mapout(bp); 19240Sstevel@tonic-gate brelse(bp); 19250Sstevel@tonic-gate } 19260Sstevel@tonic-gate } 19270Sstevel@tonic-gate 19280Sstevel@tonic-gate /* 19290Sstevel@tonic-gate * bioerror(9F) - indicate error in buffer header 19300Sstevel@tonic-gate * If 'error' is zero, remove the error indication. 19310Sstevel@tonic-gate */ 19320Sstevel@tonic-gate void 19330Sstevel@tonic-gate bioerror(struct buf *bp, int error) 19340Sstevel@tonic-gate { 19350Sstevel@tonic-gate ASSERT(bp != NULL); 19360Sstevel@tonic-gate ASSERT(error >= 0); 19370Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 19380Sstevel@tonic-gate 19390Sstevel@tonic-gate if (error != 0) { 19400Sstevel@tonic-gate bp->b_flags |= B_ERROR; 19410Sstevel@tonic-gate } else { 19420Sstevel@tonic-gate bp->b_flags &= ~B_ERROR; 19430Sstevel@tonic-gate } 19440Sstevel@tonic-gate bp->b_error = error; 19450Sstevel@tonic-gate } 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate /* 19480Sstevel@tonic-gate * bioreset(9F) - reuse a private buffer header after I/O is complete 19490Sstevel@tonic-gate */ 19500Sstevel@tonic-gate void 19510Sstevel@tonic-gate bioreset(struct buf *bp) 19520Sstevel@tonic-gate { 19530Sstevel@tonic-gate ASSERT(bp != NULL); 19540Sstevel@tonic-gate 19550Sstevel@tonic-gate biofini(bp); 19560Sstevel@tonic-gate bioinit(bp); 19570Sstevel@tonic-gate } 19580Sstevel@tonic-gate 19590Sstevel@tonic-gate /* 19600Sstevel@tonic-gate * biosize(9F) - return size of a buffer header 19610Sstevel@tonic-gate */ 19620Sstevel@tonic-gate size_t 19630Sstevel@tonic-gate biosize(void) 19640Sstevel@tonic-gate { 19650Sstevel@tonic-gate return (sizeof (struct buf)); 19660Sstevel@tonic-gate } 19670Sstevel@tonic-gate 19680Sstevel@tonic-gate /* 19690Sstevel@tonic-gate * biomodified(9F) - check if buffer is modified 19700Sstevel@tonic-gate */ 19710Sstevel@tonic-gate int 19720Sstevel@tonic-gate biomodified(struct buf *bp) 19730Sstevel@tonic-gate { 19740Sstevel@tonic-gate int npf; 19750Sstevel@tonic-gate int ppattr; 19760Sstevel@tonic-gate struct page *pp; 19770Sstevel@tonic-gate 19780Sstevel@tonic-gate ASSERT(bp != NULL); 19790Sstevel@tonic-gate 19800Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) == 0) { 19810Sstevel@tonic-gate return (-1); 19820Sstevel@tonic-gate } 19830Sstevel@tonic-gate pp = bp->b_pages; 19840Sstevel@tonic-gate npf = btopr(bp->b_bcount + ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET)); 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate while (npf > 0) { 19870Sstevel@tonic-gate ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO | 19880Sstevel@tonic-gate HAT_SYNC_STOPON_MOD); 19890Sstevel@tonic-gate if (ppattr & P_MOD) 19900Sstevel@tonic-gate return (1); 19910Sstevel@tonic-gate pp = pp->p_next; 19920Sstevel@tonic-gate npf--; 19930Sstevel@tonic-gate } 19940Sstevel@tonic-gate 19950Sstevel@tonic-gate return (0); 19960Sstevel@tonic-gate } 19970Sstevel@tonic-gate 19980Sstevel@tonic-gate /* 19990Sstevel@tonic-gate * bioinit(9F) - initialize a buffer structure 20000Sstevel@tonic-gate */ 20010Sstevel@tonic-gate void 20020Sstevel@tonic-gate bioinit(struct buf *bp) 20030Sstevel@tonic-gate { 20040Sstevel@tonic-gate bzero(bp, sizeof (struct buf)); 20050Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 20060Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 20070Sstevel@tonic-gate bp->b_offset = -1; 20080Sstevel@tonic-gate } 20090Sstevel@tonic-gate 20100Sstevel@tonic-gate /* 20110Sstevel@tonic-gate * biofini(9F) - uninitialize a buffer structure 20120Sstevel@tonic-gate */ 20130Sstevel@tonic-gate void 20140Sstevel@tonic-gate biofini(struct buf *bp) 20150Sstevel@tonic-gate { 20160Sstevel@tonic-gate sema_destroy(&bp->b_io); 20170Sstevel@tonic-gate sema_destroy(&bp->b_sem); 20180Sstevel@tonic-gate } 20190Sstevel@tonic-gate 20200Sstevel@tonic-gate /* 20210Sstevel@tonic-gate * bioclone(9F) - clone a buffer 20220Sstevel@tonic-gate */ 20230Sstevel@tonic-gate struct buf * 20240Sstevel@tonic-gate bioclone(struct buf *bp, off_t off, size_t len, dev_t dev, daddr_t blkno, 20250Sstevel@tonic-gate int (*iodone)(struct buf *), struct buf *bp_mem, int sleep) 20260Sstevel@tonic-gate { 20270Sstevel@tonic-gate struct buf *bufp; 20280Sstevel@tonic-gate 20290Sstevel@tonic-gate ASSERT(bp); 20300Sstevel@tonic-gate if (bp_mem == NULL) { 20310Sstevel@tonic-gate bufp = kmem_alloc(sizeof (struct buf), sleep); 20320Sstevel@tonic-gate if (bufp == NULL) { 20330Sstevel@tonic-gate return (NULL); 20340Sstevel@tonic-gate } 20350Sstevel@tonic-gate bioinit(bufp); 20360Sstevel@tonic-gate } else { 20370Sstevel@tonic-gate bufp = bp_mem; 20380Sstevel@tonic-gate bioreset(bufp); 20390Sstevel@tonic-gate } 20400Sstevel@tonic-gate 20410Sstevel@tonic-gate #define BUF_CLONE_FLAGS (B_READ|B_WRITE|B_SHADOW|B_PHYS|B_PAGEIO|B_FAILFAST|\ 20420Sstevel@tonic-gate B_ABRWRITE) 20430Sstevel@tonic-gate 20440Sstevel@tonic-gate /* 2045*2418Scth * The cloned buffer does not inherit the B_REMAPPED flag. 20460Sstevel@tonic-gate */ 20470Sstevel@tonic-gate bufp->b_flags = (bp->b_flags & BUF_CLONE_FLAGS) | B_BUSY; 20480Sstevel@tonic-gate bufp->b_bcount = len; 20490Sstevel@tonic-gate bufp->b_blkno = blkno; 20500Sstevel@tonic-gate bufp->b_iodone = iodone; 20510Sstevel@tonic-gate bufp->b_proc = bp->b_proc; 20520Sstevel@tonic-gate bufp->b_edev = dev; 20530Sstevel@tonic-gate bufp->b_file = bp->b_file; 20540Sstevel@tonic-gate bufp->b_offset = bp->b_offset; 20550Sstevel@tonic-gate 20560Sstevel@tonic-gate if (bp->b_flags & B_SHADOW) { 20570Sstevel@tonic-gate ASSERT(bp->b_shadow); 20580Sstevel@tonic-gate ASSERT(bp->b_flags & B_PHYS); 20590Sstevel@tonic-gate 20600Sstevel@tonic-gate bufp->b_shadow = bp->b_shadow + 20610Sstevel@tonic-gate btop(((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off); 20620Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)((uintptr_t)bp->b_un.b_addr + off); 2063*2418Scth if (bp->b_flags & B_REMAPPED) 2064*2418Scth bufp->b_proc = NULL; 20650Sstevel@tonic-gate } else { 20660Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) { 20670Sstevel@tonic-gate struct page *pp; 20680Sstevel@tonic-gate off_t o; 20690Sstevel@tonic-gate int i; 20700Sstevel@tonic-gate 20710Sstevel@tonic-gate pp = bp->b_pages; 20720Sstevel@tonic-gate o = ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off; 20730Sstevel@tonic-gate for (i = btop(o); i > 0; i--) { 20740Sstevel@tonic-gate pp = pp->p_next; 20750Sstevel@tonic-gate } 20760Sstevel@tonic-gate bufp->b_pages = pp; 20770Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)(o & PAGEOFFSET); 20780Sstevel@tonic-gate } else { 20790Sstevel@tonic-gate bufp->b_un.b_addr = 20800Sstevel@tonic-gate (caddr_t)((uintptr_t)bp->b_un.b_addr + off); 20810Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 20820Sstevel@tonic-gate bufp->b_proc = NULL; 20830Sstevel@tonic-gate } 20840Sstevel@tonic-gate } 20850Sstevel@tonic-gate return (bufp); 20860Sstevel@tonic-gate } 2087