xref: /onnv-gate/usr/src/uts/common/io/scsi/targets/sd_xbuf.c (revision 8551:c521d2c1b8a8)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*8551SPeng.L@Sun.COM  * Common Development and Distribution License (the "License").
6*8551SPeng.L@Sun.COM  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*8551SPeng.L@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/scsi/scsi.h>
270Sstevel@tonic-gate #include <sys/ddi.h>
280Sstevel@tonic-gate #include <sys/sunddi.h>
290Sstevel@tonic-gate #include <sys/thread.h>
300Sstevel@tonic-gate #include <sys/var.h>
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include "sd_xbuf.h"
330Sstevel@tonic-gate 
340Sstevel@tonic-gate /*
350Sstevel@tonic-gate  * xbuf.c: buf(9s) extension facility.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * The buf(9S) extension facility is intended to allow block drivers to
380Sstevel@tonic-gate  * allocate additional memory that is associated with a particular buf(9S)
390Sstevel@tonic-gate  * struct.  It is further intended to help in addressing the usual set of
400Sstevel@tonic-gate  * problems associated with such allocations, in particular those involving
410Sstevel@tonic-gate  * recovery from allocation failures, especially in code paths that the
420Sstevel@tonic-gate  * system relies on to free memory.
430Sstevel@tonic-gate  *
440Sstevel@tonic-gate  * CAVEAT: Currently this code is completely private to the sd driver and in
450Sstevel@tonic-gate  * NO WAY constitutes a public or supported interface of any kind. It is
460Sstevel@tonic-gate  * envisioned that this may one day migrate into the Solaris DDI, but until
470Sstevel@tonic-gate  * that time this ought to be considered completely unstable and is subject
480Sstevel@tonic-gate  * to change without notice. This code may NOT in any way be utilized by
490Sstevel@tonic-gate  * ANY code outside the sd driver.
500Sstevel@tonic-gate  */
510Sstevel@tonic-gate 
520Sstevel@tonic-gate 
530Sstevel@tonic-gate static int xbuf_iostart(ddi_xbuf_attr_t xap);
540Sstevel@tonic-gate static void xbuf_dispatch(ddi_xbuf_attr_t xap);
550Sstevel@tonic-gate static void xbuf_restart_callback(void *arg);
56*8551SPeng.L@Sun.COM static void xbuf_enqueue(struct buf *bp, ddi_xbuf_attr_t xap);
57*8551SPeng.L@Sun.COM static int xbuf_brk_done(struct buf *bp);
580Sstevel@tonic-gate 
590Sstevel@tonic-gate 
600Sstevel@tonic-gate /*
610Sstevel@tonic-gate  * Note: Should this be exposed to the caller.... do we want to give the
620Sstevel@tonic-gate  * caller the fexibility of specifying the parameters for the thread pool?
630Sstevel@tonic-gate  * Note: these values are just estimates at this time, based upon what
640Sstevel@tonic-gate  * seems reasonable for the sd driver. It may be preferable to make these
650Sstevel@tonic-gate  * parameters self-scaling in a real (future) implementation.
660Sstevel@tonic-gate  */
670Sstevel@tonic-gate #define	XBUF_TQ_MINALLOC	64
680Sstevel@tonic-gate #define	XBUF_TQ_MAXALLOC	512
690Sstevel@tonic-gate #define	XBUF_DISPATCH_DELAY	(drv_usectohz(50000))	/* 50 msec */
700Sstevel@tonic-gate 
710Sstevel@tonic-gate static taskq_t *xbuf_tq = NULL;
720Sstevel@tonic-gate static int xbuf_attr_tq_minalloc = XBUF_TQ_MINALLOC;
730Sstevel@tonic-gate static int xbuf_attr_tq_maxalloc = XBUF_TQ_MAXALLOC;
740Sstevel@tonic-gate 
750Sstevel@tonic-gate static kmutex_t	xbuf_mutex = { 0 };
760Sstevel@tonic-gate static uint32_t	xbuf_refcount = 0;
770Sstevel@tonic-gate 
78*8551SPeng.L@Sun.COM /*
79*8551SPeng.L@Sun.COM  * Private wrapper for buf cloned via ddi_xbuf_qstrategy()
80*8551SPeng.L@Sun.COM  */
81*8551SPeng.L@Sun.COM struct xbuf_brk {
82*8551SPeng.L@Sun.COM 	kmutex_t mutex;
83*8551SPeng.L@Sun.COM 	struct buf *bp0;
84*8551SPeng.L@Sun.COM 	uint8_t nbufs;	/* number of buf allocated */
85*8551SPeng.L@Sun.COM 	uint8_t active; /* number of active xfer */
86*8551SPeng.L@Sun.COM 
87*8551SPeng.L@Sun.COM 	size_t brksize;	/* break size used for this buf */
88*8551SPeng.L@Sun.COM 	int brkblk;
89*8551SPeng.L@Sun.COM 
90*8551SPeng.L@Sun.COM 	/* xfer position */
91*8551SPeng.L@Sun.COM 	off_t off;
92*8551SPeng.L@Sun.COM 	off_t noff;
93*8551SPeng.L@Sun.COM 	daddr_t blkno;
94*8551SPeng.L@Sun.COM };
95*8551SPeng.L@Sun.COM 
_NOTE(DATA_READABLE_WITHOUT_LOCK (xbuf_brk::off))96*8551SPeng.L@Sun.COM _NOTE(DATA_READABLE_WITHOUT_LOCK(xbuf_brk::off))
97*8551SPeng.L@Sun.COM 
98*8551SPeng.L@Sun.COM /*
99*8551SPeng.L@Sun.COM  * Hack needed in the prototype so buf breakup will work.
100*8551SPeng.L@Sun.COM  * Here we can rely on the sd code not changing the value in
101*8551SPeng.L@Sun.COM  * b_forw.
102*8551SPeng.L@Sun.COM  */
103*8551SPeng.L@Sun.COM #define	b_clone_private b_forw
104*8551SPeng.L@Sun.COM 
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate /* ARGSUSED */
1070Sstevel@tonic-gate DDII ddi_xbuf_attr_t
1080Sstevel@tonic-gate ddi_xbuf_attr_create(size_t xsize,
1090Sstevel@tonic-gate 	void (*xa_strategy)(struct buf *bp, ddi_xbuf_t xp, void *attr_arg),
1100Sstevel@tonic-gate 	void *attr_arg, uint32_t active_limit, uint32_t reserve_limit,
1110Sstevel@tonic-gate 	major_t major, int flags)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate 	ddi_xbuf_attr_t	xap;
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate 	xap = kmem_zalloc(sizeof (struct __ddi_xbuf_attr), KM_SLEEP);
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate 	mutex_init(&xap->xa_mutex, NULL, MUTEX_DRIVER, NULL);
1180Sstevel@tonic-gate 	mutex_init(&xap->xa_reserve_mutex, NULL, MUTEX_DRIVER, NULL);
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	/* Future: Allow the caller to specify alignment requirements? */
1210Sstevel@tonic-gate 	xap->xa_allocsize	= max(xsize, sizeof (void *));
1220Sstevel@tonic-gate 	xap->xa_active_limit	= active_limit;
1230Sstevel@tonic-gate 	xap->xa_active_lowater	= xap->xa_active_limit / 2;
1240Sstevel@tonic-gate 	xap->xa_reserve_limit	= reserve_limit;
1250Sstevel@tonic-gate 	xap->xa_strategy	= xa_strategy;
1260Sstevel@tonic-gate 	xap->xa_attr_arg	= attr_arg;
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate 	mutex_enter(&xbuf_mutex);
1290Sstevel@tonic-gate 	if (xbuf_refcount == 0) {
1300Sstevel@tonic-gate 		ASSERT(xbuf_tq == NULL);
1310Sstevel@tonic-gate 		/*
1320Sstevel@tonic-gate 		 * Note: Would be nice if: (1) #threads in the taskq pool (set
1330Sstevel@tonic-gate 		 * to the value of 'ncpus' at the time the taskq is created)
1340Sstevel@tonic-gate 		 * could adjust automatically with DR; (2) the taskq
1350Sstevel@tonic-gate 		 * minalloc/maxalloc counts could be grown/shrunk on the fly.
1360Sstevel@tonic-gate 		 */
1370Sstevel@tonic-gate 		xbuf_tq = taskq_create("xbuf_taskq", ncpus,
1380Sstevel@tonic-gate 		    (v.v_maxsyspri - 2), xbuf_attr_tq_minalloc,
1390Sstevel@tonic-gate 		    xbuf_attr_tq_maxalloc, TASKQ_PREPOPULATE);
1400Sstevel@tonic-gate 	}
1410Sstevel@tonic-gate 	xbuf_refcount++;
1420Sstevel@tonic-gate 	mutex_exit(&xbuf_mutex);
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate 	/* In this prototype we just always use the global system pool. */
1450Sstevel@tonic-gate 	xap->xa_tq = xbuf_tq;
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 	return (xap);
1480Sstevel@tonic-gate }
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate DDII void
ddi_xbuf_attr_destroy(ddi_xbuf_attr_t xap)1520Sstevel@tonic-gate ddi_xbuf_attr_destroy(ddi_xbuf_attr_t xap)
1530Sstevel@tonic-gate {
1540Sstevel@tonic-gate 	ddi_xbuf_t	xp;
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	mutex_destroy(&xap->xa_mutex);
1570Sstevel@tonic-gate 	mutex_destroy(&xap->xa_reserve_mutex);
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 	/* Free any xbufs on the reserve list */
1600Sstevel@tonic-gate 	while (xap->xa_reserve_count != 0) {
1610Sstevel@tonic-gate 		xp = xap->xa_reserve_headp;
1620Sstevel@tonic-gate 		xap->xa_reserve_headp = *((void **)xp);
1630Sstevel@tonic-gate 		xap->xa_reserve_count--;
1640Sstevel@tonic-gate 		kmem_free(xp, xap->xa_allocsize);
1650Sstevel@tonic-gate 	}
1660Sstevel@tonic-gate 	ASSERT(xap->xa_reserve_headp == NULL);
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 	mutex_enter(&xbuf_mutex);
1690Sstevel@tonic-gate 	ASSERT((xbuf_refcount != 0) && (xbuf_tq != NULL));
1700Sstevel@tonic-gate 	xbuf_refcount--;
1710Sstevel@tonic-gate 	if (xbuf_refcount == 0) {
1720Sstevel@tonic-gate 		taskq_destroy(xbuf_tq);
1730Sstevel@tonic-gate 		xbuf_tq = NULL;
1740Sstevel@tonic-gate 	}
1750Sstevel@tonic-gate 	mutex_exit(&xbuf_mutex);
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	kmem_free(xap, sizeof (struct __ddi_xbuf_attr));
1780Sstevel@tonic-gate }
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /* ARGSUSED */
1820Sstevel@tonic-gate DDII void
ddi_xbuf_attr_register_devinfo(ddi_xbuf_attr_t xbuf_attr,dev_info_t * dip)1830Sstevel@tonic-gate ddi_xbuf_attr_register_devinfo(ddi_xbuf_attr_t xbuf_attr, dev_info_t *dip)
1840Sstevel@tonic-gate {
1850Sstevel@tonic-gate 	/* Currently a no-op in this prototype */
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate /* ARGSUSED */
1900Sstevel@tonic-gate DDII void
ddi_xbuf_attr_unregister_devinfo(ddi_xbuf_attr_t xbuf_attr,dev_info_t * dip)1910Sstevel@tonic-gate ddi_xbuf_attr_unregister_devinfo(ddi_xbuf_attr_t xbuf_attr, dev_info_t *dip)
1920Sstevel@tonic-gate {
1930Sstevel@tonic-gate 	/* Currently a no-op in this prototype */
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate 
196*8551SPeng.L@Sun.COM DDII int
ddi_xbuf_attr_setup_brk(ddi_xbuf_attr_t xap,size_t size)197*8551SPeng.L@Sun.COM ddi_xbuf_attr_setup_brk(ddi_xbuf_attr_t xap, size_t size)
198*8551SPeng.L@Sun.COM {
199*8551SPeng.L@Sun.COM 	if (size < DEV_BSIZE)
200*8551SPeng.L@Sun.COM 		return (0);
201*8551SPeng.L@Sun.COM 
202*8551SPeng.L@Sun.COM 	mutex_enter(&xap->xa_mutex);
203*8551SPeng.L@Sun.COM 	xap->xa_brksize = size & ~(DEV_BSIZE - 1);
204*8551SPeng.L@Sun.COM 	mutex_exit(&xap->xa_mutex);
205*8551SPeng.L@Sun.COM 	return (1);
206*8551SPeng.L@Sun.COM }
207*8551SPeng.L@Sun.COM 
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate /*
2110Sstevel@tonic-gate  * Enqueue the given buf and attempt to initiate IO.
2120Sstevel@tonic-gate  * Called from the driver strategy(9E) routine.
2130Sstevel@tonic-gate  */
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate DDII int
ddi_xbuf_qstrategy(struct buf * bp,ddi_xbuf_attr_t xap)2160Sstevel@tonic-gate ddi_xbuf_qstrategy(struct buf *bp, ddi_xbuf_attr_t xap)
2170Sstevel@tonic-gate {
2180Sstevel@tonic-gate 	ASSERT(xap != NULL);
2190Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
2200Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
2230Sstevel@tonic-gate 
224*8551SPeng.L@Sun.COM 	ASSERT((bp->b_bcount & (DEV_BSIZE - 1)) == 0);
225*8551SPeng.L@Sun.COM 
226*8551SPeng.L@Sun.COM 	/*
227*8551SPeng.L@Sun.COM 	 * Breakup buf if necessary. bp->b_private is temporarily
228*8551SPeng.L@Sun.COM 	 * used to save xbuf_brk
229*8551SPeng.L@Sun.COM 	 */
230*8551SPeng.L@Sun.COM 	if (xap->xa_brksize && bp->b_bcount > xap->xa_brksize) {
231*8551SPeng.L@Sun.COM 		struct xbuf_brk *brkp;
232*8551SPeng.L@Sun.COM 
233*8551SPeng.L@Sun.COM 		brkp = kmem_zalloc(sizeof (struct xbuf_brk), KM_SLEEP);
234*8551SPeng.L@Sun.COM 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*brkp))
235*8551SPeng.L@Sun.COM 		mutex_init(&brkp->mutex, NULL, MUTEX_DRIVER, NULL);
236*8551SPeng.L@Sun.COM 		brkp->bp0 = bp;
237*8551SPeng.L@Sun.COM 		brkp->brksize = xap->xa_brksize;
238*8551SPeng.L@Sun.COM 		brkp->brkblk = btodt(xap->xa_brksize);
239*8551SPeng.L@Sun.COM 		brkp->noff = xap->xa_brksize;
240*8551SPeng.L@Sun.COM 		brkp->blkno = bp->b_blkno;
241*8551SPeng.L@Sun.COM 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*brkp))
242*8551SPeng.L@Sun.COM 		bp->b_private = brkp;
243*8551SPeng.L@Sun.COM 	} else {
244*8551SPeng.L@Sun.COM 		bp->b_private = NULL;
245*8551SPeng.L@Sun.COM 	}
246*8551SPeng.L@Sun.COM 
247*8551SPeng.L@Sun.COM 	/* Enqueue buf */
2480Sstevel@tonic-gate 	if (xap->xa_headp == NULL) {
2490Sstevel@tonic-gate 		xap->xa_headp = xap->xa_tailp = bp;
2500Sstevel@tonic-gate 	} else {
2510Sstevel@tonic-gate 		xap->xa_tailp->av_forw = bp;
2520Sstevel@tonic-gate 		xap->xa_tailp = bp;
2530Sstevel@tonic-gate 	}
2540Sstevel@tonic-gate 	bp->av_forw = NULL;
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 	xap->xa_pending++;
2570Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
2580Sstevel@tonic-gate 	return (xbuf_iostart(xap));
2590Sstevel@tonic-gate }
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate /*
2630Sstevel@tonic-gate  * Drivers call this immediately before calling biodone(9F), to notify the
2640Sstevel@tonic-gate  * framework that the indicated xbuf is no longer being used by the driver.
2650Sstevel@tonic-gate  * May be called under interrupt context.
2660Sstevel@tonic-gate  */
2670Sstevel@tonic-gate 
268*8551SPeng.L@Sun.COM DDII int
ddi_xbuf_done(struct buf * bp,ddi_xbuf_attr_t xap)2690Sstevel@tonic-gate ddi_xbuf_done(struct buf *bp, ddi_xbuf_attr_t xap)
2700Sstevel@tonic-gate {
2710Sstevel@tonic-gate 	ddi_xbuf_t xp;
272*8551SPeng.L@Sun.COM 	int done;
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	ASSERT(bp != NULL);
2750Sstevel@tonic-gate 	ASSERT(xap != NULL);
2760Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
2770Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	xp = ddi_xbuf_get(bp, xap);
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate #ifdef	SDDEBUG
2840Sstevel@tonic-gate 	if (xap->xa_active_limit != 0) {
2850Sstevel@tonic-gate 		ASSERT(xap->xa_active_count > 0);
2860Sstevel@tonic-gate 	}
2870Sstevel@tonic-gate #endif
2880Sstevel@tonic-gate 	xap->xa_active_count--;
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	if (xap->xa_reserve_limit != 0) {
2910Sstevel@tonic-gate 		mutex_enter(&xap->xa_reserve_mutex);
2920Sstevel@tonic-gate 		if (xap->xa_reserve_count < xap->xa_reserve_limit) {
2930Sstevel@tonic-gate 			/* Put this xbuf onto the reserve list & exit */
2940Sstevel@tonic-gate 			*((void **)xp) = xap->xa_reserve_headp;
2950Sstevel@tonic-gate 			xap->xa_reserve_headp = xp;
2960Sstevel@tonic-gate 			xap->xa_reserve_count++;
2970Sstevel@tonic-gate 			mutex_exit(&xap->xa_reserve_mutex);
2980Sstevel@tonic-gate 			goto done;
2990Sstevel@tonic-gate 		}
3000Sstevel@tonic-gate 		mutex_exit(&xap->xa_reserve_mutex);
3010Sstevel@tonic-gate 	}
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	kmem_free(xp, xap->xa_allocsize);	/* return it to the system */
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate done:
306*8551SPeng.L@Sun.COM 	if (bp->b_iodone == xbuf_brk_done) {
307*8551SPeng.L@Sun.COM 		struct xbuf_brk *brkp = (struct xbuf_brk *)bp->b_clone_private;
308*8551SPeng.L@Sun.COM 
309*8551SPeng.L@Sun.COM 		brkp->active--;
310*8551SPeng.L@Sun.COM 		if (brkp->active || xap->xa_headp == brkp->bp0) {
311*8551SPeng.L@Sun.COM 			done = 0;
312*8551SPeng.L@Sun.COM 		} else {
313*8551SPeng.L@Sun.COM 			brkp->off = -1;	/* mark bp0 as completed */
314*8551SPeng.L@Sun.COM 			done = 1;
315*8551SPeng.L@Sun.COM 		}
316*8551SPeng.L@Sun.COM 	} else {
317*8551SPeng.L@Sun.COM 		done = 1;
318*8551SPeng.L@Sun.COM 	}
319*8551SPeng.L@Sun.COM 
3200Sstevel@tonic-gate 	if ((xap->xa_active_limit == 0) ||
3210Sstevel@tonic-gate 	    (xap->xa_active_count <= xap->xa_active_lowater)) {
3220Sstevel@tonic-gate 		xbuf_dispatch(xap);
3230Sstevel@tonic-gate 	}
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
326*8551SPeng.L@Sun.COM 	return (done);
327*8551SPeng.L@Sun.COM }
328*8551SPeng.L@Sun.COM 
329*8551SPeng.L@Sun.COM static int
xbuf_brk_done(struct buf * bp)330*8551SPeng.L@Sun.COM xbuf_brk_done(struct buf *bp)
331*8551SPeng.L@Sun.COM {
332*8551SPeng.L@Sun.COM 	struct xbuf_brk *brkp = (struct xbuf_brk *)bp->b_clone_private;
333*8551SPeng.L@Sun.COM 	struct buf *bp0 = brkp->bp0;
334*8551SPeng.L@Sun.COM 	int done;
335*8551SPeng.L@Sun.COM 
336*8551SPeng.L@Sun.COM 	mutex_enter(&brkp->mutex);
337*8551SPeng.L@Sun.COM 	if (bp->b_flags & B_ERROR && !(bp0->b_flags & B_ERROR)) {
338*8551SPeng.L@Sun.COM 		bp0->b_flags |= B_ERROR;
339*8551SPeng.L@Sun.COM 		bp0->b_error = bp->b_error;
340*8551SPeng.L@Sun.COM 	}
341*8551SPeng.L@Sun.COM 	if (bp->b_resid)
342*8551SPeng.L@Sun.COM 		bp0->b_resid = bp0->b_bcount;
343*8551SPeng.L@Sun.COM 
344*8551SPeng.L@Sun.COM 	freerbuf(bp);
345*8551SPeng.L@Sun.COM 	brkp->nbufs--;
346*8551SPeng.L@Sun.COM 
347*8551SPeng.L@Sun.COM 	done = (brkp->off == -1 && brkp->nbufs == 0);
348*8551SPeng.L@Sun.COM 	mutex_exit(&brkp->mutex);
349*8551SPeng.L@Sun.COM 
350*8551SPeng.L@Sun.COM 	/* All buf segments done */
351*8551SPeng.L@Sun.COM 	if (done) {
352*8551SPeng.L@Sun.COM 		mutex_destroy(&brkp->mutex);
353*8551SPeng.L@Sun.COM 		kmem_free(brkp, sizeof (struct xbuf_brk));
354*8551SPeng.L@Sun.COM 		biodone(bp0);
355*8551SPeng.L@Sun.COM 	}
356*8551SPeng.L@Sun.COM 	return (0);
3570Sstevel@tonic-gate }
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate DDII void
ddi_xbuf_dispatch(ddi_xbuf_attr_t xap)3600Sstevel@tonic-gate ddi_xbuf_dispatch(ddi_xbuf_attr_t xap)
3610Sstevel@tonic-gate {
3620Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
3630Sstevel@tonic-gate 	if ((xap->xa_active_limit == 0) ||
3640Sstevel@tonic-gate 	    (xap->xa_active_count <= xap->xa_active_lowater)) {
3650Sstevel@tonic-gate 		xbuf_dispatch(xap);
3660Sstevel@tonic-gate 	}
3670Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
3680Sstevel@tonic-gate }
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate /*
3720Sstevel@tonic-gate  * ISSUE: in this prototype we cannot really implement ddi_xbuf_get()
3730Sstevel@tonic-gate  * unless we explicitly hide the xbuf pointer somewhere in the buf
3740Sstevel@tonic-gate  * during allocation, and then rely on the driver never changing it.
3750Sstevel@tonic-gate  * We can probably get away with using b_private for this for now,
3760Sstevel@tonic-gate  * tho it really is kinda gnarly.....
3770Sstevel@tonic-gate  */
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate /* ARGSUSED */
3800Sstevel@tonic-gate DDII ddi_xbuf_t
ddi_xbuf_get(struct buf * bp,ddi_xbuf_attr_t xap)3810Sstevel@tonic-gate ddi_xbuf_get(struct buf *bp, ddi_xbuf_attr_t xap)
3820Sstevel@tonic-gate {
3830Sstevel@tonic-gate 	return (bp->b_private);
3840Sstevel@tonic-gate }
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate /*
3880Sstevel@tonic-gate  * Initiate IOs for bufs on the queue.  Called from kernel thread or taskq
3890Sstevel@tonic-gate  * thread context. May execute concurrently for the same ddi_xbuf_attr_t.
3900Sstevel@tonic-gate  */
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate static int
xbuf_iostart(ddi_xbuf_attr_t xap)3930Sstevel@tonic-gate xbuf_iostart(ddi_xbuf_attr_t xap)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate 	struct buf *bp;
3960Sstevel@tonic-gate 	ddi_xbuf_t xp;
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 	ASSERT(xap != NULL);
3990Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
4000Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate 	/*
4030Sstevel@tonic-gate 	 * For each request on the queue, attempt to allocate the specified
4040Sstevel@tonic-gate 	 * xbuf extension area, and call the driver's iostart() routine.
4050Sstevel@tonic-gate 	 * We process as many requests on the queue as we can, until either
4060Sstevel@tonic-gate 	 * (1) we run out of requests; or
4070Sstevel@tonic-gate 	 * (2) we run out of resources; or
4080Sstevel@tonic-gate 	 * (3) we reach the maximum limit for the given ddi_xbuf_attr_t.
4090Sstevel@tonic-gate 	 */
4100Sstevel@tonic-gate 	for (;;) {
4110Sstevel@tonic-gate 		mutex_enter(&xap->xa_mutex);
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 		if ((bp = xap->xa_headp) == NULL) {
4140Sstevel@tonic-gate 			break;	/* queue empty */
4150Sstevel@tonic-gate 		}
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 		if ((xap->xa_active_limit != 0) &&
4180Sstevel@tonic-gate 		    (xap->xa_active_count >= xap->xa_active_limit)) {
4190Sstevel@tonic-gate 			break;	/* allocation limit reached */
4200Sstevel@tonic-gate 		}
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate 		/*
4230Sstevel@tonic-gate 		 * If the reserve_limit is non-zero then work with the
4240Sstevel@tonic-gate 		 * reserve else always allocate a new struct.
4250Sstevel@tonic-gate 		 */
4260Sstevel@tonic-gate 		if (xap->xa_reserve_limit != 0) {
4270Sstevel@tonic-gate 			/*
4280Sstevel@tonic-gate 			 * Don't penalize EVERY I/O by always allocating a new
4290Sstevel@tonic-gate 			 * struct. for the sake of maintaining and not touching
4300Sstevel@tonic-gate 			 * a reserve for a pathalogical condition that may never
4310Sstevel@tonic-gate 			 * happen. Use the reserve entries first, this uses it
4320Sstevel@tonic-gate 			 * like a local pool rather than a reserve that goes
4330Sstevel@tonic-gate 			 * untouched. Make sure it's re-populated whenever it
4340Sstevel@tonic-gate 			 * gets fully depleted just in case it really is needed.
4350Sstevel@tonic-gate 			 * This is safe because under the pathalogical
4360Sstevel@tonic-gate 			 * condition, when the system runs out of memory such
4370Sstevel@tonic-gate 			 * that the below allocs fail, the reserve will still
4380Sstevel@tonic-gate 			 * be available whether the entries are saved away on
4390Sstevel@tonic-gate 			 * the queue unused or in-transport somewhere. Thus
4400Sstevel@tonic-gate 			 * progress can still continue, however slowly.
4410Sstevel@tonic-gate 			 */
4420Sstevel@tonic-gate 			mutex_enter(&xap->xa_reserve_mutex);
4430Sstevel@tonic-gate 			if (xap->xa_reserve_count != 0) {
4440Sstevel@tonic-gate 				ASSERT(xap->xa_reserve_headp != NULL);
4450Sstevel@tonic-gate 				/* Grab an xbuf from the reserve */
4460Sstevel@tonic-gate 				xp = xap->xa_reserve_headp;
4470Sstevel@tonic-gate 				xap->xa_reserve_headp = *((void **)xp);
4480Sstevel@tonic-gate 				ASSERT(xap->xa_reserve_count > 0);
4490Sstevel@tonic-gate 				xap->xa_reserve_count--;
4500Sstevel@tonic-gate 			} else {
4510Sstevel@tonic-gate 				/*
4520Sstevel@tonic-gate 				 * Either this is the first time through,
4530Sstevel@tonic-gate 				 * or the reserve has been totally depleted.
4540Sstevel@tonic-gate 				 * Re-populate the reserve (pool). Excess
4550Sstevel@tonic-gate 				 * structs. get released in the done path.
4560Sstevel@tonic-gate 				 */
4570Sstevel@tonic-gate 				while (xap->xa_reserve_count <
4580Sstevel@tonic-gate 				    xap->xa_reserve_limit) {
4590Sstevel@tonic-gate 					xp = kmem_alloc(xap->xa_allocsize,
4600Sstevel@tonic-gate 					    KM_NOSLEEP);
4610Sstevel@tonic-gate 					if (xp == NULL) {
4620Sstevel@tonic-gate 						break;
4630Sstevel@tonic-gate 					}
4640Sstevel@tonic-gate 					*((void **)xp) = xap->xa_reserve_headp;
4650Sstevel@tonic-gate 					xap->xa_reserve_headp = xp;
4660Sstevel@tonic-gate 					xap->xa_reserve_count++;
4670Sstevel@tonic-gate 				}
4680Sstevel@tonic-gate 				/* And one more to use right now. */
4690Sstevel@tonic-gate 				xp = kmem_alloc(xap->xa_allocsize, KM_NOSLEEP);
4700Sstevel@tonic-gate 			}
4710Sstevel@tonic-gate 			mutex_exit(&xap->xa_reserve_mutex);
4720Sstevel@tonic-gate 		} else {
4730Sstevel@tonic-gate 			/*
4740Sstevel@tonic-gate 			 * Try to alloc a new xbuf struct. If this fails just
4750Sstevel@tonic-gate 			 * exit for now. We'll get back here again either upon
4760Sstevel@tonic-gate 			 * cmd completion or via the timer handler.
4770Sstevel@tonic-gate 			 * Question: what if the allocation attempt for the very
4780Sstevel@tonic-gate 			 * first cmd. fails? There are no outstanding cmds so
4790Sstevel@tonic-gate 			 * how do we get back here?
4800Sstevel@tonic-gate 			 * Should look at un_ncmds_in_transport, if it's zero
4810Sstevel@tonic-gate 			 * then schedule xbuf_restart_callback via the timer.
4820Sstevel@tonic-gate 			 * Athough that breaks the architecture by bringing
4830Sstevel@tonic-gate 			 * softstate data into this code.
4840Sstevel@tonic-gate 			 */
4850Sstevel@tonic-gate 			xp = kmem_alloc(xap->xa_allocsize, KM_NOSLEEP);
4860Sstevel@tonic-gate 		}
4870Sstevel@tonic-gate 		if (xp == NULL) {
4880Sstevel@tonic-gate 			break; /* Can't process a cmd. right now. */
4890Sstevel@tonic-gate 		}
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 		/*
4920Sstevel@tonic-gate 		 * Always run the counter. It's used/needed when xa_active_limit
4930Sstevel@tonic-gate 		 * is non-zero which is the typical (and right now only) case.
4940Sstevel@tonic-gate 		 */
4950Sstevel@tonic-gate 		xap->xa_active_count++;
4960Sstevel@tonic-gate 
497*8551SPeng.L@Sun.COM 		if (bp->b_private) {
498*8551SPeng.L@Sun.COM 			struct xbuf_brk *brkp = bp->b_private;
499*8551SPeng.L@Sun.COM 			struct buf *bp0 = bp;
500*8551SPeng.L@Sun.COM 
501*8551SPeng.L@Sun.COM 			brkp->active++;
502*8551SPeng.L@Sun.COM 
503*8551SPeng.L@Sun.COM 			mutex_enter(&brkp->mutex);
504*8551SPeng.L@Sun.COM 			brkp->nbufs++;
505*8551SPeng.L@Sun.COM 			mutex_exit(&brkp->mutex);
506*8551SPeng.L@Sun.COM 
507*8551SPeng.L@Sun.COM 			if (brkp->noff < bp0->b_bcount) {
508*8551SPeng.L@Sun.COM 				bp = bioclone(bp0, brkp->off, brkp->brksize,
509*8551SPeng.L@Sun.COM 				    bp0->b_edev, brkp->blkno, xbuf_brk_done,
510*8551SPeng.L@Sun.COM 				    NULL, KM_SLEEP);
511*8551SPeng.L@Sun.COM 
512*8551SPeng.L@Sun.COM 				/* update xfer position */
513*8551SPeng.L@Sun.COM 				brkp->off = brkp->noff;
514*8551SPeng.L@Sun.COM 				brkp->noff += brkp->brksize;
515*8551SPeng.L@Sun.COM 				brkp->blkno += brkp->brkblk;
516*8551SPeng.L@Sun.COM 			} else {
517*8551SPeng.L@Sun.COM 				bp = bioclone(bp0, brkp->off,
518*8551SPeng.L@Sun.COM 				    bp0->b_bcount - brkp->off, bp0->b_edev,
519*8551SPeng.L@Sun.COM 				    brkp->blkno, xbuf_brk_done, NULL, KM_SLEEP);
520*8551SPeng.L@Sun.COM 
521*8551SPeng.L@Sun.COM 				/* unlink the buf from the list */
522*8551SPeng.L@Sun.COM 				xap->xa_headp = bp0->av_forw;
523*8551SPeng.L@Sun.COM 				bp0->av_forw = NULL;
524*8551SPeng.L@Sun.COM 			}
525*8551SPeng.L@Sun.COM 			bp->b_clone_private = (struct buf *)brkp;
526*8551SPeng.L@Sun.COM 		} else {
527*8551SPeng.L@Sun.COM 			/* unlink the buf from the list */
528*8551SPeng.L@Sun.COM 			xap->xa_headp = bp->av_forw;
529*8551SPeng.L@Sun.COM 			bp->av_forw = NULL;
530*8551SPeng.L@Sun.COM 		}
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 		/*
5330Sstevel@tonic-gate 		 * Hack needed in the prototype so ddi_xbuf_get() will work.
5340Sstevel@tonic-gate 		 * Here we can rely on the sd code not changing the value in
5350Sstevel@tonic-gate 		 * b_private (in fact it wants it there). See ddi_get_xbuf()
5360Sstevel@tonic-gate 		 */
5370Sstevel@tonic-gate 		bp->b_private = xp;
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 		/* call the driver's iostart routine */
5400Sstevel@tonic-gate 		mutex_exit(&xap->xa_mutex);
5410Sstevel@tonic-gate 		(*(xap->xa_strategy))(bp, xp, xap->xa_attr_arg);
5420Sstevel@tonic-gate 	}
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	ASSERT(xap->xa_pending > 0);
5450Sstevel@tonic-gate 	xap->xa_pending--;
5460Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
5470Sstevel@tonic-gate 	return (0);
5480Sstevel@tonic-gate }
5490Sstevel@tonic-gate 
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate  * Re-start IO processing if there is anything on the queue, AND if the
5530Sstevel@tonic-gate  * restart function is not already running/pending for this ddi_xbuf_attr_t
5540Sstevel@tonic-gate  */
5550Sstevel@tonic-gate static void
xbuf_dispatch(ddi_xbuf_attr_t xap)5560Sstevel@tonic-gate xbuf_dispatch(ddi_xbuf_attr_t xap)
5570Sstevel@tonic-gate {
5580Sstevel@tonic-gate 	ASSERT(xap != NULL);
5590Sstevel@tonic-gate 	ASSERT(xap->xa_tq != NULL);
5600Sstevel@tonic-gate 	ASSERT(mutex_owned(&xap->xa_mutex));
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate 	if ((xap->xa_headp != NULL) && (xap->xa_timeid == NULL) &&
5630Sstevel@tonic-gate 	    (xap->xa_pending == 0)) {
5640Sstevel@tonic-gate 		/*
5650Sstevel@tonic-gate 		 * First try to see if we can dispatch the restart function
5660Sstevel@tonic-gate 		 * immediately, in a taskq thread.  If this fails, then
5670Sstevel@tonic-gate 		 * schedule a timeout(9F) callback to try again later.
5680Sstevel@tonic-gate 		 */
5690Sstevel@tonic-gate 		if (taskq_dispatch(xap->xa_tq,
5700Sstevel@tonic-gate 		    (void (*)(void *)) xbuf_iostart, xap, KM_NOSLEEP) == 0) {
5710Sstevel@tonic-gate 			/*
5720Sstevel@tonic-gate 			 * Unable to enqueue the request for the taskq thread,
5730Sstevel@tonic-gate 			 * try again later.  Note that this will keep re-trying
5740Sstevel@tonic-gate 			 * until taskq_dispatch() succeeds.
5750Sstevel@tonic-gate 			 */
5760Sstevel@tonic-gate 			xap->xa_timeid = timeout(xbuf_restart_callback, xap,
5770Sstevel@tonic-gate 			    XBUF_DISPATCH_DELAY);
5780Sstevel@tonic-gate 		} else {
5790Sstevel@tonic-gate 			/*
5800Sstevel@tonic-gate 			 * This indicates that xbuf_iostart() will soon be
5810Sstevel@tonic-gate 			 * run for this ddi_xbuf_attr_t, and we do not need to
5820Sstevel@tonic-gate 			 * schedule another invocation via timeout/taskq
5830Sstevel@tonic-gate 			 */
5840Sstevel@tonic-gate 			xap->xa_pending++;
5850Sstevel@tonic-gate 		}
5860Sstevel@tonic-gate 	}
5870Sstevel@tonic-gate }
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate /* timeout(9F) callback routine for xbuf restart mechanism. */
5900Sstevel@tonic-gate static void
xbuf_restart_callback(void * arg)5910Sstevel@tonic-gate xbuf_restart_callback(void *arg)
5920Sstevel@tonic-gate {
5930Sstevel@tonic-gate 	ddi_xbuf_attr_t	xap = arg;
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 	ASSERT(xap != NULL);
5960Sstevel@tonic-gate 	ASSERT(xap->xa_tq != NULL);
5970Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
6000Sstevel@tonic-gate 	xap->xa_timeid = NULL;
6010Sstevel@tonic-gate 	xbuf_dispatch(xap);
6020Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate DDII void
ddi_xbuf_flushq(ddi_xbuf_attr_t xap,int (* funcp)(struct buf *))6070Sstevel@tonic-gate ddi_xbuf_flushq(ddi_xbuf_attr_t xap, int (*funcp)(struct buf *))
6080Sstevel@tonic-gate {
6090Sstevel@tonic-gate 	struct buf *bp;
6100Sstevel@tonic-gate 	struct buf *next_bp;
6110Sstevel@tonic-gate 	struct buf *prev_bp = NULL;
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate 	ASSERT(xap != NULL);
6140Sstevel@tonic-gate 	ASSERT(xap->xa_tq != NULL);
6150Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate 	for (bp = xap->xa_headp; bp != NULL; bp = next_bp) {
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 		next_bp = bp->av_forw;	/* Save for next iteration */
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate 		/*
6240Sstevel@tonic-gate 		 * If the user-supplied function is non-NULL and returns
6250Sstevel@tonic-gate 		 * FALSE, then just leave the current bp on the queue.
6260Sstevel@tonic-gate 		 */
6270Sstevel@tonic-gate 		if ((funcp != NULL) && (!(*funcp)(bp))) {
6280Sstevel@tonic-gate 			prev_bp = bp;
6290Sstevel@tonic-gate 			continue;
6300Sstevel@tonic-gate 		}
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 		/* de-queue the bp */
6330Sstevel@tonic-gate 		if (bp == xap->xa_headp) {
6340Sstevel@tonic-gate 			xap->xa_headp = next_bp;
6350Sstevel@tonic-gate 			if (xap->xa_headp == NULL) {
6360Sstevel@tonic-gate 				xap->xa_tailp = NULL;
6370Sstevel@tonic-gate 			}
6380Sstevel@tonic-gate 		} else {
6390Sstevel@tonic-gate 			ASSERT(xap->xa_headp != NULL);
6400Sstevel@tonic-gate 			ASSERT(prev_bp != NULL);
6410Sstevel@tonic-gate 			if (bp == xap->xa_tailp) {
6420Sstevel@tonic-gate 				ASSERT(next_bp == NULL);
6430Sstevel@tonic-gate 				xap->xa_tailp = prev_bp;
6440Sstevel@tonic-gate 			}
6450Sstevel@tonic-gate 			prev_bp->av_forw = next_bp;
6460Sstevel@tonic-gate 		}
6470Sstevel@tonic-gate 		bp->av_forw = NULL;
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 		/* Add the bp to the flush queue */
6500Sstevel@tonic-gate 		if (xap->xa_flush_headp == NULL) {
6510Sstevel@tonic-gate 			ASSERT(xap->xa_flush_tailp == NULL);
6520Sstevel@tonic-gate 			xap->xa_flush_headp = xap->xa_flush_tailp = bp;
6530Sstevel@tonic-gate 		} else {
6540Sstevel@tonic-gate 			ASSERT(xap->xa_flush_tailp != NULL);
6550Sstevel@tonic-gate 			xap->xa_flush_tailp->av_forw = bp;
6560Sstevel@tonic-gate 			xap->xa_flush_tailp = bp;
6570Sstevel@tonic-gate 		}
6580Sstevel@tonic-gate 	}
6590Sstevel@tonic-gate 
6600Sstevel@tonic-gate 	while ((bp = xap->xa_flush_headp) != NULL) {
6610Sstevel@tonic-gate 		xap->xa_flush_headp = bp->av_forw;
6620Sstevel@tonic-gate 		if (xap->xa_flush_headp == NULL) {
6630Sstevel@tonic-gate 			xap->xa_flush_tailp = NULL;
6640Sstevel@tonic-gate 		}
6650Sstevel@tonic-gate 		mutex_exit(&xap->xa_mutex);
6660Sstevel@tonic-gate 		bioerror(bp, EIO);
6670Sstevel@tonic-gate 		bp->b_resid = bp->b_bcount;
6680Sstevel@tonic-gate 		biodone(bp);
6690Sstevel@tonic-gate 		mutex_enter(&xap->xa_mutex);
6700Sstevel@tonic-gate 	}
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
6730Sstevel@tonic-gate }
674