10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*8551SPeng.L@Sun.COM * Common Development and Distribution License (the "License").
6*8551SPeng.L@Sun.COM * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*8551SPeng.L@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #include <sys/scsi/scsi.h>
270Sstevel@tonic-gate #include <sys/ddi.h>
280Sstevel@tonic-gate #include <sys/sunddi.h>
290Sstevel@tonic-gate #include <sys/thread.h>
300Sstevel@tonic-gate #include <sys/var.h>
310Sstevel@tonic-gate
320Sstevel@tonic-gate #include "sd_xbuf.h"
330Sstevel@tonic-gate
340Sstevel@tonic-gate /*
350Sstevel@tonic-gate * xbuf.c: buf(9s) extension facility.
360Sstevel@tonic-gate *
370Sstevel@tonic-gate * The buf(9S) extension facility is intended to allow block drivers to
380Sstevel@tonic-gate * allocate additional memory that is associated with a particular buf(9S)
390Sstevel@tonic-gate * struct. It is further intended to help in addressing the usual set of
400Sstevel@tonic-gate * problems associated with such allocations, in particular those involving
410Sstevel@tonic-gate * recovery from allocation failures, especially in code paths that the
420Sstevel@tonic-gate * system relies on to free memory.
430Sstevel@tonic-gate *
440Sstevel@tonic-gate * CAVEAT: Currently this code is completely private to the sd driver and in
450Sstevel@tonic-gate * NO WAY constitutes a public or supported interface of any kind. It is
460Sstevel@tonic-gate * envisioned that this may one day migrate into the Solaris DDI, but until
470Sstevel@tonic-gate * that time this ought to be considered completely unstable and is subject
480Sstevel@tonic-gate * to change without notice. This code may NOT in any way be utilized by
490Sstevel@tonic-gate * ANY code outside the sd driver.
500Sstevel@tonic-gate */
510Sstevel@tonic-gate
520Sstevel@tonic-gate
530Sstevel@tonic-gate static int xbuf_iostart(ddi_xbuf_attr_t xap);
540Sstevel@tonic-gate static void xbuf_dispatch(ddi_xbuf_attr_t xap);
550Sstevel@tonic-gate static void xbuf_restart_callback(void *arg);
56*8551SPeng.L@Sun.COM static void xbuf_enqueue(struct buf *bp, ddi_xbuf_attr_t xap);
57*8551SPeng.L@Sun.COM static int xbuf_brk_done(struct buf *bp);
580Sstevel@tonic-gate
590Sstevel@tonic-gate
600Sstevel@tonic-gate /*
610Sstevel@tonic-gate * Note: Should this be exposed to the caller.... do we want to give the
620Sstevel@tonic-gate * caller the fexibility of specifying the parameters for the thread pool?
630Sstevel@tonic-gate * Note: these values are just estimates at this time, based upon what
640Sstevel@tonic-gate * seems reasonable for the sd driver. It may be preferable to make these
650Sstevel@tonic-gate * parameters self-scaling in a real (future) implementation.
660Sstevel@tonic-gate */
670Sstevel@tonic-gate #define XBUF_TQ_MINALLOC 64
680Sstevel@tonic-gate #define XBUF_TQ_MAXALLOC 512
690Sstevel@tonic-gate #define XBUF_DISPATCH_DELAY (drv_usectohz(50000)) /* 50 msec */
700Sstevel@tonic-gate
710Sstevel@tonic-gate static taskq_t *xbuf_tq = NULL;
720Sstevel@tonic-gate static int xbuf_attr_tq_minalloc = XBUF_TQ_MINALLOC;
730Sstevel@tonic-gate static int xbuf_attr_tq_maxalloc = XBUF_TQ_MAXALLOC;
740Sstevel@tonic-gate
750Sstevel@tonic-gate static kmutex_t xbuf_mutex = { 0 };
760Sstevel@tonic-gate static uint32_t xbuf_refcount = 0;
770Sstevel@tonic-gate
78*8551SPeng.L@Sun.COM /*
79*8551SPeng.L@Sun.COM * Private wrapper for buf cloned via ddi_xbuf_qstrategy()
80*8551SPeng.L@Sun.COM */
81*8551SPeng.L@Sun.COM struct xbuf_brk {
82*8551SPeng.L@Sun.COM kmutex_t mutex;
83*8551SPeng.L@Sun.COM struct buf *bp0;
84*8551SPeng.L@Sun.COM uint8_t nbufs; /* number of buf allocated */
85*8551SPeng.L@Sun.COM uint8_t active; /* number of active xfer */
86*8551SPeng.L@Sun.COM
87*8551SPeng.L@Sun.COM size_t brksize; /* break size used for this buf */
88*8551SPeng.L@Sun.COM int brkblk;
89*8551SPeng.L@Sun.COM
90*8551SPeng.L@Sun.COM /* xfer position */
91*8551SPeng.L@Sun.COM off_t off;
92*8551SPeng.L@Sun.COM off_t noff;
93*8551SPeng.L@Sun.COM daddr_t blkno;
94*8551SPeng.L@Sun.COM };
95*8551SPeng.L@Sun.COM
_NOTE(DATA_READABLE_WITHOUT_LOCK (xbuf_brk::off))96*8551SPeng.L@Sun.COM _NOTE(DATA_READABLE_WITHOUT_LOCK(xbuf_brk::off))
97*8551SPeng.L@Sun.COM
98*8551SPeng.L@Sun.COM /*
99*8551SPeng.L@Sun.COM * Hack needed in the prototype so buf breakup will work.
100*8551SPeng.L@Sun.COM * Here we can rely on the sd code not changing the value in
101*8551SPeng.L@Sun.COM * b_forw.
102*8551SPeng.L@Sun.COM */
103*8551SPeng.L@Sun.COM #define b_clone_private b_forw
104*8551SPeng.L@Sun.COM
1050Sstevel@tonic-gate
1060Sstevel@tonic-gate /* ARGSUSED */
1070Sstevel@tonic-gate DDII ddi_xbuf_attr_t
1080Sstevel@tonic-gate ddi_xbuf_attr_create(size_t xsize,
1090Sstevel@tonic-gate void (*xa_strategy)(struct buf *bp, ddi_xbuf_t xp, void *attr_arg),
1100Sstevel@tonic-gate void *attr_arg, uint32_t active_limit, uint32_t reserve_limit,
1110Sstevel@tonic-gate major_t major, int flags)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate ddi_xbuf_attr_t xap;
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate xap = kmem_zalloc(sizeof (struct __ddi_xbuf_attr), KM_SLEEP);
1160Sstevel@tonic-gate
1170Sstevel@tonic-gate mutex_init(&xap->xa_mutex, NULL, MUTEX_DRIVER, NULL);
1180Sstevel@tonic-gate mutex_init(&xap->xa_reserve_mutex, NULL, MUTEX_DRIVER, NULL);
1190Sstevel@tonic-gate
1200Sstevel@tonic-gate /* Future: Allow the caller to specify alignment requirements? */
1210Sstevel@tonic-gate xap->xa_allocsize = max(xsize, sizeof (void *));
1220Sstevel@tonic-gate xap->xa_active_limit = active_limit;
1230Sstevel@tonic-gate xap->xa_active_lowater = xap->xa_active_limit / 2;
1240Sstevel@tonic-gate xap->xa_reserve_limit = reserve_limit;
1250Sstevel@tonic-gate xap->xa_strategy = xa_strategy;
1260Sstevel@tonic-gate xap->xa_attr_arg = attr_arg;
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate mutex_enter(&xbuf_mutex);
1290Sstevel@tonic-gate if (xbuf_refcount == 0) {
1300Sstevel@tonic-gate ASSERT(xbuf_tq == NULL);
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate * Note: Would be nice if: (1) #threads in the taskq pool (set
1330Sstevel@tonic-gate * to the value of 'ncpus' at the time the taskq is created)
1340Sstevel@tonic-gate * could adjust automatically with DR; (2) the taskq
1350Sstevel@tonic-gate * minalloc/maxalloc counts could be grown/shrunk on the fly.
1360Sstevel@tonic-gate */
1370Sstevel@tonic-gate xbuf_tq = taskq_create("xbuf_taskq", ncpus,
1380Sstevel@tonic-gate (v.v_maxsyspri - 2), xbuf_attr_tq_minalloc,
1390Sstevel@tonic-gate xbuf_attr_tq_maxalloc, TASKQ_PREPOPULATE);
1400Sstevel@tonic-gate }
1410Sstevel@tonic-gate xbuf_refcount++;
1420Sstevel@tonic-gate mutex_exit(&xbuf_mutex);
1430Sstevel@tonic-gate
1440Sstevel@tonic-gate /* In this prototype we just always use the global system pool. */
1450Sstevel@tonic-gate xap->xa_tq = xbuf_tq;
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate return (xap);
1480Sstevel@tonic-gate }
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate DDII void
ddi_xbuf_attr_destroy(ddi_xbuf_attr_t xap)1520Sstevel@tonic-gate ddi_xbuf_attr_destroy(ddi_xbuf_attr_t xap)
1530Sstevel@tonic-gate {
1540Sstevel@tonic-gate ddi_xbuf_t xp;
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate mutex_destroy(&xap->xa_mutex);
1570Sstevel@tonic-gate mutex_destroy(&xap->xa_reserve_mutex);
1580Sstevel@tonic-gate
1590Sstevel@tonic-gate /* Free any xbufs on the reserve list */
1600Sstevel@tonic-gate while (xap->xa_reserve_count != 0) {
1610Sstevel@tonic-gate xp = xap->xa_reserve_headp;
1620Sstevel@tonic-gate xap->xa_reserve_headp = *((void **)xp);
1630Sstevel@tonic-gate xap->xa_reserve_count--;
1640Sstevel@tonic-gate kmem_free(xp, xap->xa_allocsize);
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate ASSERT(xap->xa_reserve_headp == NULL);
1670Sstevel@tonic-gate
1680Sstevel@tonic-gate mutex_enter(&xbuf_mutex);
1690Sstevel@tonic-gate ASSERT((xbuf_refcount != 0) && (xbuf_tq != NULL));
1700Sstevel@tonic-gate xbuf_refcount--;
1710Sstevel@tonic-gate if (xbuf_refcount == 0) {
1720Sstevel@tonic-gate taskq_destroy(xbuf_tq);
1730Sstevel@tonic-gate xbuf_tq = NULL;
1740Sstevel@tonic-gate }
1750Sstevel@tonic-gate mutex_exit(&xbuf_mutex);
1760Sstevel@tonic-gate
1770Sstevel@tonic-gate kmem_free(xap, sizeof (struct __ddi_xbuf_attr));
1780Sstevel@tonic-gate }
1790Sstevel@tonic-gate
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate /* ARGSUSED */
1820Sstevel@tonic-gate DDII void
ddi_xbuf_attr_register_devinfo(ddi_xbuf_attr_t xbuf_attr,dev_info_t * dip)1830Sstevel@tonic-gate ddi_xbuf_attr_register_devinfo(ddi_xbuf_attr_t xbuf_attr, dev_info_t *dip)
1840Sstevel@tonic-gate {
1850Sstevel@tonic-gate /* Currently a no-op in this prototype */
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate /* ARGSUSED */
1900Sstevel@tonic-gate DDII void
ddi_xbuf_attr_unregister_devinfo(ddi_xbuf_attr_t xbuf_attr,dev_info_t * dip)1910Sstevel@tonic-gate ddi_xbuf_attr_unregister_devinfo(ddi_xbuf_attr_t xbuf_attr, dev_info_t *dip)
1920Sstevel@tonic-gate {
1930Sstevel@tonic-gate /* Currently a no-op in this prototype */
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate
196*8551SPeng.L@Sun.COM DDII int
ddi_xbuf_attr_setup_brk(ddi_xbuf_attr_t xap,size_t size)197*8551SPeng.L@Sun.COM ddi_xbuf_attr_setup_brk(ddi_xbuf_attr_t xap, size_t size)
198*8551SPeng.L@Sun.COM {
199*8551SPeng.L@Sun.COM if (size < DEV_BSIZE)
200*8551SPeng.L@Sun.COM return (0);
201*8551SPeng.L@Sun.COM
202*8551SPeng.L@Sun.COM mutex_enter(&xap->xa_mutex);
203*8551SPeng.L@Sun.COM xap->xa_brksize = size & ~(DEV_BSIZE - 1);
204*8551SPeng.L@Sun.COM mutex_exit(&xap->xa_mutex);
205*8551SPeng.L@Sun.COM return (1);
206*8551SPeng.L@Sun.COM }
207*8551SPeng.L@Sun.COM
2080Sstevel@tonic-gate
2090Sstevel@tonic-gate
2100Sstevel@tonic-gate /*
2110Sstevel@tonic-gate * Enqueue the given buf and attempt to initiate IO.
2120Sstevel@tonic-gate * Called from the driver strategy(9E) routine.
2130Sstevel@tonic-gate */
2140Sstevel@tonic-gate
2150Sstevel@tonic-gate DDII int
ddi_xbuf_qstrategy(struct buf * bp,ddi_xbuf_attr_t xap)2160Sstevel@tonic-gate ddi_xbuf_qstrategy(struct buf *bp, ddi_xbuf_attr_t xap)
2170Sstevel@tonic-gate {
2180Sstevel@tonic-gate ASSERT(xap != NULL);
2190Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_mutex));
2200Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
2210Sstevel@tonic-gate
2220Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
2230Sstevel@tonic-gate
224*8551SPeng.L@Sun.COM ASSERT((bp->b_bcount & (DEV_BSIZE - 1)) == 0);
225*8551SPeng.L@Sun.COM
226*8551SPeng.L@Sun.COM /*
227*8551SPeng.L@Sun.COM * Breakup buf if necessary. bp->b_private is temporarily
228*8551SPeng.L@Sun.COM * used to save xbuf_brk
229*8551SPeng.L@Sun.COM */
230*8551SPeng.L@Sun.COM if (xap->xa_brksize && bp->b_bcount > xap->xa_brksize) {
231*8551SPeng.L@Sun.COM struct xbuf_brk *brkp;
232*8551SPeng.L@Sun.COM
233*8551SPeng.L@Sun.COM brkp = kmem_zalloc(sizeof (struct xbuf_brk), KM_SLEEP);
234*8551SPeng.L@Sun.COM _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*brkp))
235*8551SPeng.L@Sun.COM mutex_init(&brkp->mutex, NULL, MUTEX_DRIVER, NULL);
236*8551SPeng.L@Sun.COM brkp->bp0 = bp;
237*8551SPeng.L@Sun.COM brkp->brksize = xap->xa_brksize;
238*8551SPeng.L@Sun.COM brkp->brkblk = btodt(xap->xa_brksize);
239*8551SPeng.L@Sun.COM brkp->noff = xap->xa_brksize;
240*8551SPeng.L@Sun.COM brkp->blkno = bp->b_blkno;
241*8551SPeng.L@Sun.COM _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*brkp))
242*8551SPeng.L@Sun.COM bp->b_private = brkp;
243*8551SPeng.L@Sun.COM } else {
244*8551SPeng.L@Sun.COM bp->b_private = NULL;
245*8551SPeng.L@Sun.COM }
246*8551SPeng.L@Sun.COM
247*8551SPeng.L@Sun.COM /* Enqueue buf */
2480Sstevel@tonic-gate if (xap->xa_headp == NULL) {
2490Sstevel@tonic-gate xap->xa_headp = xap->xa_tailp = bp;
2500Sstevel@tonic-gate } else {
2510Sstevel@tonic-gate xap->xa_tailp->av_forw = bp;
2520Sstevel@tonic-gate xap->xa_tailp = bp;
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate bp->av_forw = NULL;
2550Sstevel@tonic-gate
2560Sstevel@tonic-gate xap->xa_pending++;
2570Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
2580Sstevel@tonic-gate return (xbuf_iostart(xap));
2590Sstevel@tonic-gate }
2600Sstevel@tonic-gate
2610Sstevel@tonic-gate
2620Sstevel@tonic-gate /*
2630Sstevel@tonic-gate * Drivers call this immediately before calling biodone(9F), to notify the
2640Sstevel@tonic-gate * framework that the indicated xbuf is no longer being used by the driver.
2650Sstevel@tonic-gate * May be called under interrupt context.
2660Sstevel@tonic-gate */
2670Sstevel@tonic-gate
268*8551SPeng.L@Sun.COM DDII int
ddi_xbuf_done(struct buf * bp,ddi_xbuf_attr_t xap)2690Sstevel@tonic-gate ddi_xbuf_done(struct buf *bp, ddi_xbuf_attr_t xap)
2700Sstevel@tonic-gate {
2710Sstevel@tonic-gate ddi_xbuf_t xp;
272*8551SPeng.L@Sun.COM int done;
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate ASSERT(bp != NULL);
2750Sstevel@tonic-gate ASSERT(xap != NULL);
2760Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_mutex));
2770Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
2780Sstevel@tonic-gate
2790Sstevel@tonic-gate xp = ddi_xbuf_get(bp, xap);
2800Sstevel@tonic-gate
2810Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
2820Sstevel@tonic-gate
2830Sstevel@tonic-gate #ifdef SDDEBUG
2840Sstevel@tonic-gate if (xap->xa_active_limit != 0) {
2850Sstevel@tonic-gate ASSERT(xap->xa_active_count > 0);
2860Sstevel@tonic-gate }
2870Sstevel@tonic-gate #endif
2880Sstevel@tonic-gate xap->xa_active_count--;
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate if (xap->xa_reserve_limit != 0) {
2910Sstevel@tonic-gate mutex_enter(&xap->xa_reserve_mutex);
2920Sstevel@tonic-gate if (xap->xa_reserve_count < xap->xa_reserve_limit) {
2930Sstevel@tonic-gate /* Put this xbuf onto the reserve list & exit */
2940Sstevel@tonic-gate *((void **)xp) = xap->xa_reserve_headp;
2950Sstevel@tonic-gate xap->xa_reserve_headp = xp;
2960Sstevel@tonic-gate xap->xa_reserve_count++;
2970Sstevel@tonic-gate mutex_exit(&xap->xa_reserve_mutex);
2980Sstevel@tonic-gate goto done;
2990Sstevel@tonic-gate }
3000Sstevel@tonic-gate mutex_exit(&xap->xa_reserve_mutex);
3010Sstevel@tonic-gate }
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate kmem_free(xp, xap->xa_allocsize); /* return it to the system */
3040Sstevel@tonic-gate
3050Sstevel@tonic-gate done:
306*8551SPeng.L@Sun.COM if (bp->b_iodone == xbuf_brk_done) {
307*8551SPeng.L@Sun.COM struct xbuf_brk *brkp = (struct xbuf_brk *)bp->b_clone_private;
308*8551SPeng.L@Sun.COM
309*8551SPeng.L@Sun.COM brkp->active--;
310*8551SPeng.L@Sun.COM if (brkp->active || xap->xa_headp == brkp->bp0) {
311*8551SPeng.L@Sun.COM done = 0;
312*8551SPeng.L@Sun.COM } else {
313*8551SPeng.L@Sun.COM brkp->off = -1; /* mark bp0 as completed */
314*8551SPeng.L@Sun.COM done = 1;
315*8551SPeng.L@Sun.COM }
316*8551SPeng.L@Sun.COM } else {
317*8551SPeng.L@Sun.COM done = 1;
318*8551SPeng.L@Sun.COM }
319*8551SPeng.L@Sun.COM
3200Sstevel@tonic-gate if ((xap->xa_active_limit == 0) ||
3210Sstevel@tonic-gate (xap->xa_active_count <= xap->xa_active_lowater)) {
3220Sstevel@tonic-gate xbuf_dispatch(xap);
3230Sstevel@tonic-gate }
3240Sstevel@tonic-gate
3250Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
326*8551SPeng.L@Sun.COM return (done);
327*8551SPeng.L@Sun.COM }
328*8551SPeng.L@Sun.COM
329*8551SPeng.L@Sun.COM static int
xbuf_brk_done(struct buf * bp)330*8551SPeng.L@Sun.COM xbuf_brk_done(struct buf *bp)
331*8551SPeng.L@Sun.COM {
332*8551SPeng.L@Sun.COM struct xbuf_brk *brkp = (struct xbuf_brk *)bp->b_clone_private;
333*8551SPeng.L@Sun.COM struct buf *bp0 = brkp->bp0;
334*8551SPeng.L@Sun.COM int done;
335*8551SPeng.L@Sun.COM
336*8551SPeng.L@Sun.COM mutex_enter(&brkp->mutex);
337*8551SPeng.L@Sun.COM if (bp->b_flags & B_ERROR && !(bp0->b_flags & B_ERROR)) {
338*8551SPeng.L@Sun.COM bp0->b_flags |= B_ERROR;
339*8551SPeng.L@Sun.COM bp0->b_error = bp->b_error;
340*8551SPeng.L@Sun.COM }
341*8551SPeng.L@Sun.COM if (bp->b_resid)
342*8551SPeng.L@Sun.COM bp0->b_resid = bp0->b_bcount;
343*8551SPeng.L@Sun.COM
344*8551SPeng.L@Sun.COM freerbuf(bp);
345*8551SPeng.L@Sun.COM brkp->nbufs--;
346*8551SPeng.L@Sun.COM
347*8551SPeng.L@Sun.COM done = (brkp->off == -1 && brkp->nbufs == 0);
348*8551SPeng.L@Sun.COM mutex_exit(&brkp->mutex);
349*8551SPeng.L@Sun.COM
350*8551SPeng.L@Sun.COM /* All buf segments done */
351*8551SPeng.L@Sun.COM if (done) {
352*8551SPeng.L@Sun.COM mutex_destroy(&brkp->mutex);
353*8551SPeng.L@Sun.COM kmem_free(brkp, sizeof (struct xbuf_brk));
354*8551SPeng.L@Sun.COM biodone(bp0);
355*8551SPeng.L@Sun.COM }
356*8551SPeng.L@Sun.COM return (0);
3570Sstevel@tonic-gate }
3580Sstevel@tonic-gate
3590Sstevel@tonic-gate DDII void
ddi_xbuf_dispatch(ddi_xbuf_attr_t xap)3600Sstevel@tonic-gate ddi_xbuf_dispatch(ddi_xbuf_attr_t xap)
3610Sstevel@tonic-gate {
3620Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
3630Sstevel@tonic-gate if ((xap->xa_active_limit == 0) ||
3640Sstevel@tonic-gate (xap->xa_active_count <= xap->xa_active_lowater)) {
3650Sstevel@tonic-gate xbuf_dispatch(xap);
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
3680Sstevel@tonic-gate }
3690Sstevel@tonic-gate
3700Sstevel@tonic-gate
3710Sstevel@tonic-gate /*
3720Sstevel@tonic-gate * ISSUE: in this prototype we cannot really implement ddi_xbuf_get()
3730Sstevel@tonic-gate * unless we explicitly hide the xbuf pointer somewhere in the buf
3740Sstevel@tonic-gate * during allocation, and then rely on the driver never changing it.
3750Sstevel@tonic-gate * We can probably get away with using b_private for this for now,
3760Sstevel@tonic-gate * tho it really is kinda gnarly.....
3770Sstevel@tonic-gate */
3780Sstevel@tonic-gate
3790Sstevel@tonic-gate /* ARGSUSED */
3800Sstevel@tonic-gate DDII ddi_xbuf_t
ddi_xbuf_get(struct buf * bp,ddi_xbuf_attr_t xap)3810Sstevel@tonic-gate ddi_xbuf_get(struct buf *bp, ddi_xbuf_attr_t xap)
3820Sstevel@tonic-gate {
3830Sstevel@tonic-gate return (bp->b_private);
3840Sstevel@tonic-gate }
3850Sstevel@tonic-gate
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate /*
3880Sstevel@tonic-gate * Initiate IOs for bufs on the queue. Called from kernel thread or taskq
3890Sstevel@tonic-gate * thread context. May execute concurrently for the same ddi_xbuf_attr_t.
3900Sstevel@tonic-gate */
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate static int
xbuf_iostart(ddi_xbuf_attr_t xap)3930Sstevel@tonic-gate xbuf_iostart(ddi_xbuf_attr_t xap)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate struct buf *bp;
3960Sstevel@tonic-gate ddi_xbuf_t xp;
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate ASSERT(xap != NULL);
3990Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_mutex));
4000Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
4010Sstevel@tonic-gate
4020Sstevel@tonic-gate /*
4030Sstevel@tonic-gate * For each request on the queue, attempt to allocate the specified
4040Sstevel@tonic-gate * xbuf extension area, and call the driver's iostart() routine.
4050Sstevel@tonic-gate * We process as many requests on the queue as we can, until either
4060Sstevel@tonic-gate * (1) we run out of requests; or
4070Sstevel@tonic-gate * (2) we run out of resources; or
4080Sstevel@tonic-gate * (3) we reach the maximum limit for the given ddi_xbuf_attr_t.
4090Sstevel@tonic-gate */
4100Sstevel@tonic-gate for (;;) {
4110Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
4120Sstevel@tonic-gate
4130Sstevel@tonic-gate if ((bp = xap->xa_headp) == NULL) {
4140Sstevel@tonic-gate break; /* queue empty */
4150Sstevel@tonic-gate }
4160Sstevel@tonic-gate
4170Sstevel@tonic-gate if ((xap->xa_active_limit != 0) &&
4180Sstevel@tonic-gate (xap->xa_active_count >= xap->xa_active_limit)) {
4190Sstevel@tonic-gate break; /* allocation limit reached */
4200Sstevel@tonic-gate }
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate /*
4230Sstevel@tonic-gate * If the reserve_limit is non-zero then work with the
4240Sstevel@tonic-gate * reserve else always allocate a new struct.
4250Sstevel@tonic-gate */
4260Sstevel@tonic-gate if (xap->xa_reserve_limit != 0) {
4270Sstevel@tonic-gate /*
4280Sstevel@tonic-gate * Don't penalize EVERY I/O by always allocating a new
4290Sstevel@tonic-gate * struct. for the sake of maintaining and not touching
4300Sstevel@tonic-gate * a reserve for a pathalogical condition that may never
4310Sstevel@tonic-gate * happen. Use the reserve entries first, this uses it
4320Sstevel@tonic-gate * like a local pool rather than a reserve that goes
4330Sstevel@tonic-gate * untouched. Make sure it's re-populated whenever it
4340Sstevel@tonic-gate * gets fully depleted just in case it really is needed.
4350Sstevel@tonic-gate * This is safe because under the pathalogical
4360Sstevel@tonic-gate * condition, when the system runs out of memory such
4370Sstevel@tonic-gate * that the below allocs fail, the reserve will still
4380Sstevel@tonic-gate * be available whether the entries are saved away on
4390Sstevel@tonic-gate * the queue unused or in-transport somewhere. Thus
4400Sstevel@tonic-gate * progress can still continue, however slowly.
4410Sstevel@tonic-gate */
4420Sstevel@tonic-gate mutex_enter(&xap->xa_reserve_mutex);
4430Sstevel@tonic-gate if (xap->xa_reserve_count != 0) {
4440Sstevel@tonic-gate ASSERT(xap->xa_reserve_headp != NULL);
4450Sstevel@tonic-gate /* Grab an xbuf from the reserve */
4460Sstevel@tonic-gate xp = xap->xa_reserve_headp;
4470Sstevel@tonic-gate xap->xa_reserve_headp = *((void **)xp);
4480Sstevel@tonic-gate ASSERT(xap->xa_reserve_count > 0);
4490Sstevel@tonic-gate xap->xa_reserve_count--;
4500Sstevel@tonic-gate } else {
4510Sstevel@tonic-gate /*
4520Sstevel@tonic-gate * Either this is the first time through,
4530Sstevel@tonic-gate * or the reserve has been totally depleted.
4540Sstevel@tonic-gate * Re-populate the reserve (pool). Excess
4550Sstevel@tonic-gate * structs. get released in the done path.
4560Sstevel@tonic-gate */
4570Sstevel@tonic-gate while (xap->xa_reserve_count <
4580Sstevel@tonic-gate xap->xa_reserve_limit) {
4590Sstevel@tonic-gate xp = kmem_alloc(xap->xa_allocsize,
4600Sstevel@tonic-gate KM_NOSLEEP);
4610Sstevel@tonic-gate if (xp == NULL) {
4620Sstevel@tonic-gate break;
4630Sstevel@tonic-gate }
4640Sstevel@tonic-gate *((void **)xp) = xap->xa_reserve_headp;
4650Sstevel@tonic-gate xap->xa_reserve_headp = xp;
4660Sstevel@tonic-gate xap->xa_reserve_count++;
4670Sstevel@tonic-gate }
4680Sstevel@tonic-gate /* And one more to use right now. */
4690Sstevel@tonic-gate xp = kmem_alloc(xap->xa_allocsize, KM_NOSLEEP);
4700Sstevel@tonic-gate }
4710Sstevel@tonic-gate mutex_exit(&xap->xa_reserve_mutex);
4720Sstevel@tonic-gate } else {
4730Sstevel@tonic-gate /*
4740Sstevel@tonic-gate * Try to alloc a new xbuf struct. If this fails just
4750Sstevel@tonic-gate * exit for now. We'll get back here again either upon
4760Sstevel@tonic-gate * cmd completion or via the timer handler.
4770Sstevel@tonic-gate * Question: what if the allocation attempt for the very
4780Sstevel@tonic-gate * first cmd. fails? There are no outstanding cmds so
4790Sstevel@tonic-gate * how do we get back here?
4800Sstevel@tonic-gate * Should look at un_ncmds_in_transport, if it's zero
4810Sstevel@tonic-gate * then schedule xbuf_restart_callback via the timer.
4820Sstevel@tonic-gate * Athough that breaks the architecture by bringing
4830Sstevel@tonic-gate * softstate data into this code.
4840Sstevel@tonic-gate */
4850Sstevel@tonic-gate xp = kmem_alloc(xap->xa_allocsize, KM_NOSLEEP);
4860Sstevel@tonic-gate }
4870Sstevel@tonic-gate if (xp == NULL) {
4880Sstevel@tonic-gate break; /* Can't process a cmd. right now. */
4890Sstevel@tonic-gate }
4900Sstevel@tonic-gate
4910Sstevel@tonic-gate /*
4920Sstevel@tonic-gate * Always run the counter. It's used/needed when xa_active_limit
4930Sstevel@tonic-gate * is non-zero which is the typical (and right now only) case.
4940Sstevel@tonic-gate */
4950Sstevel@tonic-gate xap->xa_active_count++;
4960Sstevel@tonic-gate
497*8551SPeng.L@Sun.COM if (bp->b_private) {
498*8551SPeng.L@Sun.COM struct xbuf_brk *brkp = bp->b_private;
499*8551SPeng.L@Sun.COM struct buf *bp0 = bp;
500*8551SPeng.L@Sun.COM
501*8551SPeng.L@Sun.COM brkp->active++;
502*8551SPeng.L@Sun.COM
503*8551SPeng.L@Sun.COM mutex_enter(&brkp->mutex);
504*8551SPeng.L@Sun.COM brkp->nbufs++;
505*8551SPeng.L@Sun.COM mutex_exit(&brkp->mutex);
506*8551SPeng.L@Sun.COM
507*8551SPeng.L@Sun.COM if (brkp->noff < bp0->b_bcount) {
508*8551SPeng.L@Sun.COM bp = bioclone(bp0, brkp->off, brkp->brksize,
509*8551SPeng.L@Sun.COM bp0->b_edev, brkp->blkno, xbuf_brk_done,
510*8551SPeng.L@Sun.COM NULL, KM_SLEEP);
511*8551SPeng.L@Sun.COM
512*8551SPeng.L@Sun.COM /* update xfer position */
513*8551SPeng.L@Sun.COM brkp->off = brkp->noff;
514*8551SPeng.L@Sun.COM brkp->noff += brkp->brksize;
515*8551SPeng.L@Sun.COM brkp->blkno += brkp->brkblk;
516*8551SPeng.L@Sun.COM } else {
517*8551SPeng.L@Sun.COM bp = bioclone(bp0, brkp->off,
518*8551SPeng.L@Sun.COM bp0->b_bcount - brkp->off, bp0->b_edev,
519*8551SPeng.L@Sun.COM brkp->blkno, xbuf_brk_done, NULL, KM_SLEEP);
520*8551SPeng.L@Sun.COM
521*8551SPeng.L@Sun.COM /* unlink the buf from the list */
522*8551SPeng.L@Sun.COM xap->xa_headp = bp0->av_forw;
523*8551SPeng.L@Sun.COM bp0->av_forw = NULL;
524*8551SPeng.L@Sun.COM }
525*8551SPeng.L@Sun.COM bp->b_clone_private = (struct buf *)brkp;
526*8551SPeng.L@Sun.COM } else {
527*8551SPeng.L@Sun.COM /* unlink the buf from the list */
528*8551SPeng.L@Sun.COM xap->xa_headp = bp->av_forw;
529*8551SPeng.L@Sun.COM bp->av_forw = NULL;
530*8551SPeng.L@Sun.COM }
5310Sstevel@tonic-gate
5320Sstevel@tonic-gate /*
5330Sstevel@tonic-gate * Hack needed in the prototype so ddi_xbuf_get() will work.
5340Sstevel@tonic-gate * Here we can rely on the sd code not changing the value in
5350Sstevel@tonic-gate * b_private (in fact it wants it there). See ddi_get_xbuf()
5360Sstevel@tonic-gate */
5370Sstevel@tonic-gate bp->b_private = xp;
5380Sstevel@tonic-gate
5390Sstevel@tonic-gate /* call the driver's iostart routine */
5400Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
5410Sstevel@tonic-gate (*(xap->xa_strategy))(bp, xp, xap->xa_attr_arg);
5420Sstevel@tonic-gate }
5430Sstevel@tonic-gate
5440Sstevel@tonic-gate ASSERT(xap->xa_pending > 0);
5450Sstevel@tonic-gate xap->xa_pending--;
5460Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
5470Sstevel@tonic-gate return (0);
5480Sstevel@tonic-gate }
5490Sstevel@tonic-gate
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate * Re-start IO processing if there is anything on the queue, AND if the
5530Sstevel@tonic-gate * restart function is not already running/pending for this ddi_xbuf_attr_t
5540Sstevel@tonic-gate */
5550Sstevel@tonic-gate static void
xbuf_dispatch(ddi_xbuf_attr_t xap)5560Sstevel@tonic-gate xbuf_dispatch(ddi_xbuf_attr_t xap)
5570Sstevel@tonic-gate {
5580Sstevel@tonic-gate ASSERT(xap != NULL);
5590Sstevel@tonic-gate ASSERT(xap->xa_tq != NULL);
5600Sstevel@tonic-gate ASSERT(mutex_owned(&xap->xa_mutex));
5610Sstevel@tonic-gate
5620Sstevel@tonic-gate if ((xap->xa_headp != NULL) && (xap->xa_timeid == NULL) &&
5630Sstevel@tonic-gate (xap->xa_pending == 0)) {
5640Sstevel@tonic-gate /*
5650Sstevel@tonic-gate * First try to see if we can dispatch the restart function
5660Sstevel@tonic-gate * immediately, in a taskq thread. If this fails, then
5670Sstevel@tonic-gate * schedule a timeout(9F) callback to try again later.
5680Sstevel@tonic-gate */
5690Sstevel@tonic-gate if (taskq_dispatch(xap->xa_tq,
5700Sstevel@tonic-gate (void (*)(void *)) xbuf_iostart, xap, KM_NOSLEEP) == 0) {
5710Sstevel@tonic-gate /*
5720Sstevel@tonic-gate * Unable to enqueue the request for the taskq thread,
5730Sstevel@tonic-gate * try again later. Note that this will keep re-trying
5740Sstevel@tonic-gate * until taskq_dispatch() succeeds.
5750Sstevel@tonic-gate */
5760Sstevel@tonic-gate xap->xa_timeid = timeout(xbuf_restart_callback, xap,
5770Sstevel@tonic-gate XBUF_DISPATCH_DELAY);
5780Sstevel@tonic-gate } else {
5790Sstevel@tonic-gate /*
5800Sstevel@tonic-gate * This indicates that xbuf_iostart() will soon be
5810Sstevel@tonic-gate * run for this ddi_xbuf_attr_t, and we do not need to
5820Sstevel@tonic-gate * schedule another invocation via timeout/taskq
5830Sstevel@tonic-gate */
5840Sstevel@tonic-gate xap->xa_pending++;
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate }
5880Sstevel@tonic-gate
5890Sstevel@tonic-gate /* timeout(9F) callback routine for xbuf restart mechanism. */
5900Sstevel@tonic-gate static void
xbuf_restart_callback(void * arg)5910Sstevel@tonic-gate xbuf_restart_callback(void *arg)
5920Sstevel@tonic-gate {
5930Sstevel@tonic-gate ddi_xbuf_attr_t xap = arg;
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate ASSERT(xap != NULL);
5960Sstevel@tonic-gate ASSERT(xap->xa_tq != NULL);
5970Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_mutex));
5980Sstevel@tonic-gate
5990Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
6000Sstevel@tonic-gate xap->xa_timeid = NULL;
6010Sstevel@tonic-gate xbuf_dispatch(xap);
6020Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate
6050Sstevel@tonic-gate
6060Sstevel@tonic-gate DDII void
ddi_xbuf_flushq(ddi_xbuf_attr_t xap,int (* funcp)(struct buf *))6070Sstevel@tonic-gate ddi_xbuf_flushq(ddi_xbuf_attr_t xap, int (*funcp)(struct buf *))
6080Sstevel@tonic-gate {
6090Sstevel@tonic-gate struct buf *bp;
6100Sstevel@tonic-gate struct buf *next_bp;
6110Sstevel@tonic-gate struct buf *prev_bp = NULL;
6120Sstevel@tonic-gate
6130Sstevel@tonic-gate ASSERT(xap != NULL);
6140Sstevel@tonic-gate ASSERT(xap->xa_tq != NULL);
6150Sstevel@tonic-gate ASSERT(!mutex_owned(&xap->xa_mutex));
6160Sstevel@tonic-gate
6170Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
6180Sstevel@tonic-gate
6190Sstevel@tonic-gate for (bp = xap->xa_headp; bp != NULL; bp = next_bp) {
6200Sstevel@tonic-gate
6210Sstevel@tonic-gate next_bp = bp->av_forw; /* Save for next iteration */
6220Sstevel@tonic-gate
6230Sstevel@tonic-gate /*
6240Sstevel@tonic-gate * If the user-supplied function is non-NULL and returns
6250Sstevel@tonic-gate * FALSE, then just leave the current bp on the queue.
6260Sstevel@tonic-gate */
6270Sstevel@tonic-gate if ((funcp != NULL) && (!(*funcp)(bp))) {
6280Sstevel@tonic-gate prev_bp = bp;
6290Sstevel@tonic-gate continue;
6300Sstevel@tonic-gate }
6310Sstevel@tonic-gate
6320Sstevel@tonic-gate /* de-queue the bp */
6330Sstevel@tonic-gate if (bp == xap->xa_headp) {
6340Sstevel@tonic-gate xap->xa_headp = next_bp;
6350Sstevel@tonic-gate if (xap->xa_headp == NULL) {
6360Sstevel@tonic-gate xap->xa_tailp = NULL;
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate } else {
6390Sstevel@tonic-gate ASSERT(xap->xa_headp != NULL);
6400Sstevel@tonic-gate ASSERT(prev_bp != NULL);
6410Sstevel@tonic-gate if (bp == xap->xa_tailp) {
6420Sstevel@tonic-gate ASSERT(next_bp == NULL);
6430Sstevel@tonic-gate xap->xa_tailp = prev_bp;
6440Sstevel@tonic-gate }
6450Sstevel@tonic-gate prev_bp->av_forw = next_bp;
6460Sstevel@tonic-gate }
6470Sstevel@tonic-gate bp->av_forw = NULL;
6480Sstevel@tonic-gate
6490Sstevel@tonic-gate /* Add the bp to the flush queue */
6500Sstevel@tonic-gate if (xap->xa_flush_headp == NULL) {
6510Sstevel@tonic-gate ASSERT(xap->xa_flush_tailp == NULL);
6520Sstevel@tonic-gate xap->xa_flush_headp = xap->xa_flush_tailp = bp;
6530Sstevel@tonic-gate } else {
6540Sstevel@tonic-gate ASSERT(xap->xa_flush_tailp != NULL);
6550Sstevel@tonic-gate xap->xa_flush_tailp->av_forw = bp;
6560Sstevel@tonic-gate xap->xa_flush_tailp = bp;
6570Sstevel@tonic-gate }
6580Sstevel@tonic-gate }
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate while ((bp = xap->xa_flush_headp) != NULL) {
6610Sstevel@tonic-gate xap->xa_flush_headp = bp->av_forw;
6620Sstevel@tonic-gate if (xap->xa_flush_headp == NULL) {
6630Sstevel@tonic-gate xap->xa_flush_tailp = NULL;
6640Sstevel@tonic-gate }
6650Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
6660Sstevel@tonic-gate bioerror(bp, EIO);
6670Sstevel@tonic-gate bp->b_resid = bp->b_bcount;
6680Sstevel@tonic-gate biodone(bp);
6690Sstevel@tonic-gate mutex_enter(&xap->xa_mutex);
6700Sstevel@tonic-gate }
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate mutex_exit(&xap->xa_mutex);
6730Sstevel@tonic-gate }
674