xref: /onnv-gate/usr/src/uts/common/xen/io/xdf.h (revision 9917:4de88cdeac3d)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev 
225084Sjohnlev /*
238863SEdward.Pilatowicz@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
245084Sjohnlev  * Use is subject to license terms.
255084Sjohnlev  */
265084Sjohnlev 
275084Sjohnlev 
285084Sjohnlev #ifndef _SYS_XDF_H
295084Sjohnlev #define	_SYS_XDF_H
305084Sjohnlev 
318863SEdward.Pilatowicz@Sun.COM #include <sys/ddi.h>
328863SEdward.Pilatowicz@Sun.COM #include <sys/sunddi.h>
338863SEdward.Pilatowicz@Sun.COM #include <sys/cmlb.h>
348863SEdward.Pilatowicz@Sun.COM #include <sys/dkio.h>
358863SEdward.Pilatowicz@Sun.COM 
368863SEdward.Pilatowicz@Sun.COM #include <sys/gnttab.h>
378863SEdward.Pilatowicz@Sun.COM #include <xen/sys/xendev.h>
388863SEdward.Pilatowicz@Sun.COM 
395084Sjohnlev #ifdef __cplusplus
405084Sjohnlev extern "C" {
415084Sjohnlev #endif
425084Sjohnlev 
435084Sjohnlev 
445084Sjohnlev /*
455084Sjohnlev  * VBDs have standard 512 byte blocks
465084Sjohnlev  * A single blkif_request can transfer up to 11 pages of data, 1 page/segment
475084Sjohnlev  */
485084Sjohnlev #define	XB_BSIZE	DEV_BSIZE
495084Sjohnlev #define	XB_BMASK	(XB_BSIZE - 1)
505084Sjohnlev #define	XB_BSHIFT	9
519889SLarry.Liu@Sun.COM #define	XB_DTOB(bn, vdp)	((bn) * (vdp)->xdf_xdev_secsize)
525084Sjohnlev 
535084Sjohnlev #define	XB_MAX_SEGLEN	(8 * XB_BSIZE)
545084Sjohnlev #define	XB_SEGOFFSET	(XB_MAX_SEGLEN - 1)
555084Sjohnlev #define	XB_MAX_XFER	(XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST)
565084Sjohnlev #define	XB_MAXPHYS	(XB_MAX_XFER * BLKIF_RING_SIZE)
575084Sjohnlev 
58*9917SMark.Johnson@Sun.COM /* Number of sectors per segement */
59*9917SMark.Johnson@Sun.COM #define	XB_NUM_SECTORS_PER_SEG	(PAGESIZE / XB_BSIZE)
60*9917SMark.Johnson@Sun.COM /* sectors are number 0 through XB_NUM_SECTORS_PER_SEG - 1 */
61*9917SMark.Johnson@Sun.COM #define	XB_LAST_SECTOR_IN_SEG	(XB_NUM_SECTORS_PER_SEG - 1)
62*9917SMark.Johnson@Sun.COM 
636318Sedp 
646318Sedp /*
656318Sedp  * Slice for absolute disk transaction.
666318Sedp  *
676318Sedp  * Hack Alert.  XB_SLICE_NONE is a magic value that can be written into the
686318Sedp  * b_private field of buf structures passed to xdf_strategy().  When present
696318Sedp  * it indicates that the I/O is using an absolute offset.  (ie, the I/O is
706318Sedp  * not bound to any one partition.)  This magic value is currently used by
716318Sedp  * the pv_cmdk driver.  This hack is shamelessly stolen from the sun4v vdc
726318Sedp  * driver, another virtual disk device driver.  (Although in the case of
736318Sedp  * vdc the hack is less egregious since it is self contained within the
746318Sedp  * vdc driver, where as here it is used as an interface between the pv_cmdk
756318Sedp  * driver and the xdf driver.)
766318Sedp  */
778863SEdward.Pilatowicz@Sun.COM #define	XB_SLICE_NONE		0xFF
786318Sedp 
795084Sjohnlev /*
805084Sjohnlev  * blkif status
815084Sjohnlev  */
828863SEdward.Pilatowicz@Sun.COM typedef enum xdf_state {
835084Sjohnlev 	/*
845084Sjohnlev 	 * initial state
855084Sjohnlev 	 */
868863SEdward.Pilatowicz@Sun.COM 	XD_UNKNOWN = 0,
875084Sjohnlev 	/*
885084Sjohnlev 	 * ring and evtchn alloced, xenbus state changed to
895084Sjohnlev 	 * XenbusStateInitialised, wait for backend to connect
905084Sjohnlev 	 */
918863SEdward.Pilatowicz@Sun.COM 	XD_INIT = 1,
925084Sjohnlev 	/*
938863SEdward.Pilatowicz@Sun.COM 	 * backend and frontend xenbus state has changed to
948863SEdward.Pilatowicz@Sun.COM 	 * XenbusStateConnected.  IO is now allowed, but we are not still
958863SEdward.Pilatowicz@Sun.COM 	 * fully initialized.
965084Sjohnlev 	 */
978863SEdward.Pilatowicz@Sun.COM 	XD_CONNECTED = 2,
988863SEdward.Pilatowicz@Sun.COM 	/*
998863SEdward.Pilatowicz@Sun.COM 	 * We're fully initialized and allowing regular IO.
1008863SEdward.Pilatowicz@Sun.COM 	 */
1018863SEdward.Pilatowicz@Sun.COM 	XD_READY = 3,
1025084Sjohnlev 	/*
1035084Sjohnlev 	 * vbd interface close request received from backend, no more I/O
1045084Sjohnlev 	 * requestis allowed to be put into ring buffer, while interrupt handler
1055084Sjohnlev 	 * is allowed to run to finish any outstanding I/O request, disconnect
1065084Sjohnlev 	 * process is kicked off by changing xenbus state to XenbusStateClosed
1075084Sjohnlev 	 */
1088863SEdward.Pilatowicz@Sun.COM 	XD_CLOSING = 4,
1095084Sjohnlev 	/*
1105084Sjohnlev 	 * disconnection process finished, both backend and frontend's
1115084Sjohnlev 	 * xenbus state has been changed to XenbusStateClosed, can be detached
1125084Sjohnlev 	 */
1138863SEdward.Pilatowicz@Sun.COM 	XD_CLOSED = 5,
1145084Sjohnlev 	/*
1158863SEdward.Pilatowicz@Sun.COM 	 * We're either being suspended or resuming from a suspend.  If we're
1168863SEdward.Pilatowicz@Sun.COM 	 * in the process of suspending, we block all new IO, but but allow
1178863SEdward.Pilatowicz@Sun.COM 	 * existing IO to drain.
1185084Sjohnlev 	 */
1198863SEdward.Pilatowicz@Sun.COM 	XD_SUSPEND = 6
1208863SEdward.Pilatowicz@Sun.COM } xdf_state_t;
1215084Sjohnlev 
1225084Sjohnlev /*
1235741Smrj  * 16 partitions + fdisk
1245084Sjohnlev  */
1255084Sjohnlev #define	XDF_PSHIFT	6
1265084Sjohnlev #define	XDF_PMASK	((1 << XDF_PSHIFT) - 1)
1275084Sjohnlev #define	XDF_PEXT	(1 << XDF_PSHIFT)
1285084Sjohnlev #define	XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m))
1295084Sjohnlev #define	XDF_INST(m)	((m) >> XDF_PSHIFT)
1305084Sjohnlev #define	XDF_PART(m)	((m) & XDF_PMASK)
1315084Sjohnlev 
1325084Sjohnlev /*
1335084Sjohnlev  * one blkif_request_t will have one corresponding ge_slot_t
1345084Sjohnlev  * where we save those grant table refs used in this blkif_request_t
1355084Sjohnlev  *
1365084Sjohnlev  * the id of this ge_slot_t will also be put into 'id' field in
1375084Sjohnlev  * each blkif_request_t when sent out to the ring buffer.
1385084Sjohnlev  */
1395084Sjohnlev typedef struct ge_slot {
1408863SEdward.Pilatowicz@Sun.COM 	list_node_t	gs_vreq_link;
1418863SEdward.Pilatowicz@Sun.COM 	struct v_req	*gs_vreq;
1428863SEdward.Pilatowicz@Sun.COM 	domid_t		gs_oeid;
1438863SEdward.Pilatowicz@Sun.COM 	int		gs_isread;
1448863SEdward.Pilatowicz@Sun.COM 	grant_ref_t	gs_ghead;
1458863SEdward.Pilatowicz@Sun.COM 	int		gs_ngrefs;
1468863SEdward.Pilatowicz@Sun.COM 	grant_ref_t	gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1475084Sjohnlev } ge_slot_t;
1485084Sjohnlev 
1495084Sjohnlev /*
1505084Sjohnlev  * vbd I/O request
1515084Sjohnlev  *
1525084Sjohnlev  * An instance of this structure is bound to each buf passed to
1535084Sjohnlev  * the driver's strategy by setting the pointer into bp->av_back.
1545084Sjohnlev  * The id of this vreq will also be put into 'id' field in each
1555084Sjohnlev  * blkif_request_t when sent out to the ring buffer for one DMA
1565084Sjohnlev  * window of this buf.
1575084Sjohnlev  *
1585084Sjohnlev  * Vreq mainly contains DMA information for this buf. In one vreq/buf,
1595084Sjohnlev  * there could be more than one DMA window, each of which will be
1605084Sjohnlev  * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant
1615084Sjohnlev  * table entry information for this buf. The ge_slot_t for current DMA
1625084Sjohnlev  * window is pointed to by v_gs in vreq.
1635084Sjohnlev  *
1645084Sjohnlev  * So, grant table entries will only be alloc'ed when the DMA window is
1655084Sjohnlev  * about to be transferred via blkif_request_t to the ring buffer. And
1665084Sjohnlev  * they will be freed right after the blkif_response_t is seen. By this
1675084Sjohnlev  * means, we can make use of grant table entries more efficiently.
1685084Sjohnlev  */
1695084Sjohnlev typedef struct v_req {
1705084Sjohnlev 	list_node_t	v_link;
1718863SEdward.Pilatowicz@Sun.COM 	list_t		v_gs;
1725084Sjohnlev 	int		v_status;
1735084Sjohnlev 	buf_t		*v_buf;
1745084Sjohnlev 	uint_t		v_ndmacs;
1755084Sjohnlev 	uint_t		v_dmaw;
1765084Sjohnlev 	uint_t		v_ndmaws;
1775084Sjohnlev 	uint_t		v_nslots;
1785084Sjohnlev 	uint64_t	v_blkno;
1798863SEdward.Pilatowicz@Sun.COM 	ddi_dma_handle_t v_memdmahdl;
1805084Sjohnlev 	ddi_acc_handle_t v_align;
1818863SEdward.Pilatowicz@Sun.COM 	ddi_dma_handle_t v_dmahdl;
1828863SEdward.Pilatowicz@Sun.COM 	ddi_dma_cookie_t v_dmac;
1835084Sjohnlev 	caddr_t		v_abuf;
1845084Sjohnlev 	uint8_t		v_flush_diskcache;
1858863SEdward.Pilatowicz@Sun.COM 	boolean_t	v_runq;
1865084Sjohnlev } v_req_t;
1875084Sjohnlev 
1885084Sjohnlev /*
1895084Sjohnlev  * Status set and checked in vreq->v_status by vreq_setup()
1905084Sjohnlev  *
1915084Sjohnlev  * These flags will help us to continue the vreq setup work from last failure
1925741Smrj  * point, instead of starting from scratch after each failure.
1935084Sjohnlev  */
1945084Sjohnlev #define	VREQ_INIT		0x0
1955084Sjohnlev #define	VREQ_INIT_DONE		0x1
1965084Sjohnlev #define	VREQ_DMAHDL_ALLOCED	0x2
1975084Sjohnlev #define	VREQ_MEMDMAHDL_ALLOCED	0x3
1985084Sjohnlev #define	VREQ_DMAMEM_ALLOCED	0x4
1995084Sjohnlev #define	VREQ_DMABUF_BOUND	0x5
2005084Sjohnlev #define	VREQ_GS_ALLOCED		0x6
2015084Sjohnlev #define	VREQ_DMAWIN_DONE	0x7
2025084Sjohnlev 
2035084Sjohnlev /*
2045084Sjohnlev  * virtual block device per-instance softstate
2055084Sjohnlev  */
2065084Sjohnlev typedef struct xdf {
2075084Sjohnlev 	dev_info_t	*xdf_dip;
2088863SEdward.Pilatowicz@Sun.COM 	char		*xdf_addr;
2096318Sedp 	ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */
2105084Sjohnlev 	domid_t		xdf_peer; /* otherend's dom ID */
2115084Sjohnlev 	xendev_ring_t	*xdf_xb_ring; /* I/O ring buffer */
2125084Sjohnlev 	ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
2135084Sjohnlev 	list_t		xdf_vreq_act; /* active vreq list */
2145084Sjohnlev 	buf_t		*xdf_f_act; /* active buf list head */
2155084Sjohnlev 	buf_t		*xdf_l_act; /* active buf list tail */
2168863SEdward.Pilatowicz@Sun.COM 	buf_t		*xdf_i_act; /* active buf list index */
2178863SEdward.Pilatowicz@Sun.COM 	xdf_state_t	xdf_state; /* status of this virtual disk */
2188863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_suspending;
2195084Sjohnlev 	ulong_t		xdf_vd_open[OTYPCNT];
2205084Sjohnlev 	ulong_t		xdf_vd_lyropen[XDF_PEXT];
2218863SEdward.Pilatowicz@Sun.COM 	ulong_t		xdf_connect_req;
2229471SEdward.Pilatowicz@Sun.COM 	kthread_t	*xdf_connect_thread;
2235084Sjohnlev 	ulong_t		xdf_vd_exclopen;
2246318Sedp 	kmutex_t	xdf_iostat_lk; /* muxes lock for the iostat ptr */
2255084Sjohnlev 	kmutex_t	xdf_dev_lk; /* mutex lock for I/O path */
2265084Sjohnlev 	kmutex_t	xdf_cb_lk; /* mutex lock for event handling path */
2275084Sjohnlev 	kcondvar_t	xdf_dev_cv; /* cv used in I/O path */
2288863SEdward.Pilatowicz@Sun.COM 	uint_t		xdf_dinfo; /* disk info from backend xenstore */
2295084Sjohnlev 	diskaddr_t	xdf_xdev_nblocks; /* total size in block */
2309889SLarry.Liu@Sun.COM 	uint_t		xdf_xdev_secsize; /* disk blksize from backend */
2316318Sedp 	cmlb_geom_t	xdf_pgeom;
2328863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_pgeom_set;
2338863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_pgeom_fixed;
2345084Sjohnlev 	kstat_t		*xdf_xdev_iostat;
2355084Sjohnlev 	cmlb_handle_t	xdf_vd_lbl;
2365084Sjohnlev 	ddi_softintr_t	xdf_softintr_id;
2375084Sjohnlev 	timeout_id_t	xdf_timeout_id;
2385084Sjohnlev 	struct gnttab_free_callback xdf_gnt_callback;
2398863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_feature_barrier;
2408863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_flush_supported;
2418863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_media_req_supported;
2428863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_wce;
2438863SEdward.Pilatowicz@Sun.COM 	boolean_t	xdf_cmbl_reattach;
2445084Sjohnlev 	char		*xdf_flush_mem;
2455084Sjohnlev 	char		*xdf_cache_flush_block;
2465741Smrj 	int		xdf_evtchn;
2478863SEdward.Pilatowicz@Sun.COM 	enum dkio_state	xdf_mstate;
2488863SEdward.Pilatowicz@Sun.COM 	kcondvar_t	xdf_mstate_cv;
2498863SEdward.Pilatowicz@Sun.COM 	kcondvar_t	xdf_hp_status_cv;
2508863SEdward.Pilatowicz@Sun.COM 	struct buf	*xdf_ready_bp;
2518863SEdward.Pilatowicz@Sun.COM 	ddi_taskq_t	*xdf_ready_tq;
2528863SEdward.Pilatowicz@Sun.COM 	kthread_t	*xdf_ready_tq_thread;
2538863SEdward.Pilatowicz@Sun.COM 	struct buf	*xdf_ready_tq_bp;
2545084Sjohnlev #ifdef	DEBUG
2555084Sjohnlev 	int		xdf_dmacallback_num;
2568863SEdward.Pilatowicz@Sun.COM 	kthread_t	*xdf_oe_change_thread;
2575084Sjohnlev #endif
2585084Sjohnlev } xdf_t;
2595084Sjohnlev 
2605084Sjohnlev /*
2615084Sjohnlev  * VBD I/O requests must be aligned on a 512-byte boundary and specify
2625084Sjohnlev  * a transfer size which is a mutiple of 512-bytes
2635084Sjohnlev  */
2645084Sjohnlev #define	ALIGNED_XFER(bp) \
2655084Sjohnlev 	((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \
2665084Sjohnlev 	(((bp)->b_bcount & XB_BMASK) == 0))
2675084Sjohnlev 
2685084Sjohnlev #define	U_INVAL(u)	(((u)->uio_loffset & (offset_t)(XB_BMASK)) || \
2695084Sjohnlev 	((u)->uio_iov->iov_len & (offset_t)(XB_BMASK)))
2705084Sjohnlev 
2715084Sjohnlev /* wrap pa_to_ma() for xdf to run in dom0 */
2725084Sjohnlev #define	PATOMA(addr)	(DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
2735084Sjohnlev 
2748863SEdward.Pilatowicz@Sun.COM #define	XD_IS_RO(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY)
2758863SEdward.Pilatowicz@Sun.COM #define	XD_IS_CD(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM)
2768863SEdward.Pilatowicz@Sun.COM #define	XD_IS_RM(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE)
2778863SEdward.Pilatowicz@Sun.COM #define	IS_READ(bp)	VOID2BOOLEAN((bp)->b_flags & B_READ)
2788863SEdward.Pilatowicz@Sun.COM #define	IS_ERROR(bp)	VOID2BOOLEAN((bp)->b_flags & B_ERROR)
2795084Sjohnlev 
2805084Sjohnlev #define	XDF_UPDATE_IO_STAT(vdp, bp)					\
2818863SEdward.Pilatowicz@Sun.COM 	{								\
2825084Sjohnlev 		kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat);	\
2835084Sjohnlev 		size_t n_done = (bp)->b_bcount - (bp)->b_resid;		\
2845084Sjohnlev 		if ((bp)->b_flags & B_READ) {				\
2855084Sjohnlev 			kip->reads++;					\
2865084Sjohnlev 			kip->nread += n_done;				\
2875084Sjohnlev 		} else {                                                \
2885084Sjohnlev 			kip->writes++;					\
2895084Sjohnlev 			kip->nwritten += n_done;			\
2905084Sjohnlev 		}							\
2915084Sjohnlev 	}
2925084Sjohnlev 
2935084Sjohnlev #ifdef DEBUG
2948863SEdward.Pilatowicz@Sun.COM #define	DPRINTF(flag, args)	{if (xdf_debug & (flag)) prom_printf args; }
2955084Sjohnlev #define	SETDMACBON(vbd)		{(vbd)->xdf_dmacallback_num++; }
2965084Sjohnlev #define	SETDMACBOFF(vbd)	{(vbd)->xdf_dmacallback_num--; }
2975084Sjohnlev #define	ISDMACBON(vbd)		((vbd)->xdf_dmacallback_num > 0)
2985084Sjohnlev #else
2995084Sjohnlev #define	DPRINTF(flag, args)
3005084Sjohnlev #define	SETDMACBON(vbd)
3015084Sjohnlev #define	SETDMACBOFF(vbd)
3025084Sjohnlev #define	ISDMACBON(vbd)
3035084Sjohnlev #endif /* DEBUG */
3045084Sjohnlev 
3055084Sjohnlev #define	DDI_DBG		0x1
3065084Sjohnlev #define	DMA_DBG		0x2
3075084Sjohnlev #define	INTR_DBG	0x8
3085084Sjohnlev #define	IO_DBG		0x10
3095084Sjohnlev #define	IOCTL_DBG	0x20
3105084Sjohnlev #define	SUSRES_DBG	0x40
3115084Sjohnlev #define	LBL_DBG		0x80
3125084Sjohnlev 
3136318Sedp #if defined(XPV_HVM_DRIVER)
3148863SEdward.Pilatowicz@Sun.COM extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
3158863SEdward.Pilatowicz@Sun.COM extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
3168863SEdward.Pilatowicz@Sun.COM     void *);
3178863SEdward.Pilatowicz@Sun.COM extern void xdfmin(struct buf *bp);
3188863SEdward.Pilatowicz@Sun.COM extern dev_info_t *xdf_hvm_hold(const char *);
3198863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_hvm_connect(dev_info_t *);
3206318Sedp extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *);
3216318Sedp extern int xdf_kstat_create(dev_info_t *, char *, int);
3226318Sedp extern void xdf_kstat_delete(dev_info_t *);
3238863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_is_cd(dev_info_t *);
3248863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_is_rm(dev_info_t *);
3258863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_media_req_supported(dev_info_t *);
3266318Sedp #endif /* XPV_HVM_DRIVER */
3276318Sedp 
3285084Sjohnlev #ifdef __cplusplus
3295084Sjohnlev }
3305084Sjohnlev #endif
3315084Sjohnlev 
3325084Sjohnlev #endif	/* _SYS_XDF_H */
333