15084Sjohnlev /* 25084Sjohnlev * CDDL HEADER START 35084Sjohnlev * 45084Sjohnlev * The contents of this file are subject to the terms of the 55084Sjohnlev * Common Development and Distribution License (the "License"). 65084Sjohnlev * You may not use this file except in compliance with the License. 75084Sjohnlev * 85084Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 95084Sjohnlev * or http://www.opensolaris.org/os/licensing. 105084Sjohnlev * See the License for the specific language governing permissions 115084Sjohnlev * and limitations under the License. 125084Sjohnlev * 135084Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 145084Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 155084Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 165084Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 175084Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 185084Sjohnlev * 195084Sjohnlev * CDDL HEADER END 205084Sjohnlev */ 215084Sjohnlev 225084Sjohnlev /* 238863SEdward.Pilatowicz@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 245084Sjohnlev * Use is subject to license terms. 255084Sjohnlev */ 265084Sjohnlev 275084Sjohnlev 285084Sjohnlev #ifndef _SYS_XDF_H 295084Sjohnlev #define _SYS_XDF_H 305084Sjohnlev 318863SEdward.Pilatowicz@Sun.COM #include <sys/ddi.h> 328863SEdward.Pilatowicz@Sun.COM #include <sys/sunddi.h> 338863SEdward.Pilatowicz@Sun.COM #include <sys/cmlb.h> 348863SEdward.Pilatowicz@Sun.COM #include <sys/dkio.h> 358863SEdward.Pilatowicz@Sun.COM 368863SEdward.Pilatowicz@Sun.COM #include <sys/gnttab.h> 378863SEdward.Pilatowicz@Sun.COM #include <xen/sys/xendev.h> 388863SEdward.Pilatowicz@Sun.COM 395084Sjohnlev #ifdef __cplusplus 405084Sjohnlev extern "C" { 415084Sjohnlev #endif 425084Sjohnlev 435084Sjohnlev 445084Sjohnlev /* 455084Sjohnlev * VBDs have standard 512 byte blocks 465084Sjohnlev * A single blkif_request can transfer up to 11 pages of data, 1 page/segment 475084Sjohnlev */ 485084Sjohnlev #define XB_BSIZE DEV_BSIZE 495084Sjohnlev #define XB_BMASK (XB_BSIZE - 1) 505084Sjohnlev #define XB_BSHIFT 9 519889SLarry.Liu@Sun.COM #define XB_DTOB(bn, vdp) ((bn) * (vdp)->xdf_xdev_secsize) 525084Sjohnlev 535084Sjohnlev #define XB_MAX_SEGLEN (8 * XB_BSIZE) 545084Sjohnlev #define XB_SEGOFFSET (XB_MAX_SEGLEN - 1) 555084Sjohnlev #define XB_MAX_XFER (XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST) 565084Sjohnlev #define XB_MAXPHYS (XB_MAX_XFER * BLKIF_RING_SIZE) 575084Sjohnlev 58*9917SMark.Johnson@Sun.COM /* Number of sectors per segement */ 59*9917SMark.Johnson@Sun.COM #define XB_NUM_SECTORS_PER_SEG (PAGESIZE / XB_BSIZE) 60*9917SMark.Johnson@Sun.COM /* sectors are number 0 through XB_NUM_SECTORS_PER_SEG - 1 */ 61*9917SMark.Johnson@Sun.COM #define XB_LAST_SECTOR_IN_SEG (XB_NUM_SECTORS_PER_SEG - 1) 62*9917SMark.Johnson@Sun.COM 636318Sedp 646318Sedp /* 656318Sedp * Slice for absolute disk transaction. 666318Sedp * 676318Sedp * Hack Alert. XB_SLICE_NONE is a magic value that can be written into the 686318Sedp * b_private field of buf structures passed to xdf_strategy(). When present 696318Sedp * it indicates that the I/O is using an absolute offset. (ie, the I/O is 706318Sedp * not bound to any one partition.) This magic value is currently used by 716318Sedp * the pv_cmdk driver. This hack is shamelessly stolen from the sun4v vdc 726318Sedp * driver, another virtual disk device driver. (Although in the case of 736318Sedp * vdc the hack is less egregious since it is self contained within the 746318Sedp * vdc driver, where as here it is used as an interface between the pv_cmdk 756318Sedp * driver and the xdf driver.) 766318Sedp */ 778863SEdward.Pilatowicz@Sun.COM #define XB_SLICE_NONE 0xFF 786318Sedp 795084Sjohnlev /* 805084Sjohnlev * blkif status 815084Sjohnlev */ 828863SEdward.Pilatowicz@Sun.COM typedef enum xdf_state { 835084Sjohnlev /* 845084Sjohnlev * initial state 855084Sjohnlev */ 868863SEdward.Pilatowicz@Sun.COM XD_UNKNOWN = 0, 875084Sjohnlev /* 885084Sjohnlev * ring and evtchn alloced, xenbus state changed to 895084Sjohnlev * XenbusStateInitialised, wait for backend to connect 905084Sjohnlev */ 918863SEdward.Pilatowicz@Sun.COM XD_INIT = 1, 925084Sjohnlev /* 938863SEdward.Pilatowicz@Sun.COM * backend and frontend xenbus state has changed to 948863SEdward.Pilatowicz@Sun.COM * XenbusStateConnected. IO is now allowed, but we are not still 958863SEdward.Pilatowicz@Sun.COM * fully initialized. 965084Sjohnlev */ 978863SEdward.Pilatowicz@Sun.COM XD_CONNECTED = 2, 988863SEdward.Pilatowicz@Sun.COM /* 998863SEdward.Pilatowicz@Sun.COM * We're fully initialized and allowing regular IO. 1008863SEdward.Pilatowicz@Sun.COM */ 1018863SEdward.Pilatowicz@Sun.COM XD_READY = 3, 1025084Sjohnlev /* 1035084Sjohnlev * vbd interface close request received from backend, no more I/O 1045084Sjohnlev * requestis allowed to be put into ring buffer, while interrupt handler 1055084Sjohnlev * is allowed to run to finish any outstanding I/O request, disconnect 1065084Sjohnlev * process is kicked off by changing xenbus state to XenbusStateClosed 1075084Sjohnlev */ 1088863SEdward.Pilatowicz@Sun.COM XD_CLOSING = 4, 1095084Sjohnlev /* 1105084Sjohnlev * disconnection process finished, both backend and frontend's 1115084Sjohnlev * xenbus state has been changed to XenbusStateClosed, can be detached 1125084Sjohnlev */ 1138863SEdward.Pilatowicz@Sun.COM XD_CLOSED = 5, 1145084Sjohnlev /* 1158863SEdward.Pilatowicz@Sun.COM * We're either being suspended or resuming from a suspend. If we're 1168863SEdward.Pilatowicz@Sun.COM * in the process of suspending, we block all new IO, but but allow 1178863SEdward.Pilatowicz@Sun.COM * existing IO to drain. 1185084Sjohnlev */ 1198863SEdward.Pilatowicz@Sun.COM XD_SUSPEND = 6 1208863SEdward.Pilatowicz@Sun.COM } xdf_state_t; 1215084Sjohnlev 1225084Sjohnlev /* 1235741Smrj * 16 partitions + fdisk 1245084Sjohnlev */ 1255084Sjohnlev #define XDF_PSHIFT 6 1265084Sjohnlev #define XDF_PMASK ((1 << XDF_PSHIFT) - 1) 1275084Sjohnlev #define XDF_PEXT (1 << XDF_PSHIFT) 1285084Sjohnlev #define XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m)) 1295084Sjohnlev #define XDF_INST(m) ((m) >> XDF_PSHIFT) 1305084Sjohnlev #define XDF_PART(m) ((m) & XDF_PMASK) 1315084Sjohnlev 1325084Sjohnlev /* 1335084Sjohnlev * one blkif_request_t will have one corresponding ge_slot_t 1345084Sjohnlev * where we save those grant table refs used in this blkif_request_t 1355084Sjohnlev * 1365084Sjohnlev * the id of this ge_slot_t will also be put into 'id' field in 1375084Sjohnlev * each blkif_request_t when sent out to the ring buffer. 1385084Sjohnlev */ 1395084Sjohnlev typedef struct ge_slot { 1408863SEdward.Pilatowicz@Sun.COM list_node_t gs_vreq_link; 1418863SEdward.Pilatowicz@Sun.COM struct v_req *gs_vreq; 1428863SEdward.Pilatowicz@Sun.COM domid_t gs_oeid; 1438863SEdward.Pilatowicz@Sun.COM int gs_isread; 1448863SEdward.Pilatowicz@Sun.COM grant_ref_t gs_ghead; 1458863SEdward.Pilatowicz@Sun.COM int gs_ngrefs; 1468863SEdward.Pilatowicz@Sun.COM grant_ref_t gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 1475084Sjohnlev } ge_slot_t; 1485084Sjohnlev 1495084Sjohnlev /* 1505084Sjohnlev * vbd I/O request 1515084Sjohnlev * 1525084Sjohnlev * An instance of this structure is bound to each buf passed to 1535084Sjohnlev * the driver's strategy by setting the pointer into bp->av_back. 1545084Sjohnlev * The id of this vreq will also be put into 'id' field in each 1555084Sjohnlev * blkif_request_t when sent out to the ring buffer for one DMA 1565084Sjohnlev * window of this buf. 1575084Sjohnlev * 1585084Sjohnlev * Vreq mainly contains DMA information for this buf. In one vreq/buf, 1595084Sjohnlev * there could be more than one DMA window, each of which will be 1605084Sjohnlev * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant 1615084Sjohnlev * table entry information for this buf. The ge_slot_t for current DMA 1625084Sjohnlev * window is pointed to by v_gs in vreq. 1635084Sjohnlev * 1645084Sjohnlev * So, grant table entries will only be alloc'ed when the DMA window is 1655084Sjohnlev * about to be transferred via blkif_request_t to the ring buffer. And 1665084Sjohnlev * they will be freed right after the blkif_response_t is seen. By this 1675084Sjohnlev * means, we can make use of grant table entries more efficiently. 1685084Sjohnlev */ 1695084Sjohnlev typedef struct v_req { 1705084Sjohnlev list_node_t v_link; 1718863SEdward.Pilatowicz@Sun.COM list_t v_gs; 1725084Sjohnlev int v_status; 1735084Sjohnlev buf_t *v_buf; 1745084Sjohnlev uint_t v_ndmacs; 1755084Sjohnlev uint_t v_dmaw; 1765084Sjohnlev uint_t v_ndmaws; 1775084Sjohnlev uint_t v_nslots; 1785084Sjohnlev uint64_t v_blkno; 1798863SEdward.Pilatowicz@Sun.COM ddi_dma_handle_t v_memdmahdl; 1805084Sjohnlev ddi_acc_handle_t v_align; 1818863SEdward.Pilatowicz@Sun.COM ddi_dma_handle_t v_dmahdl; 1828863SEdward.Pilatowicz@Sun.COM ddi_dma_cookie_t v_dmac; 1835084Sjohnlev caddr_t v_abuf; 1845084Sjohnlev uint8_t v_flush_diskcache; 1858863SEdward.Pilatowicz@Sun.COM boolean_t v_runq; 1865084Sjohnlev } v_req_t; 1875084Sjohnlev 1885084Sjohnlev /* 1895084Sjohnlev * Status set and checked in vreq->v_status by vreq_setup() 1905084Sjohnlev * 1915084Sjohnlev * These flags will help us to continue the vreq setup work from last failure 1925741Smrj * point, instead of starting from scratch after each failure. 1935084Sjohnlev */ 1945084Sjohnlev #define VREQ_INIT 0x0 1955084Sjohnlev #define VREQ_INIT_DONE 0x1 1965084Sjohnlev #define VREQ_DMAHDL_ALLOCED 0x2 1975084Sjohnlev #define VREQ_MEMDMAHDL_ALLOCED 0x3 1985084Sjohnlev #define VREQ_DMAMEM_ALLOCED 0x4 1995084Sjohnlev #define VREQ_DMABUF_BOUND 0x5 2005084Sjohnlev #define VREQ_GS_ALLOCED 0x6 2015084Sjohnlev #define VREQ_DMAWIN_DONE 0x7 2025084Sjohnlev 2035084Sjohnlev /* 2045084Sjohnlev * virtual block device per-instance softstate 2055084Sjohnlev */ 2065084Sjohnlev typedef struct xdf { 2075084Sjohnlev dev_info_t *xdf_dip; 2088863SEdward.Pilatowicz@Sun.COM char *xdf_addr; 2096318Sedp ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */ 2105084Sjohnlev domid_t xdf_peer; /* otherend's dom ID */ 2115084Sjohnlev xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */ 2125084Sjohnlev ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */ 2135084Sjohnlev list_t xdf_vreq_act; /* active vreq list */ 2145084Sjohnlev buf_t *xdf_f_act; /* active buf list head */ 2155084Sjohnlev buf_t *xdf_l_act; /* active buf list tail */ 2168863SEdward.Pilatowicz@Sun.COM buf_t *xdf_i_act; /* active buf list index */ 2178863SEdward.Pilatowicz@Sun.COM xdf_state_t xdf_state; /* status of this virtual disk */ 2188863SEdward.Pilatowicz@Sun.COM boolean_t xdf_suspending; 2195084Sjohnlev ulong_t xdf_vd_open[OTYPCNT]; 2205084Sjohnlev ulong_t xdf_vd_lyropen[XDF_PEXT]; 2218863SEdward.Pilatowicz@Sun.COM ulong_t xdf_connect_req; 2229471SEdward.Pilatowicz@Sun.COM kthread_t *xdf_connect_thread; 2235084Sjohnlev ulong_t xdf_vd_exclopen; 2246318Sedp kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */ 2255084Sjohnlev kmutex_t xdf_dev_lk; /* mutex lock for I/O path */ 2265084Sjohnlev kmutex_t xdf_cb_lk; /* mutex lock for event handling path */ 2275084Sjohnlev kcondvar_t xdf_dev_cv; /* cv used in I/O path */ 2288863SEdward.Pilatowicz@Sun.COM uint_t xdf_dinfo; /* disk info from backend xenstore */ 2295084Sjohnlev diskaddr_t xdf_xdev_nblocks; /* total size in block */ 2309889SLarry.Liu@Sun.COM uint_t xdf_xdev_secsize; /* disk blksize from backend */ 2316318Sedp cmlb_geom_t xdf_pgeom; 2328863SEdward.Pilatowicz@Sun.COM boolean_t xdf_pgeom_set; 2338863SEdward.Pilatowicz@Sun.COM boolean_t xdf_pgeom_fixed; 2345084Sjohnlev kstat_t *xdf_xdev_iostat; 2355084Sjohnlev cmlb_handle_t xdf_vd_lbl; 2365084Sjohnlev ddi_softintr_t xdf_softintr_id; 2375084Sjohnlev timeout_id_t xdf_timeout_id; 2385084Sjohnlev struct gnttab_free_callback xdf_gnt_callback; 2398863SEdward.Pilatowicz@Sun.COM boolean_t xdf_feature_barrier; 2408863SEdward.Pilatowicz@Sun.COM boolean_t xdf_flush_supported; 2418863SEdward.Pilatowicz@Sun.COM boolean_t xdf_media_req_supported; 2428863SEdward.Pilatowicz@Sun.COM boolean_t xdf_wce; 2438863SEdward.Pilatowicz@Sun.COM boolean_t xdf_cmbl_reattach; 2445084Sjohnlev char *xdf_flush_mem; 2455084Sjohnlev char *xdf_cache_flush_block; 2465741Smrj int xdf_evtchn; 2478863SEdward.Pilatowicz@Sun.COM enum dkio_state xdf_mstate; 2488863SEdward.Pilatowicz@Sun.COM kcondvar_t xdf_mstate_cv; 2498863SEdward.Pilatowicz@Sun.COM kcondvar_t xdf_hp_status_cv; 2508863SEdward.Pilatowicz@Sun.COM struct buf *xdf_ready_bp; 2518863SEdward.Pilatowicz@Sun.COM ddi_taskq_t *xdf_ready_tq; 2528863SEdward.Pilatowicz@Sun.COM kthread_t *xdf_ready_tq_thread; 2538863SEdward.Pilatowicz@Sun.COM struct buf *xdf_ready_tq_bp; 2545084Sjohnlev #ifdef DEBUG 2555084Sjohnlev int xdf_dmacallback_num; 2568863SEdward.Pilatowicz@Sun.COM kthread_t *xdf_oe_change_thread; 2575084Sjohnlev #endif 2585084Sjohnlev } xdf_t; 2595084Sjohnlev 2605084Sjohnlev /* 2615084Sjohnlev * VBD I/O requests must be aligned on a 512-byte boundary and specify 2625084Sjohnlev * a transfer size which is a mutiple of 512-bytes 2635084Sjohnlev */ 2645084Sjohnlev #define ALIGNED_XFER(bp) \ 2655084Sjohnlev ((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \ 2665084Sjohnlev (((bp)->b_bcount & XB_BMASK) == 0)) 2675084Sjohnlev 2685084Sjohnlev #define U_INVAL(u) (((u)->uio_loffset & (offset_t)(XB_BMASK)) || \ 2695084Sjohnlev ((u)->uio_iov->iov_len & (offset_t)(XB_BMASK))) 2705084Sjohnlev 2715084Sjohnlev /* wrap pa_to_ma() for xdf to run in dom0 */ 2725084Sjohnlev #define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr)) 2735084Sjohnlev 2748863SEdward.Pilatowicz@Sun.COM #define XD_IS_RO(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY) 2758863SEdward.Pilatowicz@Sun.COM #define XD_IS_CD(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM) 2768863SEdward.Pilatowicz@Sun.COM #define XD_IS_RM(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE) 2778863SEdward.Pilatowicz@Sun.COM #define IS_READ(bp) VOID2BOOLEAN((bp)->b_flags & B_READ) 2788863SEdward.Pilatowicz@Sun.COM #define IS_ERROR(bp) VOID2BOOLEAN((bp)->b_flags & B_ERROR) 2795084Sjohnlev 2805084Sjohnlev #define XDF_UPDATE_IO_STAT(vdp, bp) \ 2818863SEdward.Pilatowicz@Sun.COM { \ 2825084Sjohnlev kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \ 2835084Sjohnlev size_t n_done = (bp)->b_bcount - (bp)->b_resid; \ 2845084Sjohnlev if ((bp)->b_flags & B_READ) { \ 2855084Sjohnlev kip->reads++; \ 2865084Sjohnlev kip->nread += n_done; \ 2875084Sjohnlev } else { \ 2885084Sjohnlev kip->writes++; \ 2895084Sjohnlev kip->nwritten += n_done; \ 2905084Sjohnlev } \ 2915084Sjohnlev } 2925084Sjohnlev 2935084Sjohnlev #ifdef DEBUG 2948863SEdward.Pilatowicz@Sun.COM #define DPRINTF(flag, args) {if (xdf_debug & (flag)) prom_printf args; } 2955084Sjohnlev #define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; } 2965084Sjohnlev #define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; } 2975084Sjohnlev #define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0) 2985084Sjohnlev #else 2995084Sjohnlev #define DPRINTF(flag, args) 3005084Sjohnlev #define SETDMACBON(vbd) 3015084Sjohnlev #define SETDMACBOFF(vbd) 3025084Sjohnlev #define ISDMACBON(vbd) 3035084Sjohnlev #endif /* DEBUG */ 3045084Sjohnlev 3055084Sjohnlev #define DDI_DBG 0x1 3065084Sjohnlev #define DMA_DBG 0x2 3075084Sjohnlev #define INTR_DBG 0x8 3085084Sjohnlev #define IO_DBG 0x10 3095084Sjohnlev #define IOCTL_DBG 0x20 3105084Sjohnlev #define SUSRES_DBG 0x40 3115084Sjohnlev #define LBL_DBG 0x80 3125084Sjohnlev 3136318Sedp #if defined(XPV_HVM_DRIVER) 3148863SEdward.Pilatowicz@Sun.COM extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 3158863SEdward.Pilatowicz@Sun.COM extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 3168863SEdward.Pilatowicz@Sun.COM void *); 3178863SEdward.Pilatowicz@Sun.COM extern void xdfmin(struct buf *bp); 3188863SEdward.Pilatowicz@Sun.COM extern dev_info_t *xdf_hvm_hold(const char *); 3198863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_hvm_connect(dev_info_t *); 3206318Sedp extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *); 3216318Sedp extern int xdf_kstat_create(dev_info_t *, char *, int); 3226318Sedp extern void xdf_kstat_delete(dev_info_t *); 3238863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_is_cd(dev_info_t *); 3248863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_is_rm(dev_info_t *); 3258863SEdward.Pilatowicz@Sun.COM extern boolean_t xdf_media_req_supported(dev_info_t *); 3266318Sedp #endif /* XPV_HVM_DRIVER */ 3276318Sedp 3285084Sjohnlev #ifdef __cplusplus 3295084Sjohnlev } 3305084Sjohnlev #endif 3315084Sjohnlev 3325084Sjohnlev #endif /* _SYS_XDF_H */ 333