11991Sheppo /* 21991Sheppo * CDDL HEADER START 31991Sheppo * 41991Sheppo * The contents of this file are subject to the terms of the 51991Sheppo * Common Development and Distribution License (the "License"). 61991Sheppo * You may not use this file except in compliance with the License. 71991Sheppo * 81991Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91991Sheppo * or http://www.opensolaris.org/os/licensing. 101991Sheppo * See the License for the specific language governing permissions 111991Sheppo * and limitations under the License. 121991Sheppo * 131991Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141991Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151991Sheppo * If applicable, add the following below this CDDL HEADER, with the 161991Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171991Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181991Sheppo * 191991Sheppo * CDDL HEADER END 201991Sheppo */ 211991Sheppo 221991Sheppo /* 231991Sheppo * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 241991Sheppo * Use is subject to license terms. 251991Sheppo */ 261991Sheppo 271991Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281991Sheppo 291991Sheppo /* 301991Sheppo * Virtual disk server 311991Sheppo */ 321991Sheppo 331991Sheppo 341991Sheppo #include <sys/types.h> 351991Sheppo #include <sys/conf.h> 362531Snarayan #include <sys/crc32.h> 371991Sheppo #include <sys/ddi.h> 381991Sheppo #include <sys/dkio.h> 391991Sheppo #include <sys/file.h> 401991Sheppo #include <sys/mdeg.h> 411991Sheppo #include <sys/modhash.h> 421991Sheppo #include <sys/note.h> 431991Sheppo #include <sys/pathname.h> 441991Sheppo #include <sys/sunddi.h> 451991Sheppo #include <sys/sunldi.h> 461991Sheppo #include <sys/sysmacros.h> 471991Sheppo #include <sys/vio_common.h> 481991Sheppo #include <sys/vdsk_mailbox.h> 491991Sheppo #include <sys/vdsk_common.h> 501991Sheppo #include <sys/vtoc.h> 511991Sheppo 521991Sheppo 531991Sheppo /* Virtual disk server initialization flags */ 542336Snarayan #define VDS_LDI 0x01 552336Snarayan #define VDS_MDEG 0x02 561991Sheppo 571991Sheppo /* Virtual disk server tunable parameters */ 583166Ssg70180 #define VDS_LDC_RETRIES 5 592793Slm66018 #define VDS_LDC_DELAY 1000 /* usec */ 601991Sheppo #define VDS_NCHAINS 32 611991Sheppo 621991Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 631991Sheppo #define VDS_NAME "virtual-disk-server" 641991Sheppo 651991Sheppo #define VD_NAME "vd" 661991Sheppo #define VD_VOLUME_NAME "vdisk" 671991Sheppo #define VD_ASCIILABEL "Virtual Disk" 681991Sheppo 691991Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 701991Sheppo #define VD_ID_PROP "id" 711991Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 72*3297Ssb155480 #define VD_REG_PROP "reg" 731991Sheppo 741991Sheppo /* Virtual disk initialization flags */ 751991Sheppo #define VD_LOCKING 0x01 762336Snarayan #define VD_LDC 0x02 772336Snarayan #define VD_DRING 0x04 782336Snarayan #define VD_SID 0x08 792336Snarayan #define VD_SEQ_NUM 0x10 801991Sheppo 811991Sheppo /* Flags for opening/closing backing devices via LDI */ 821991Sheppo #define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE) 831991Sheppo 841991Sheppo /* 851991Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 861991Sheppo * unfortunately, this convention does not appear to be codified. 871991Sheppo */ 881991Sheppo #define VD_ENTIRE_DISK_SLICE 2 891991Sheppo 901991Sheppo /* Return a cpp token as a string */ 911991Sheppo #define STRINGIZE(token) #token 921991Sheppo 931991Sheppo /* 941991Sheppo * Print a message prefixed with the current function name to the message log 951991Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 961991Sheppo * concatenation of string literals and C99 variable-length-argument-list 971991Sheppo * macros 981991Sheppo */ 991991Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 1001991Sheppo #define _PRN(format, ...) \ 1011991Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 1021991Sheppo 1031991Sheppo /* Return a pointer to the "i"th vdisk dring element */ 1041991Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 1051991Sheppo (vd->dring + (i)*vd->descriptor_size)) 1061991Sheppo 1071991Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 1081991Sheppo #define VD_CLIENT(vd) \ 1091991Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 1101991Sheppo (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 1111991Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 1121991Sheppo "unsupported client"))) 1131991Sheppo 114*3297Ssb155480 /* 115*3297Ssb155480 * Specification of an MD node passed to the MDEG to filter any 116*3297Ssb155480 * 'vport' nodes that do not belong to the specified node. This 117*3297Ssb155480 * template is copied for each vds instance and filled in with 118*3297Ssb155480 * the appropriate 'cfg-handle' value before being passed to the MDEG. 119*3297Ssb155480 */ 120*3297Ssb155480 static mdeg_prop_spec_t vds_prop_template[] = { 121*3297Ssb155480 { MDET_PROP_STR, "name", VDS_NAME }, 122*3297Ssb155480 { MDET_PROP_VAL, "cfg-handle", NULL }, 123*3297Ssb155480 { MDET_LIST_END, NULL, NULL } 124*3297Ssb155480 }; 125*3297Ssb155480 126*3297Ssb155480 #define VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 127*3297Ssb155480 128*3297Ssb155480 /* 129*3297Ssb155480 * Matching criteria passed to the MDEG to register interest 130*3297Ssb155480 * in changes to 'virtual-device-port' nodes identified by their 131*3297Ssb155480 * 'id' property. 132*3297Ssb155480 */ 133*3297Ssb155480 static md_prop_match_t vd_prop_match[] = { 134*3297Ssb155480 { MDET_PROP_VAL, VD_ID_PROP }, 135*3297Ssb155480 { MDET_LIST_END, NULL } 136*3297Ssb155480 }; 137*3297Ssb155480 138*3297Ssb155480 static mdeg_node_match_t vd_match = {"virtual-device-port", 139*3297Ssb155480 vd_prop_match}; 140*3297Ssb155480 1411991Sheppo /* Debugging macros */ 1421991Sheppo #ifdef DEBUG 1432793Slm66018 1442793Slm66018 static int vd_msglevel = 0; 1452793Slm66018 1462793Slm66018 1471991Sheppo #define PR0 if (vd_msglevel > 0) PRN 1481991Sheppo #define PR1 if (vd_msglevel > 1) PRN 1491991Sheppo #define PR2 if (vd_msglevel > 2) PRN 1501991Sheppo 1511991Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 1521991Sheppo PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 1531991Sheppo elem->hdr.dstate, \ 1541991Sheppo elem->payload.operation, \ 1551991Sheppo elem->payload.status, \ 1561991Sheppo elem->payload.nbytes, \ 1571991Sheppo elem->payload.addr, \ 1581991Sheppo elem->payload.ncookies); 1591991Sheppo 1602793Slm66018 char * 1612793Slm66018 vd_decode_state(int state) 1622793Slm66018 { 1632793Slm66018 char *str; 1642793Slm66018 1652793Slm66018 #define CASE_STATE(_s) case _s: str = #_s; break; 1662793Slm66018 1672793Slm66018 switch (state) { 1682793Slm66018 CASE_STATE(VD_STATE_INIT) 1692793Slm66018 CASE_STATE(VD_STATE_VER) 1702793Slm66018 CASE_STATE(VD_STATE_ATTR) 1712793Slm66018 CASE_STATE(VD_STATE_DRING) 1722793Slm66018 CASE_STATE(VD_STATE_RDX) 1732793Slm66018 CASE_STATE(VD_STATE_DATA) 1742793Slm66018 default: str = "unknown"; break; 1752793Slm66018 } 1762793Slm66018 1772793Slm66018 #undef CASE_STATE 1782793Slm66018 1792793Slm66018 return (str); 1802793Slm66018 } 1812793Slm66018 1822793Slm66018 void 1832793Slm66018 vd_decode_tag(vio_msg_t *msg) 1842793Slm66018 { 1852793Slm66018 char *tstr, *sstr, *estr; 1862793Slm66018 1872793Slm66018 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 1882793Slm66018 1892793Slm66018 switch (msg->tag.vio_msgtype) { 1902793Slm66018 CASE_TYPE(VIO_TYPE_CTRL) 1912793Slm66018 CASE_TYPE(VIO_TYPE_DATA) 1922793Slm66018 CASE_TYPE(VIO_TYPE_ERR) 1932793Slm66018 default: tstr = "unknown"; break; 1942793Slm66018 } 1952793Slm66018 1962793Slm66018 #undef CASE_TYPE 1972793Slm66018 1982793Slm66018 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 1992793Slm66018 2002793Slm66018 switch (msg->tag.vio_subtype) { 2012793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 2022793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 2032793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 2042793Slm66018 default: sstr = "unknown"; break; 2052793Slm66018 } 2062793Slm66018 2072793Slm66018 #undef CASE_SUBTYPE 2082793Slm66018 2092793Slm66018 #define CASE_ENV(_s) case _s: estr = #_s; break; 2102793Slm66018 2112793Slm66018 switch (msg->tag.vio_subtype_env) { 2122793Slm66018 CASE_ENV(VIO_VER_INFO) 2132793Slm66018 CASE_ENV(VIO_ATTR_INFO) 2142793Slm66018 CASE_ENV(VIO_DRING_REG) 2152793Slm66018 CASE_ENV(VIO_DRING_UNREG) 2162793Slm66018 CASE_ENV(VIO_RDX) 2172793Slm66018 CASE_ENV(VIO_PKT_DATA) 2182793Slm66018 CASE_ENV(VIO_DESC_DATA) 2192793Slm66018 CASE_ENV(VIO_DRING_DATA) 2202793Slm66018 default: estr = "unknown"; break; 2212793Slm66018 } 2222793Slm66018 2232793Slm66018 #undef CASE_ENV 2242793Slm66018 2252793Slm66018 PR1("(%x/%x/%x) message : (%s/%s/%s)", 2262793Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 2272793Slm66018 msg->tag.vio_subtype_env, tstr, sstr, estr); 2282793Slm66018 } 2292793Slm66018 2301991Sheppo #else /* !DEBUG */ 2312793Slm66018 2321991Sheppo #define PR0(...) 2331991Sheppo #define PR1(...) 2341991Sheppo #define PR2(...) 2351991Sheppo 2361991Sheppo #define VD_DUMP_DRING_ELEM(elem) 2371991Sheppo 2382793Slm66018 #define vd_decode_state(_s) (NULL) 2392793Slm66018 #define vd_decode_tag(_s) (NULL) 2402793Slm66018 2411991Sheppo #endif /* DEBUG */ 2421991Sheppo 2431991Sheppo 2442336Snarayan /* 2452336Snarayan * Soft state structure for a vds instance 2462336Snarayan */ 2471991Sheppo typedef struct vds { 2481991Sheppo uint_t initialized; /* driver inst initialization flags */ 2491991Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 2501991Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 2511991Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 252*3297Ssb155480 mdeg_node_spec_t *ispecp; /* mdeg node specification */ 2531991Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 2541991Sheppo } vds_t; 2551991Sheppo 2562336Snarayan /* 2572336Snarayan * Types of descriptor-processing tasks 2582336Snarayan */ 2592336Snarayan typedef enum vd_task_type { 2602336Snarayan VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 2612336Snarayan VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 2622336Snarayan } vd_task_type_t; 2632336Snarayan 2642336Snarayan /* 2652336Snarayan * Structure describing the task for processing a descriptor 2662336Snarayan */ 2672336Snarayan typedef struct vd_task { 2682336Snarayan struct vd *vd; /* vd instance task is for */ 2692336Snarayan vd_task_type_t type; /* type of descriptor task */ 2702336Snarayan int index; /* dring elem index for task */ 2712336Snarayan vio_msg_t *msg; /* VIO message task is for */ 2722336Snarayan size_t msglen; /* length of message content */ 2732336Snarayan vd_dring_payload_t *request; /* request task will perform */ 2742336Snarayan struct buf buf; /* buf(9s) for I/O request */ 2752531Snarayan ldc_mem_handle_t mhdl; /* task memory handle */ 2762336Snarayan } vd_task_t; 2772336Snarayan 2782336Snarayan /* 2792336Snarayan * Soft state structure for a virtual disk instance 2802336Snarayan */ 2811991Sheppo typedef struct vd { 2821991Sheppo uint_t initialized; /* vdisk initialization flags */ 2831991Sheppo vds_t *vds; /* server for this vdisk */ 2842336Snarayan ddi_taskq_t *startq; /* queue for I/O start tasks */ 2852336Snarayan ddi_taskq_t *completionq; /* queue for completion tasks */ 2861991Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 2871991Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 2882410Slm66018 uint_t nslices; /* number of slices */ 2891991Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 2901991Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 2912531Snarayan vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 2922410Slm66018 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 2931991Sheppo boolean_t pseudo; /* underlying pseudo dev */ 2942531Snarayan struct dk_efi dk_efi; /* synthetic for slice type */ 2951991Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 2961991Sheppo struct vtoc vtoc; /* synthetic for slice type */ 2971991Sheppo ldc_status_t ldc_state; /* LDC connection state */ 2981991Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 2991991Sheppo size_t max_msglen; /* largest LDC message len */ 3001991Sheppo vd_state_t state; /* client handshake state */ 3011991Sheppo uint8_t xfer_mode; /* transfer mode with client */ 3021991Sheppo uint32_t sid; /* client's session ID */ 3031991Sheppo uint64_t seq_num; /* message sequence number */ 3041991Sheppo uint64_t dring_ident; /* identifier of dring */ 3051991Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 3061991Sheppo uint32_t descriptor_size; /* num bytes in desc */ 3071991Sheppo uint32_t dring_len; /* number of dring elements */ 3081991Sheppo caddr_t dring; /* address of dring */ 3092793Slm66018 caddr_t vio_msgp; /* vio msg staging buffer */ 3102336Snarayan vd_task_t inband_task; /* task for inband descriptor */ 3112336Snarayan vd_task_t *dring_task; /* tasks dring elements */ 3122336Snarayan 3132336Snarayan kmutex_t lock; /* protects variables below */ 3142336Snarayan boolean_t enabled; /* is vdisk enabled? */ 3152336Snarayan boolean_t reset_state; /* reset connection state? */ 3162336Snarayan boolean_t reset_ldc; /* reset LDC channel? */ 3171991Sheppo } vd_t; 3181991Sheppo 3191991Sheppo typedef struct vds_operation { 3202793Slm66018 char *namep; 3211991Sheppo uint8_t operation; 3222336Snarayan int (*start)(vd_task_t *task); 3232336Snarayan void (*complete)(void *arg); 3241991Sheppo } vds_operation_t; 3251991Sheppo 3262032Slm66018 typedef struct vd_ioctl { 3272032Slm66018 uint8_t operation; /* vdisk operation */ 3282032Slm66018 const char *operation_name; /* vdisk operation name */ 3292032Slm66018 size_t nbytes; /* size of operation buffer */ 3302032Slm66018 int cmd; /* corresponding ioctl cmd */ 3312032Slm66018 const char *cmd_name; /* ioctl cmd name */ 3322032Slm66018 void *arg; /* ioctl cmd argument */ 3332032Slm66018 /* convert input vd_buf to output ioctl_arg */ 3342032Slm66018 void (*copyin)(void *vd_buf, void *ioctl_arg); 3352032Slm66018 /* convert input ioctl_arg to output vd_buf */ 3362032Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 3372032Slm66018 } vd_ioctl_t; 3382032Slm66018 3392032Slm66018 /* Define trivial copyin/copyout conversion function flag */ 3402032Slm66018 #define VD_IDENTITY ((void (*)(void *, void *))-1) 3411991Sheppo 3421991Sheppo 3431991Sheppo static int vds_ldc_retries = VDS_LDC_RETRIES; 3442793Slm66018 static int vds_ldc_delay = VDS_LDC_DELAY; 3451991Sheppo static void *vds_state; 3461991Sheppo static uint64_t vds_operations; /* see vds_operation[] definition below */ 3471991Sheppo 3481991Sheppo static int vd_open_flags = VD_OPEN_FLAGS; 3491991Sheppo 3502032Slm66018 /* 3512032Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 3522032Slm66018 * 3532032Slm66018 * Each supported major version should appear only once, paired with (and only 3542032Slm66018 * with) its highest supported minor version number (as the protocol requires 3552032Slm66018 * supporting all lower minor version numbers as well) 3562032Slm66018 */ 3572032Slm66018 static const vio_ver_t vds_version[] = {{1, 0}}; 3582032Slm66018 static const size_t vds_num_versions = 3592032Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 3602032Slm66018 3612793Slm66018 static void vd_free_dring_task(vd_t *vdp); 3621991Sheppo 3631991Sheppo static int 3642336Snarayan vd_start_bio(vd_task_t *task) 3651991Sheppo { 3662531Snarayan int rv, status = 0; 3672336Snarayan vd_t *vd = task->vd; 3682336Snarayan vd_dring_payload_t *request = task->request; 3692336Snarayan struct buf *buf = &task->buf; 3702531Snarayan uint8_t mtype; 3712336Snarayan 3722336Snarayan 3732336Snarayan ASSERT(vd != NULL); 3742336Snarayan ASSERT(request != NULL); 3752336Snarayan ASSERT(request->slice < vd->nslices); 3762336Snarayan ASSERT((request->operation == VD_OP_BREAD) || 3772336Snarayan (request->operation == VD_OP_BWRITE)); 3782336Snarayan 3791991Sheppo if (request->nbytes == 0) 3801991Sheppo return (EINVAL); /* no service for trivial requests */ 3812336Snarayan 3822336Snarayan PR1("%s %lu bytes at block %lu", 3832336Snarayan (request->operation == VD_OP_BREAD) ? "Read" : "Write", 3842336Snarayan request->nbytes, request->addr); 3852336Snarayan 3862336Snarayan bioinit(buf); 3872336Snarayan buf->b_flags = B_BUSY; 3882336Snarayan buf->b_bcount = request->nbytes; 3892336Snarayan buf->b_lblkno = request->addr; 3902336Snarayan buf->b_edev = vd->dev[request->slice]; 3912336Snarayan 3922531Snarayan mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 3932531Snarayan 3942531Snarayan /* Map memory exported by client */ 3952531Snarayan status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 3962531Snarayan mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 3972531Snarayan &(buf->b_un.b_addr), NULL); 3982531Snarayan if (status != 0) { 3992793Slm66018 PR0("ldc_mem_map() returned err %d ", status); 4002531Snarayan biofini(buf); 4012531Snarayan return (status); 4022336Snarayan } 4032336Snarayan 4042531Snarayan status = ldc_mem_acquire(task->mhdl, 0, buf->b_bcount); 4052531Snarayan if (status != 0) { 4062531Snarayan (void) ldc_mem_unmap(task->mhdl); 4072793Slm66018 PR0("ldc_mem_acquire() returned err %d ", status); 4082531Snarayan biofini(buf); 4092531Snarayan return (status); 4102531Snarayan } 4112531Snarayan 4122531Snarayan buf->b_flags |= (request->operation == VD_OP_BREAD) ? B_READ : B_WRITE; 4132531Snarayan 4142336Snarayan /* Start the block I/O */ 4152531Snarayan if ((status = ldi_strategy(vd->ldi_handle[request->slice], buf)) == 0) 4162336Snarayan return (EINPROGRESS); /* will complete on completionq */ 4172336Snarayan 4182336Snarayan /* Clean up after error */ 4192531Snarayan rv = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 4202531Snarayan if (rv) { 4212793Slm66018 PR0("ldc_mem_release() returned err %d ", rv); 4222531Snarayan } 4232531Snarayan rv = ldc_mem_unmap(task->mhdl); 4242531Snarayan if (rv) { 4252793Slm66018 PR0("ldc_mem_unmap() returned err %d ", status); 4262531Snarayan } 4272531Snarayan 4282336Snarayan biofini(buf); 4292336Snarayan return (status); 4302336Snarayan } 4312336Snarayan 4322336Snarayan static int 4332336Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 4342336Snarayan { 4352793Slm66018 int status; 4362336Snarayan size_t nbytes; 4372336Snarayan 4382793Slm66018 do { 4392336Snarayan nbytes = msglen; 4402336Snarayan status = ldc_write(ldc_handle, msg, &nbytes); 4412793Slm66018 if (status != EWOULDBLOCK) 4422793Slm66018 break; 4432793Slm66018 drv_usecwait(vds_ldc_delay); 4442793Slm66018 } while (status == EWOULDBLOCK); 4452336Snarayan 4462336Snarayan if (status != 0) { 4472793Slm66018 if (status != ECONNRESET) 4482793Slm66018 PR0("ldc_write() returned errno %d", status); 4492336Snarayan return (status); 4502336Snarayan } else if (nbytes != msglen) { 4512793Slm66018 PR0("ldc_write() performed only partial write"); 4522336Snarayan return (EIO); 4532336Snarayan } 4542336Snarayan 4552336Snarayan PR1("SENT %lu bytes", msglen); 4562336Snarayan return (0); 4572336Snarayan } 4582336Snarayan 4592336Snarayan static void 4602336Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc) 4612336Snarayan { 4622336Snarayan mutex_enter(&vd->lock); 4632336Snarayan vd->reset_state = B_TRUE; 4642336Snarayan vd->reset_ldc = reset_ldc; 4652336Snarayan mutex_exit(&vd->lock); 4662336Snarayan } 4672336Snarayan 4682336Snarayan /* 4692336Snarayan * Reset the state of the connection with a client, if needed; reset the LDC 4702336Snarayan * transport as well, if needed. This function should only be called from the 4712793Slm66018 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 4722336Snarayan */ 4732336Snarayan static void 4742336Snarayan vd_reset_if_needed(vd_t *vd) 4752336Snarayan { 4762793Slm66018 int status = 0; 4772336Snarayan 4782336Snarayan mutex_enter(&vd->lock); 4792336Snarayan if (!vd->reset_state) { 4802336Snarayan ASSERT(!vd->reset_ldc); 4812336Snarayan mutex_exit(&vd->lock); 4822336Snarayan return; 4832336Snarayan } 4842336Snarayan mutex_exit(&vd->lock); 4852336Snarayan 4862336Snarayan PR0("Resetting connection state with %s", VD_CLIENT(vd)); 4872336Snarayan 4882336Snarayan /* 4892336Snarayan * Let any asynchronous I/O complete before possibly pulling the rug 4902336Snarayan * out from under it; defer checking vd->reset_ldc, as one of the 4912336Snarayan * asynchronous tasks might set it 4922336Snarayan */ 4932336Snarayan ddi_taskq_wait(vd->completionq); 4942336Snarayan 4952336Snarayan if ((vd->initialized & VD_DRING) && 4962336Snarayan ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 4972793Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 4982793Slm66018 4992793Slm66018 vd_free_dring_task(vd); 5002793Slm66018 5012793Slm66018 /* Free the staging buffer for msgs */ 5022793Slm66018 if (vd->vio_msgp != NULL) { 5032793Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 5042793Slm66018 vd->vio_msgp = NULL; 5052336Snarayan } 5062336Snarayan 5072793Slm66018 /* Free the inband message buffer */ 5082793Slm66018 if (vd->inband_task.msg != NULL) { 5092793Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 5102793Slm66018 vd->inband_task.msg = NULL; 5112793Slm66018 } 5122336Snarayan 5132336Snarayan mutex_enter(&vd->lock); 5142793Slm66018 5152793Slm66018 if (vd->reset_ldc) 5162793Slm66018 PR0("taking down LDC channel"); 5172410Slm66018 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 5182793Slm66018 PR0("ldc_down() returned errno %d", status); 5192336Snarayan 5202336Snarayan vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 5212336Snarayan vd->state = VD_STATE_INIT; 5222336Snarayan vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 5232336Snarayan 5242793Slm66018 /* Allocate the staging buffer */ 5252793Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 5262793Slm66018 5273010Slm66018 PR0("calling ldc_up\n"); 5283010Slm66018 (void) ldc_up(vd->ldc_handle); 5292793Slm66018 5302336Snarayan vd->reset_state = B_FALSE; 5312336Snarayan vd->reset_ldc = B_FALSE; 5322793Slm66018 5332336Snarayan mutex_exit(&vd->lock); 5342336Snarayan } 5352336Snarayan 5362793Slm66018 static void vd_recv_msg(void *arg); 5372793Slm66018 5382793Slm66018 static void 5392793Slm66018 vd_mark_in_reset(vd_t *vd) 5402793Slm66018 { 5412793Slm66018 int status; 5422793Slm66018 5432793Slm66018 PR0("vd_mark_in_reset: marking vd in reset\n"); 5442793Slm66018 5452793Slm66018 vd_need_reset(vd, B_FALSE); 5462793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 5472793Slm66018 if (status == DDI_FAILURE) { 5482793Slm66018 PR0("cannot schedule task to recv msg\n"); 5492793Slm66018 vd_need_reset(vd, B_TRUE); 5502793Slm66018 return; 5512793Slm66018 } 5522793Slm66018 } 5532793Slm66018 5542336Snarayan static int 5552336Snarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status) 5562336Snarayan { 5572336Snarayan boolean_t accepted; 5582336Snarayan int status; 5592336Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 5602336Snarayan 5612793Slm66018 if (vd->reset_state) 5622793Slm66018 return (0); 5632336Snarayan 5642336Snarayan /* Acquire the element */ 5652793Slm66018 if (!vd->reset_state && 5662793Slm66018 (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 5672793Slm66018 if (status == ECONNRESET) { 5682793Slm66018 vd_mark_in_reset(vd); 5692793Slm66018 return (0); 5702793Slm66018 } else { 5712793Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", 5722793Slm66018 status); 5732793Slm66018 return (status); 5742793Slm66018 } 5752336Snarayan } 5762336Snarayan 5772336Snarayan /* Set the element's status and mark it done */ 5782336Snarayan accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 5792336Snarayan if (accepted) { 5802336Snarayan elem->payload.status = elem_status; 5812336Snarayan elem->hdr.dstate = VIO_DESC_DONE; 5822336Snarayan } else { 5832336Snarayan /* Perhaps client timed out waiting for I/O... */ 5842793Slm66018 PR0("element %u no longer \"accepted\"", idx); 5852336Snarayan VD_DUMP_DRING_ELEM(elem); 5862336Snarayan } 5872336Snarayan /* Release the element */ 5882793Slm66018 if (!vd->reset_state && 5892793Slm66018 (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 5902793Slm66018 if (status == ECONNRESET) { 5912793Slm66018 vd_mark_in_reset(vd); 5922793Slm66018 return (0); 5932793Slm66018 } else { 5942793Slm66018 PR0("ldc_mem_dring_release() returned errno %d", 5952793Slm66018 status); 5962793Slm66018 return (status); 5972793Slm66018 } 5982336Snarayan } 5992336Snarayan 6002336Snarayan return (accepted ? 0 : EINVAL); 6012336Snarayan } 6022336Snarayan 6032336Snarayan static void 6042336Snarayan vd_complete_bio(void *arg) 6052336Snarayan { 6062336Snarayan int status = 0; 6072336Snarayan vd_task_t *task = (vd_task_t *)arg; 6082336Snarayan vd_t *vd = task->vd; 6092336Snarayan vd_dring_payload_t *request = task->request; 6102336Snarayan struct buf *buf = &task->buf; 6112336Snarayan 6122336Snarayan 6132336Snarayan ASSERT(vd != NULL); 6142336Snarayan ASSERT(request != NULL); 6152336Snarayan ASSERT(task->msg != NULL); 6162336Snarayan ASSERT(task->msglen >= sizeof (*task->msg)); 6172336Snarayan 6182336Snarayan /* Wait for the I/O to complete */ 6192336Snarayan request->status = biowait(buf); 6202336Snarayan 6212531Snarayan /* Release the buffer */ 6222793Slm66018 if (!vd->reset_state) 6232793Slm66018 status = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 6242531Snarayan if (status) { 6252793Slm66018 PR0("ldc_mem_release() returned errno %d copying to " 6262793Slm66018 "client", status); 6272793Slm66018 if (status == ECONNRESET) { 6282793Slm66018 vd_mark_in_reset(vd); 6292793Slm66018 } 6301991Sheppo } 6312336Snarayan 6322793Slm66018 /* Unmap the memory, even if in reset */ 6332531Snarayan status = ldc_mem_unmap(task->mhdl); 6342531Snarayan if (status) { 6352793Slm66018 PR0("ldc_mem_unmap() returned errno %d copying to client", 6362531Snarayan status); 6372793Slm66018 if (status == ECONNRESET) { 6382793Slm66018 vd_mark_in_reset(vd); 6392793Slm66018 } 6402531Snarayan } 6412531Snarayan 6422336Snarayan biofini(buf); 6432336Snarayan 6442336Snarayan /* Update the dring element for a dring client */ 6452793Slm66018 if (!vd->reset_state && (status == 0) && 6462793Slm66018 (vd->xfer_mode == VIO_DRING_MODE)) { 6472336Snarayan status = vd_mark_elem_done(vd, task->index, request->status); 6482793Slm66018 if (status == ECONNRESET) 6492793Slm66018 vd_mark_in_reset(vd); 6502793Slm66018 } 6512336Snarayan 6522336Snarayan /* 6532336Snarayan * If a transport error occurred, arrange to "nack" the message when 6542336Snarayan * the final task in the descriptor element range completes 6552336Snarayan */ 6562336Snarayan if (status != 0) 6572336Snarayan task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 6582336Snarayan 6592336Snarayan /* 6602336Snarayan * Only the final task for a range of elements will respond to and 6612336Snarayan * free the message 6622336Snarayan */ 6632793Slm66018 if (task->type == VD_NONFINAL_RANGE_TASK) { 6642336Snarayan return; 6652793Slm66018 } 6662336Snarayan 6672336Snarayan /* 6682336Snarayan * Send the "ack" or "nack" back to the client; if sending the message 6692336Snarayan * via LDC fails, arrange to reset both the connection state and LDC 6702336Snarayan * itself 6712336Snarayan */ 6722336Snarayan PR1("Sending %s", 6732336Snarayan (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 6742793Slm66018 if (!vd->reset_state) { 6752793Slm66018 status = send_msg(vd->ldc_handle, task->msg, task->msglen); 6762793Slm66018 switch (status) { 6772793Slm66018 case 0: 6782793Slm66018 break; 6792793Slm66018 case ECONNRESET: 6802793Slm66018 vd_mark_in_reset(vd); 6812793Slm66018 break; 6822793Slm66018 default: 6832793Slm66018 PR0("initiating full reset"); 6842793Slm66018 vd_need_reset(vd, B_TRUE); 6852793Slm66018 break; 6862793Slm66018 } 6872793Slm66018 } 6881991Sheppo } 6891991Sheppo 6902032Slm66018 static void 6912032Slm66018 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) 6922032Slm66018 { 6932032Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 6942032Slm66018 } 6952032Slm66018 6962032Slm66018 static void 6972032Slm66018 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) 6982032Slm66018 { 6992032Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 7002032Slm66018 } 7012032Slm66018 7022032Slm66018 static void 7032032Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 7042032Slm66018 { 7052032Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 7062032Slm66018 } 7072032Slm66018 7082032Slm66018 static void 7092032Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 7102032Slm66018 { 7112032Slm66018 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 7122032Slm66018 } 7132032Slm66018 7142531Snarayan static void 7152531Snarayan vd_get_efi_in(void *vd_buf, void *ioctl_arg) 7162531Snarayan { 7172531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7182531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7192531Snarayan 7202531Snarayan dk_efi->dki_lba = vd_efi->lba; 7212531Snarayan dk_efi->dki_length = vd_efi->length; 7222531Snarayan dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 7232531Snarayan } 7242531Snarayan 7252531Snarayan static void 7262531Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf) 7272531Snarayan { 7282531Snarayan int len; 7292531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7302531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7312531Snarayan 7322531Snarayan len = vd_efi->length; 7332531Snarayan DK_EFI2VD_EFI(dk_efi, vd_efi); 7342531Snarayan kmem_free(dk_efi->dki_data, len); 7352531Snarayan } 7362531Snarayan 7372531Snarayan static void 7382531Snarayan vd_set_efi_in(void *vd_buf, void *ioctl_arg) 7392531Snarayan { 7402531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7412531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7422531Snarayan 7432531Snarayan dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 7442531Snarayan VD_EFI2DK_EFI(vd_efi, dk_efi); 7452531Snarayan } 7462531Snarayan 7472531Snarayan static void 7482531Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf) 7492531Snarayan { 7502531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7512531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7522531Snarayan 7532531Snarayan kmem_free(dk_efi->dki_data, vd_efi->length); 7542531Snarayan } 7552531Snarayan 7562531Snarayan static int 7572531Snarayan vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc, vd_disk_label_t *label) 7582531Snarayan { 7592531Snarayan int status, rval; 7602531Snarayan struct dk_gpt *efi; 7612531Snarayan size_t efi_len; 7622531Snarayan 7632531Snarayan *label = VD_DISK_LABEL_UNK; 7642531Snarayan 7652531Snarayan status = ldi_ioctl(handle, DKIOCGVTOC, (intptr_t)vtoc, 7662531Snarayan (vd_open_flags | FKIOCTL), kcred, &rval); 7672531Snarayan 7682531Snarayan if (status == 0) { 7692531Snarayan *label = VD_DISK_LABEL_VTOC; 7702531Snarayan return (0); 7712531Snarayan } else if (status != ENOTSUP) { 7722793Slm66018 PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status); 7732531Snarayan return (status); 7742531Snarayan } 7752531Snarayan 7762531Snarayan status = vds_efi_alloc_and_read(handle, &efi, &efi_len); 7772531Snarayan 7782531Snarayan if (status) { 7792793Slm66018 PR0("vds_efi_alloc_and_read returned error %d", status); 7802531Snarayan return (status); 7812531Snarayan } 7822531Snarayan 7832531Snarayan *label = VD_DISK_LABEL_EFI; 7842531Snarayan vd_efi_to_vtoc(efi, vtoc); 7852531Snarayan vd_efi_free(efi, efi_len); 7862531Snarayan 7872531Snarayan return (0); 7882531Snarayan } 7892531Snarayan 7901991Sheppo static int 7912032Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 7921991Sheppo { 7932531Snarayan dk_efi_t *dk_ioc; 7942531Snarayan 7952531Snarayan switch (vd->vdisk_label) { 7962531Snarayan 7972531Snarayan case VD_DISK_LABEL_VTOC: 7982531Snarayan 7992531Snarayan switch (cmd) { 8002531Snarayan case DKIOCGGEOM: 8012531Snarayan ASSERT(ioctl_arg != NULL); 8022531Snarayan bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 8032531Snarayan return (0); 8042531Snarayan case DKIOCGVTOC: 8052531Snarayan ASSERT(ioctl_arg != NULL); 8062531Snarayan bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 8072531Snarayan return (0); 8082531Snarayan default: 8092531Snarayan return (ENOTSUP); 8102531Snarayan } 8112531Snarayan 8122531Snarayan case VD_DISK_LABEL_EFI: 8132531Snarayan 8142531Snarayan switch (cmd) { 8152531Snarayan case DKIOCGETEFI: 8162531Snarayan ASSERT(ioctl_arg != NULL); 8172531Snarayan dk_ioc = (dk_efi_t *)ioctl_arg; 8182531Snarayan if (dk_ioc->dki_length < vd->dk_efi.dki_length) 8192531Snarayan return (EINVAL); 8202531Snarayan bcopy(vd->dk_efi.dki_data, dk_ioc->dki_data, 8212531Snarayan vd->dk_efi.dki_length); 8222531Snarayan return (0); 8232531Snarayan default: 8242531Snarayan return (ENOTSUP); 8252531Snarayan } 8262531Snarayan 8271991Sheppo default: 8281991Sheppo return (ENOTSUP); 8291991Sheppo } 8301991Sheppo } 8311991Sheppo 8321991Sheppo static int 8332032Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 8341991Sheppo { 8351991Sheppo int rval = 0, status; 8361991Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 8371991Sheppo 8381991Sheppo 8391991Sheppo ASSERT(request->slice < vd->nslices); 8401991Sheppo PR0("Performing %s", ioctl->operation_name); 8411991Sheppo 8422032Slm66018 /* Get data from client and convert, if necessary */ 8432032Slm66018 if (ioctl->copyin != NULL) { 8441991Sheppo ASSERT(nbytes != 0 && buf != NULL); 8451991Sheppo PR1("Getting \"arg\" data from client"); 8461991Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 8471991Sheppo request->cookie, request->ncookies, 8481991Sheppo LDC_COPY_IN)) != 0) { 8492793Slm66018 PR0("ldc_mem_copy() returned errno %d " 8501991Sheppo "copying from client", status); 8511991Sheppo return (status); 8521991Sheppo } 8532032Slm66018 8542032Slm66018 /* Convert client's data, if necessary */ 8552032Slm66018 if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ 8562032Slm66018 ioctl->arg = buf; 8572032Slm66018 else /* convert client vdisk operation data to ioctl data */ 8582032Slm66018 (ioctl->copyin)(buf, (void *)ioctl->arg); 8591991Sheppo } 8601991Sheppo 8611991Sheppo /* 8621991Sheppo * Handle single-slice block devices internally; otherwise, have the 8631991Sheppo * real driver perform the ioctl() 8641991Sheppo */ 8651991Sheppo if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { 8662032Slm66018 if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, 8672032Slm66018 (void *)ioctl->arg)) != 0) 8681991Sheppo return (status); 8691991Sheppo } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], 8702336Snarayan ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL), 8712336Snarayan kcred, &rval)) != 0) { 8721991Sheppo PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); 8731991Sheppo return (status); 8741991Sheppo } 8751991Sheppo #ifdef DEBUG 8761991Sheppo if (rval != 0) { 8772793Slm66018 PR0("%s set rval = %d, which is not being returned to client", 8781991Sheppo ioctl->cmd_name, rval); 8791991Sheppo } 8801991Sheppo #endif /* DEBUG */ 8811991Sheppo 8822032Slm66018 /* Convert data and send to client, if necessary */ 8832032Slm66018 if (ioctl->copyout != NULL) { 8841991Sheppo ASSERT(nbytes != 0 && buf != NULL); 8851991Sheppo PR1("Sending \"arg\" data to client"); 8862032Slm66018 8872032Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 8882032Slm66018 if (ioctl->copyout != VD_IDENTITY) 8892032Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 8902032Slm66018 8911991Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 8921991Sheppo request->cookie, request->ncookies, 8931991Sheppo LDC_COPY_OUT)) != 0) { 8942793Slm66018 PR0("ldc_mem_copy() returned errno %d " 8951991Sheppo "copying to client", status); 8961991Sheppo return (status); 8971991Sheppo } 8981991Sheppo } 8991991Sheppo 9001991Sheppo return (status); 9011991Sheppo } 9021991Sheppo 9031991Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 9041991Sheppo static int 9052336Snarayan vd_ioctl(vd_task_t *task) 9061991Sheppo { 9073166Ssg70180 int i, status, rc; 9082336Snarayan void *buf = NULL; 9092336Snarayan struct dk_geom dk_geom = {0}; 9102336Snarayan struct vtoc vtoc = {0}; 9112531Snarayan struct dk_efi dk_efi = {0}; 9122336Snarayan vd_t *vd = task->vd; 9132336Snarayan vd_dring_payload_t *request = task->request; 9142336Snarayan vd_ioctl_t ioctl[] = { 9151991Sheppo /* Command (no-copy) operations */ 9162032Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 9172032Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 9182032Slm66018 NULL, NULL, NULL}, 9191991Sheppo 9201991Sheppo /* "Get" (copy-out) operations */ 9212032Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 9222032Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 9232531Snarayan NULL, VD_IDENTITY, VD_IDENTITY}, 9242032Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 9252032Slm66018 RNDSIZE(vd_geom_t), 9262032Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 9272032Slm66018 &dk_geom, NULL, dk_geom2vd_geom}, 9282032Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 9292032Slm66018 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 9302032Slm66018 &vtoc, NULL, vtoc2vd_vtoc}, 9312531Snarayan {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 9322531Snarayan DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 9332531Snarayan &dk_efi, vd_get_efi_in, vd_get_efi_out}, 9341991Sheppo 9351991Sheppo /* "Set" (copy-in) operations */ 9362032Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 9372032Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 9382531Snarayan NULL, VD_IDENTITY, VD_IDENTITY}, 9392032Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 9402032Slm66018 RNDSIZE(vd_geom_t), 9412032Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 9422032Slm66018 &dk_geom, vd_geom2dk_geom, NULL}, 9432032Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 9442032Slm66018 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 9452032Slm66018 &vtoc, vd_vtoc2vtoc, NULL}, 9462531Snarayan {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 9472531Snarayan DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 9482531Snarayan &dk_efi, vd_set_efi_in, vd_set_efi_out}, 9491991Sheppo }; 9501991Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 9511991Sheppo 9521991Sheppo 9532336Snarayan ASSERT(vd != NULL); 9542336Snarayan ASSERT(request != NULL); 9551991Sheppo ASSERT(request->slice < vd->nslices); 9561991Sheppo 9571991Sheppo /* 9581991Sheppo * Determine ioctl corresponding to caller's "operation" and 9591991Sheppo * validate caller's "nbytes" 9601991Sheppo */ 9611991Sheppo for (i = 0; i < nioctls; i++) { 9621991Sheppo if (request->operation == ioctl[i].operation) { 9632032Slm66018 /* LDC memory operations require 8-byte multiples */ 9642032Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 9652032Slm66018 9662531Snarayan if (request->operation == VD_OP_GET_EFI || 9672531Snarayan request->operation == VD_OP_SET_EFI) { 9682531Snarayan if (request->nbytes >= ioctl[i].nbytes) 9692531Snarayan break; 9702793Slm66018 PR0("%s: Expected at least nbytes = %lu, " 9712531Snarayan "got %lu", ioctl[i].operation_name, 9722531Snarayan ioctl[i].nbytes, request->nbytes); 9732531Snarayan return (EINVAL); 9742531Snarayan } 9752531Snarayan 9762032Slm66018 if (request->nbytes != ioctl[i].nbytes) { 9772793Slm66018 PR0("%s: Expected nbytes = %lu, got %lu", 9782032Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 9792032Slm66018 request->nbytes); 9801991Sheppo return (EINVAL); 9811991Sheppo } 9821991Sheppo 9831991Sheppo break; 9841991Sheppo } 9851991Sheppo } 9861991Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 9871991Sheppo 9881991Sheppo if (request->nbytes) 9891991Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 9901991Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 9911991Sheppo if (request->nbytes) 9921991Sheppo kmem_free(buf, request->nbytes); 9932531Snarayan if (vd->vdisk_type == VD_DISK_TYPE_DISK && 9942531Snarayan (request->operation == VD_OP_SET_VTOC || 9953166Ssg70180 request->operation == VD_OP_SET_EFI)) { 9963166Ssg70180 /* update disk information */ 9973166Ssg70180 rc = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, 9983166Ssg70180 &vd->vdisk_label); 9993166Ssg70180 if (rc != 0) 10003166Ssg70180 PR0("vd_read_vtoc return error %d", rc); 10013166Ssg70180 } 10022336Snarayan PR0("Returning %d", status); 10031991Sheppo return (status); 10041991Sheppo } 10051991Sheppo 10062531Snarayan static int 10072531Snarayan vd_get_devid(vd_task_t *task) 10082531Snarayan { 10092531Snarayan vd_t *vd = task->vd; 10102531Snarayan vd_dring_payload_t *request = task->request; 10112531Snarayan vd_devid_t *vd_devid; 10122531Snarayan impl_devid_t *devid; 10132531Snarayan int status, bufid_len, devid_len, len; 10142793Slm66018 int bufbytes; 10152793Slm66018 10162793Slm66018 PR1("Get Device ID, nbytes=%ld", request->nbytes); 10172531Snarayan 10182531Snarayan if (ddi_lyr_get_devid(vd->dev[request->slice], 10192531Snarayan (ddi_devid_t *)&devid) != DDI_SUCCESS) { 10202531Snarayan /* the most common failure is that no devid is available */ 10212793Slm66018 PR2("No Device ID"); 10222531Snarayan return (ENOENT); 10232531Snarayan } 10242531Snarayan 10252531Snarayan bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 10262531Snarayan devid_len = DEVID_GETLEN(devid); 10272531Snarayan 10282793Slm66018 /* 10292793Slm66018 * Save the buffer size here for use in deallocation. 10302793Slm66018 * The actual number of bytes copied is returned in 10312793Slm66018 * the 'nbytes' field of the request structure. 10322793Slm66018 */ 10332793Slm66018 bufbytes = request->nbytes; 10342793Slm66018 10352793Slm66018 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 10362531Snarayan vd_devid->length = devid_len; 10372531Snarayan vd_devid->type = DEVID_GETTYPE(devid); 10382531Snarayan 10392531Snarayan len = (devid_len > bufid_len)? bufid_len : devid_len; 10402531Snarayan 10412531Snarayan bcopy(devid->did_id, vd_devid->id, len); 10422531Snarayan 10432531Snarayan /* LDC memory operations require 8-byte multiples */ 10442531Snarayan ASSERT(request->nbytes % sizeof (uint64_t) == 0); 10452531Snarayan 10462531Snarayan if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 10472531Snarayan &request->nbytes, request->cookie, request->ncookies, 10482531Snarayan LDC_COPY_OUT)) != 0) { 10492793Slm66018 PR0("ldc_mem_copy() returned errno %d copying to client", 10502531Snarayan status); 10512531Snarayan } 10522793Slm66018 PR1("post mem_copy: nbytes=%ld", request->nbytes); 10532793Slm66018 10542793Slm66018 kmem_free(vd_devid, bufbytes); 10552531Snarayan ddi_devid_free((ddi_devid_t)devid); 10562531Snarayan 10572531Snarayan return (status); 10582531Snarayan } 10592531Snarayan 10601991Sheppo /* 10611991Sheppo * Define the supported operations once the functions for performing them have 10621991Sheppo * been defined 10631991Sheppo */ 10641991Sheppo static const vds_operation_t vds_operation[] = { 10652793Slm66018 #define X(_s) #_s, _s 10662793Slm66018 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 10672793Slm66018 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 10682793Slm66018 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 10692793Slm66018 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 10702793Slm66018 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 10712793Slm66018 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 10722793Slm66018 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 10732793Slm66018 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 10742793Slm66018 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 10752793Slm66018 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 10762793Slm66018 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 10772793Slm66018 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 10782793Slm66018 #undef X 10791991Sheppo }; 10801991Sheppo 10811991Sheppo static const size_t vds_noperations = 10821991Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 10831991Sheppo 10841991Sheppo /* 10852336Snarayan * Process a task specifying a client I/O request 10861991Sheppo */ 10871991Sheppo static int 10882336Snarayan vd_process_task(vd_task_t *task) 10891991Sheppo { 10902336Snarayan int i, status; 10912336Snarayan vd_t *vd = task->vd; 10922336Snarayan vd_dring_payload_t *request = task->request; 10932336Snarayan 10942336Snarayan 10952336Snarayan ASSERT(vd != NULL); 10962336Snarayan ASSERT(request != NULL); 10971991Sheppo 10982336Snarayan /* Find the requested operation */ 10991991Sheppo for (i = 0; i < vds_noperations; i++) 11001991Sheppo if (request->operation == vds_operation[i].operation) 11012336Snarayan break; 11022336Snarayan if (i == vds_noperations) { 11032793Slm66018 PR0("Unsupported operation %u", request->operation); 11042336Snarayan return (ENOTSUP); 11052336Snarayan } 11062336Snarayan 11072748Slm66018 /* Handle client using absolute disk offsets */ 11082748Slm66018 if ((vd->vdisk_type == VD_DISK_TYPE_DISK) && 11092748Slm66018 (request->slice == UINT8_MAX)) 11102748Slm66018 request->slice = VD_ENTIRE_DISK_SLICE; 11112748Slm66018 11122748Slm66018 /* Range-check slice */ 11132748Slm66018 if (request->slice >= vd->nslices) { 11142793Slm66018 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 11152748Slm66018 request->slice, (vd->nslices - 1)); 11162748Slm66018 return (EINVAL); 11172748Slm66018 } 11182748Slm66018 11192793Slm66018 PR1("operation : %s", vds_operation[i].namep); 11202793Slm66018 11212336Snarayan /* Start the operation */ 11222336Snarayan if ((status = vds_operation[i].start(task)) != EINPROGRESS) { 11232793Slm66018 PR0("operation : %s returned status %d", 11242793Slm66018 vds_operation[i].namep, status); 11252336Snarayan request->status = status; /* op succeeded or failed */ 11262336Snarayan return (0); /* but request completed */ 11271991Sheppo } 11281991Sheppo 11292336Snarayan ASSERT(vds_operation[i].complete != NULL); /* debug case */ 11302336Snarayan if (vds_operation[i].complete == NULL) { /* non-debug case */ 11312793Slm66018 PR0("Unexpected return of EINPROGRESS " 11322336Snarayan "with no I/O completion handler"); 11332336Snarayan request->status = EIO; /* operation failed */ 11342336Snarayan return (0); /* but request completed */ 11351991Sheppo } 11361991Sheppo 11372793Slm66018 PR1("operation : kick off taskq entry for %s", vds_operation[i].namep); 11382793Slm66018 11392336Snarayan /* Queue a task to complete the operation */ 11402336Snarayan status = ddi_taskq_dispatch(vd->completionq, vds_operation[i].complete, 11412336Snarayan task, DDI_SLEEP); 11422336Snarayan /* ddi_taskq_dispatch(9f) guarantees success with DDI_SLEEP */ 11432336Snarayan ASSERT(status == DDI_SUCCESS); 11442336Snarayan 11452336Snarayan PR1("Operation in progress"); 11462336Snarayan return (EINPROGRESS); /* completion handler will finish request */ 11471991Sheppo } 11481991Sheppo 11491991Sheppo /* 11502032Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 11512032Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 11521991Sheppo */ 11532032Slm66018 boolean_t 11541991Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 11551991Sheppo { 11561991Sheppo return ((tag->vio_msgtype == type) && 11571991Sheppo (tag->vio_subtype == subtype) && 11582032Slm66018 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 11591991Sheppo } 11601991Sheppo 11612032Slm66018 /* 11622032Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 11632032Slm66018 * by this server. 11642032Slm66018 */ 11652032Slm66018 static boolean_t 11662032Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 11672032Slm66018 { 11682032Slm66018 for (int i = 0; i < vds_num_versions; i++) { 11692032Slm66018 ASSERT(vds_version[i].major > 0); 11702032Slm66018 ASSERT((i == 0) || 11712032Slm66018 (vds_version[i].major < vds_version[i-1].major)); 11722032Slm66018 11732032Slm66018 /* 11742032Slm66018 * If the major versions match, adjust the minor version, if 11752032Slm66018 * necessary, down to the highest value supported by this 11762032Slm66018 * server and return true so this message will get "ack"ed; 11772032Slm66018 * the client should also support all minor versions lower 11782032Slm66018 * than the value it sent 11792032Slm66018 */ 11802032Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 11812032Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 11822032Slm66018 PR0("Adjusting minor version from %u to %u", 11832032Slm66018 ver_msg->ver_minor, vds_version[i].minor); 11842032Slm66018 ver_msg->ver_minor = vds_version[i].minor; 11852032Slm66018 } 11862032Slm66018 return (B_TRUE); 11872032Slm66018 } 11882032Slm66018 11892032Slm66018 /* 11902032Slm66018 * If the message contains a higher major version number, set 11912032Slm66018 * the message's major/minor versions to the current values 11922032Slm66018 * and return false, so this message will get "nack"ed with 11932032Slm66018 * these values, and the client will potentially try again 11942032Slm66018 * with the same or a lower version 11952032Slm66018 */ 11962032Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 11972032Slm66018 ver_msg->ver_major = vds_version[i].major; 11982032Slm66018 ver_msg->ver_minor = vds_version[i].minor; 11992032Slm66018 return (B_FALSE); 12002032Slm66018 } 12012032Slm66018 12022032Slm66018 /* 12032032Slm66018 * Otherwise, the message's major version is less than the 12042032Slm66018 * current major version, so continue the loop to the next 12052032Slm66018 * (lower) supported version 12062032Slm66018 */ 12072032Slm66018 } 12082032Slm66018 12092032Slm66018 /* 12102032Slm66018 * No common version was found; "ground" the version pair in the 12112032Slm66018 * message to terminate negotiation 12122032Slm66018 */ 12132032Slm66018 ver_msg->ver_major = 0; 12142032Slm66018 ver_msg->ver_minor = 0; 12152032Slm66018 return (B_FALSE); 12162032Slm66018 } 12172032Slm66018 12182032Slm66018 /* 12192032Slm66018 * Process a version message from a client. vds expects to receive version 12202032Slm66018 * messages from clients seeking service, but never issues version messages 12212032Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 12222032Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 12232032Slm66018 * messages as invalid). 12242032Slm66018 */ 12251991Sheppo static int 12262032Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 12271991Sheppo { 12281991Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 12291991Sheppo 12301991Sheppo 12311991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 12321991Sheppo 12331991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 12341991Sheppo VIO_VER_INFO)) { 12351991Sheppo return (ENOMSG); /* not a version message */ 12361991Sheppo } 12371991Sheppo 12381991Sheppo if (msglen != sizeof (*ver_msg)) { 12392793Slm66018 PR0("Expected %lu-byte version message; " 12401991Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 12411991Sheppo return (EBADMSG); 12421991Sheppo } 12431991Sheppo 12441991Sheppo if (ver_msg->dev_class != VDEV_DISK) { 12452793Slm66018 PR0("Expected device class %u (disk); received %u", 12461991Sheppo VDEV_DISK, ver_msg->dev_class); 12471991Sheppo return (EBADMSG); 12481991Sheppo } 12491991Sheppo 12502032Slm66018 /* 12512032Slm66018 * We're talking to the expected kind of client; set our device class 12522032Slm66018 * for "ack/nack" back to the client 12532032Slm66018 */ 12542032Slm66018 ver_msg->dev_class = VDEV_DISK_SERVER; 12552032Slm66018 12562032Slm66018 /* 12572032Slm66018 * Check whether the (valid) version message specifies a version 12582032Slm66018 * supported by this server. If the version is not supported, return 12592032Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 12602032Slm66018 * will have updated the message with a supported version for the 12612032Slm66018 * client to consider 12622032Slm66018 */ 12632032Slm66018 if (!vds_supported_version(ver_msg)) 12641991Sheppo return (EBADMSG); 12652032Slm66018 12662032Slm66018 12672032Slm66018 /* 12682032Slm66018 * A version has been agreed upon; use the client's SID for 12692032Slm66018 * communication on this channel now 12702032Slm66018 */ 12712032Slm66018 ASSERT(!(vd->initialized & VD_SID)); 12722032Slm66018 vd->sid = ver_msg->tag.vio_sid; 12732032Slm66018 vd->initialized |= VD_SID; 12741991Sheppo 12752032Slm66018 /* 12762032Slm66018 * When multiple versions are supported, this function should store 12772032Slm66018 * the negotiated major and minor version values in the "vd" data 12782032Slm66018 * structure to govern further communication; in particular, note that 12792032Slm66018 * the client might have specified a lower minor version for the 12802032Slm66018 * agreed major version than specifed in the vds_version[] array. The 12812032Slm66018 * following assertions should help remind future maintainers to make 12822032Slm66018 * the appropriate changes to support multiple versions. 12832032Slm66018 */ 12842032Slm66018 ASSERT(vds_num_versions == 1); 12852032Slm66018 ASSERT(ver_msg->ver_major == vds_version[0].major); 12862032Slm66018 ASSERT(ver_msg->ver_minor == vds_version[0].minor); 12872032Slm66018 12882032Slm66018 PR0("Using major version %u, minor version %u", 12892032Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 12901991Sheppo return (0); 12911991Sheppo } 12921991Sheppo 12931991Sheppo static int 12941991Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 12951991Sheppo { 12961991Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 12971991Sheppo 12981991Sheppo 12991991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 13001991Sheppo 13011991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 13021991Sheppo VIO_ATTR_INFO)) { 13032336Snarayan PR0("Message is not an attribute message"); 13042336Snarayan return (ENOMSG); 13051991Sheppo } 13061991Sheppo 13071991Sheppo if (msglen != sizeof (*attr_msg)) { 13082793Slm66018 PR0("Expected %lu-byte attribute message; " 13091991Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 13101991Sheppo return (EBADMSG); 13111991Sheppo } 13121991Sheppo 13131991Sheppo if (attr_msg->max_xfer_sz == 0) { 13142793Slm66018 PR0("Received maximum transfer size of 0 from client"); 13151991Sheppo return (EBADMSG); 13161991Sheppo } 13171991Sheppo 13181991Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 13191991Sheppo (attr_msg->xfer_mode != VIO_DRING_MODE)) { 13202793Slm66018 PR0("Client requested unsupported transfer mode"); 13211991Sheppo return (EBADMSG); 13221991Sheppo } 13231991Sheppo 13241991Sheppo /* Success: valid message and transfer mode */ 13251991Sheppo vd->xfer_mode = attr_msg->xfer_mode; 13262793Slm66018 13271991Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 13282793Slm66018 13291991Sheppo /* 13301991Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 13311991Sheppo * for up to n-1 more cookies, where "n" is the number of full 13321991Sheppo * pages plus possibly one partial page required to cover 13331991Sheppo * "max_xfer_sz". Add room for one more cookie if 13341991Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 13351991Sheppo * Must first get the maximum transfer size in bytes. 13361991Sheppo */ 13371991Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 13381991Sheppo attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 13391991Sheppo attr_msg->max_xfer_sz; 13401991Sheppo size_t max_inband_msglen = 13411991Sheppo sizeof (vd_dring_inband_msg_t) + 13421991Sheppo ((max_xfer_bytes/PAGESIZE + 13431991Sheppo ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 13441991Sheppo (sizeof (ldc_mem_cookie_t))); 13451991Sheppo 13461991Sheppo /* 13471991Sheppo * Set the maximum expected message length to 13481991Sheppo * accommodate in-band-descriptor messages with all 13491991Sheppo * their cookies 13501991Sheppo */ 13511991Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 13522336Snarayan 13532336Snarayan /* 13542336Snarayan * Initialize the data structure for processing in-band I/O 13552336Snarayan * request descriptors 13562336Snarayan */ 13572336Snarayan vd->inband_task.vd = vd; 13582793Slm66018 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 13592336Snarayan vd->inband_task.index = 0; 13602336Snarayan vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 13611991Sheppo } 13621991Sheppo 13632410Slm66018 /* Return the device's block size and max transfer size to the client */ 13642410Slm66018 attr_msg->vdisk_block_size = DEV_BSIZE; 13652410Slm66018 attr_msg->max_xfer_sz = vd->max_xfer_sz; 13662410Slm66018 13671991Sheppo attr_msg->vdisk_size = vd->vdisk_size; 13681991Sheppo attr_msg->vdisk_type = vd->vdisk_type; 13691991Sheppo attr_msg->operations = vds_operations; 13701991Sheppo PR0("%s", VD_CLIENT(vd)); 13712793Slm66018 13722793Slm66018 ASSERT(vd->dring_task == NULL); 13732793Slm66018 13741991Sheppo return (0); 13751991Sheppo } 13761991Sheppo 13771991Sheppo static int 13781991Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 13791991Sheppo { 13801991Sheppo int status; 13811991Sheppo size_t expected; 13821991Sheppo ldc_mem_info_t dring_minfo; 13831991Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 13841991Sheppo 13851991Sheppo 13861991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 13871991Sheppo 13881991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 13891991Sheppo VIO_DRING_REG)) { 13902336Snarayan PR0("Message is not a register-dring message"); 13912336Snarayan return (ENOMSG); 13921991Sheppo } 13931991Sheppo 13941991Sheppo if (msglen < sizeof (*reg_msg)) { 13952793Slm66018 PR0("Expected at least %lu-byte register-dring message; " 13961991Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 13971991Sheppo return (EBADMSG); 13981991Sheppo } 13991991Sheppo 14001991Sheppo expected = sizeof (*reg_msg) + 14011991Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 14021991Sheppo if (msglen != expected) { 14032793Slm66018 PR0("Expected %lu-byte register-dring message; " 14041991Sheppo "received %lu bytes", expected, msglen); 14051991Sheppo return (EBADMSG); 14061991Sheppo } 14071991Sheppo 14081991Sheppo if (vd->initialized & VD_DRING) { 14092793Slm66018 PR0("A dring was previously registered; only support one"); 14101991Sheppo return (EBADMSG); 14111991Sheppo } 14121991Sheppo 14132336Snarayan if (reg_msg->num_descriptors > INT32_MAX) { 14142793Slm66018 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 14152336Snarayan reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 14162336Snarayan return (EBADMSG); 14172336Snarayan } 14182336Snarayan 14191991Sheppo if (reg_msg->ncookies != 1) { 14201991Sheppo /* 14211991Sheppo * In addition to fixing the assertion in the success case 14221991Sheppo * below, supporting drings which require more than one 14231991Sheppo * "cookie" requires increasing the value of vd->max_msglen 14241991Sheppo * somewhere in the code path prior to receiving the message 14251991Sheppo * which results in calling this function. Note that without 14261991Sheppo * making this change, the larger message size required to 14271991Sheppo * accommodate multiple cookies cannot be successfully 14281991Sheppo * received, so this function will not even get called. 14291991Sheppo * Gracefully accommodating more dring cookies might 14301991Sheppo * reasonably demand exchanging an additional attribute or 14311991Sheppo * making a minor protocol adjustment 14321991Sheppo */ 14332793Slm66018 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 14341991Sheppo return (EBADMSG); 14351991Sheppo } 14361991Sheppo 14371991Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 14381991Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 14392531Snarayan reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle); 14401991Sheppo if (status != 0) { 14412793Slm66018 PR0("ldc_mem_dring_map() returned errno %d", status); 14421991Sheppo return (status); 14431991Sheppo } 14441991Sheppo 14451991Sheppo /* 14461991Sheppo * To remove the need for this assertion, must call 14471991Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 14481991Sheppo * successful call to ldc_mem_dring_map() 14491991Sheppo */ 14501991Sheppo ASSERT(reg_msg->ncookies == 1); 14511991Sheppo 14521991Sheppo if ((status = 14531991Sheppo ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 14542793Slm66018 PR0("ldc_mem_dring_info() returned errno %d", status); 14551991Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 14562793Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 14571991Sheppo return (status); 14581991Sheppo } 14591991Sheppo 14601991Sheppo if (dring_minfo.vaddr == NULL) { 14612793Slm66018 PR0("Descriptor ring virtual address is NULL"); 14622032Slm66018 return (ENXIO); 14631991Sheppo } 14641991Sheppo 14651991Sheppo 14662336Snarayan /* Initialize for valid message and mapped dring */ 14671991Sheppo PR1("descriptor size = %u, dring length = %u", 14681991Sheppo vd->descriptor_size, vd->dring_len); 14691991Sheppo vd->initialized |= VD_DRING; 14701991Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 14711991Sheppo vd->dring = dring_minfo.vaddr; 14721991Sheppo vd->descriptor_size = reg_msg->descriptor_size; 14731991Sheppo vd->dring_len = reg_msg->num_descriptors; 14741991Sheppo reg_msg->dring_ident = vd->dring_ident; 14752336Snarayan 14762336Snarayan /* 14772336Snarayan * Allocate and initialize a "shadow" array of data structures for 14782336Snarayan * tasks to process I/O requests in dring elements 14792336Snarayan */ 14802336Snarayan vd->dring_task = 14812336Snarayan kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 14822336Snarayan for (int i = 0; i < vd->dring_len; i++) { 14832336Snarayan vd->dring_task[i].vd = vd; 14842336Snarayan vd->dring_task[i].index = i; 14852336Snarayan vd->dring_task[i].request = &VD_DRING_ELEM(i)->payload; 14862531Snarayan 14872531Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, 14882531Snarayan &(vd->dring_task[i].mhdl)); 14892531Snarayan if (status) { 14902793Slm66018 PR0("ldc_mem_alloc_handle() returned err %d ", status); 14912531Snarayan return (ENXIO); 14922531Snarayan } 14932793Slm66018 14942793Slm66018 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 14952336Snarayan } 14962336Snarayan 14971991Sheppo return (0); 14981991Sheppo } 14991991Sheppo 15001991Sheppo static int 15011991Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 15021991Sheppo { 15031991Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 15041991Sheppo 15051991Sheppo 15061991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 15071991Sheppo 15081991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 15091991Sheppo VIO_DRING_UNREG)) { 15102336Snarayan PR0("Message is not an unregister-dring message"); 15112336Snarayan return (ENOMSG); 15121991Sheppo } 15131991Sheppo 15141991Sheppo if (msglen != sizeof (*unreg_msg)) { 15152793Slm66018 PR0("Expected %lu-byte unregister-dring message; " 15161991Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 15171991Sheppo return (EBADMSG); 15181991Sheppo } 15191991Sheppo 15201991Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 15212793Slm66018 PR0("Expected dring ident %lu; received %lu", 15221991Sheppo vd->dring_ident, unreg_msg->dring_ident); 15231991Sheppo return (EBADMSG); 15241991Sheppo } 15251991Sheppo 15261991Sheppo return (0); 15271991Sheppo } 15281991Sheppo 15291991Sheppo static int 15301991Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 15311991Sheppo { 15321991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 15331991Sheppo 15342336Snarayan if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 15352336Snarayan PR0("Message is not an RDX message"); 15362336Snarayan return (ENOMSG); 15372336Snarayan } 15381991Sheppo 15391991Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 15402793Slm66018 PR0("Expected %lu-byte RDX message; received %lu bytes", 15411991Sheppo sizeof (vio_rdx_msg_t), msglen); 15421991Sheppo return (EBADMSG); 15431991Sheppo } 15441991Sheppo 15452336Snarayan PR0("Valid RDX message"); 15461991Sheppo return (0); 15471991Sheppo } 15481991Sheppo 15491991Sheppo static int 15501991Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 15511991Sheppo { 15521991Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 15532793Slm66018 PR0("Received seq_num %lu; expected %lu", 15541991Sheppo seq_num, (vd->seq_num + 1)); 15552793Slm66018 PR0("initiating soft reset"); 15562336Snarayan vd_need_reset(vd, B_FALSE); 15571991Sheppo return (1); 15581991Sheppo } 15591991Sheppo 15601991Sheppo vd->seq_num = seq_num; 15611991Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 15621991Sheppo return (0); 15631991Sheppo } 15641991Sheppo 15651991Sheppo /* 15661991Sheppo * Return the expected size of an inband-descriptor message with all the 15671991Sheppo * cookies it claims to include 15681991Sheppo */ 15691991Sheppo static size_t 15701991Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 15711991Sheppo { 15721991Sheppo return ((sizeof (*msg)) + 15731991Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 15741991Sheppo } 15751991Sheppo 15761991Sheppo /* 15771991Sheppo * Process an in-band descriptor message: used with clients like OBP, with 15781991Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 15791991Sheppo * operating on them within a descriptor ring 15801991Sheppo */ 15811991Sheppo static int 15822793Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 15831991Sheppo { 15841991Sheppo size_t expected; 15851991Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 15861991Sheppo 15871991Sheppo 15881991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 15891991Sheppo 15901991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 15912336Snarayan VIO_DESC_DATA)) { 15922336Snarayan PR1("Message is not an in-band-descriptor message"); 15932336Snarayan return (ENOMSG); 15942336Snarayan } 15951991Sheppo 15961991Sheppo if (msglen < sizeof (*desc_msg)) { 15972793Slm66018 PR0("Expected at least %lu-byte descriptor message; " 15981991Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 15991991Sheppo return (EBADMSG); 16001991Sheppo } 16011991Sheppo 16021991Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 16032793Slm66018 PR0("Expected %lu-byte descriptor message; " 16041991Sheppo "received %lu bytes", expected, msglen); 16051991Sheppo return (EBADMSG); 16061991Sheppo } 16071991Sheppo 16082336Snarayan if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 16091991Sheppo return (EBADMSG); 16102336Snarayan 16112336Snarayan /* 16122336Snarayan * Valid message: Set up the in-band descriptor task and process the 16132336Snarayan * request. Arrange to acknowledge the client's message, unless an 16142336Snarayan * error processing the descriptor task results in setting 16152336Snarayan * VIO_SUBTYPE_NACK 16162336Snarayan */ 16172336Snarayan PR1("Valid in-band-descriptor message"); 16182336Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 16192793Slm66018 16202793Slm66018 ASSERT(vd->inband_task.msg != NULL); 16212793Slm66018 16222793Slm66018 bcopy(msg, vd->inband_task.msg, msglen); 16232336Snarayan vd->inband_task.msglen = msglen; 16242793Slm66018 16252793Slm66018 /* 16262793Slm66018 * The task request is now the payload of the message 16272793Slm66018 * that was just copied into the body of the task. 16282793Slm66018 */ 16292793Slm66018 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 16302336Snarayan vd->inband_task.request = &desc_msg->payload; 16312793Slm66018 16322336Snarayan return (vd_process_task(&vd->inband_task)); 16331991Sheppo } 16341991Sheppo 16351991Sheppo static int 16362336Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 16372793Slm66018 vio_msg_t *msg, size_t msglen) 16381991Sheppo { 16392336Snarayan int status; 16402336Snarayan boolean_t ready; 16412336Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 16422336Snarayan 16432336Snarayan 16442336Snarayan /* Accept the updated dring element */ 16452336Snarayan if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 16462793Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", status); 16471991Sheppo return (status); 16481991Sheppo } 16492336Snarayan ready = (elem->hdr.dstate == VIO_DESC_READY); 16502336Snarayan if (ready) { 16512336Snarayan elem->hdr.dstate = VIO_DESC_ACCEPTED; 16522336Snarayan } else { 16532793Slm66018 PR0("descriptor %u not ready", idx); 16542336Snarayan VD_DUMP_DRING_ELEM(elem); 16552336Snarayan } 16562336Snarayan if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 16572793Slm66018 PR0("ldc_mem_dring_release() returned errno %d", status); 16581991Sheppo return (status); 16591991Sheppo } 16602336Snarayan if (!ready) 16612336Snarayan return (EBUSY); 16622336Snarayan 16632336Snarayan 16642336Snarayan /* Initialize a task and process the accepted element */ 16652336Snarayan PR1("Processing dring element %u", idx); 16662336Snarayan vd->dring_task[idx].type = type; 16672793Slm66018 16682793Slm66018 /* duplicate msg buf for cookies etc. */ 16692793Slm66018 bcopy(msg, vd->dring_task[idx].msg, msglen); 16702793Slm66018 16712336Snarayan vd->dring_task[idx].msglen = msglen; 16722336Snarayan if ((status = vd_process_task(&vd->dring_task[idx])) != EINPROGRESS) 16732336Snarayan status = vd_mark_elem_done(vd, idx, elem->payload.status); 16742336Snarayan 16752336Snarayan return (status); 16761991Sheppo } 16771991Sheppo 16781991Sheppo static int 16792336Snarayan vd_process_element_range(vd_t *vd, int start, int end, 16802793Slm66018 vio_msg_t *msg, size_t msglen) 16812336Snarayan { 16822336Snarayan int i, n, nelem, status = 0; 16832336Snarayan boolean_t inprogress = B_FALSE; 16842336Snarayan vd_task_type_t type; 16852336Snarayan 16862336Snarayan 16872336Snarayan ASSERT(start >= 0); 16882336Snarayan ASSERT(end >= 0); 16892336Snarayan 16902336Snarayan /* 16912336Snarayan * Arrange to acknowledge the client's message, unless an error 16922336Snarayan * processing one of the dring elements results in setting 16932336Snarayan * VIO_SUBTYPE_NACK 16942336Snarayan */ 16952336Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 16962336Snarayan 16972336Snarayan /* 16982336Snarayan * Process the dring elements in the range 16992336Snarayan */ 17002336Snarayan nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 17012336Snarayan for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 17022336Snarayan ((vio_dring_msg_t *)msg)->end_idx = i; 17032336Snarayan type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 17042793Slm66018 status = vd_process_element(vd, type, i, msg, msglen); 17052336Snarayan if (status == EINPROGRESS) 17062336Snarayan inprogress = B_TRUE; 17072336Snarayan else if (status != 0) 17082336Snarayan break; 17092336Snarayan } 17102336Snarayan 17112336Snarayan /* 17122336Snarayan * If some, but not all, operations of a multi-element range are in 17132336Snarayan * progress, wait for other operations to complete before returning 17142336Snarayan * (which will result in "ack" or "nack" of the message). Note that 17152336Snarayan * all outstanding operations will need to complete, not just the ones 17162336Snarayan * corresponding to the current range of dring elements; howevever, as 17172336Snarayan * this situation is an error case, performance is less critical. 17182336Snarayan */ 17192336Snarayan if ((nelem > 1) && (status != EINPROGRESS) && inprogress) 17202336Snarayan ddi_taskq_wait(vd->completionq); 17212336Snarayan 17222336Snarayan return (status); 17232336Snarayan } 17242336Snarayan 17252336Snarayan static int 17262793Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 17271991Sheppo { 17281991Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 17291991Sheppo 17301991Sheppo 17311991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 17321991Sheppo 17331991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 17341991Sheppo VIO_DRING_DATA)) { 17352336Snarayan PR1("Message is not a dring-data message"); 17362336Snarayan return (ENOMSG); 17371991Sheppo } 17381991Sheppo 17391991Sheppo if (msglen != sizeof (*dring_msg)) { 17402793Slm66018 PR0("Expected %lu-byte dring message; received %lu bytes", 17411991Sheppo sizeof (*dring_msg), msglen); 17421991Sheppo return (EBADMSG); 17431991Sheppo } 17441991Sheppo 17452336Snarayan if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 17461991Sheppo return (EBADMSG); 17471991Sheppo 17481991Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 17492793Slm66018 PR0("Expected dring ident %lu; received ident %lu", 17501991Sheppo vd->dring_ident, dring_msg->dring_ident); 17511991Sheppo return (EBADMSG); 17521991Sheppo } 17531991Sheppo 17542336Snarayan if (dring_msg->start_idx >= vd->dring_len) { 17552793Slm66018 PR0("\"start_idx\" = %u; must be less than %u", 17562336Snarayan dring_msg->start_idx, vd->dring_len); 17572336Snarayan return (EBADMSG); 17582336Snarayan } 17592336Snarayan 17602336Snarayan if ((dring_msg->end_idx < 0) || 17612336Snarayan (dring_msg->end_idx >= vd->dring_len)) { 17622793Slm66018 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 17632336Snarayan dring_msg->end_idx, vd->dring_len); 17642336Snarayan return (EBADMSG); 17652336Snarayan } 17662336Snarayan 17672336Snarayan /* Valid message; process range of updated dring elements */ 17682336Snarayan PR1("Processing descriptor range, start = %u, end = %u", 17692336Snarayan dring_msg->start_idx, dring_msg->end_idx); 17702336Snarayan return (vd_process_element_range(vd, dring_msg->start_idx, 17712793Slm66018 dring_msg->end_idx, msg, msglen)); 17721991Sheppo } 17731991Sheppo 17741991Sheppo static int 17751991Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 17761991Sheppo { 17771991Sheppo int retry, status; 17781991Sheppo size_t size = *nbytes; 17791991Sheppo 17801991Sheppo 17811991Sheppo for (retry = 0, status = ETIMEDOUT; 17821991Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 17831991Sheppo retry++) { 17841991Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 17851991Sheppo *nbytes = size; 17861991Sheppo status = ldc_read(ldc_handle, msg, nbytes); 17871991Sheppo } 17881991Sheppo 17892793Slm66018 if (status) { 17902793Slm66018 PR0("ldc_read() returned errno %d", status); 17912793Slm66018 if (status != ECONNRESET) 17922793Slm66018 return (ENOMSG); 17931991Sheppo return (status); 17941991Sheppo } else if (*nbytes == 0) { 17951991Sheppo PR1("ldc_read() returned 0 and no message read"); 17961991Sheppo return (ENOMSG); 17971991Sheppo } 17981991Sheppo 17991991Sheppo PR1("RCVD %lu-byte message", *nbytes); 18001991Sheppo return (0); 18011991Sheppo } 18021991Sheppo 18031991Sheppo static int 18042793Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 18051991Sheppo { 18061991Sheppo int status; 18071991Sheppo 18081991Sheppo 18091991Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 18101991Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 18112793Slm66018 #ifdef DEBUG 18122793Slm66018 vd_decode_tag(msg); 18132793Slm66018 #endif 18141991Sheppo 18151991Sheppo /* 18161991Sheppo * Validate session ID up front, since it applies to all messages 18171991Sheppo * once set 18181991Sheppo */ 18191991Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 18202793Slm66018 PR0("Expected SID %u, received %u", vd->sid, 18211991Sheppo msg->tag.vio_sid); 18221991Sheppo return (EBADMSG); 18231991Sheppo } 18241991Sheppo 18252793Slm66018 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 18261991Sheppo 18271991Sheppo /* 18281991Sheppo * Process the received message based on connection state 18291991Sheppo */ 18301991Sheppo switch (vd->state) { 18311991Sheppo case VD_STATE_INIT: /* expect version message */ 18322032Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 18331991Sheppo return (status); 18341991Sheppo 18351991Sheppo /* Version negotiated, move to that state */ 18361991Sheppo vd->state = VD_STATE_VER; 18371991Sheppo return (0); 18381991Sheppo 18391991Sheppo case VD_STATE_VER: /* expect attribute message */ 18401991Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 18411991Sheppo return (status); 18421991Sheppo 18431991Sheppo /* Attributes exchanged, move to that state */ 18441991Sheppo vd->state = VD_STATE_ATTR; 18451991Sheppo return (0); 18461991Sheppo 18471991Sheppo case VD_STATE_ATTR: 18481991Sheppo switch (vd->xfer_mode) { 18491991Sheppo case VIO_DESC_MODE: /* expect RDX message */ 18501991Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 18511991Sheppo return (status); 18521991Sheppo 18531991Sheppo /* Ready to receive in-band descriptors */ 18541991Sheppo vd->state = VD_STATE_DATA; 18551991Sheppo return (0); 18561991Sheppo 18571991Sheppo case VIO_DRING_MODE: /* expect register-dring message */ 18581991Sheppo if ((status = 18591991Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 18601991Sheppo return (status); 18611991Sheppo 18621991Sheppo /* One dring negotiated, move to that state */ 18631991Sheppo vd->state = VD_STATE_DRING; 18641991Sheppo return (0); 18651991Sheppo 18661991Sheppo default: 18671991Sheppo ASSERT("Unsupported transfer mode"); 18682793Slm66018 PR0("Unsupported transfer mode"); 18691991Sheppo return (ENOTSUP); 18701991Sheppo } 18711991Sheppo 18721991Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 18731991Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 18741991Sheppo /* Ready to receive data */ 18751991Sheppo vd->state = VD_STATE_DATA; 18761991Sheppo return (0); 18771991Sheppo } else if (status != ENOMSG) { 18781991Sheppo return (status); 18791991Sheppo } 18801991Sheppo 18811991Sheppo 18821991Sheppo /* 18831991Sheppo * If another register-dring message is received, stay in 18841991Sheppo * dring state in case the client sends RDX; although the 18851991Sheppo * protocol allows multiple drings, this server does not 18861991Sheppo * support using more than one 18871991Sheppo */ 18881991Sheppo if ((status = 18891991Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 18901991Sheppo return (status); 18911991Sheppo 18921991Sheppo /* 18931991Sheppo * Acknowledge an unregister-dring message, but reset the 18941991Sheppo * connection anyway: Although the protocol allows 18951991Sheppo * unregistering drings, this server cannot serve a vdisk 18961991Sheppo * without its only dring 18971991Sheppo */ 18981991Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 18991991Sheppo return ((status == 0) ? ENOTSUP : status); 19001991Sheppo 19011991Sheppo case VD_STATE_DATA: 19021991Sheppo switch (vd->xfer_mode) { 19031991Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 19042793Slm66018 return (vd_process_desc_msg(vd, msg, msglen)); 19051991Sheppo 19061991Sheppo case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 19071991Sheppo /* 19081991Sheppo * Typically expect dring-data messages, so handle 19091991Sheppo * them first 19101991Sheppo */ 19111991Sheppo if ((status = vd_process_dring_msg(vd, msg, 19122793Slm66018 msglen)) != ENOMSG) 19131991Sheppo return (status); 19141991Sheppo 19151991Sheppo /* 19161991Sheppo * Acknowledge an unregister-dring message, but reset 19171991Sheppo * the connection anyway: Although the protocol 19181991Sheppo * allows unregistering drings, this server cannot 19191991Sheppo * serve a vdisk without its only dring 19201991Sheppo */ 19211991Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 19221991Sheppo return ((status == 0) ? ENOTSUP : status); 19231991Sheppo 19241991Sheppo default: 19251991Sheppo ASSERT("Unsupported transfer mode"); 19262793Slm66018 PR0("Unsupported transfer mode"); 19271991Sheppo return (ENOTSUP); 19281991Sheppo } 19291991Sheppo 19301991Sheppo default: 19311991Sheppo ASSERT("Invalid client connection state"); 19322793Slm66018 PR0("Invalid client connection state"); 19331991Sheppo return (ENOTSUP); 19341991Sheppo } 19351991Sheppo } 19361991Sheppo 19372336Snarayan static int 19382793Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 19391991Sheppo { 19401991Sheppo int status; 19411991Sheppo boolean_t reset_ldc = B_FALSE; 19421991Sheppo 19431991Sheppo 19441991Sheppo /* 19451991Sheppo * Check that the message is at least big enough for a "tag", so that 19461991Sheppo * message processing can proceed based on tag-specified message type 19471991Sheppo */ 19481991Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 19492793Slm66018 PR0("Received short (%lu-byte) message", msglen); 19501991Sheppo /* Can't "nack" short message, so drop the big hammer */ 19512793Slm66018 PR0("initiating full reset"); 19522336Snarayan vd_need_reset(vd, B_TRUE); 19532336Snarayan return (EBADMSG); 19541991Sheppo } 19551991Sheppo 19561991Sheppo /* 19571991Sheppo * Process the message 19581991Sheppo */ 19592793Slm66018 switch (status = vd_do_process_msg(vd, msg, msglen)) { 19601991Sheppo case 0: 19611991Sheppo /* "ack" valid, successfully-processed messages */ 19621991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 19631991Sheppo break; 19641991Sheppo 19652336Snarayan case EINPROGRESS: 19662336Snarayan /* The completion handler will "ack" or "nack" the message */ 19672336Snarayan return (EINPROGRESS); 19681991Sheppo case ENOMSG: 19692793Slm66018 PR0("Received unexpected message"); 19701991Sheppo _NOTE(FALLTHROUGH); 19711991Sheppo case EBADMSG: 19721991Sheppo case ENOTSUP: 19731991Sheppo /* "nack" invalid messages */ 19741991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 19751991Sheppo break; 19761991Sheppo 19771991Sheppo default: 19781991Sheppo /* "nack" failed messages */ 19791991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 19801991Sheppo /* An LDC error probably occurred, so try resetting it */ 19811991Sheppo reset_ldc = B_TRUE; 19821991Sheppo break; 19831991Sheppo } 19841991Sheppo 19852793Slm66018 PR1("\tResulting in state %d (%s)", vd->state, 19862793Slm66018 vd_decode_state(vd->state)); 19872793Slm66018 19882336Snarayan /* Send the "ack" or "nack" to the client */ 19891991Sheppo PR1("Sending %s", 19901991Sheppo (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 19911991Sheppo if (send_msg(vd->ldc_handle, msg, msglen) != 0) 19921991Sheppo reset_ldc = B_TRUE; 19931991Sheppo 19942336Snarayan /* Arrange to reset the connection for nack'ed or failed messages */ 19952793Slm66018 if ((status != 0) || reset_ldc) { 19962793Slm66018 PR0("initiating %s reset", 19972793Slm66018 (reset_ldc) ? "full" : "soft"); 19982336Snarayan vd_need_reset(vd, reset_ldc); 19992793Slm66018 } 20002336Snarayan 20012336Snarayan return (status); 20022336Snarayan } 20032336Snarayan 20042336Snarayan static boolean_t 20052336Snarayan vd_enabled(vd_t *vd) 20062336Snarayan { 20072336Snarayan boolean_t enabled; 20082336Snarayan 20092336Snarayan 20102336Snarayan mutex_enter(&vd->lock); 20112336Snarayan enabled = vd->enabled; 20122336Snarayan mutex_exit(&vd->lock); 20132336Snarayan return (enabled); 20141991Sheppo } 20151991Sheppo 20161991Sheppo static void 20172032Slm66018 vd_recv_msg(void *arg) 20181991Sheppo { 20192032Slm66018 vd_t *vd = (vd_t *)arg; 20202793Slm66018 int rv = 0, status = 0; 20211991Sheppo 20221991Sheppo ASSERT(vd != NULL); 20232793Slm66018 20242336Snarayan PR2("New task to receive incoming message(s)"); 20252793Slm66018 20262793Slm66018 20272336Snarayan while (vd_enabled(vd) && status == 0) { 20282336Snarayan size_t msglen, msgsize; 20292793Slm66018 ldc_status_t lstatus; 20302336Snarayan 20312336Snarayan /* 20322336Snarayan * Receive and process a message 20332336Snarayan */ 20342336Snarayan vd_reset_if_needed(vd); /* can change vd->max_msglen */ 20352793Slm66018 20362793Slm66018 /* 20372793Slm66018 * check if channel is UP - else break out of loop 20382793Slm66018 */ 20392793Slm66018 status = ldc_status(vd->ldc_handle, &lstatus); 20402793Slm66018 if (lstatus != LDC_UP) { 20412793Slm66018 PR0("channel not up (status=%d), exiting recv loop\n", 20422793Slm66018 lstatus); 20432793Slm66018 break; 20442793Slm66018 } 20452793Slm66018 20462793Slm66018 ASSERT(vd->max_msglen != 0); 20472793Slm66018 20482793Slm66018 msgsize = vd->max_msglen; /* stable copy for alloc/free */ 20492793Slm66018 msglen = msgsize; /* actual len after recv_msg() */ 20502793Slm66018 20512793Slm66018 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 20522793Slm66018 switch (status) { 20532793Slm66018 case 0: 20542793Slm66018 rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, 20552793Slm66018 msglen); 20562793Slm66018 /* check if max_msglen changed */ 20572793Slm66018 if (msgsize != vd->max_msglen) { 20582793Slm66018 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 20592793Slm66018 msgsize, vd->max_msglen); 20602793Slm66018 kmem_free(vd->vio_msgp, msgsize); 20612793Slm66018 vd->vio_msgp = 20622793Slm66018 kmem_alloc(vd->max_msglen, KM_SLEEP); 20632793Slm66018 } 20642793Slm66018 if (rv == EINPROGRESS) 20652793Slm66018 continue; 20662793Slm66018 break; 20672793Slm66018 20682793Slm66018 case ENOMSG: 20692793Slm66018 break; 20702793Slm66018 20712793Slm66018 case ECONNRESET: 20722793Slm66018 PR0("initiating soft reset (ECONNRESET)\n"); 20732793Slm66018 vd_need_reset(vd, B_FALSE); 20742793Slm66018 status = 0; 20752793Slm66018 break; 20762793Slm66018 20772793Slm66018 default: 20782336Snarayan /* Probably an LDC failure; arrange to reset it */ 20792793Slm66018 PR0("initiating full reset (status=0x%x)", status); 20802336Snarayan vd_need_reset(vd, B_TRUE); 20812793Slm66018 break; 20822336Snarayan } 20832032Slm66018 } 20842793Slm66018 20852336Snarayan PR2("Task finished"); 20862032Slm66018 } 20872032Slm66018 20882032Slm66018 static uint_t 20891991Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 20901991Sheppo { 20911991Sheppo vd_t *vd = (vd_t *)(void *)arg; 20922793Slm66018 int status; 20931991Sheppo 20941991Sheppo ASSERT(vd != NULL); 20952336Snarayan 20962336Snarayan if (!vd_enabled(vd)) 20972336Snarayan return (LDC_SUCCESS); 20982336Snarayan 20992793Slm66018 if (event & LDC_EVT_DOWN) { 21003166Ssg70180 PR0("LDC_EVT_DOWN: LDC channel went down"); 21012793Slm66018 21022793Slm66018 vd_need_reset(vd, B_TRUE); 21032793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 21042793Slm66018 DDI_SLEEP); 21052793Slm66018 if (status == DDI_FAILURE) { 21062793Slm66018 PR0("cannot schedule task to recv msg\n"); 21072793Slm66018 vd_need_reset(vd, B_TRUE); 21082793Slm66018 } 21092793Slm66018 } 21102793Slm66018 21112336Snarayan if (event & LDC_EVT_RESET) { 21122793Slm66018 PR0("LDC_EVT_RESET: LDC channel was reset"); 21132793Slm66018 21142793Slm66018 if (vd->state != VD_STATE_INIT) { 21152793Slm66018 PR0("scheduling full reset"); 21162793Slm66018 vd_need_reset(vd, B_FALSE); 21172793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 21182793Slm66018 vd, DDI_SLEEP); 21192793Slm66018 if (status == DDI_FAILURE) { 21202793Slm66018 PR0("cannot schedule task to recv msg\n"); 21212793Slm66018 vd_need_reset(vd, B_TRUE); 21222793Slm66018 } 21232793Slm66018 21242793Slm66018 } else { 21252793Slm66018 PR0("channel already reset, ignoring...\n"); 21262793Slm66018 PR0("doing ldc up...\n"); 21272793Slm66018 (void) ldc_up(vd->ldc_handle); 21282793Slm66018 } 21292793Slm66018 21302336Snarayan return (LDC_SUCCESS); 21312336Snarayan } 21322336Snarayan 21332336Snarayan if (event & LDC_EVT_UP) { 21342793Slm66018 PR0("EVT_UP: LDC is up\nResetting client connection state"); 21352793Slm66018 PR0("initiating soft reset"); 21362336Snarayan vd_need_reset(vd, B_FALSE); 21372793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 21382793Slm66018 vd, DDI_SLEEP); 21392793Slm66018 if (status == DDI_FAILURE) { 21402793Slm66018 PR0("cannot schedule task to recv msg\n"); 21412793Slm66018 vd_need_reset(vd, B_TRUE); 21422793Slm66018 return (LDC_SUCCESS); 21432793Slm66018 } 21442336Snarayan } 21452336Snarayan 21462336Snarayan if (event & LDC_EVT_READ) { 21472336Snarayan int status; 21482336Snarayan 21492336Snarayan PR1("New data available"); 21502336Snarayan /* Queue a task to receive the new data */ 21512336Snarayan status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 21522336Snarayan DDI_SLEEP); 21532793Slm66018 21542793Slm66018 if (status == DDI_FAILURE) { 21552793Slm66018 PR0("cannot schedule task to recv msg\n"); 21562793Slm66018 vd_need_reset(vd, B_TRUE); 21572793Slm66018 } 21582336Snarayan } 21592336Snarayan 21602336Snarayan return (LDC_SUCCESS); 21611991Sheppo } 21621991Sheppo 21631991Sheppo static uint_t 21641991Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 21651991Sheppo { 21661991Sheppo _NOTE(ARGUNUSED(key, val)) 21671991Sheppo (*((uint_t *)arg))++; 21681991Sheppo return (MH_WALK_TERMINATE); 21691991Sheppo } 21701991Sheppo 21711991Sheppo 21721991Sheppo static int 21731991Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 21741991Sheppo { 21751991Sheppo uint_t vd_present = 0; 21761991Sheppo minor_t instance; 21771991Sheppo vds_t *vds; 21781991Sheppo 21791991Sheppo 21801991Sheppo switch (cmd) { 21811991Sheppo case DDI_DETACH: 21821991Sheppo /* the real work happens below */ 21831991Sheppo break; 21841991Sheppo case DDI_SUSPEND: 21852336Snarayan PR0("No action required for DDI_SUSPEND"); 21861991Sheppo return (DDI_SUCCESS); 21871991Sheppo default: 21882793Slm66018 PR0("Unrecognized \"cmd\""); 21891991Sheppo return (DDI_FAILURE); 21901991Sheppo } 21911991Sheppo 21921991Sheppo ASSERT(cmd == DDI_DETACH); 21931991Sheppo instance = ddi_get_instance(dip); 21941991Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 21952793Slm66018 PR0("Could not get state for instance %u", instance); 21961991Sheppo ddi_soft_state_free(vds_state, instance); 21971991Sheppo return (DDI_FAILURE); 21981991Sheppo } 21991991Sheppo 22001991Sheppo /* Do no detach when serving any vdisks */ 22011991Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 22021991Sheppo if (vd_present) { 22031991Sheppo PR0("Not detaching because serving vdisks"); 22041991Sheppo return (DDI_FAILURE); 22051991Sheppo } 22061991Sheppo 22071991Sheppo PR0("Detaching"); 2208*3297Ssb155480 if (vds->initialized & VDS_MDEG) { 22091991Sheppo (void) mdeg_unregister(vds->mdeg); 2210*3297Ssb155480 kmem_free(vds->ispecp->specp, sizeof (vds_prop_template)); 2211*3297Ssb155480 kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t)); 2212*3297Ssb155480 vds->ispecp = NULL; 2213*3297Ssb155480 vds->mdeg = NULL; 2214*3297Ssb155480 } 2215*3297Ssb155480 22161991Sheppo if (vds->initialized & VDS_LDI) 22171991Sheppo (void) ldi_ident_release(vds->ldi_ident); 22181991Sheppo mod_hash_destroy_hash(vds->vd_table); 22191991Sheppo ddi_soft_state_free(vds_state, instance); 22201991Sheppo return (DDI_SUCCESS); 22211991Sheppo } 22221991Sheppo 22231991Sheppo static boolean_t 22241991Sheppo is_pseudo_device(dev_info_t *dip) 22251991Sheppo { 22261991Sheppo dev_info_t *parent, *root = ddi_root_node(); 22271991Sheppo 22281991Sheppo 22291991Sheppo for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 22301991Sheppo parent = ddi_get_parent(parent)) { 22311991Sheppo if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 22321991Sheppo return (B_TRUE); 22331991Sheppo } 22341991Sheppo 22351991Sheppo return (B_FALSE); 22361991Sheppo } 22371991Sheppo 22381991Sheppo static int 22392032Slm66018 vd_setup_full_disk(vd_t *vd) 22402032Slm66018 { 22412032Slm66018 int rval, status; 22422032Slm66018 major_t major = getmajor(vd->dev[0]); 22432032Slm66018 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 22442531Snarayan struct dk_minfo dk_minfo; 22452531Snarayan 22462531Snarayan /* 22472531Snarayan * At this point, vdisk_size is set to the size of partition 2 but 22482531Snarayan * this does not represent the size of the disk because partition 2 22492531Snarayan * may not cover the entire disk and its size does not include reserved 22502531Snarayan * blocks. So we update vdisk_size to be the size of the entire disk. 22512531Snarayan */ 22522531Snarayan if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 22532531Snarayan (intptr_t)&dk_minfo, (vd_open_flags | FKIOCTL), 22542531Snarayan kcred, &rval)) != 0) { 22553166Ssg70180 PR0("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", 22562531Snarayan status); 22572032Slm66018 return (status); 22582032Slm66018 } 22592531Snarayan vd->vdisk_size = dk_minfo.dki_capacity; 22602032Slm66018 22612032Slm66018 /* Set full-disk parameters */ 22622032Slm66018 vd->vdisk_type = VD_DISK_TYPE_DISK; 22632032Slm66018 vd->nslices = (sizeof (vd->dev))/(sizeof (vd->dev[0])); 22642032Slm66018 22652032Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 22662032Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 22672032Slm66018 vd->dev[0] = 0; 22682032Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 22692032Slm66018 vd->ldi_handle[0] = NULL; 22702032Slm66018 22712032Slm66018 /* Initialize device numbers for remaining slices and open them */ 22722032Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 22732032Slm66018 /* 22742032Slm66018 * Skip the entire-disk slice, as it's already open and its 22752032Slm66018 * device known 22762032Slm66018 */ 22772032Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 22782032Slm66018 continue; 22792032Slm66018 ASSERT(vd->dev[slice] == 0); 22802032Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 22812032Slm66018 22822032Slm66018 /* 22832032Slm66018 * Construct the device number for the current slice 22842032Slm66018 */ 22852032Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 22862032Slm66018 22872032Slm66018 /* 22883166Ssg70180 * Open all slices of the disk to serve them to the client. 22893166Ssg70180 * Slices are opened exclusively to prevent other threads or 22903166Ssg70180 * processes in the service domain from performing I/O to 22913166Ssg70180 * slices being accessed by a client. Failure to open a slice 22923166Ssg70180 * results in vds not serving this disk, as the client could 22933166Ssg70180 * attempt (and should be able) to access any slice immediately. 22943166Ssg70180 * Any slices successfully opened before a failure will get 22953166Ssg70180 * closed by vds_destroy_vd() as a result of the error returned 22963166Ssg70180 * by this function. 22973166Ssg70180 * 22983166Ssg70180 * We need to do the open with FNDELAY so that opening an empty 22993166Ssg70180 * slice does not fail. 23002032Slm66018 */ 23012032Slm66018 PR0("Opening device major %u, minor %u = slice %u", 23022032Slm66018 major, minor, slice); 23032032Slm66018 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 23043166Ssg70180 vd_open_flags | FNDELAY, kcred, &vd->ldi_handle[slice], 23053166Ssg70180 vd->vds->ldi_ident)) != 0) { 23063166Ssg70180 PR0("ldi_open_by_dev() returned errno %d " 23072032Slm66018 "for slice %u", status, slice); 23082032Slm66018 /* vds_destroy_vd() will close any open slices */ 23092032Slm66018 return (status); 23102032Slm66018 } 23112032Slm66018 } 23122032Slm66018 23132032Slm66018 return (0); 23142032Slm66018 } 23152032Slm66018 23162032Slm66018 static int 23172531Snarayan vd_setup_partition_efi(vd_t *vd) 23182531Snarayan { 23192531Snarayan efi_gpt_t *gpt; 23202531Snarayan efi_gpe_t *gpe; 23212531Snarayan struct uuid uuid = EFI_RESERVED; 23222531Snarayan uint32_t crc; 23232531Snarayan int length; 23242531Snarayan 23252531Snarayan length = sizeof (efi_gpt_t) + sizeof (efi_gpe_t); 23262531Snarayan 23272531Snarayan gpt = kmem_zalloc(length, KM_SLEEP); 23282531Snarayan gpe = (efi_gpe_t *)(gpt + 1); 23292531Snarayan 23302531Snarayan gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 23312531Snarayan gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 23322531Snarayan gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 23332531Snarayan gpt->efi_gpt_FirstUsableLBA = LE_64(0ULL); 23342531Snarayan gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 23352531Snarayan gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 23362531Snarayan gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 23372531Snarayan 23382531Snarayan UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid); 23392531Snarayan gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA; 23402531Snarayan gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA; 23412531Snarayan 23422531Snarayan CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table); 23432531Snarayan gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 23442531Snarayan 23452531Snarayan CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 23462531Snarayan gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 23472531Snarayan 23482531Snarayan vd->dk_efi.dki_lba = 0; 23492531Snarayan vd->dk_efi.dki_length = length; 23502531Snarayan vd->dk_efi.dki_data = gpt; 23512531Snarayan 23522531Snarayan return (0); 23532531Snarayan } 23542531Snarayan 23552531Snarayan static int 23562410Slm66018 vd_setup_vd(char *device_path, vd_t *vd) 23571991Sheppo { 23582410Slm66018 int rval, status; 23591991Sheppo dev_info_t *dip; 23601991Sheppo struct dk_cinfo dk_cinfo; 23611991Sheppo 23622531Snarayan /* 23632531Snarayan * We need to open with FNDELAY so that opening an empty partition 23642531Snarayan * does not fail. 23652531Snarayan */ 23662531Snarayan if ((status = ldi_open_by_name(device_path, vd_open_flags | FNDELAY, 23672531Snarayan kcred, &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) { 23682410Slm66018 PRN("ldi_open_by_name(%s) = errno %d", device_path, status); 23692032Slm66018 return (status); 23702032Slm66018 } 23712032Slm66018 23722531Snarayan /* 23732531Snarayan * nslices must be updated now so that vds_destroy_vd() will close 23742531Snarayan * the slice we have just opened in case of an error. 23752531Snarayan */ 23762531Snarayan vd->nslices = 1; 23772531Snarayan 23782410Slm66018 /* Get device number and size of backing device */ 23792032Slm66018 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 23801991Sheppo PRN("ldi_get_dev() returned errno %d for %s", 23812410Slm66018 status, device_path); 23821991Sheppo return (status); 23831991Sheppo } 23842410Slm66018 if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 23852410Slm66018 PRN("ldi_get_size() failed for %s", device_path); 23862410Slm66018 return (EIO); 23872410Slm66018 } 23882410Slm66018 vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ 23892410Slm66018 23902410Slm66018 /* Verify backing device supports dk_cinfo, dk_geom, and vtoc */ 23912410Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 23922410Slm66018 (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, 23932410Slm66018 &rval)) != 0) { 23942410Slm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 23952410Slm66018 status, device_path); 23961991Sheppo return (status); 23971991Sheppo } 23982410Slm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 23992410Slm66018 PRN("slice %u >= maximum slice %u for %s", 24002410Slm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 24011991Sheppo return (EIO); 24021991Sheppo } 24032531Snarayan 24042531Snarayan status = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, &vd->vdisk_label); 24052531Snarayan 24062531Snarayan if (status != 0) { 24072531Snarayan PRN("vd_read_vtoc returned errno %d for %s", 24082410Slm66018 status, device_path); 24092410Slm66018 return (status); 24102410Slm66018 } 24112531Snarayan 24122531Snarayan if (vd->vdisk_label == VD_DISK_LABEL_VTOC && 24132531Snarayan (status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 24142531Snarayan (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), 24152531Snarayan kcred, &rval)) != 0) { 24162531Snarayan PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 24172531Snarayan status, device_path); 24182531Snarayan return (status); 24192410Slm66018 } 24202410Slm66018 24212410Slm66018 /* Store the device's max transfer size for return to the client */ 24222410Slm66018 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 24232410Slm66018 24242410Slm66018 24252410Slm66018 /* Determine if backing device is a pseudo device */ 24261991Sheppo if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), 24271991Sheppo dev_to_instance(vd->dev[0]), 0)) == NULL) { 24282410Slm66018 PRN("%s is no longer accessible", device_path); 24291991Sheppo return (EIO); 24301991Sheppo } 24311991Sheppo vd->pseudo = is_pseudo_device(dip); 24321991Sheppo ddi_release_devi(dip); 24331991Sheppo if (vd->pseudo) { 24341991Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 24351991Sheppo vd->nslices = 1; 24361991Sheppo return (0); /* ...and we're done */ 24371991Sheppo } 24381991Sheppo 24391991Sheppo 24402032Slm66018 /* If slice is entire-disk slice, initialize for full disk */ 24412032Slm66018 if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) 24422032Slm66018 return (vd_setup_full_disk(vd)); 24431991Sheppo 24442032Slm66018 24452410Slm66018 /* Otherwise, we have a non-entire slice of a device */ 24461991Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 24471991Sheppo vd->nslices = 1; 24481991Sheppo 24492531Snarayan if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 24502531Snarayan status = vd_setup_partition_efi(vd); 24512531Snarayan return (status); 24522531Snarayan } 24531991Sheppo 24542410Slm66018 /* Initialize dk_geom structure for single-slice device */ 24551991Sheppo if (vd->dk_geom.dkg_nsect == 0) { 24562793Slm66018 PR0("%s geometry claims 0 sectors per track", device_path); 24571991Sheppo return (EIO); 24581991Sheppo } 24591991Sheppo if (vd->dk_geom.dkg_nhead == 0) { 24602793Slm66018 PR0("%s geometry claims 0 heads", device_path); 24611991Sheppo return (EIO); 24621991Sheppo } 24631991Sheppo vd->dk_geom.dkg_ncyl = 24642410Slm66018 vd->vdisk_size/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead; 24651991Sheppo vd->dk_geom.dkg_acyl = 0; 24661991Sheppo vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 24671991Sheppo 24681991Sheppo 24692410Slm66018 /* Initialize vtoc structure for single-slice device */ 24701991Sheppo bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 24711991Sheppo MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 24721991Sheppo bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 24731991Sheppo vd->vtoc.v_nparts = 1; 24741991Sheppo vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 24751991Sheppo vd->vtoc.v_part[0].p_flag = 0; 24761991Sheppo vd->vtoc.v_part[0].p_start = 0; 24772410Slm66018 vd->vtoc.v_part[0].p_size = vd->vdisk_size; 24781991Sheppo bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 24791991Sheppo MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 24801991Sheppo 24811991Sheppo 24821991Sheppo return (0); 24831991Sheppo } 24841991Sheppo 24851991Sheppo static int 24862410Slm66018 vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id, 24871991Sheppo vd_t **vdp) 24881991Sheppo { 24891991Sheppo char tq_name[TASKQ_NAMELEN]; 24902032Slm66018 int status; 24911991Sheppo ddi_iblock_cookie_t iblock = NULL; 24921991Sheppo ldc_attr_t ldc_attr; 24931991Sheppo vd_t *vd; 24941991Sheppo 24951991Sheppo 24961991Sheppo ASSERT(vds != NULL); 24972410Slm66018 ASSERT(device_path != NULL); 24981991Sheppo ASSERT(vdp != NULL); 24992410Slm66018 PR0("Adding vdisk for %s", device_path); 25001991Sheppo 25011991Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 25021991Sheppo PRN("No memory for virtual disk"); 25031991Sheppo return (EAGAIN); 25041991Sheppo } 25051991Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 25061991Sheppo vd->vds = vds; 25071991Sheppo 25081991Sheppo 25092032Slm66018 /* Open vdisk and initialize parameters */ 25102410Slm66018 if ((status = vd_setup_vd(device_path, vd)) != 0) 25111991Sheppo return (status); 25121991Sheppo ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 25131991Sheppo PR0("vdisk_type = %s, pseudo = %s, nslices = %u", 25141991Sheppo ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 25151991Sheppo (vd->pseudo ? "yes" : "no"), vd->nslices); 25161991Sheppo 25171991Sheppo 25181991Sheppo /* Initialize locking */ 25191991Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 25201991Sheppo &iblock) != DDI_SUCCESS) { 25211991Sheppo PRN("Could not get iblock cookie."); 25221991Sheppo return (EIO); 25231991Sheppo } 25241991Sheppo 25251991Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 25261991Sheppo vd->initialized |= VD_LOCKING; 25271991Sheppo 25281991Sheppo 25292336Snarayan /* Create start and completion task queues for the vdisk */ 25302336Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 25311991Sheppo PR1("tq_name = %s", tq_name); 25322336Snarayan if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 25331991Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 25341991Sheppo PRN("Could not create task queue"); 25351991Sheppo return (EIO); 25361991Sheppo } 25372336Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 25382336Snarayan PR1("tq_name = %s", tq_name); 25392336Snarayan if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 25402336Snarayan TASKQ_DEFAULTPRI, 0)) == NULL) { 25412336Snarayan PRN("Could not create task queue"); 25422336Snarayan return (EIO); 25432336Snarayan } 25442336Snarayan vd->enabled = 1; /* before callback can dispatch to startq */ 25451991Sheppo 25461991Sheppo 25471991Sheppo /* Bring up LDC */ 25481991Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 25491991Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 25501991Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 25512410Slm66018 ldc_attr.mtu = VD_LDC_MTU; 25521991Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 25532793Slm66018 PR0("ldc_init(%lu) = errno %d", ldc_id, status); 25541991Sheppo return (status); 25551991Sheppo } 25561991Sheppo vd->initialized |= VD_LDC; 25571991Sheppo 25581991Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 25591991Sheppo (caddr_t)vd)) != 0) { 25602793Slm66018 PR0("ldc_reg_callback() returned errno %d", status); 25611991Sheppo return (status); 25621991Sheppo } 25631991Sheppo 25641991Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 25652793Slm66018 PR0("ldc_open() returned errno %d", status); 25661991Sheppo return (status); 25671991Sheppo } 25681991Sheppo 25692793Slm66018 if ((status = ldc_up(vd->ldc_handle)) != 0) { 25703166Ssg70180 PR0("ldc_up() returned errno %d", status); 25712793Slm66018 } 25722793Slm66018 25732531Snarayan /* Allocate the inband task memory handle */ 25742531Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 25752531Snarayan if (status) { 25763166Ssg70180 PR0("ldc_mem_alloc_handle() returned err %d ", status); 25772531Snarayan return (ENXIO); 25782531Snarayan } 25791991Sheppo 25801991Sheppo /* Add the successfully-initialized vdisk to the server's table */ 25811991Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 25821991Sheppo PRN("Error adding vdisk ID %lu to table", id); 25831991Sheppo return (EIO); 25841991Sheppo } 25851991Sheppo 25862793Slm66018 /* Allocate the staging buffer */ 25872793Slm66018 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 25882793Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 25892793Slm66018 25902793Slm66018 /* store initial state */ 25912793Slm66018 vd->state = VD_STATE_INIT; 25922793Slm66018 25931991Sheppo return (0); 25941991Sheppo } 25951991Sheppo 25962793Slm66018 static void 25972793Slm66018 vd_free_dring_task(vd_t *vdp) 25982793Slm66018 { 25992793Slm66018 if (vdp->dring_task != NULL) { 26002793Slm66018 ASSERT(vdp->dring_len != 0); 26012793Slm66018 /* Free all dring_task memory handles */ 26022793Slm66018 for (int i = 0; i < vdp->dring_len; i++) { 26032793Slm66018 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 26042793Slm66018 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 26052793Slm66018 vdp->dring_task[i].msg = NULL; 26062793Slm66018 } 26072793Slm66018 kmem_free(vdp->dring_task, 26082793Slm66018 (sizeof (*vdp->dring_task)) * vdp->dring_len); 26092793Slm66018 vdp->dring_task = NULL; 26102793Slm66018 } 26112793Slm66018 } 26122793Slm66018 26131991Sheppo /* 26141991Sheppo * Destroy the state associated with a virtual disk 26151991Sheppo */ 26161991Sheppo static void 26171991Sheppo vds_destroy_vd(void *arg) 26181991Sheppo { 26191991Sheppo vd_t *vd = (vd_t *)arg; 26203166Ssg70180 int retry = 0, rv; 26211991Sheppo 26221991Sheppo if (vd == NULL) 26231991Sheppo return; 26241991Sheppo 26252336Snarayan PR0("Destroying vdisk state"); 26262336Snarayan 26272531Snarayan if (vd->dk_efi.dki_data != NULL) 26282531Snarayan kmem_free(vd->dk_efi.dki_data, vd->dk_efi.dki_length); 26292531Snarayan 26301991Sheppo /* Disable queuing requests for the vdisk */ 26311991Sheppo if (vd->initialized & VD_LOCKING) { 26321991Sheppo mutex_enter(&vd->lock); 26331991Sheppo vd->enabled = 0; 26341991Sheppo mutex_exit(&vd->lock); 26351991Sheppo } 26361991Sheppo 26372336Snarayan /* Drain and destroy start queue (*before* destroying completionq) */ 26382336Snarayan if (vd->startq != NULL) 26392336Snarayan ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 26402336Snarayan 26412336Snarayan /* Drain and destroy completion queue (*before* shutting down LDC) */ 26422336Snarayan if (vd->completionq != NULL) 26432336Snarayan ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 26442336Snarayan 26452793Slm66018 vd_free_dring_task(vd); 26462793Slm66018 26473166Ssg70180 /* Free the inband task memory handle */ 26483166Ssg70180 (void) ldc_mem_free_handle(vd->inband_task.mhdl); 26493166Ssg70180 26503166Ssg70180 /* Shut down LDC */ 26513166Ssg70180 if (vd->initialized & VD_LDC) { 26523166Ssg70180 /* unmap the dring */ 26533166Ssg70180 if (vd->initialized & VD_DRING) 26543166Ssg70180 (void) ldc_mem_dring_unmap(vd->dring_handle); 26553166Ssg70180 26563166Ssg70180 /* close LDC channel - retry on EAGAIN */ 26573166Ssg70180 while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) { 26583166Ssg70180 if (++retry > vds_ldc_retries) { 26593166Ssg70180 PR0("Timed out closing channel"); 26603166Ssg70180 break; 26613166Ssg70180 } 26623166Ssg70180 drv_usecwait(vds_ldc_delay); 26633166Ssg70180 } 26643166Ssg70180 if (rv == 0) { 26653166Ssg70180 (void) ldc_unreg_callback(vd->ldc_handle); 26663166Ssg70180 (void) ldc_fini(vd->ldc_handle); 26673166Ssg70180 } else { 26683166Ssg70180 /* 26693166Ssg70180 * Closing the LDC channel has failed. Ideally we should 26703166Ssg70180 * fail here but there is no Zeus level infrastructure 26713166Ssg70180 * to handle this. The MD has already been changed and 26723166Ssg70180 * we have to do the close. So we try to do as much 26733166Ssg70180 * clean up as we can. 26743166Ssg70180 */ 26753166Ssg70180 (void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE); 26763166Ssg70180 while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN) 26773166Ssg70180 drv_usecwait(vds_ldc_delay); 26783166Ssg70180 } 26793166Ssg70180 } 26803166Ssg70180 26812793Slm66018 /* Free the staging buffer for msgs */ 26822793Slm66018 if (vd->vio_msgp != NULL) { 26832793Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 26842793Slm66018 vd->vio_msgp = NULL; 26852793Slm66018 } 26862793Slm66018 26872793Slm66018 /* Free the inband message buffer */ 26882793Slm66018 if (vd->inband_task.msg != NULL) { 26892793Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 26902793Slm66018 vd->inband_task.msg = NULL; 26912336Snarayan } 26921991Sheppo 26931991Sheppo /* Close any open backing-device slices */ 26941991Sheppo for (uint_t slice = 0; slice < vd->nslices; slice++) { 26951991Sheppo if (vd->ldi_handle[slice] != NULL) { 26961991Sheppo PR0("Closing slice %u", slice); 26971991Sheppo (void) ldi_close(vd->ldi_handle[slice], 26982531Snarayan vd_open_flags | FNDELAY, kcred); 26991991Sheppo } 27001991Sheppo } 27011991Sheppo 27021991Sheppo /* Free lock */ 27031991Sheppo if (vd->initialized & VD_LOCKING) 27041991Sheppo mutex_destroy(&vd->lock); 27051991Sheppo 27061991Sheppo /* Finally, free the vdisk structure itself */ 27071991Sheppo kmem_free(vd, sizeof (*vd)); 27081991Sheppo } 27091991Sheppo 27101991Sheppo static int 27112410Slm66018 vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id) 27121991Sheppo { 27131991Sheppo int status; 27141991Sheppo vd_t *vd = NULL; 27151991Sheppo 27161991Sheppo 27172410Slm66018 if ((status = vds_do_init_vd(vds, id, device_path, ldc_id, &vd)) != 0) 27181991Sheppo vds_destroy_vd(vd); 27191991Sheppo 27201991Sheppo return (status); 27211991Sheppo } 27221991Sheppo 27231991Sheppo static int 27241991Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 27251991Sheppo uint64_t *ldc_id) 27261991Sheppo { 27271991Sheppo int num_channels; 27281991Sheppo 27291991Sheppo 27301991Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 27311991Sheppo if ((num_channels = md_scan_dag(md, vd_node, 27321991Sheppo md_find_name(md, VD_CHANNEL_ENDPOINT), 27331991Sheppo md_find_name(md, "fwd"), channel)) <= 0) { 27341991Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 27351991Sheppo return (-1); 27361991Sheppo } 27371991Sheppo 27381991Sheppo /* Get the "id" value for the first channel endpoint node */ 27391991Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 27401991Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 27411991Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 27421991Sheppo return (-1); 27431991Sheppo } 27441991Sheppo 27451991Sheppo if (num_channels > 1) { 27461991Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 27471991Sheppo } 27481991Sheppo 27491991Sheppo return (0); 27501991Sheppo } 27511991Sheppo 27521991Sheppo static int 27531991Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 27541991Sheppo { 27551991Sheppo int num_nodes, status; 27561991Sheppo size_t size; 27571991Sheppo mde_cookie_t *channel; 27581991Sheppo 27591991Sheppo 27601991Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 27611991Sheppo PRN("Invalid node count in Machine Description subtree"); 27621991Sheppo return (-1); 27631991Sheppo } 27641991Sheppo size = num_nodes*(sizeof (*channel)); 27651991Sheppo channel = kmem_zalloc(size, KM_SLEEP); 27661991Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 27671991Sheppo kmem_free(channel, size); 27681991Sheppo 27691991Sheppo return (status); 27701991Sheppo } 27711991Sheppo 27721991Sheppo static void 27731991Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 27741991Sheppo { 27752410Slm66018 char *device_path = NULL; 27761991Sheppo uint64_t id = 0, ldc_id = 0; 27771991Sheppo 27781991Sheppo 27791991Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 27801991Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 27811991Sheppo return; 27821991Sheppo } 27831991Sheppo PR0("Adding vdisk ID %lu", id); 27841991Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 27852410Slm66018 &device_path) != 0) { 27861991Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 27871991Sheppo return; 27881991Sheppo } 27891991Sheppo 27901991Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 27911991Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 27921991Sheppo return; 27931991Sheppo } 27941991Sheppo 27952410Slm66018 if (vds_init_vd(vds, id, device_path, ldc_id) != 0) { 27961991Sheppo PRN("Failed to add vdisk ID %lu", id); 27971991Sheppo return; 27981991Sheppo } 27991991Sheppo } 28001991Sheppo 28011991Sheppo static void 28021991Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 28031991Sheppo { 28041991Sheppo uint64_t id = 0; 28051991Sheppo 28061991Sheppo 28071991Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 28081991Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 28091991Sheppo VD_ID_PROP); 28101991Sheppo return; 28111991Sheppo } 28121991Sheppo PR0("Removing vdisk ID %lu", id); 28131991Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 28141991Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 28151991Sheppo } 28161991Sheppo 28171991Sheppo static void 28181991Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 28191991Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 28201991Sheppo { 28211991Sheppo char *curr_dev, *prev_dev; 28221991Sheppo uint64_t curr_id = 0, curr_ldc_id = 0; 28231991Sheppo uint64_t prev_id = 0, prev_ldc_id = 0; 28241991Sheppo size_t len; 28251991Sheppo 28261991Sheppo 28271991Sheppo /* Validate that vdisk ID has not changed */ 28281991Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 28291991Sheppo PRN("Error getting previous vdisk \"%s\" property", 28301991Sheppo VD_ID_PROP); 28311991Sheppo return; 28321991Sheppo } 28331991Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 28341991Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 28351991Sheppo return; 28361991Sheppo } 28371991Sheppo if (curr_id != prev_id) { 28381991Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 28391991Sheppo prev_id, curr_id); 28401991Sheppo return; 28411991Sheppo } 28421991Sheppo 28431991Sheppo /* Validate that LDC ID has not changed */ 28441991Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 28451991Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 28461991Sheppo return; 28471991Sheppo } 28481991Sheppo 28491991Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 28501991Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 28511991Sheppo return; 28521991Sheppo } 28531991Sheppo if (curr_ldc_id != prev_ldc_id) { 28542032Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 28551991Sheppo PRN("Not changing vdisk: " 28561991Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 28571991Sheppo return; 28581991Sheppo } 28591991Sheppo 28601991Sheppo /* Determine whether device path has changed */ 28611991Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 28621991Sheppo &prev_dev) != 0) { 28631991Sheppo PRN("Error getting previous vdisk \"%s\"", 28641991Sheppo VD_BLOCK_DEVICE_PROP); 28651991Sheppo return; 28661991Sheppo } 28671991Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 28681991Sheppo &curr_dev) != 0) { 28691991Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 28701991Sheppo return; 28711991Sheppo } 28721991Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 28731991Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 28741991Sheppo return; /* no relevant (supported) change */ 28751991Sheppo 28761991Sheppo PR0("Changing vdisk ID %lu", prev_id); 28772793Slm66018 28781991Sheppo /* Remove old state, which will close vdisk and reset */ 28791991Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 28801991Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 28812793Slm66018 28821991Sheppo /* Re-initialize vdisk with new state */ 28831991Sheppo if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) { 28841991Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 28851991Sheppo return; 28861991Sheppo } 28871991Sheppo } 28881991Sheppo 28891991Sheppo static int 28901991Sheppo vds_process_md(void *arg, mdeg_result_t *md) 28911991Sheppo { 28921991Sheppo int i; 28931991Sheppo vds_t *vds = arg; 28941991Sheppo 28951991Sheppo 28961991Sheppo if (md == NULL) 28971991Sheppo return (MDEG_FAILURE); 28981991Sheppo ASSERT(vds != NULL); 28991991Sheppo 29001991Sheppo for (i = 0; i < md->removed.nelem; i++) 29011991Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 29021991Sheppo for (i = 0; i < md->match_curr.nelem; i++) 29031991Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 29041991Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 29051991Sheppo for (i = 0; i < md->added.nelem; i++) 29061991Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 29071991Sheppo 29081991Sheppo return (MDEG_SUCCESS); 29091991Sheppo } 29101991Sheppo 29111991Sheppo static int 29121991Sheppo vds_do_attach(dev_info_t *dip) 29131991Sheppo { 2914*3297Ssb155480 int status, sz; 2915*3297Ssb155480 int cfg_handle; 29161991Sheppo minor_t instance = ddi_get_instance(dip); 29171991Sheppo vds_t *vds; 2918*3297Ssb155480 mdeg_prop_spec_t *pspecp; 2919*3297Ssb155480 mdeg_node_spec_t *ispecp; 29201991Sheppo 29211991Sheppo /* 29221991Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 29231991Sheppo * notion of "instance", or unique identifier, for that node; OBP 29241991Sheppo * stores the value of the "cfg-handle" MD property as the value of 29251991Sheppo * the "reg" property on the node in the device tree it builds from 29261991Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 29271991Sheppo * "reg" property value to uniquely identify this device instance when 29281991Sheppo * registering with the MD event-generation framework. If the "reg" 29291991Sheppo * property cannot be found, the device tree state is presumably so 29301991Sheppo * broken that there is no point in continuing. 29311991Sheppo */ 2932*3297Ssb155480 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2933*3297Ssb155480 VD_REG_PROP)) { 2934*3297Ssb155480 PRN("vds \"%s\" property does not exist", VD_REG_PROP); 29351991Sheppo return (DDI_FAILURE); 29361991Sheppo } 29371991Sheppo 29381991Sheppo /* Get the MD instance for later MDEG registration */ 29391991Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2940*3297Ssb155480 VD_REG_PROP, -1); 29411991Sheppo 29421991Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 29431991Sheppo PRN("Could not allocate state for instance %u", instance); 29441991Sheppo return (DDI_FAILURE); 29451991Sheppo } 29461991Sheppo 29471991Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 29481991Sheppo PRN("Could not get state for instance %u", instance); 29491991Sheppo ddi_soft_state_free(vds_state, instance); 29501991Sheppo return (DDI_FAILURE); 29511991Sheppo } 29521991Sheppo 29531991Sheppo vds->dip = dip; 29541991Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 29551991Sheppo vds_destroy_vd, 29561991Sheppo sizeof (void *)); 29571991Sheppo ASSERT(vds->vd_table != NULL); 29581991Sheppo 29591991Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 29601991Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 29611991Sheppo return (DDI_FAILURE); 29621991Sheppo } 29631991Sheppo vds->initialized |= VDS_LDI; 29641991Sheppo 29651991Sheppo /* Register for MD updates */ 2966*3297Ssb155480 sz = sizeof (vds_prop_template); 2967*3297Ssb155480 pspecp = kmem_alloc(sz, KM_SLEEP); 2968*3297Ssb155480 bcopy(vds_prop_template, pspecp, sz); 2969*3297Ssb155480 2970*3297Ssb155480 VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle); 2971*3297Ssb155480 2972*3297Ssb155480 /* initialize the complete prop spec structure */ 2973*3297Ssb155480 ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 2974*3297Ssb155480 ispecp->namep = "virtual-device"; 2975*3297Ssb155480 ispecp->specp = pspecp; 2976*3297Ssb155480 2977*3297Ssb155480 if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, 29781991Sheppo &vds->mdeg) != MDEG_SUCCESS) { 29791991Sheppo PRN("Unable to register for MD updates"); 2980*3297Ssb155480 kmem_free(ispecp, sizeof (mdeg_node_spec_t)); 2981*3297Ssb155480 kmem_free(pspecp, sz); 29821991Sheppo return (DDI_FAILURE); 29831991Sheppo } 2984*3297Ssb155480 2985*3297Ssb155480 vds->ispecp = ispecp; 29861991Sheppo vds->initialized |= VDS_MDEG; 29871991Sheppo 29882032Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 29892032Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 29902032Slm66018 DDI_PROP_SUCCESS) { 29912032Slm66018 PRN("failed to set \"%s\" property for instance %u", 29922032Slm66018 DDI_NO_AUTODETACH, instance); 29932032Slm66018 } 29942032Slm66018 29951991Sheppo ddi_report_dev(dip); 29961991Sheppo return (DDI_SUCCESS); 29971991Sheppo } 29981991Sheppo 29991991Sheppo static int 30001991Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 30011991Sheppo { 30021991Sheppo int status; 30031991Sheppo 30041991Sheppo switch (cmd) { 30051991Sheppo case DDI_ATTACH: 30062336Snarayan PR0("Attaching"); 30071991Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 30081991Sheppo (void) vds_detach(dip, DDI_DETACH); 30091991Sheppo return (status); 30101991Sheppo case DDI_RESUME: 30112336Snarayan PR0("No action required for DDI_RESUME"); 30121991Sheppo return (DDI_SUCCESS); 30131991Sheppo default: 30141991Sheppo return (DDI_FAILURE); 30151991Sheppo } 30161991Sheppo } 30171991Sheppo 30181991Sheppo static struct dev_ops vds_ops = { 30191991Sheppo DEVO_REV, /* devo_rev */ 30201991Sheppo 0, /* devo_refcnt */ 30211991Sheppo ddi_no_info, /* devo_getinfo */ 30221991Sheppo nulldev, /* devo_identify */ 30231991Sheppo nulldev, /* devo_probe */ 30241991Sheppo vds_attach, /* devo_attach */ 30251991Sheppo vds_detach, /* devo_detach */ 30261991Sheppo nodev, /* devo_reset */ 30271991Sheppo NULL, /* devo_cb_ops */ 30281991Sheppo NULL, /* devo_bus_ops */ 30291991Sheppo nulldev /* devo_power */ 30301991Sheppo }; 30311991Sheppo 30321991Sheppo static struct modldrv modldrv = { 30331991Sheppo &mod_driverops, 30341991Sheppo "virtual disk server v%I%", 30351991Sheppo &vds_ops, 30361991Sheppo }; 30371991Sheppo 30381991Sheppo static struct modlinkage modlinkage = { 30391991Sheppo MODREV_1, 30401991Sheppo &modldrv, 30411991Sheppo NULL 30421991Sheppo }; 30431991Sheppo 30441991Sheppo 30451991Sheppo int 30461991Sheppo _init(void) 30471991Sheppo { 30481991Sheppo int i, status; 30491991Sheppo 30502336Snarayan 30511991Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 30521991Sheppo return (status); 30531991Sheppo if ((status = mod_install(&modlinkage)) != 0) { 30541991Sheppo ddi_soft_state_fini(&vds_state); 30551991Sheppo return (status); 30561991Sheppo } 30571991Sheppo 30581991Sheppo /* Fill in the bit-mask of server-supported operations */ 30591991Sheppo for (i = 0; i < vds_noperations; i++) 30601991Sheppo vds_operations |= 1 << (vds_operation[i].operation - 1); 30611991Sheppo 30621991Sheppo return (0); 30631991Sheppo } 30641991Sheppo 30651991Sheppo int 30661991Sheppo _info(struct modinfo *modinfop) 30671991Sheppo { 30681991Sheppo return (mod_info(&modlinkage, modinfop)); 30691991Sheppo } 30701991Sheppo 30711991Sheppo int 30721991Sheppo _fini(void) 30731991Sheppo { 30741991Sheppo int status; 30751991Sheppo 30762336Snarayan 30771991Sheppo if ((status = mod_remove(&modlinkage)) != 0) 30781991Sheppo return (status); 30791991Sheppo ddi_soft_state_fini(&vds_state); 30801991Sheppo return (0); 30811991Sheppo } 3082