11991Sheppo /* 21991Sheppo * CDDL HEADER START 31991Sheppo * 41991Sheppo * The contents of this file are subject to the terms of the 51991Sheppo * Common Development and Distribution License (the "License"). 61991Sheppo * You may not use this file except in compliance with the License. 71991Sheppo * 81991Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91991Sheppo * or http://www.opensolaris.org/os/licensing. 101991Sheppo * See the License for the specific language governing permissions 111991Sheppo * and limitations under the License. 121991Sheppo * 131991Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141991Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151991Sheppo * If applicable, add the following below this CDDL HEADER, with the 161991Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171991Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181991Sheppo * 191991Sheppo * CDDL HEADER END 201991Sheppo */ 211991Sheppo 221991Sheppo /* 231991Sheppo * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 241991Sheppo * Use is subject to license terms. 251991Sheppo */ 261991Sheppo 271991Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281991Sheppo 291991Sheppo /* 301991Sheppo * Virtual disk server 311991Sheppo */ 321991Sheppo 331991Sheppo 341991Sheppo #include <sys/types.h> 351991Sheppo #include <sys/conf.h> 362531Snarayan #include <sys/crc32.h> 371991Sheppo #include <sys/ddi.h> 381991Sheppo #include <sys/dkio.h> 391991Sheppo #include <sys/file.h> 401991Sheppo #include <sys/mdeg.h> 411991Sheppo #include <sys/modhash.h> 421991Sheppo #include <sys/note.h> 431991Sheppo #include <sys/pathname.h> 441991Sheppo #include <sys/sunddi.h> 451991Sheppo #include <sys/sunldi.h> 461991Sheppo #include <sys/sysmacros.h> 471991Sheppo #include <sys/vio_common.h> 481991Sheppo #include <sys/vdsk_mailbox.h> 491991Sheppo #include <sys/vdsk_common.h> 501991Sheppo #include <sys/vtoc.h> 511991Sheppo 521991Sheppo 531991Sheppo /* Virtual disk server initialization flags */ 542336Snarayan #define VDS_LDI 0x01 552336Snarayan #define VDS_MDEG 0x02 561991Sheppo 571991Sheppo /* Virtual disk server tunable parameters */ 581991Sheppo #define VDS_LDC_RETRIES 3 59*2793Slm66018 #define VDS_LDC_DELAY 1000 /* usec */ 601991Sheppo #define VDS_NCHAINS 32 611991Sheppo 621991Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 631991Sheppo #define VDS_NAME "virtual-disk-server" 641991Sheppo 651991Sheppo #define VD_NAME "vd" 661991Sheppo #define VD_VOLUME_NAME "vdisk" 671991Sheppo #define VD_ASCIILABEL "Virtual Disk" 681991Sheppo 691991Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 701991Sheppo #define VD_ID_PROP "id" 711991Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 721991Sheppo 731991Sheppo /* Virtual disk initialization flags */ 741991Sheppo #define VD_LOCKING 0x01 752336Snarayan #define VD_LDC 0x02 762336Snarayan #define VD_DRING 0x04 772336Snarayan #define VD_SID 0x08 782336Snarayan #define VD_SEQ_NUM 0x10 791991Sheppo 801991Sheppo /* Flags for opening/closing backing devices via LDI */ 811991Sheppo #define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE) 821991Sheppo 831991Sheppo /* 841991Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 851991Sheppo * unfortunately, this convention does not appear to be codified. 861991Sheppo */ 871991Sheppo #define VD_ENTIRE_DISK_SLICE 2 881991Sheppo 891991Sheppo /* Return a cpp token as a string */ 901991Sheppo #define STRINGIZE(token) #token 911991Sheppo 921991Sheppo /* 931991Sheppo * Print a message prefixed with the current function name to the message log 941991Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 951991Sheppo * concatenation of string literals and C99 variable-length-argument-list 961991Sheppo * macros 971991Sheppo */ 981991Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 991991Sheppo #define _PRN(format, ...) \ 1001991Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 1011991Sheppo 1021991Sheppo /* Return a pointer to the "i"th vdisk dring element */ 1031991Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 1041991Sheppo (vd->dring + (i)*vd->descriptor_size)) 1051991Sheppo 1061991Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 1071991Sheppo #define VD_CLIENT(vd) \ 1081991Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 1091991Sheppo (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 1101991Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 1111991Sheppo "unsupported client"))) 1121991Sheppo 1131991Sheppo /* Debugging macros */ 1141991Sheppo #ifdef DEBUG 115*2793Slm66018 116*2793Slm66018 static int vd_msglevel = 0; 117*2793Slm66018 118*2793Slm66018 1191991Sheppo #define PR0 if (vd_msglevel > 0) PRN 1201991Sheppo #define PR1 if (vd_msglevel > 1) PRN 1211991Sheppo #define PR2 if (vd_msglevel > 2) PRN 1221991Sheppo 1231991Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 1241991Sheppo PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 1251991Sheppo elem->hdr.dstate, \ 1261991Sheppo elem->payload.operation, \ 1271991Sheppo elem->payload.status, \ 1281991Sheppo elem->payload.nbytes, \ 1291991Sheppo elem->payload.addr, \ 1301991Sheppo elem->payload.ncookies); 1311991Sheppo 132*2793Slm66018 char * 133*2793Slm66018 vd_decode_state(int state) 134*2793Slm66018 { 135*2793Slm66018 char *str; 136*2793Slm66018 137*2793Slm66018 #define CASE_STATE(_s) case _s: str = #_s; break; 138*2793Slm66018 139*2793Slm66018 switch (state) { 140*2793Slm66018 CASE_STATE(VD_STATE_INIT) 141*2793Slm66018 CASE_STATE(VD_STATE_VER) 142*2793Slm66018 CASE_STATE(VD_STATE_ATTR) 143*2793Slm66018 CASE_STATE(VD_STATE_DRING) 144*2793Slm66018 CASE_STATE(VD_STATE_RDX) 145*2793Slm66018 CASE_STATE(VD_STATE_DATA) 146*2793Slm66018 default: str = "unknown"; break; 147*2793Slm66018 } 148*2793Slm66018 149*2793Slm66018 #undef CASE_STATE 150*2793Slm66018 151*2793Slm66018 return (str); 152*2793Slm66018 } 153*2793Slm66018 154*2793Slm66018 void 155*2793Slm66018 vd_decode_tag(vio_msg_t *msg) 156*2793Slm66018 { 157*2793Slm66018 char *tstr, *sstr, *estr; 158*2793Slm66018 159*2793Slm66018 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 160*2793Slm66018 161*2793Slm66018 switch (msg->tag.vio_msgtype) { 162*2793Slm66018 CASE_TYPE(VIO_TYPE_CTRL) 163*2793Slm66018 CASE_TYPE(VIO_TYPE_DATA) 164*2793Slm66018 CASE_TYPE(VIO_TYPE_ERR) 165*2793Slm66018 default: tstr = "unknown"; break; 166*2793Slm66018 } 167*2793Slm66018 168*2793Slm66018 #undef CASE_TYPE 169*2793Slm66018 170*2793Slm66018 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 171*2793Slm66018 172*2793Slm66018 switch (msg->tag.vio_subtype) { 173*2793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 174*2793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 175*2793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 176*2793Slm66018 default: sstr = "unknown"; break; 177*2793Slm66018 } 178*2793Slm66018 179*2793Slm66018 #undef CASE_SUBTYPE 180*2793Slm66018 181*2793Slm66018 #define CASE_ENV(_s) case _s: estr = #_s; break; 182*2793Slm66018 183*2793Slm66018 switch (msg->tag.vio_subtype_env) { 184*2793Slm66018 CASE_ENV(VIO_VER_INFO) 185*2793Slm66018 CASE_ENV(VIO_ATTR_INFO) 186*2793Slm66018 CASE_ENV(VIO_DRING_REG) 187*2793Slm66018 CASE_ENV(VIO_DRING_UNREG) 188*2793Slm66018 CASE_ENV(VIO_RDX) 189*2793Slm66018 CASE_ENV(VIO_PKT_DATA) 190*2793Slm66018 CASE_ENV(VIO_DESC_DATA) 191*2793Slm66018 CASE_ENV(VIO_DRING_DATA) 192*2793Slm66018 default: estr = "unknown"; break; 193*2793Slm66018 } 194*2793Slm66018 195*2793Slm66018 #undef CASE_ENV 196*2793Slm66018 197*2793Slm66018 PR1("(%x/%x/%x) message : (%s/%s/%s)", 198*2793Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 199*2793Slm66018 msg->tag.vio_subtype_env, tstr, sstr, estr); 200*2793Slm66018 } 201*2793Slm66018 2021991Sheppo #else /* !DEBUG */ 203*2793Slm66018 2041991Sheppo #define PR0(...) 2051991Sheppo #define PR1(...) 2061991Sheppo #define PR2(...) 2071991Sheppo 2081991Sheppo #define VD_DUMP_DRING_ELEM(elem) 2091991Sheppo 210*2793Slm66018 #define vd_decode_state(_s) (NULL) 211*2793Slm66018 #define vd_decode_tag(_s) (NULL) 212*2793Slm66018 2131991Sheppo #endif /* DEBUG */ 2141991Sheppo 2151991Sheppo 2162336Snarayan /* 2172336Snarayan * Soft state structure for a vds instance 2182336Snarayan */ 2191991Sheppo typedef struct vds { 2201991Sheppo uint_t initialized; /* driver inst initialization flags */ 2211991Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 2221991Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 2231991Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 2241991Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 2251991Sheppo } vds_t; 2261991Sheppo 2272336Snarayan /* 2282336Snarayan * Types of descriptor-processing tasks 2292336Snarayan */ 2302336Snarayan typedef enum vd_task_type { 2312336Snarayan VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 2322336Snarayan VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 2332336Snarayan } vd_task_type_t; 2342336Snarayan 2352336Snarayan /* 2362336Snarayan * Structure describing the task for processing a descriptor 2372336Snarayan */ 2382336Snarayan typedef struct vd_task { 2392336Snarayan struct vd *vd; /* vd instance task is for */ 2402336Snarayan vd_task_type_t type; /* type of descriptor task */ 2412336Snarayan int index; /* dring elem index for task */ 2422336Snarayan vio_msg_t *msg; /* VIO message task is for */ 2432336Snarayan size_t msglen; /* length of message content */ 2442336Snarayan vd_dring_payload_t *request; /* request task will perform */ 2452336Snarayan struct buf buf; /* buf(9s) for I/O request */ 2462531Snarayan ldc_mem_handle_t mhdl; /* task memory handle */ 2472336Snarayan } vd_task_t; 2482336Snarayan 2492336Snarayan /* 2502336Snarayan * Soft state structure for a virtual disk instance 2512336Snarayan */ 2521991Sheppo typedef struct vd { 2531991Sheppo uint_t initialized; /* vdisk initialization flags */ 2541991Sheppo vds_t *vds; /* server for this vdisk */ 2552336Snarayan ddi_taskq_t *startq; /* queue for I/O start tasks */ 2562336Snarayan ddi_taskq_t *completionq; /* queue for completion tasks */ 2571991Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 2581991Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 2592410Slm66018 uint_t nslices; /* number of slices */ 2601991Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 2611991Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 2622531Snarayan vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 2632410Slm66018 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 2641991Sheppo boolean_t pseudo; /* underlying pseudo dev */ 2652531Snarayan struct dk_efi dk_efi; /* synthetic for slice type */ 2661991Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 2671991Sheppo struct vtoc vtoc; /* synthetic for slice type */ 2681991Sheppo ldc_status_t ldc_state; /* LDC connection state */ 2691991Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 2701991Sheppo size_t max_msglen; /* largest LDC message len */ 2711991Sheppo vd_state_t state; /* client handshake state */ 2721991Sheppo uint8_t xfer_mode; /* transfer mode with client */ 2731991Sheppo uint32_t sid; /* client's session ID */ 2741991Sheppo uint64_t seq_num; /* message sequence number */ 2751991Sheppo uint64_t dring_ident; /* identifier of dring */ 2761991Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 2771991Sheppo uint32_t descriptor_size; /* num bytes in desc */ 2781991Sheppo uint32_t dring_len; /* number of dring elements */ 2791991Sheppo caddr_t dring; /* address of dring */ 280*2793Slm66018 caddr_t vio_msgp; /* vio msg staging buffer */ 2812336Snarayan vd_task_t inband_task; /* task for inband descriptor */ 2822336Snarayan vd_task_t *dring_task; /* tasks dring elements */ 2832336Snarayan 2842336Snarayan kmutex_t lock; /* protects variables below */ 2852336Snarayan boolean_t enabled; /* is vdisk enabled? */ 2862336Snarayan boolean_t reset_state; /* reset connection state? */ 2872336Snarayan boolean_t reset_ldc; /* reset LDC channel? */ 2881991Sheppo } vd_t; 2891991Sheppo 2901991Sheppo typedef struct vds_operation { 291*2793Slm66018 char *namep; 2921991Sheppo uint8_t operation; 2932336Snarayan int (*start)(vd_task_t *task); 2942336Snarayan void (*complete)(void *arg); 2951991Sheppo } vds_operation_t; 2961991Sheppo 2972032Slm66018 typedef struct vd_ioctl { 2982032Slm66018 uint8_t operation; /* vdisk operation */ 2992032Slm66018 const char *operation_name; /* vdisk operation name */ 3002032Slm66018 size_t nbytes; /* size of operation buffer */ 3012032Slm66018 int cmd; /* corresponding ioctl cmd */ 3022032Slm66018 const char *cmd_name; /* ioctl cmd name */ 3032032Slm66018 void *arg; /* ioctl cmd argument */ 3042032Slm66018 /* convert input vd_buf to output ioctl_arg */ 3052032Slm66018 void (*copyin)(void *vd_buf, void *ioctl_arg); 3062032Slm66018 /* convert input ioctl_arg to output vd_buf */ 3072032Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 3082032Slm66018 } vd_ioctl_t; 3092032Slm66018 3102032Slm66018 /* Define trivial copyin/copyout conversion function flag */ 3112032Slm66018 #define VD_IDENTITY ((void (*)(void *, void *))-1) 3121991Sheppo 3131991Sheppo 3141991Sheppo static int vds_ldc_retries = VDS_LDC_RETRIES; 315*2793Slm66018 static int vds_ldc_delay = VDS_LDC_DELAY; 3161991Sheppo static void *vds_state; 3171991Sheppo static uint64_t vds_operations; /* see vds_operation[] definition below */ 3181991Sheppo 3191991Sheppo static int vd_open_flags = VD_OPEN_FLAGS; 3201991Sheppo 3212032Slm66018 /* 3222032Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 3232032Slm66018 * 3242032Slm66018 * Each supported major version should appear only once, paired with (and only 3252032Slm66018 * with) its highest supported minor version number (as the protocol requires 3262032Slm66018 * supporting all lower minor version numbers as well) 3272032Slm66018 */ 3282032Slm66018 static const vio_ver_t vds_version[] = {{1, 0}}; 3292032Slm66018 static const size_t vds_num_versions = 3302032Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 3312032Slm66018 332*2793Slm66018 static void vd_free_dring_task(vd_t *vdp); 3331991Sheppo 3341991Sheppo static int 3352336Snarayan vd_start_bio(vd_task_t *task) 3361991Sheppo { 3372531Snarayan int rv, status = 0; 3382336Snarayan vd_t *vd = task->vd; 3392336Snarayan vd_dring_payload_t *request = task->request; 3402336Snarayan struct buf *buf = &task->buf; 3412531Snarayan uint8_t mtype; 3422336Snarayan 3432336Snarayan 3442336Snarayan ASSERT(vd != NULL); 3452336Snarayan ASSERT(request != NULL); 3462336Snarayan ASSERT(request->slice < vd->nslices); 3472336Snarayan ASSERT((request->operation == VD_OP_BREAD) || 3482336Snarayan (request->operation == VD_OP_BWRITE)); 3492336Snarayan 3501991Sheppo if (request->nbytes == 0) 3511991Sheppo return (EINVAL); /* no service for trivial requests */ 3522336Snarayan 3532336Snarayan PR1("%s %lu bytes at block %lu", 3542336Snarayan (request->operation == VD_OP_BREAD) ? "Read" : "Write", 3552336Snarayan request->nbytes, request->addr); 3562336Snarayan 3572336Snarayan bioinit(buf); 3582336Snarayan buf->b_flags = B_BUSY; 3592336Snarayan buf->b_bcount = request->nbytes; 3602336Snarayan buf->b_lblkno = request->addr; 3612336Snarayan buf->b_edev = vd->dev[request->slice]; 3622336Snarayan 3632531Snarayan mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 3642531Snarayan 3652531Snarayan /* Map memory exported by client */ 3662531Snarayan status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 3672531Snarayan mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 3682531Snarayan &(buf->b_un.b_addr), NULL); 3692531Snarayan if (status != 0) { 370*2793Slm66018 PR0("ldc_mem_map() returned err %d ", status); 3712531Snarayan biofini(buf); 3722531Snarayan return (status); 3732336Snarayan } 3742336Snarayan 3752531Snarayan status = ldc_mem_acquire(task->mhdl, 0, buf->b_bcount); 3762531Snarayan if (status != 0) { 3772531Snarayan (void) ldc_mem_unmap(task->mhdl); 378*2793Slm66018 PR0("ldc_mem_acquire() returned err %d ", status); 3792531Snarayan biofini(buf); 3802531Snarayan return (status); 3812531Snarayan } 3822531Snarayan 3832531Snarayan buf->b_flags |= (request->operation == VD_OP_BREAD) ? B_READ : B_WRITE; 3842531Snarayan 3852336Snarayan /* Start the block I/O */ 3862531Snarayan if ((status = ldi_strategy(vd->ldi_handle[request->slice], buf)) == 0) 3872336Snarayan return (EINPROGRESS); /* will complete on completionq */ 3882336Snarayan 3892336Snarayan /* Clean up after error */ 3902531Snarayan rv = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 3912531Snarayan if (rv) { 392*2793Slm66018 PR0("ldc_mem_release() returned err %d ", rv); 3932531Snarayan } 3942531Snarayan rv = ldc_mem_unmap(task->mhdl); 3952531Snarayan if (rv) { 396*2793Slm66018 PR0("ldc_mem_unmap() returned err %d ", status); 3972531Snarayan } 3982531Snarayan 3992336Snarayan biofini(buf); 4002336Snarayan return (status); 4012336Snarayan } 4022336Snarayan 4032336Snarayan static int 4042336Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 4052336Snarayan { 406*2793Slm66018 int status; 4072336Snarayan size_t nbytes; 4082336Snarayan 409*2793Slm66018 do { 4102336Snarayan nbytes = msglen; 4112336Snarayan status = ldc_write(ldc_handle, msg, &nbytes); 412*2793Slm66018 if (status != EWOULDBLOCK) 413*2793Slm66018 break; 414*2793Slm66018 drv_usecwait(vds_ldc_delay); 415*2793Slm66018 } while (status == EWOULDBLOCK); 4162336Snarayan 4172336Snarayan if (status != 0) { 418*2793Slm66018 if (status != ECONNRESET) 419*2793Slm66018 PR0("ldc_write() returned errno %d", status); 4202336Snarayan return (status); 4212336Snarayan } else if (nbytes != msglen) { 422*2793Slm66018 PR0("ldc_write() performed only partial write"); 4232336Snarayan return (EIO); 4242336Snarayan } 4252336Snarayan 4262336Snarayan PR1("SENT %lu bytes", msglen); 4272336Snarayan return (0); 4282336Snarayan } 4292336Snarayan 4302336Snarayan static void 4312336Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc) 4322336Snarayan { 4332336Snarayan mutex_enter(&vd->lock); 4342336Snarayan vd->reset_state = B_TRUE; 4352336Snarayan vd->reset_ldc = reset_ldc; 4362336Snarayan mutex_exit(&vd->lock); 4372336Snarayan } 4382336Snarayan 4392336Snarayan /* 4402336Snarayan * Reset the state of the connection with a client, if needed; reset the LDC 4412336Snarayan * transport as well, if needed. This function should only be called from the 442*2793Slm66018 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 4432336Snarayan */ 4442336Snarayan static void 4452336Snarayan vd_reset_if_needed(vd_t *vd) 4462336Snarayan { 447*2793Slm66018 int status = 0; 4482336Snarayan 4492336Snarayan mutex_enter(&vd->lock); 4502336Snarayan if (!vd->reset_state) { 4512336Snarayan ASSERT(!vd->reset_ldc); 4522336Snarayan mutex_exit(&vd->lock); 4532336Snarayan return; 4542336Snarayan } 4552336Snarayan mutex_exit(&vd->lock); 4562336Snarayan 4572336Snarayan PR0("Resetting connection state with %s", VD_CLIENT(vd)); 4582336Snarayan 4592336Snarayan /* 4602336Snarayan * Let any asynchronous I/O complete before possibly pulling the rug 4612336Snarayan * out from under it; defer checking vd->reset_ldc, as one of the 4622336Snarayan * asynchronous tasks might set it 4632336Snarayan */ 4642336Snarayan ddi_taskq_wait(vd->completionq); 4652336Snarayan 4662336Snarayan if ((vd->initialized & VD_DRING) && 4672336Snarayan ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 468*2793Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 469*2793Slm66018 470*2793Slm66018 vd_free_dring_task(vd); 471*2793Slm66018 472*2793Slm66018 /* Free the staging buffer for msgs */ 473*2793Slm66018 if (vd->vio_msgp != NULL) { 474*2793Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 475*2793Slm66018 vd->vio_msgp = NULL; 4762336Snarayan } 4772336Snarayan 478*2793Slm66018 /* Free the inband message buffer */ 479*2793Slm66018 if (vd->inband_task.msg != NULL) { 480*2793Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 481*2793Slm66018 vd->inband_task.msg = NULL; 482*2793Slm66018 } 4832336Snarayan 4842336Snarayan mutex_enter(&vd->lock); 485*2793Slm66018 486*2793Slm66018 if (vd->reset_ldc) 487*2793Slm66018 PR0("taking down LDC channel"); 4882410Slm66018 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 489*2793Slm66018 PR0("ldc_down() returned errno %d", status); 4902336Snarayan 4912336Snarayan vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 4922336Snarayan vd->state = VD_STATE_INIT; 4932336Snarayan vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 4942336Snarayan 495*2793Slm66018 /* Allocate the staging buffer */ 496*2793Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 497*2793Slm66018 498*2793Slm66018 status = ldc_status(vd->ldc_handle, &vd->ldc_state); 499*2793Slm66018 if (vd->reset_ldc && vd->ldc_state != LDC_UP) { 500*2793Slm66018 PR0("calling ldc_up\n"); 501*2793Slm66018 (void) ldc_up(vd->ldc_handle); 502*2793Slm66018 } 503*2793Slm66018 5042336Snarayan vd->reset_state = B_FALSE; 5052336Snarayan vd->reset_ldc = B_FALSE; 506*2793Slm66018 5072336Snarayan mutex_exit(&vd->lock); 5082336Snarayan } 5092336Snarayan 510*2793Slm66018 static void vd_recv_msg(void *arg); 511*2793Slm66018 512*2793Slm66018 static void 513*2793Slm66018 vd_mark_in_reset(vd_t *vd) 514*2793Slm66018 { 515*2793Slm66018 int status; 516*2793Slm66018 517*2793Slm66018 PR0("vd_mark_in_reset: marking vd in reset\n"); 518*2793Slm66018 519*2793Slm66018 vd_need_reset(vd, B_FALSE); 520*2793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 521*2793Slm66018 if (status == DDI_FAILURE) { 522*2793Slm66018 PR0("cannot schedule task to recv msg\n"); 523*2793Slm66018 vd_need_reset(vd, B_TRUE); 524*2793Slm66018 return; 525*2793Slm66018 } 526*2793Slm66018 } 527*2793Slm66018 5282336Snarayan static int 5292336Snarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status) 5302336Snarayan { 5312336Snarayan boolean_t accepted; 5322336Snarayan int status; 5332336Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 5342336Snarayan 535*2793Slm66018 if (vd->reset_state) 536*2793Slm66018 return (0); 5372336Snarayan 5382336Snarayan /* Acquire the element */ 539*2793Slm66018 if (!vd->reset_state && 540*2793Slm66018 (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 541*2793Slm66018 if (status == ECONNRESET) { 542*2793Slm66018 vd_mark_in_reset(vd); 543*2793Slm66018 return (0); 544*2793Slm66018 } else { 545*2793Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", 546*2793Slm66018 status); 547*2793Slm66018 return (status); 548*2793Slm66018 } 5492336Snarayan } 5502336Snarayan 5512336Snarayan /* Set the element's status and mark it done */ 5522336Snarayan accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 5532336Snarayan if (accepted) { 5542336Snarayan elem->payload.status = elem_status; 5552336Snarayan elem->hdr.dstate = VIO_DESC_DONE; 5562336Snarayan } else { 5572336Snarayan /* Perhaps client timed out waiting for I/O... */ 558*2793Slm66018 PR0("element %u no longer \"accepted\"", idx); 5592336Snarayan VD_DUMP_DRING_ELEM(elem); 5602336Snarayan } 5612336Snarayan /* Release the element */ 562*2793Slm66018 if (!vd->reset_state && 563*2793Slm66018 (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 564*2793Slm66018 if (status == ECONNRESET) { 565*2793Slm66018 vd_mark_in_reset(vd); 566*2793Slm66018 return (0); 567*2793Slm66018 } else { 568*2793Slm66018 PR0("ldc_mem_dring_release() returned errno %d", 569*2793Slm66018 status); 570*2793Slm66018 return (status); 571*2793Slm66018 } 5722336Snarayan } 5732336Snarayan 5742336Snarayan return (accepted ? 0 : EINVAL); 5752336Snarayan } 5762336Snarayan 5772336Snarayan static void 5782336Snarayan vd_complete_bio(void *arg) 5792336Snarayan { 5802336Snarayan int status = 0; 5812336Snarayan vd_task_t *task = (vd_task_t *)arg; 5822336Snarayan vd_t *vd = task->vd; 5832336Snarayan vd_dring_payload_t *request = task->request; 5842336Snarayan struct buf *buf = &task->buf; 5852336Snarayan 5862336Snarayan 5872336Snarayan ASSERT(vd != NULL); 5882336Snarayan ASSERT(request != NULL); 5892336Snarayan ASSERT(task->msg != NULL); 5902336Snarayan ASSERT(task->msglen >= sizeof (*task->msg)); 5912336Snarayan 5922336Snarayan /* Wait for the I/O to complete */ 5932336Snarayan request->status = biowait(buf); 5942336Snarayan 5952531Snarayan /* Release the buffer */ 596*2793Slm66018 if (!vd->reset_state) 597*2793Slm66018 status = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 5982531Snarayan if (status) { 599*2793Slm66018 PR0("ldc_mem_release() returned errno %d copying to " 600*2793Slm66018 "client", status); 601*2793Slm66018 if (status == ECONNRESET) { 602*2793Slm66018 vd_mark_in_reset(vd); 603*2793Slm66018 } 6041991Sheppo } 6052336Snarayan 606*2793Slm66018 /* Unmap the memory, even if in reset */ 6072531Snarayan status = ldc_mem_unmap(task->mhdl); 6082531Snarayan if (status) { 609*2793Slm66018 PR0("ldc_mem_unmap() returned errno %d copying to client", 6102531Snarayan status); 611*2793Slm66018 if (status == ECONNRESET) { 612*2793Slm66018 vd_mark_in_reset(vd); 613*2793Slm66018 } 6142531Snarayan } 6152531Snarayan 6162336Snarayan biofini(buf); 6172336Snarayan 6182336Snarayan /* Update the dring element for a dring client */ 619*2793Slm66018 if (!vd->reset_state && (status == 0) && 620*2793Slm66018 (vd->xfer_mode == VIO_DRING_MODE)) { 6212336Snarayan status = vd_mark_elem_done(vd, task->index, request->status); 622*2793Slm66018 if (status == ECONNRESET) 623*2793Slm66018 vd_mark_in_reset(vd); 624*2793Slm66018 } 6252336Snarayan 6262336Snarayan /* 6272336Snarayan * If a transport error occurred, arrange to "nack" the message when 6282336Snarayan * the final task in the descriptor element range completes 6292336Snarayan */ 6302336Snarayan if (status != 0) 6312336Snarayan task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 6322336Snarayan 6332336Snarayan /* 6342336Snarayan * Only the final task for a range of elements will respond to and 6352336Snarayan * free the message 6362336Snarayan */ 637*2793Slm66018 if (task->type == VD_NONFINAL_RANGE_TASK) { 6382336Snarayan return; 639*2793Slm66018 } 6402336Snarayan 6412336Snarayan /* 6422336Snarayan * Send the "ack" or "nack" back to the client; if sending the message 6432336Snarayan * via LDC fails, arrange to reset both the connection state and LDC 6442336Snarayan * itself 6452336Snarayan */ 6462336Snarayan PR1("Sending %s", 6472336Snarayan (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 648*2793Slm66018 if (!vd->reset_state) { 649*2793Slm66018 status = send_msg(vd->ldc_handle, task->msg, task->msglen); 650*2793Slm66018 switch (status) { 651*2793Slm66018 case 0: 652*2793Slm66018 break; 653*2793Slm66018 case ECONNRESET: 654*2793Slm66018 vd_mark_in_reset(vd); 655*2793Slm66018 break; 656*2793Slm66018 default: 657*2793Slm66018 PR0("initiating full reset"); 658*2793Slm66018 vd_need_reset(vd, B_TRUE); 659*2793Slm66018 break; 660*2793Slm66018 } 661*2793Slm66018 } 6621991Sheppo } 6631991Sheppo 6642032Slm66018 static void 6652032Slm66018 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) 6662032Slm66018 { 6672032Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 6682032Slm66018 } 6692032Slm66018 6702032Slm66018 static void 6712032Slm66018 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) 6722032Slm66018 { 6732032Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 6742032Slm66018 } 6752032Slm66018 6762032Slm66018 static void 6772032Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 6782032Slm66018 { 6792032Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 6802032Slm66018 } 6812032Slm66018 6822032Slm66018 static void 6832032Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 6842032Slm66018 { 6852032Slm66018 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 6862032Slm66018 } 6872032Slm66018 6882531Snarayan static void 6892531Snarayan vd_get_efi_in(void *vd_buf, void *ioctl_arg) 6902531Snarayan { 6912531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 6922531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 6932531Snarayan 6942531Snarayan dk_efi->dki_lba = vd_efi->lba; 6952531Snarayan dk_efi->dki_length = vd_efi->length; 6962531Snarayan dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 6972531Snarayan } 6982531Snarayan 6992531Snarayan static void 7002531Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf) 7012531Snarayan { 7022531Snarayan int len; 7032531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7042531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7052531Snarayan 7062531Snarayan len = vd_efi->length; 7072531Snarayan DK_EFI2VD_EFI(dk_efi, vd_efi); 7082531Snarayan kmem_free(dk_efi->dki_data, len); 7092531Snarayan } 7102531Snarayan 7112531Snarayan static void 7122531Snarayan vd_set_efi_in(void *vd_buf, void *ioctl_arg) 7132531Snarayan { 7142531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7152531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7162531Snarayan 7172531Snarayan dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 7182531Snarayan VD_EFI2DK_EFI(vd_efi, dk_efi); 7192531Snarayan } 7202531Snarayan 7212531Snarayan static void 7222531Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf) 7232531Snarayan { 7242531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 7252531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 7262531Snarayan 7272531Snarayan kmem_free(dk_efi->dki_data, vd_efi->length); 7282531Snarayan } 7292531Snarayan 7302531Snarayan static int 7312531Snarayan vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc, vd_disk_label_t *label) 7322531Snarayan { 7332531Snarayan int status, rval; 7342531Snarayan struct dk_gpt *efi; 7352531Snarayan size_t efi_len; 7362531Snarayan 7372531Snarayan *label = VD_DISK_LABEL_UNK; 7382531Snarayan 7392531Snarayan status = ldi_ioctl(handle, DKIOCGVTOC, (intptr_t)vtoc, 7402531Snarayan (vd_open_flags | FKIOCTL), kcred, &rval); 7412531Snarayan 7422531Snarayan if (status == 0) { 7432531Snarayan *label = VD_DISK_LABEL_VTOC; 7442531Snarayan return (0); 7452531Snarayan } else if (status != ENOTSUP) { 746*2793Slm66018 PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status); 7472531Snarayan return (status); 7482531Snarayan } 7492531Snarayan 7502531Snarayan status = vds_efi_alloc_and_read(handle, &efi, &efi_len); 7512531Snarayan 7522531Snarayan if (status) { 753*2793Slm66018 PR0("vds_efi_alloc_and_read returned error %d", status); 7542531Snarayan return (status); 7552531Snarayan } 7562531Snarayan 7572531Snarayan *label = VD_DISK_LABEL_EFI; 7582531Snarayan vd_efi_to_vtoc(efi, vtoc); 7592531Snarayan vd_efi_free(efi, efi_len); 7602531Snarayan 7612531Snarayan return (0); 7622531Snarayan } 7632531Snarayan 7641991Sheppo static int 7652032Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 7661991Sheppo { 7672531Snarayan dk_efi_t *dk_ioc; 7682531Snarayan 7692531Snarayan switch (vd->vdisk_label) { 7702531Snarayan 7712531Snarayan case VD_DISK_LABEL_VTOC: 7722531Snarayan 7732531Snarayan switch (cmd) { 7742531Snarayan case DKIOCGGEOM: 7752531Snarayan ASSERT(ioctl_arg != NULL); 7762531Snarayan bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 7772531Snarayan return (0); 7782531Snarayan case DKIOCGVTOC: 7792531Snarayan ASSERT(ioctl_arg != NULL); 7802531Snarayan bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 7812531Snarayan return (0); 7822531Snarayan default: 7832531Snarayan return (ENOTSUP); 7842531Snarayan } 7852531Snarayan 7862531Snarayan case VD_DISK_LABEL_EFI: 7872531Snarayan 7882531Snarayan switch (cmd) { 7892531Snarayan case DKIOCGETEFI: 7902531Snarayan ASSERT(ioctl_arg != NULL); 7912531Snarayan dk_ioc = (dk_efi_t *)ioctl_arg; 7922531Snarayan if (dk_ioc->dki_length < vd->dk_efi.dki_length) 7932531Snarayan return (EINVAL); 7942531Snarayan bcopy(vd->dk_efi.dki_data, dk_ioc->dki_data, 7952531Snarayan vd->dk_efi.dki_length); 7962531Snarayan return (0); 7972531Snarayan default: 7982531Snarayan return (ENOTSUP); 7992531Snarayan } 8002531Snarayan 8011991Sheppo default: 8021991Sheppo return (ENOTSUP); 8031991Sheppo } 8041991Sheppo } 8051991Sheppo 8061991Sheppo static int 8072032Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 8081991Sheppo { 8091991Sheppo int rval = 0, status; 8101991Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 8111991Sheppo 8121991Sheppo 8131991Sheppo ASSERT(request->slice < vd->nslices); 8141991Sheppo PR0("Performing %s", ioctl->operation_name); 8151991Sheppo 8162032Slm66018 /* Get data from client and convert, if necessary */ 8172032Slm66018 if (ioctl->copyin != NULL) { 8181991Sheppo ASSERT(nbytes != 0 && buf != NULL); 8191991Sheppo PR1("Getting \"arg\" data from client"); 8201991Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 8211991Sheppo request->cookie, request->ncookies, 8221991Sheppo LDC_COPY_IN)) != 0) { 823*2793Slm66018 PR0("ldc_mem_copy() returned errno %d " 8241991Sheppo "copying from client", status); 8251991Sheppo return (status); 8261991Sheppo } 8272032Slm66018 8282032Slm66018 /* Convert client's data, if necessary */ 8292032Slm66018 if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ 8302032Slm66018 ioctl->arg = buf; 8312032Slm66018 else /* convert client vdisk operation data to ioctl data */ 8322032Slm66018 (ioctl->copyin)(buf, (void *)ioctl->arg); 8331991Sheppo } 8341991Sheppo 8351991Sheppo /* 8361991Sheppo * Handle single-slice block devices internally; otherwise, have the 8371991Sheppo * real driver perform the ioctl() 8381991Sheppo */ 8391991Sheppo if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { 8402032Slm66018 if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, 8412032Slm66018 (void *)ioctl->arg)) != 0) 8421991Sheppo return (status); 8431991Sheppo } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], 8442336Snarayan ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL), 8452336Snarayan kcred, &rval)) != 0) { 8461991Sheppo PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); 8471991Sheppo return (status); 8481991Sheppo } 8491991Sheppo #ifdef DEBUG 8501991Sheppo if (rval != 0) { 851*2793Slm66018 PR0("%s set rval = %d, which is not being returned to client", 8521991Sheppo ioctl->cmd_name, rval); 8531991Sheppo } 8541991Sheppo #endif /* DEBUG */ 8551991Sheppo 8562032Slm66018 /* Convert data and send to client, if necessary */ 8572032Slm66018 if (ioctl->copyout != NULL) { 8581991Sheppo ASSERT(nbytes != 0 && buf != NULL); 8591991Sheppo PR1("Sending \"arg\" data to client"); 8602032Slm66018 8612032Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 8622032Slm66018 if (ioctl->copyout != VD_IDENTITY) 8632032Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 8642032Slm66018 8651991Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 8661991Sheppo request->cookie, request->ncookies, 8671991Sheppo LDC_COPY_OUT)) != 0) { 868*2793Slm66018 PR0("ldc_mem_copy() returned errno %d " 8691991Sheppo "copying to client", status); 8701991Sheppo return (status); 8711991Sheppo } 8721991Sheppo } 8731991Sheppo 8741991Sheppo return (status); 8751991Sheppo } 8761991Sheppo 8772032Slm66018 /* 8782032Slm66018 * Open any slices which have become non-empty as a result of performing a 8792032Slm66018 * set-VTOC operation for the client. 8802032Slm66018 * 8812032Slm66018 * When serving a full disk, vds attempts to exclusively open all of the 8822032Slm66018 * disk's slices to prevent another thread or process in the service domain 8832032Slm66018 * from "stealing" a slice or from performing I/O to a slice while a vds 8842032Slm66018 * client is accessing it. Unfortunately, underlying drivers, such as sd(7d) 8852032Slm66018 * and cmdk(7d), return an error when attempting to open the device file for a 8862032Slm66018 * slice which is currently empty according to the VTOC. This driver behavior 8872032Slm66018 * means that vds must skip opening empty slices when initializing a vdisk for 8882032Slm66018 * full-disk service and try to open slices that become non-empty (via a 8892032Slm66018 * set-VTOC operation) during use of the full disk in order to begin serving 8902032Slm66018 * such slices to the client. This approach has an inherent (and therefore 8912032Slm66018 * unavoidable) race condition; it also means that failure to open a 8922032Slm66018 * newly-non-empty slice has different semantics than failure to open an 8932032Slm66018 * initially-non-empty slice: Due to driver bahavior, opening a 8942032Slm66018 * newly-non-empty slice is a necessary side effect of vds performing a 8952032Slm66018 * (successful) set-VTOC operation for a client on an in-service (and in-use) 8962032Slm66018 * disk in order to begin serving the slice; failure of this side-effect 8972032Slm66018 * operation does not mean that the client's set-VTOC operation failed or that 8982032Slm66018 * operations on other slices must fail. Therefore, this function prints an 8992032Slm66018 * error message on failure to open a slice, but does not return an error to 9002032Slm66018 * its caller--unlike failure to open a slice initially, which results in an 9012032Slm66018 * error that prevents serving the vdisk (and thereby requires an 9022032Slm66018 * administrator to resolve the problem). Note that, apart from another 9032032Slm66018 * thread or process opening a new slice during the race-condition window, 9042032Slm66018 * failure to open a slice in this function will likely indicate an underlying 9052032Slm66018 * drive problem, which will also likely become evident in errors returned by 9062032Slm66018 * operations on other slices, and which will require administrative 9072032Slm66018 * intervention and possibly servicing the drive. 9082032Slm66018 */ 9092032Slm66018 static void 9102032Slm66018 vd_open_new_slices(vd_t *vd) 9112032Slm66018 { 9122531Snarayan int status; 9132032Slm66018 struct vtoc vtoc; 9142032Slm66018 9152531Snarayan /* Get the (new) partitions for updated slice sizes */ 9162531Snarayan if ((status = vd_read_vtoc(vd->ldi_handle[0], &vtoc, 9172531Snarayan &vd->vdisk_label)) != 0) { 918*2793Slm66018 PR0("vd_read_vtoc returned error %d", status); 9192032Slm66018 return; 9202032Slm66018 } 9212032Slm66018 9222032Slm66018 /* Open any newly-non-empty slices */ 9232032Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 9242032Slm66018 /* Skip zero-length slices */ 9252032Slm66018 if (vtoc.v_part[slice].p_size == 0) { 9262032Slm66018 if (vd->ldi_handle[slice] != NULL) 9272032Slm66018 PR0("Open slice %u now has zero length", slice); 9282032Slm66018 continue; 9292032Slm66018 } 9302032Slm66018 9312032Slm66018 /* Skip already-open slices */ 9322032Slm66018 if (vd->ldi_handle[slice] != NULL) 9332032Slm66018 continue; 9342032Slm66018 9352032Slm66018 PR0("Opening newly-non-empty slice %u", slice); 9362032Slm66018 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 9372032Slm66018 vd_open_flags, kcred, &vd->ldi_handle[slice], 9382032Slm66018 vd->vds->ldi_ident)) != 0) { 939*2793Slm66018 PR0("ldi_open_by_dev() returned errno %d " 9402032Slm66018 "for slice %u", status, slice); 9412032Slm66018 } 9422032Slm66018 } 9432032Slm66018 } 9442032Slm66018 9451991Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 9461991Sheppo static int 9472336Snarayan vd_ioctl(vd_task_t *task) 9481991Sheppo { 9492336Snarayan int i, status; 9502336Snarayan void *buf = NULL; 9512336Snarayan struct dk_geom dk_geom = {0}; 9522336Snarayan struct vtoc vtoc = {0}; 9532531Snarayan struct dk_efi dk_efi = {0}; 9542336Snarayan vd_t *vd = task->vd; 9552336Snarayan vd_dring_payload_t *request = task->request; 9562336Snarayan vd_ioctl_t ioctl[] = { 9571991Sheppo /* Command (no-copy) operations */ 9582032Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 9592032Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 9602032Slm66018 NULL, NULL, NULL}, 9611991Sheppo 9621991Sheppo /* "Get" (copy-out) operations */ 9632032Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 9642032Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 9652531Snarayan NULL, VD_IDENTITY, VD_IDENTITY}, 9662032Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 9672032Slm66018 RNDSIZE(vd_geom_t), 9682032Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 9692032Slm66018 &dk_geom, NULL, dk_geom2vd_geom}, 9702032Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 9712032Slm66018 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 9722032Slm66018 &vtoc, NULL, vtoc2vd_vtoc}, 9732531Snarayan {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 9742531Snarayan DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 9752531Snarayan &dk_efi, vd_get_efi_in, vd_get_efi_out}, 9761991Sheppo 9771991Sheppo /* "Set" (copy-in) operations */ 9782032Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 9792032Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 9802531Snarayan NULL, VD_IDENTITY, VD_IDENTITY}, 9812032Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 9822032Slm66018 RNDSIZE(vd_geom_t), 9832032Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 9842032Slm66018 &dk_geom, vd_geom2dk_geom, NULL}, 9852032Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 9862032Slm66018 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 9872032Slm66018 &vtoc, vd_vtoc2vtoc, NULL}, 9882531Snarayan {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 9892531Snarayan DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 9902531Snarayan &dk_efi, vd_set_efi_in, vd_set_efi_out}, 9911991Sheppo }; 9921991Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 9931991Sheppo 9941991Sheppo 9952336Snarayan ASSERT(vd != NULL); 9962336Snarayan ASSERT(request != NULL); 9971991Sheppo ASSERT(request->slice < vd->nslices); 9981991Sheppo 9991991Sheppo /* 10001991Sheppo * Determine ioctl corresponding to caller's "operation" and 10011991Sheppo * validate caller's "nbytes" 10021991Sheppo */ 10031991Sheppo for (i = 0; i < nioctls; i++) { 10041991Sheppo if (request->operation == ioctl[i].operation) { 10052032Slm66018 /* LDC memory operations require 8-byte multiples */ 10062032Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 10072032Slm66018 10082531Snarayan if (request->operation == VD_OP_GET_EFI || 10092531Snarayan request->operation == VD_OP_SET_EFI) { 10102531Snarayan if (request->nbytes >= ioctl[i].nbytes) 10112531Snarayan break; 1012*2793Slm66018 PR0("%s: Expected at least nbytes = %lu, " 10132531Snarayan "got %lu", ioctl[i].operation_name, 10142531Snarayan ioctl[i].nbytes, request->nbytes); 10152531Snarayan return (EINVAL); 10162531Snarayan } 10172531Snarayan 10182032Slm66018 if (request->nbytes != ioctl[i].nbytes) { 1019*2793Slm66018 PR0("%s: Expected nbytes = %lu, got %lu", 10202032Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 10212032Slm66018 request->nbytes); 10221991Sheppo return (EINVAL); 10231991Sheppo } 10241991Sheppo 10251991Sheppo break; 10261991Sheppo } 10271991Sheppo } 10281991Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 10291991Sheppo 10301991Sheppo if (request->nbytes) 10311991Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 10321991Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 10331991Sheppo if (request->nbytes) 10341991Sheppo kmem_free(buf, request->nbytes); 10352531Snarayan if (vd->vdisk_type == VD_DISK_TYPE_DISK && 10362531Snarayan (request->operation == VD_OP_SET_VTOC || 10372531Snarayan request->operation == VD_OP_SET_EFI)) 10382032Slm66018 vd_open_new_slices(vd); 10392336Snarayan PR0("Returning %d", status); 10401991Sheppo return (status); 10411991Sheppo } 10421991Sheppo 10432531Snarayan static int 10442531Snarayan vd_get_devid(vd_task_t *task) 10452531Snarayan { 10462531Snarayan vd_t *vd = task->vd; 10472531Snarayan vd_dring_payload_t *request = task->request; 10482531Snarayan vd_devid_t *vd_devid; 10492531Snarayan impl_devid_t *devid; 10502531Snarayan int status, bufid_len, devid_len, len; 1051*2793Slm66018 int bufbytes; 1052*2793Slm66018 1053*2793Slm66018 PR1("Get Device ID, nbytes=%ld", request->nbytes); 10542531Snarayan 10552531Snarayan if (ddi_lyr_get_devid(vd->dev[request->slice], 10562531Snarayan (ddi_devid_t *)&devid) != DDI_SUCCESS) { 10572531Snarayan /* the most common failure is that no devid is available */ 1058*2793Slm66018 PR2("No Device ID"); 10592531Snarayan return (ENOENT); 10602531Snarayan } 10612531Snarayan 10622531Snarayan bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 10632531Snarayan devid_len = DEVID_GETLEN(devid); 10642531Snarayan 1065*2793Slm66018 /* 1066*2793Slm66018 * Save the buffer size here for use in deallocation. 1067*2793Slm66018 * The actual number of bytes copied is returned in 1068*2793Slm66018 * the 'nbytes' field of the request structure. 1069*2793Slm66018 */ 1070*2793Slm66018 bufbytes = request->nbytes; 1071*2793Slm66018 1072*2793Slm66018 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 10732531Snarayan vd_devid->length = devid_len; 10742531Snarayan vd_devid->type = DEVID_GETTYPE(devid); 10752531Snarayan 10762531Snarayan len = (devid_len > bufid_len)? bufid_len : devid_len; 10772531Snarayan 10782531Snarayan bcopy(devid->did_id, vd_devid->id, len); 10792531Snarayan 10802531Snarayan /* LDC memory operations require 8-byte multiples */ 10812531Snarayan ASSERT(request->nbytes % sizeof (uint64_t) == 0); 10822531Snarayan 10832531Snarayan if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 10842531Snarayan &request->nbytes, request->cookie, request->ncookies, 10852531Snarayan LDC_COPY_OUT)) != 0) { 1086*2793Slm66018 PR0("ldc_mem_copy() returned errno %d copying to client", 10872531Snarayan status); 10882531Snarayan } 1089*2793Slm66018 PR1("post mem_copy: nbytes=%ld", request->nbytes); 1090*2793Slm66018 1091*2793Slm66018 kmem_free(vd_devid, bufbytes); 10922531Snarayan ddi_devid_free((ddi_devid_t)devid); 10932531Snarayan 10942531Snarayan return (status); 10952531Snarayan } 10962531Snarayan 10971991Sheppo /* 10981991Sheppo * Define the supported operations once the functions for performing them have 10991991Sheppo * been defined 11001991Sheppo */ 11011991Sheppo static const vds_operation_t vds_operation[] = { 1102*2793Slm66018 #define X(_s) #_s, _s 1103*2793Slm66018 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 1104*2793Slm66018 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 1105*2793Slm66018 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 1106*2793Slm66018 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 1107*2793Slm66018 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 1108*2793Slm66018 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 1109*2793Slm66018 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 1110*2793Slm66018 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 1111*2793Slm66018 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 1112*2793Slm66018 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 1113*2793Slm66018 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 1114*2793Slm66018 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 1115*2793Slm66018 #undef X 11161991Sheppo }; 11171991Sheppo 11181991Sheppo static const size_t vds_noperations = 11191991Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 11201991Sheppo 11211991Sheppo /* 11222336Snarayan * Process a task specifying a client I/O request 11231991Sheppo */ 11241991Sheppo static int 11252336Snarayan vd_process_task(vd_task_t *task) 11261991Sheppo { 11272336Snarayan int i, status; 11282336Snarayan vd_t *vd = task->vd; 11292336Snarayan vd_dring_payload_t *request = task->request; 11302336Snarayan 11312336Snarayan 11322336Snarayan ASSERT(vd != NULL); 11332336Snarayan ASSERT(request != NULL); 11341991Sheppo 11352336Snarayan /* Find the requested operation */ 11361991Sheppo for (i = 0; i < vds_noperations; i++) 11371991Sheppo if (request->operation == vds_operation[i].operation) 11382336Snarayan break; 11392336Snarayan if (i == vds_noperations) { 1140*2793Slm66018 PR0("Unsupported operation %u", request->operation); 11412336Snarayan return (ENOTSUP); 11422336Snarayan } 11432336Snarayan 11442748Slm66018 /* Handle client using absolute disk offsets */ 11452748Slm66018 if ((vd->vdisk_type == VD_DISK_TYPE_DISK) && 11462748Slm66018 (request->slice == UINT8_MAX)) 11472748Slm66018 request->slice = VD_ENTIRE_DISK_SLICE; 11482748Slm66018 11492748Slm66018 /* Range-check slice */ 11502748Slm66018 if (request->slice >= vd->nslices) { 1151*2793Slm66018 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 11522748Slm66018 request->slice, (vd->nslices - 1)); 11532748Slm66018 return (EINVAL); 11542748Slm66018 } 11552748Slm66018 1156*2793Slm66018 PR1("operation : %s", vds_operation[i].namep); 1157*2793Slm66018 11582336Snarayan /* Start the operation */ 11592336Snarayan if ((status = vds_operation[i].start(task)) != EINPROGRESS) { 1160*2793Slm66018 PR0("operation : %s returned status %d", 1161*2793Slm66018 vds_operation[i].namep, status); 11622336Snarayan request->status = status; /* op succeeded or failed */ 11632336Snarayan return (0); /* but request completed */ 11641991Sheppo } 11651991Sheppo 11662336Snarayan ASSERT(vds_operation[i].complete != NULL); /* debug case */ 11672336Snarayan if (vds_operation[i].complete == NULL) { /* non-debug case */ 1168*2793Slm66018 PR0("Unexpected return of EINPROGRESS " 11692336Snarayan "with no I/O completion handler"); 11702336Snarayan request->status = EIO; /* operation failed */ 11712336Snarayan return (0); /* but request completed */ 11721991Sheppo } 11731991Sheppo 1174*2793Slm66018 PR1("operation : kick off taskq entry for %s", vds_operation[i].namep); 1175*2793Slm66018 11762336Snarayan /* Queue a task to complete the operation */ 11772336Snarayan status = ddi_taskq_dispatch(vd->completionq, vds_operation[i].complete, 11782336Snarayan task, DDI_SLEEP); 11792336Snarayan /* ddi_taskq_dispatch(9f) guarantees success with DDI_SLEEP */ 11802336Snarayan ASSERT(status == DDI_SUCCESS); 11812336Snarayan 11822336Snarayan PR1("Operation in progress"); 11832336Snarayan return (EINPROGRESS); /* completion handler will finish request */ 11841991Sheppo } 11851991Sheppo 11861991Sheppo /* 11872032Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 11882032Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 11891991Sheppo */ 11902032Slm66018 boolean_t 11911991Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 11921991Sheppo { 11931991Sheppo return ((tag->vio_msgtype == type) && 11941991Sheppo (tag->vio_subtype == subtype) && 11952032Slm66018 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 11961991Sheppo } 11971991Sheppo 11982032Slm66018 /* 11992032Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 12002032Slm66018 * by this server. 12012032Slm66018 */ 12022032Slm66018 static boolean_t 12032032Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 12042032Slm66018 { 12052032Slm66018 for (int i = 0; i < vds_num_versions; i++) { 12062032Slm66018 ASSERT(vds_version[i].major > 0); 12072032Slm66018 ASSERT((i == 0) || 12082032Slm66018 (vds_version[i].major < vds_version[i-1].major)); 12092032Slm66018 12102032Slm66018 /* 12112032Slm66018 * If the major versions match, adjust the minor version, if 12122032Slm66018 * necessary, down to the highest value supported by this 12132032Slm66018 * server and return true so this message will get "ack"ed; 12142032Slm66018 * the client should also support all minor versions lower 12152032Slm66018 * than the value it sent 12162032Slm66018 */ 12172032Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 12182032Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 12192032Slm66018 PR0("Adjusting minor version from %u to %u", 12202032Slm66018 ver_msg->ver_minor, vds_version[i].minor); 12212032Slm66018 ver_msg->ver_minor = vds_version[i].minor; 12222032Slm66018 } 12232032Slm66018 return (B_TRUE); 12242032Slm66018 } 12252032Slm66018 12262032Slm66018 /* 12272032Slm66018 * If the message contains a higher major version number, set 12282032Slm66018 * the message's major/minor versions to the current values 12292032Slm66018 * and return false, so this message will get "nack"ed with 12302032Slm66018 * these values, and the client will potentially try again 12312032Slm66018 * with the same or a lower version 12322032Slm66018 */ 12332032Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 12342032Slm66018 ver_msg->ver_major = vds_version[i].major; 12352032Slm66018 ver_msg->ver_minor = vds_version[i].minor; 12362032Slm66018 return (B_FALSE); 12372032Slm66018 } 12382032Slm66018 12392032Slm66018 /* 12402032Slm66018 * Otherwise, the message's major version is less than the 12412032Slm66018 * current major version, so continue the loop to the next 12422032Slm66018 * (lower) supported version 12432032Slm66018 */ 12442032Slm66018 } 12452032Slm66018 12462032Slm66018 /* 12472032Slm66018 * No common version was found; "ground" the version pair in the 12482032Slm66018 * message to terminate negotiation 12492032Slm66018 */ 12502032Slm66018 ver_msg->ver_major = 0; 12512032Slm66018 ver_msg->ver_minor = 0; 12522032Slm66018 return (B_FALSE); 12532032Slm66018 } 12542032Slm66018 12552032Slm66018 /* 12562032Slm66018 * Process a version message from a client. vds expects to receive version 12572032Slm66018 * messages from clients seeking service, but never issues version messages 12582032Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 12592032Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 12602032Slm66018 * messages as invalid). 12612032Slm66018 */ 12621991Sheppo static int 12632032Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 12641991Sheppo { 12651991Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 12661991Sheppo 12671991Sheppo 12681991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 12691991Sheppo 12701991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 12711991Sheppo VIO_VER_INFO)) { 12721991Sheppo return (ENOMSG); /* not a version message */ 12731991Sheppo } 12741991Sheppo 12751991Sheppo if (msglen != sizeof (*ver_msg)) { 1276*2793Slm66018 PR0("Expected %lu-byte version message; " 12771991Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 12781991Sheppo return (EBADMSG); 12791991Sheppo } 12801991Sheppo 12811991Sheppo if (ver_msg->dev_class != VDEV_DISK) { 1282*2793Slm66018 PR0("Expected device class %u (disk); received %u", 12831991Sheppo VDEV_DISK, ver_msg->dev_class); 12841991Sheppo return (EBADMSG); 12851991Sheppo } 12861991Sheppo 12872032Slm66018 /* 12882032Slm66018 * We're talking to the expected kind of client; set our device class 12892032Slm66018 * for "ack/nack" back to the client 12902032Slm66018 */ 12912032Slm66018 ver_msg->dev_class = VDEV_DISK_SERVER; 12922032Slm66018 12932032Slm66018 /* 12942032Slm66018 * Check whether the (valid) version message specifies a version 12952032Slm66018 * supported by this server. If the version is not supported, return 12962032Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 12972032Slm66018 * will have updated the message with a supported version for the 12982032Slm66018 * client to consider 12992032Slm66018 */ 13002032Slm66018 if (!vds_supported_version(ver_msg)) 13011991Sheppo return (EBADMSG); 13022032Slm66018 13032032Slm66018 13042032Slm66018 /* 13052032Slm66018 * A version has been agreed upon; use the client's SID for 13062032Slm66018 * communication on this channel now 13072032Slm66018 */ 13082032Slm66018 ASSERT(!(vd->initialized & VD_SID)); 13092032Slm66018 vd->sid = ver_msg->tag.vio_sid; 13102032Slm66018 vd->initialized |= VD_SID; 13111991Sheppo 13122032Slm66018 /* 13132032Slm66018 * When multiple versions are supported, this function should store 13142032Slm66018 * the negotiated major and minor version values in the "vd" data 13152032Slm66018 * structure to govern further communication; in particular, note that 13162032Slm66018 * the client might have specified a lower minor version for the 13172032Slm66018 * agreed major version than specifed in the vds_version[] array. The 13182032Slm66018 * following assertions should help remind future maintainers to make 13192032Slm66018 * the appropriate changes to support multiple versions. 13202032Slm66018 */ 13212032Slm66018 ASSERT(vds_num_versions == 1); 13222032Slm66018 ASSERT(ver_msg->ver_major == vds_version[0].major); 13232032Slm66018 ASSERT(ver_msg->ver_minor == vds_version[0].minor); 13242032Slm66018 13252032Slm66018 PR0("Using major version %u, minor version %u", 13262032Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 13271991Sheppo return (0); 13281991Sheppo } 13291991Sheppo 13301991Sheppo static int 13311991Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 13321991Sheppo { 13331991Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 13341991Sheppo 13351991Sheppo 13361991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 13371991Sheppo 13381991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 13391991Sheppo VIO_ATTR_INFO)) { 13402336Snarayan PR0("Message is not an attribute message"); 13412336Snarayan return (ENOMSG); 13421991Sheppo } 13431991Sheppo 13441991Sheppo if (msglen != sizeof (*attr_msg)) { 1345*2793Slm66018 PR0("Expected %lu-byte attribute message; " 13461991Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 13471991Sheppo return (EBADMSG); 13481991Sheppo } 13491991Sheppo 13501991Sheppo if (attr_msg->max_xfer_sz == 0) { 1351*2793Slm66018 PR0("Received maximum transfer size of 0 from client"); 13521991Sheppo return (EBADMSG); 13531991Sheppo } 13541991Sheppo 13551991Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 13561991Sheppo (attr_msg->xfer_mode != VIO_DRING_MODE)) { 1357*2793Slm66018 PR0("Client requested unsupported transfer mode"); 13581991Sheppo return (EBADMSG); 13591991Sheppo } 13601991Sheppo 13611991Sheppo /* Success: valid message and transfer mode */ 13621991Sheppo vd->xfer_mode = attr_msg->xfer_mode; 1363*2793Slm66018 13641991Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 1365*2793Slm66018 13661991Sheppo /* 13671991Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 13681991Sheppo * for up to n-1 more cookies, where "n" is the number of full 13691991Sheppo * pages plus possibly one partial page required to cover 13701991Sheppo * "max_xfer_sz". Add room for one more cookie if 13711991Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 13721991Sheppo * Must first get the maximum transfer size in bytes. 13731991Sheppo */ 13741991Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 13751991Sheppo attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 13761991Sheppo attr_msg->max_xfer_sz; 13771991Sheppo size_t max_inband_msglen = 13781991Sheppo sizeof (vd_dring_inband_msg_t) + 13791991Sheppo ((max_xfer_bytes/PAGESIZE + 13801991Sheppo ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 13811991Sheppo (sizeof (ldc_mem_cookie_t))); 13821991Sheppo 13831991Sheppo /* 13841991Sheppo * Set the maximum expected message length to 13851991Sheppo * accommodate in-band-descriptor messages with all 13861991Sheppo * their cookies 13871991Sheppo */ 13881991Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 13892336Snarayan 13902336Snarayan /* 13912336Snarayan * Initialize the data structure for processing in-band I/O 13922336Snarayan * request descriptors 13932336Snarayan */ 13942336Snarayan vd->inband_task.vd = vd; 1395*2793Slm66018 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 13962336Snarayan vd->inband_task.index = 0; 13972336Snarayan vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 13981991Sheppo } 13991991Sheppo 14002410Slm66018 /* Return the device's block size and max transfer size to the client */ 14012410Slm66018 attr_msg->vdisk_block_size = DEV_BSIZE; 14022410Slm66018 attr_msg->max_xfer_sz = vd->max_xfer_sz; 14032410Slm66018 14041991Sheppo attr_msg->vdisk_size = vd->vdisk_size; 14051991Sheppo attr_msg->vdisk_type = vd->vdisk_type; 14061991Sheppo attr_msg->operations = vds_operations; 14071991Sheppo PR0("%s", VD_CLIENT(vd)); 1408*2793Slm66018 1409*2793Slm66018 ASSERT(vd->dring_task == NULL); 1410*2793Slm66018 14111991Sheppo return (0); 14121991Sheppo } 14131991Sheppo 14141991Sheppo static int 14151991Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 14161991Sheppo { 14171991Sheppo int status; 14181991Sheppo size_t expected; 14191991Sheppo ldc_mem_info_t dring_minfo; 14201991Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 14211991Sheppo 14221991Sheppo 14231991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 14241991Sheppo 14251991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 14261991Sheppo VIO_DRING_REG)) { 14272336Snarayan PR0("Message is not a register-dring message"); 14282336Snarayan return (ENOMSG); 14291991Sheppo } 14301991Sheppo 14311991Sheppo if (msglen < sizeof (*reg_msg)) { 1432*2793Slm66018 PR0("Expected at least %lu-byte register-dring message; " 14331991Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 14341991Sheppo return (EBADMSG); 14351991Sheppo } 14361991Sheppo 14371991Sheppo expected = sizeof (*reg_msg) + 14381991Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 14391991Sheppo if (msglen != expected) { 1440*2793Slm66018 PR0("Expected %lu-byte register-dring message; " 14411991Sheppo "received %lu bytes", expected, msglen); 14421991Sheppo return (EBADMSG); 14431991Sheppo } 14441991Sheppo 14451991Sheppo if (vd->initialized & VD_DRING) { 1446*2793Slm66018 PR0("A dring was previously registered; only support one"); 14471991Sheppo return (EBADMSG); 14481991Sheppo } 14491991Sheppo 14502336Snarayan if (reg_msg->num_descriptors > INT32_MAX) { 1451*2793Slm66018 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 14522336Snarayan reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 14532336Snarayan return (EBADMSG); 14542336Snarayan } 14552336Snarayan 14561991Sheppo if (reg_msg->ncookies != 1) { 14571991Sheppo /* 14581991Sheppo * In addition to fixing the assertion in the success case 14591991Sheppo * below, supporting drings which require more than one 14601991Sheppo * "cookie" requires increasing the value of vd->max_msglen 14611991Sheppo * somewhere in the code path prior to receiving the message 14621991Sheppo * which results in calling this function. Note that without 14631991Sheppo * making this change, the larger message size required to 14641991Sheppo * accommodate multiple cookies cannot be successfully 14651991Sheppo * received, so this function will not even get called. 14661991Sheppo * Gracefully accommodating more dring cookies might 14671991Sheppo * reasonably demand exchanging an additional attribute or 14681991Sheppo * making a minor protocol adjustment 14691991Sheppo */ 1470*2793Slm66018 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 14711991Sheppo return (EBADMSG); 14721991Sheppo } 14731991Sheppo 14741991Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 14751991Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 14762531Snarayan reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle); 14771991Sheppo if (status != 0) { 1478*2793Slm66018 PR0("ldc_mem_dring_map() returned errno %d", status); 14791991Sheppo return (status); 14801991Sheppo } 14811991Sheppo 14821991Sheppo /* 14831991Sheppo * To remove the need for this assertion, must call 14841991Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 14851991Sheppo * successful call to ldc_mem_dring_map() 14861991Sheppo */ 14871991Sheppo ASSERT(reg_msg->ncookies == 1); 14881991Sheppo 14891991Sheppo if ((status = 14901991Sheppo ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 1491*2793Slm66018 PR0("ldc_mem_dring_info() returned errno %d", status); 14921991Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 1493*2793Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 14941991Sheppo return (status); 14951991Sheppo } 14961991Sheppo 14971991Sheppo if (dring_minfo.vaddr == NULL) { 1498*2793Slm66018 PR0("Descriptor ring virtual address is NULL"); 14992032Slm66018 return (ENXIO); 15001991Sheppo } 15011991Sheppo 15021991Sheppo 15032336Snarayan /* Initialize for valid message and mapped dring */ 15041991Sheppo PR1("descriptor size = %u, dring length = %u", 15051991Sheppo vd->descriptor_size, vd->dring_len); 15061991Sheppo vd->initialized |= VD_DRING; 15071991Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 15081991Sheppo vd->dring = dring_minfo.vaddr; 15091991Sheppo vd->descriptor_size = reg_msg->descriptor_size; 15101991Sheppo vd->dring_len = reg_msg->num_descriptors; 15111991Sheppo reg_msg->dring_ident = vd->dring_ident; 15122336Snarayan 15132336Snarayan /* 15142336Snarayan * Allocate and initialize a "shadow" array of data structures for 15152336Snarayan * tasks to process I/O requests in dring elements 15162336Snarayan */ 15172336Snarayan vd->dring_task = 15182336Snarayan kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 15192336Snarayan for (int i = 0; i < vd->dring_len; i++) { 15202336Snarayan vd->dring_task[i].vd = vd; 15212336Snarayan vd->dring_task[i].index = i; 15222336Snarayan vd->dring_task[i].request = &VD_DRING_ELEM(i)->payload; 15232531Snarayan 15242531Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, 15252531Snarayan &(vd->dring_task[i].mhdl)); 15262531Snarayan if (status) { 1527*2793Slm66018 PR0("ldc_mem_alloc_handle() returned err %d ", status); 15282531Snarayan return (ENXIO); 15292531Snarayan } 1530*2793Slm66018 1531*2793Slm66018 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 15322336Snarayan } 15332336Snarayan 15341991Sheppo return (0); 15351991Sheppo } 15361991Sheppo 15371991Sheppo static int 15381991Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 15391991Sheppo { 15401991Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 15411991Sheppo 15421991Sheppo 15431991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 15441991Sheppo 15451991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 15461991Sheppo VIO_DRING_UNREG)) { 15472336Snarayan PR0("Message is not an unregister-dring message"); 15482336Snarayan return (ENOMSG); 15491991Sheppo } 15501991Sheppo 15511991Sheppo if (msglen != sizeof (*unreg_msg)) { 1552*2793Slm66018 PR0("Expected %lu-byte unregister-dring message; " 15531991Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 15541991Sheppo return (EBADMSG); 15551991Sheppo } 15561991Sheppo 15571991Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 1558*2793Slm66018 PR0("Expected dring ident %lu; received %lu", 15591991Sheppo vd->dring_ident, unreg_msg->dring_ident); 15601991Sheppo return (EBADMSG); 15611991Sheppo } 15621991Sheppo 15631991Sheppo return (0); 15641991Sheppo } 15651991Sheppo 15661991Sheppo static int 15671991Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 15681991Sheppo { 15691991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 15701991Sheppo 15712336Snarayan if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 15722336Snarayan PR0("Message is not an RDX message"); 15732336Snarayan return (ENOMSG); 15742336Snarayan } 15751991Sheppo 15761991Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 1577*2793Slm66018 PR0("Expected %lu-byte RDX message; received %lu bytes", 15781991Sheppo sizeof (vio_rdx_msg_t), msglen); 15791991Sheppo return (EBADMSG); 15801991Sheppo } 15811991Sheppo 15822336Snarayan PR0("Valid RDX message"); 15831991Sheppo return (0); 15841991Sheppo } 15851991Sheppo 15861991Sheppo static int 15871991Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 15881991Sheppo { 15891991Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 1590*2793Slm66018 PR0("Received seq_num %lu; expected %lu", 15911991Sheppo seq_num, (vd->seq_num + 1)); 1592*2793Slm66018 PR0("initiating soft reset"); 15932336Snarayan vd_need_reset(vd, B_FALSE); 15941991Sheppo return (1); 15951991Sheppo } 15961991Sheppo 15971991Sheppo vd->seq_num = seq_num; 15981991Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 15991991Sheppo return (0); 16001991Sheppo } 16011991Sheppo 16021991Sheppo /* 16031991Sheppo * Return the expected size of an inband-descriptor message with all the 16041991Sheppo * cookies it claims to include 16051991Sheppo */ 16061991Sheppo static size_t 16071991Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 16081991Sheppo { 16091991Sheppo return ((sizeof (*msg)) + 16101991Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 16111991Sheppo } 16121991Sheppo 16131991Sheppo /* 16141991Sheppo * Process an in-band descriptor message: used with clients like OBP, with 16151991Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 16161991Sheppo * operating on them within a descriptor ring 16171991Sheppo */ 16181991Sheppo static int 1619*2793Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 16201991Sheppo { 16211991Sheppo size_t expected; 16221991Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 16231991Sheppo 16241991Sheppo 16251991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 16261991Sheppo 16271991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 16282336Snarayan VIO_DESC_DATA)) { 16292336Snarayan PR1("Message is not an in-band-descriptor message"); 16302336Snarayan return (ENOMSG); 16312336Snarayan } 16321991Sheppo 16331991Sheppo if (msglen < sizeof (*desc_msg)) { 1634*2793Slm66018 PR0("Expected at least %lu-byte descriptor message; " 16351991Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 16361991Sheppo return (EBADMSG); 16371991Sheppo } 16381991Sheppo 16391991Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 1640*2793Slm66018 PR0("Expected %lu-byte descriptor message; " 16411991Sheppo "received %lu bytes", expected, msglen); 16421991Sheppo return (EBADMSG); 16431991Sheppo } 16441991Sheppo 16452336Snarayan if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 16461991Sheppo return (EBADMSG); 16472336Snarayan 16482336Snarayan /* 16492336Snarayan * Valid message: Set up the in-band descriptor task and process the 16502336Snarayan * request. Arrange to acknowledge the client's message, unless an 16512336Snarayan * error processing the descriptor task results in setting 16522336Snarayan * VIO_SUBTYPE_NACK 16532336Snarayan */ 16542336Snarayan PR1("Valid in-band-descriptor message"); 16552336Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 1656*2793Slm66018 1657*2793Slm66018 ASSERT(vd->inband_task.msg != NULL); 1658*2793Slm66018 1659*2793Slm66018 bcopy(msg, vd->inband_task.msg, msglen); 16602336Snarayan vd->inband_task.msglen = msglen; 1661*2793Slm66018 1662*2793Slm66018 /* 1663*2793Slm66018 * The task request is now the payload of the message 1664*2793Slm66018 * that was just copied into the body of the task. 1665*2793Slm66018 */ 1666*2793Slm66018 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 16672336Snarayan vd->inband_task.request = &desc_msg->payload; 1668*2793Slm66018 16692336Snarayan return (vd_process_task(&vd->inband_task)); 16701991Sheppo } 16711991Sheppo 16721991Sheppo static int 16732336Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 1674*2793Slm66018 vio_msg_t *msg, size_t msglen) 16751991Sheppo { 16762336Snarayan int status; 16772336Snarayan boolean_t ready; 16782336Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 16792336Snarayan 16802336Snarayan 16812336Snarayan /* Accept the updated dring element */ 16822336Snarayan if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 1683*2793Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", status); 16841991Sheppo return (status); 16851991Sheppo } 16862336Snarayan ready = (elem->hdr.dstate == VIO_DESC_READY); 16872336Snarayan if (ready) { 16882336Snarayan elem->hdr.dstate = VIO_DESC_ACCEPTED; 16892336Snarayan } else { 1690*2793Slm66018 PR0("descriptor %u not ready", idx); 16912336Snarayan VD_DUMP_DRING_ELEM(elem); 16922336Snarayan } 16932336Snarayan if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 1694*2793Slm66018 PR0("ldc_mem_dring_release() returned errno %d", status); 16951991Sheppo return (status); 16961991Sheppo } 16972336Snarayan if (!ready) 16982336Snarayan return (EBUSY); 16992336Snarayan 17002336Snarayan 17012336Snarayan /* Initialize a task and process the accepted element */ 17022336Snarayan PR1("Processing dring element %u", idx); 17032336Snarayan vd->dring_task[idx].type = type; 1704*2793Slm66018 1705*2793Slm66018 /* duplicate msg buf for cookies etc. */ 1706*2793Slm66018 bcopy(msg, vd->dring_task[idx].msg, msglen); 1707*2793Slm66018 17082336Snarayan vd->dring_task[idx].msglen = msglen; 17092336Snarayan if ((status = vd_process_task(&vd->dring_task[idx])) != EINPROGRESS) 17102336Snarayan status = vd_mark_elem_done(vd, idx, elem->payload.status); 17112336Snarayan 17122336Snarayan return (status); 17131991Sheppo } 17141991Sheppo 17151991Sheppo static int 17162336Snarayan vd_process_element_range(vd_t *vd, int start, int end, 1717*2793Slm66018 vio_msg_t *msg, size_t msglen) 17182336Snarayan { 17192336Snarayan int i, n, nelem, status = 0; 17202336Snarayan boolean_t inprogress = B_FALSE; 17212336Snarayan vd_task_type_t type; 17222336Snarayan 17232336Snarayan 17242336Snarayan ASSERT(start >= 0); 17252336Snarayan ASSERT(end >= 0); 17262336Snarayan 17272336Snarayan /* 17282336Snarayan * Arrange to acknowledge the client's message, unless an error 17292336Snarayan * processing one of the dring elements results in setting 17302336Snarayan * VIO_SUBTYPE_NACK 17312336Snarayan */ 17322336Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 17332336Snarayan 17342336Snarayan /* 17352336Snarayan * Process the dring elements in the range 17362336Snarayan */ 17372336Snarayan nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 17382336Snarayan for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 17392336Snarayan ((vio_dring_msg_t *)msg)->end_idx = i; 17402336Snarayan type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 1741*2793Slm66018 status = vd_process_element(vd, type, i, msg, msglen); 17422336Snarayan if (status == EINPROGRESS) 17432336Snarayan inprogress = B_TRUE; 17442336Snarayan else if (status != 0) 17452336Snarayan break; 17462336Snarayan } 17472336Snarayan 17482336Snarayan /* 17492336Snarayan * If some, but not all, operations of a multi-element range are in 17502336Snarayan * progress, wait for other operations to complete before returning 17512336Snarayan * (which will result in "ack" or "nack" of the message). Note that 17522336Snarayan * all outstanding operations will need to complete, not just the ones 17532336Snarayan * corresponding to the current range of dring elements; howevever, as 17542336Snarayan * this situation is an error case, performance is less critical. 17552336Snarayan */ 17562336Snarayan if ((nelem > 1) && (status != EINPROGRESS) && inprogress) 17572336Snarayan ddi_taskq_wait(vd->completionq); 17582336Snarayan 17592336Snarayan return (status); 17602336Snarayan } 17612336Snarayan 17622336Snarayan static int 1763*2793Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 17641991Sheppo { 17651991Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 17661991Sheppo 17671991Sheppo 17681991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 17691991Sheppo 17701991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 17711991Sheppo VIO_DRING_DATA)) { 17722336Snarayan PR1("Message is not a dring-data message"); 17732336Snarayan return (ENOMSG); 17741991Sheppo } 17751991Sheppo 17761991Sheppo if (msglen != sizeof (*dring_msg)) { 1777*2793Slm66018 PR0("Expected %lu-byte dring message; received %lu bytes", 17781991Sheppo sizeof (*dring_msg), msglen); 17791991Sheppo return (EBADMSG); 17801991Sheppo } 17811991Sheppo 17822336Snarayan if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 17831991Sheppo return (EBADMSG); 17841991Sheppo 17851991Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 1786*2793Slm66018 PR0("Expected dring ident %lu; received ident %lu", 17871991Sheppo vd->dring_ident, dring_msg->dring_ident); 17881991Sheppo return (EBADMSG); 17891991Sheppo } 17901991Sheppo 17912336Snarayan if (dring_msg->start_idx >= vd->dring_len) { 1792*2793Slm66018 PR0("\"start_idx\" = %u; must be less than %u", 17932336Snarayan dring_msg->start_idx, vd->dring_len); 17942336Snarayan return (EBADMSG); 17952336Snarayan } 17962336Snarayan 17972336Snarayan if ((dring_msg->end_idx < 0) || 17982336Snarayan (dring_msg->end_idx >= vd->dring_len)) { 1799*2793Slm66018 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 18002336Snarayan dring_msg->end_idx, vd->dring_len); 18012336Snarayan return (EBADMSG); 18022336Snarayan } 18032336Snarayan 18042336Snarayan /* Valid message; process range of updated dring elements */ 18052336Snarayan PR1("Processing descriptor range, start = %u, end = %u", 18062336Snarayan dring_msg->start_idx, dring_msg->end_idx); 18072336Snarayan return (vd_process_element_range(vd, dring_msg->start_idx, 1808*2793Slm66018 dring_msg->end_idx, msg, msglen)); 18091991Sheppo } 18101991Sheppo 18111991Sheppo static int 18121991Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 18131991Sheppo { 18141991Sheppo int retry, status; 18151991Sheppo size_t size = *nbytes; 18161991Sheppo 18171991Sheppo 18181991Sheppo for (retry = 0, status = ETIMEDOUT; 18191991Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 18201991Sheppo retry++) { 18211991Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 18221991Sheppo *nbytes = size; 18231991Sheppo status = ldc_read(ldc_handle, msg, nbytes); 18241991Sheppo } 18251991Sheppo 1826*2793Slm66018 if (status) { 1827*2793Slm66018 PR0("ldc_read() returned errno %d", status); 1828*2793Slm66018 if (status != ECONNRESET) 1829*2793Slm66018 return (ENOMSG); 18301991Sheppo return (status); 18311991Sheppo } else if (*nbytes == 0) { 18321991Sheppo PR1("ldc_read() returned 0 and no message read"); 18331991Sheppo return (ENOMSG); 18341991Sheppo } 18351991Sheppo 18361991Sheppo PR1("RCVD %lu-byte message", *nbytes); 18371991Sheppo return (0); 18381991Sheppo } 18391991Sheppo 18401991Sheppo static int 1841*2793Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 18421991Sheppo { 18431991Sheppo int status; 18441991Sheppo 18451991Sheppo 18461991Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 18471991Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 1848*2793Slm66018 #ifdef DEBUG 1849*2793Slm66018 vd_decode_tag(msg); 1850*2793Slm66018 #endif 18511991Sheppo 18521991Sheppo /* 18531991Sheppo * Validate session ID up front, since it applies to all messages 18541991Sheppo * once set 18551991Sheppo */ 18561991Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 1857*2793Slm66018 PR0("Expected SID %u, received %u", vd->sid, 18581991Sheppo msg->tag.vio_sid); 18591991Sheppo return (EBADMSG); 18601991Sheppo } 18611991Sheppo 1862*2793Slm66018 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 18631991Sheppo 18641991Sheppo /* 18651991Sheppo * Process the received message based on connection state 18661991Sheppo */ 18671991Sheppo switch (vd->state) { 18681991Sheppo case VD_STATE_INIT: /* expect version message */ 18692032Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 18701991Sheppo return (status); 18711991Sheppo 18721991Sheppo /* Version negotiated, move to that state */ 18731991Sheppo vd->state = VD_STATE_VER; 18741991Sheppo return (0); 18751991Sheppo 18761991Sheppo case VD_STATE_VER: /* expect attribute message */ 18771991Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 18781991Sheppo return (status); 18791991Sheppo 18801991Sheppo /* Attributes exchanged, move to that state */ 18811991Sheppo vd->state = VD_STATE_ATTR; 18821991Sheppo return (0); 18831991Sheppo 18841991Sheppo case VD_STATE_ATTR: 18851991Sheppo switch (vd->xfer_mode) { 18861991Sheppo case VIO_DESC_MODE: /* expect RDX message */ 18871991Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 18881991Sheppo return (status); 18891991Sheppo 18901991Sheppo /* Ready to receive in-band descriptors */ 18911991Sheppo vd->state = VD_STATE_DATA; 18921991Sheppo return (0); 18931991Sheppo 18941991Sheppo case VIO_DRING_MODE: /* expect register-dring message */ 18951991Sheppo if ((status = 18961991Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 18971991Sheppo return (status); 18981991Sheppo 18991991Sheppo /* One dring negotiated, move to that state */ 19001991Sheppo vd->state = VD_STATE_DRING; 19011991Sheppo return (0); 19021991Sheppo 19031991Sheppo default: 19041991Sheppo ASSERT("Unsupported transfer mode"); 1905*2793Slm66018 PR0("Unsupported transfer mode"); 19061991Sheppo return (ENOTSUP); 19071991Sheppo } 19081991Sheppo 19091991Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 19101991Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 19111991Sheppo /* Ready to receive data */ 19121991Sheppo vd->state = VD_STATE_DATA; 19131991Sheppo return (0); 19141991Sheppo } else if (status != ENOMSG) { 19151991Sheppo return (status); 19161991Sheppo } 19171991Sheppo 19181991Sheppo 19191991Sheppo /* 19201991Sheppo * If another register-dring message is received, stay in 19211991Sheppo * dring state in case the client sends RDX; although the 19221991Sheppo * protocol allows multiple drings, this server does not 19231991Sheppo * support using more than one 19241991Sheppo */ 19251991Sheppo if ((status = 19261991Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 19271991Sheppo return (status); 19281991Sheppo 19291991Sheppo /* 19301991Sheppo * Acknowledge an unregister-dring message, but reset the 19311991Sheppo * connection anyway: Although the protocol allows 19321991Sheppo * unregistering drings, this server cannot serve a vdisk 19331991Sheppo * without its only dring 19341991Sheppo */ 19351991Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 19361991Sheppo return ((status == 0) ? ENOTSUP : status); 19371991Sheppo 19381991Sheppo case VD_STATE_DATA: 19391991Sheppo switch (vd->xfer_mode) { 19401991Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 1941*2793Slm66018 return (vd_process_desc_msg(vd, msg, msglen)); 19421991Sheppo 19431991Sheppo case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 19441991Sheppo /* 19451991Sheppo * Typically expect dring-data messages, so handle 19461991Sheppo * them first 19471991Sheppo */ 19481991Sheppo if ((status = vd_process_dring_msg(vd, msg, 1949*2793Slm66018 msglen)) != ENOMSG) 19501991Sheppo return (status); 19511991Sheppo 19521991Sheppo /* 19531991Sheppo * Acknowledge an unregister-dring message, but reset 19541991Sheppo * the connection anyway: Although the protocol 19551991Sheppo * allows unregistering drings, this server cannot 19561991Sheppo * serve a vdisk without its only dring 19571991Sheppo */ 19581991Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 19591991Sheppo return ((status == 0) ? ENOTSUP : status); 19601991Sheppo 19611991Sheppo default: 19621991Sheppo ASSERT("Unsupported transfer mode"); 1963*2793Slm66018 PR0("Unsupported transfer mode"); 19641991Sheppo return (ENOTSUP); 19651991Sheppo } 19661991Sheppo 19671991Sheppo default: 19681991Sheppo ASSERT("Invalid client connection state"); 1969*2793Slm66018 PR0("Invalid client connection state"); 19701991Sheppo return (ENOTSUP); 19711991Sheppo } 19721991Sheppo } 19731991Sheppo 19742336Snarayan static int 1975*2793Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 19761991Sheppo { 19771991Sheppo int status; 19781991Sheppo boolean_t reset_ldc = B_FALSE; 19791991Sheppo 19801991Sheppo 19811991Sheppo /* 19821991Sheppo * Check that the message is at least big enough for a "tag", so that 19831991Sheppo * message processing can proceed based on tag-specified message type 19841991Sheppo */ 19851991Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 1986*2793Slm66018 PR0("Received short (%lu-byte) message", msglen); 19871991Sheppo /* Can't "nack" short message, so drop the big hammer */ 1988*2793Slm66018 PR0("initiating full reset"); 19892336Snarayan vd_need_reset(vd, B_TRUE); 19902336Snarayan return (EBADMSG); 19911991Sheppo } 19921991Sheppo 19931991Sheppo /* 19941991Sheppo * Process the message 19951991Sheppo */ 1996*2793Slm66018 switch (status = vd_do_process_msg(vd, msg, msglen)) { 19971991Sheppo case 0: 19981991Sheppo /* "ack" valid, successfully-processed messages */ 19991991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 20001991Sheppo break; 20011991Sheppo 20022336Snarayan case EINPROGRESS: 20032336Snarayan /* The completion handler will "ack" or "nack" the message */ 20042336Snarayan return (EINPROGRESS); 20051991Sheppo case ENOMSG: 2006*2793Slm66018 PR0("Received unexpected message"); 20071991Sheppo _NOTE(FALLTHROUGH); 20081991Sheppo case EBADMSG: 20091991Sheppo case ENOTSUP: 20101991Sheppo /* "nack" invalid messages */ 20111991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 20121991Sheppo break; 20131991Sheppo 20141991Sheppo default: 20151991Sheppo /* "nack" failed messages */ 20161991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 20171991Sheppo /* An LDC error probably occurred, so try resetting it */ 20181991Sheppo reset_ldc = B_TRUE; 20191991Sheppo break; 20201991Sheppo } 20211991Sheppo 2022*2793Slm66018 PR1("\tResulting in state %d (%s)", vd->state, 2023*2793Slm66018 vd_decode_state(vd->state)); 2024*2793Slm66018 20252336Snarayan /* Send the "ack" or "nack" to the client */ 20261991Sheppo PR1("Sending %s", 20271991Sheppo (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 20281991Sheppo if (send_msg(vd->ldc_handle, msg, msglen) != 0) 20291991Sheppo reset_ldc = B_TRUE; 20301991Sheppo 20312336Snarayan /* Arrange to reset the connection for nack'ed or failed messages */ 2032*2793Slm66018 if ((status != 0) || reset_ldc) { 2033*2793Slm66018 PR0("initiating %s reset", 2034*2793Slm66018 (reset_ldc) ? "full" : "soft"); 20352336Snarayan vd_need_reset(vd, reset_ldc); 2036*2793Slm66018 } 20372336Snarayan 20382336Snarayan return (status); 20392336Snarayan } 20402336Snarayan 20412336Snarayan static boolean_t 20422336Snarayan vd_enabled(vd_t *vd) 20432336Snarayan { 20442336Snarayan boolean_t enabled; 20452336Snarayan 20462336Snarayan 20472336Snarayan mutex_enter(&vd->lock); 20482336Snarayan enabled = vd->enabled; 20492336Snarayan mutex_exit(&vd->lock); 20502336Snarayan return (enabled); 20511991Sheppo } 20521991Sheppo 20531991Sheppo static void 20542032Slm66018 vd_recv_msg(void *arg) 20551991Sheppo { 20562032Slm66018 vd_t *vd = (vd_t *)arg; 2057*2793Slm66018 int rv = 0, status = 0; 20581991Sheppo 20591991Sheppo ASSERT(vd != NULL); 2060*2793Slm66018 20612336Snarayan PR2("New task to receive incoming message(s)"); 2062*2793Slm66018 2063*2793Slm66018 20642336Snarayan while (vd_enabled(vd) && status == 0) { 20652336Snarayan size_t msglen, msgsize; 2066*2793Slm66018 ldc_status_t lstatus; 20672336Snarayan 20682336Snarayan /* 20692336Snarayan * Receive and process a message 20702336Snarayan */ 20712336Snarayan vd_reset_if_needed(vd); /* can change vd->max_msglen */ 2072*2793Slm66018 2073*2793Slm66018 /* 2074*2793Slm66018 * check if channel is UP - else break out of loop 2075*2793Slm66018 */ 2076*2793Slm66018 status = ldc_status(vd->ldc_handle, &lstatus); 2077*2793Slm66018 if (lstatus != LDC_UP) { 2078*2793Slm66018 PR0("channel not up (status=%d), exiting recv loop\n", 2079*2793Slm66018 lstatus); 2080*2793Slm66018 break; 2081*2793Slm66018 } 2082*2793Slm66018 2083*2793Slm66018 ASSERT(vd->max_msglen != 0); 2084*2793Slm66018 2085*2793Slm66018 msgsize = vd->max_msglen; /* stable copy for alloc/free */ 2086*2793Slm66018 msglen = msgsize; /* actual len after recv_msg() */ 2087*2793Slm66018 2088*2793Slm66018 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 2089*2793Slm66018 switch (status) { 2090*2793Slm66018 case 0: 2091*2793Slm66018 rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, 2092*2793Slm66018 msglen); 2093*2793Slm66018 /* check if max_msglen changed */ 2094*2793Slm66018 if (msgsize != vd->max_msglen) { 2095*2793Slm66018 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 2096*2793Slm66018 msgsize, vd->max_msglen); 2097*2793Slm66018 kmem_free(vd->vio_msgp, msgsize); 2098*2793Slm66018 vd->vio_msgp = 2099*2793Slm66018 kmem_alloc(vd->max_msglen, KM_SLEEP); 2100*2793Slm66018 } 2101*2793Slm66018 if (rv == EINPROGRESS) 2102*2793Slm66018 continue; 2103*2793Slm66018 break; 2104*2793Slm66018 2105*2793Slm66018 case ENOMSG: 2106*2793Slm66018 break; 2107*2793Slm66018 2108*2793Slm66018 case ECONNRESET: 2109*2793Slm66018 PR0("initiating soft reset (ECONNRESET)\n"); 2110*2793Slm66018 vd_need_reset(vd, B_FALSE); 2111*2793Slm66018 status = 0; 2112*2793Slm66018 break; 2113*2793Slm66018 2114*2793Slm66018 default: 21152336Snarayan /* Probably an LDC failure; arrange to reset it */ 2116*2793Slm66018 PR0("initiating full reset (status=0x%x)", status); 21172336Snarayan vd_need_reset(vd, B_TRUE); 2118*2793Slm66018 break; 21192336Snarayan } 21202032Slm66018 } 2121*2793Slm66018 21222336Snarayan PR2("Task finished"); 21232032Slm66018 } 21242032Slm66018 21252032Slm66018 static uint_t 21261991Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 21271991Sheppo { 21281991Sheppo vd_t *vd = (vd_t *)(void *)arg; 2129*2793Slm66018 int status; 21301991Sheppo 21311991Sheppo 21321991Sheppo ASSERT(vd != NULL); 21332336Snarayan 21342336Snarayan if (!vd_enabled(vd)) 21352336Snarayan return (LDC_SUCCESS); 21362336Snarayan 2137*2793Slm66018 if (event & LDC_EVT_DOWN) { 2138*2793Slm66018 PRN("LDC_EVT_DOWN: LDC channel went down"); 2139*2793Slm66018 2140*2793Slm66018 vd_need_reset(vd, B_TRUE); 2141*2793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 2142*2793Slm66018 DDI_SLEEP); 2143*2793Slm66018 if (status == DDI_FAILURE) { 2144*2793Slm66018 PR0("cannot schedule task to recv msg\n"); 2145*2793Slm66018 vd_need_reset(vd, B_TRUE); 2146*2793Slm66018 } 2147*2793Slm66018 } 2148*2793Slm66018 21492336Snarayan if (event & LDC_EVT_RESET) { 2150*2793Slm66018 PR0("LDC_EVT_RESET: LDC channel was reset"); 2151*2793Slm66018 2152*2793Slm66018 if (vd->state != VD_STATE_INIT) { 2153*2793Slm66018 PR0("scheduling full reset"); 2154*2793Slm66018 vd_need_reset(vd, B_FALSE); 2155*2793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 2156*2793Slm66018 vd, DDI_SLEEP); 2157*2793Slm66018 if (status == DDI_FAILURE) { 2158*2793Slm66018 PR0("cannot schedule task to recv msg\n"); 2159*2793Slm66018 vd_need_reset(vd, B_TRUE); 2160*2793Slm66018 } 2161*2793Slm66018 2162*2793Slm66018 } else { 2163*2793Slm66018 PR0("channel already reset, ignoring...\n"); 2164*2793Slm66018 PR0("doing ldc up...\n"); 2165*2793Slm66018 (void) ldc_up(vd->ldc_handle); 2166*2793Slm66018 } 2167*2793Slm66018 21682336Snarayan return (LDC_SUCCESS); 21692336Snarayan } 21702336Snarayan 21712336Snarayan if (event & LDC_EVT_UP) { 2172*2793Slm66018 PR0("EVT_UP: LDC is up\nResetting client connection state"); 2173*2793Slm66018 PR0("initiating soft reset"); 21742336Snarayan vd_need_reset(vd, B_FALSE); 2175*2793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 2176*2793Slm66018 vd, DDI_SLEEP); 2177*2793Slm66018 if (status == DDI_FAILURE) { 2178*2793Slm66018 PR0("cannot schedule task to recv msg\n"); 2179*2793Slm66018 vd_need_reset(vd, B_TRUE); 2180*2793Slm66018 return (LDC_SUCCESS); 2181*2793Slm66018 } 21822336Snarayan } 21832336Snarayan 21842336Snarayan if (event & LDC_EVT_READ) { 21852336Snarayan int status; 21862336Snarayan 21872336Snarayan PR1("New data available"); 21882336Snarayan /* Queue a task to receive the new data */ 21892336Snarayan status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 21902336Snarayan DDI_SLEEP); 2191*2793Slm66018 2192*2793Slm66018 if (status == DDI_FAILURE) { 2193*2793Slm66018 PR0("cannot schedule task to recv msg\n"); 2194*2793Slm66018 vd_need_reset(vd, B_TRUE); 2195*2793Slm66018 } 21962336Snarayan } 21972336Snarayan 21982336Snarayan return (LDC_SUCCESS); 21991991Sheppo } 22001991Sheppo 22011991Sheppo static uint_t 22021991Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 22031991Sheppo { 22041991Sheppo _NOTE(ARGUNUSED(key, val)) 22051991Sheppo (*((uint_t *)arg))++; 22061991Sheppo return (MH_WALK_TERMINATE); 22071991Sheppo } 22081991Sheppo 22091991Sheppo 22101991Sheppo static int 22111991Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 22121991Sheppo { 22131991Sheppo uint_t vd_present = 0; 22141991Sheppo minor_t instance; 22151991Sheppo vds_t *vds; 22161991Sheppo 22171991Sheppo 22181991Sheppo switch (cmd) { 22191991Sheppo case DDI_DETACH: 22201991Sheppo /* the real work happens below */ 22211991Sheppo break; 22221991Sheppo case DDI_SUSPEND: 22232336Snarayan PR0("No action required for DDI_SUSPEND"); 22241991Sheppo return (DDI_SUCCESS); 22251991Sheppo default: 2226*2793Slm66018 PR0("Unrecognized \"cmd\""); 22271991Sheppo return (DDI_FAILURE); 22281991Sheppo } 22291991Sheppo 22301991Sheppo ASSERT(cmd == DDI_DETACH); 22311991Sheppo instance = ddi_get_instance(dip); 22321991Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 2233*2793Slm66018 PR0("Could not get state for instance %u", instance); 22341991Sheppo ddi_soft_state_free(vds_state, instance); 22351991Sheppo return (DDI_FAILURE); 22361991Sheppo } 22371991Sheppo 22381991Sheppo /* Do no detach when serving any vdisks */ 22391991Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 22401991Sheppo if (vd_present) { 22411991Sheppo PR0("Not detaching because serving vdisks"); 22421991Sheppo return (DDI_FAILURE); 22431991Sheppo } 22441991Sheppo 22451991Sheppo PR0("Detaching"); 22461991Sheppo if (vds->initialized & VDS_MDEG) 22471991Sheppo (void) mdeg_unregister(vds->mdeg); 22481991Sheppo if (vds->initialized & VDS_LDI) 22491991Sheppo (void) ldi_ident_release(vds->ldi_ident); 22501991Sheppo mod_hash_destroy_hash(vds->vd_table); 22511991Sheppo ddi_soft_state_free(vds_state, instance); 22521991Sheppo return (DDI_SUCCESS); 22531991Sheppo } 22541991Sheppo 22551991Sheppo static boolean_t 22561991Sheppo is_pseudo_device(dev_info_t *dip) 22571991Sheppo { 22581991Sheppo dev_info_t *parent, *root = ddi_root_node(); 22591991Sheppo 22601991Sheppo 22611991Sheppo for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 22621991Sheppo parent = ddi_get_parent(parent)) { 22631991Sheppo if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 22641991Sheppo return (B_TRUE); 22651991Sheppo } 22661991Sheppo 22671991Sheppo return (B_FALSE); 22681991Sheppo } 22691991Sheppo 22701991Sheppo static int 22712032Slm66018 vd_setup_full_disk(vd_t *vd) 22722032Slm66018 { 22732032Slm66018 int rval, status; 22742032Slm66018 major_t major = getmajor(vd->dev[0]); 22752032Slm66018 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 22762531Snarayan struct dk_minfo dk_minfo; 22772531Snarayan 22782531Snarayan /* 22792531Snarayan * At this point, vdisk_size is set to the size of partition 2 but 22802531Snarayan * this does not represent the size of the disk because partition 2 22812531Snarayan * may not cover the entire disk and its size does not include reserved 22822531Snarayan * blocks. So we update vdisk_size to be the size of the entire disk. 22832531Snarayan */ 22842531Snarayan if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 22852531Snarayan (intptr_t)&dk_minfo, (vd_open_flags | FKIOCTL), 22862531Snarayan kcred, &rval)) != 0) { 22872531Snarayan PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", 22882531Snarayan status); 22892032Slm66018 return (status); 22902032Slm66018 } 22912531Snarayan vd->vdisk_size = dk_minfo.dki_capacity; 22922032Slm66018 22932032Slm66018 /* Set full-disk parameters */ 22942032Slm66018 vd->vdisk_type = VD_DISK_TYPE_DISK; 22952032Slm66018 vd->nslices = (sizeof (vd->dev))/(sizeof (vd->dev[0])); 22962032Slm66018 22972032Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 22982032Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 22992032Slm66018 vd->dev[0] = 0; 23002032Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 23012032Slm66018 vd->ldi_handle[0] = NULL; 23022032Slm66018 23032032Slm66018 /* Initialize device numbers for remaining slices and open them */ 23042032Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 23052032Slm66018 /* 23062032Slm66018 * Skip the entire-disk slice, as it's already open and its 23072032Slm66018 * device known 23082032Slm66018 */ 23092032Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 23102032Slm66018 continue; 23112032Slm66018 ASSERT(vd->dev[slice] == 0); 23122032Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 23132032Slm66018 23142032Slm66018 /* 23152032Slm66018 * Construct the device number for the current slice 23162032Slm66018 */ 23172032Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 23182032Slm66018 23192032Slm66018 /* 23202032Slm66018 * At least some underlying drivers refuse to open 23212032Slm66018 * devices for (currently) zero-length slices, so skip 23222032Slm66018 * them for now 23232032Slm66018 */ 23242531Snarayan if (vd->vtoc.v_part[slice].p_size == 0) { 23252032Slm66018 PR0("Skipping zero-length slice %u", slice); 23262032Slm66018 continue; 23272032Slm66018 } 23282032Slm66018 23292032Slm66018 /* 23302032Slm66018 * Open all non-empty slices of the disk to serve them to the 23312032Slm66018 * client. Slices are opened exclusively to prevent other 23322032Slm66018 * threads or processes in the service domain from performing 23332032Slm66018 * I/O to slices being accessed by a client. Failure to open 23342032Slm66018 * a slice results in vds not serving this disk, as the client 23352032Slm66018 * could attempt (and should be able) to access any non-empty 23362032Slm66018 * slice immediately. Any slices successfully opened before a 23372032Slm66018 * failure will get closed by vds_destroy_vd() as a result of 23382032Slm66018 * the error returned by this function. 23392032Slm66018 */ 23402032Slm66018 PR0("Opening device major %u, minor %u = slice %u", 23412032Slm66018 major, minor, slice); 23422032Slm66018 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 23432032Slm66018 vd_open_flags, kcred, &vd->ldi_handle[slice], 23442032Slm66018 vd->vds->ldi_ident)) != 0) { 23452032Slm66018 PRN("ldi_open_by_dev() returned errno %d " 23462032Slm66018 "for slice %u", status, slice); 23472032Slm66018 /* vds_destroy_vd() will close any open slices */ 23482032Slm66018 return (status); 23492032Slm66018 } 23502032Slm66018 } 23512032Slm66018 23522032Slm66018 return (0); 23532032Slm66018 } 23542032Slm66018 23552032Slm66018 static int 23562531Snarayan vd_setup_partition_efi(vd_t *vd) 23572531Snarayan { 23582531Snarayan efi_gpt_t *gpt; 23592531Snarayan efi_gpe_t *gpe; 23602531Snarayan struct uuid uuid = EFI_RESERVED; 23612531Snarayan uint32_t crc; 23622531Snarayan int length; 23632531Snarayan 23642531Snarayan length = sizeof (efi_gpt_t) + sizeof (efi_gpe_t); 23652531Snarayan 23662531Snarayan gpt = kmem_zalloc(length, KM_SLEEP); 23672531Snarayan gpe = (efi_gpe_t *)(gpt + 1); 23682531Snarayan 23692531Snarayan gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 23702531Snarayan gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 23712531Snarayan gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 23722531Snarayan gpt->efi_gpt_FirstUsableLBA = LE_64(0ULL); 23732531Snarayan gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 23742531Snarayan gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 23752531Snarayan gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 23762531Snarayan 23772531Snarayan UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid); 23782531Snarayan gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA; 23792531Snarayan gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA; 23802531Snarayan 23812531Snarayan CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table); 23822531Snarayan gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 23832531Snarayan 23842531Snarayan CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 23852531Snarayan gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 23862531Snarayan 23872531Snarayan vd->dk_efi.dki_lba = 0; 23882531Snarayan vd->dk_efi.dki_length = length; 23892531Snarayan vd->dk_efi.dki_data = gpt; 23902531Snarayan 23912531Snarayan return (0); 23922531Snarayan } 23932531Snarayan 23942531Snarayan static int 23952410Slm66018 vd_setup_vd(char *device_path, vd_t *vd) 23961991Sheppo { 23972410Slm66018 int rval, status; 23981991Sheppo dev_info_t *dip; 23991991Sheppo struct dk_cinfo dk_cinfo; 24001991Sheppo 24012531Snarayan /* 24022531Snarayan * We need to open with FNDELAY so that opening an empty partition 24032531Snarayan * does not fail. 24042531Snarayan */ 24052531Snarayan if ((status = ldi_open_by_name(device_path, vd_open_flags | FNDELAY, 24062531Snarayan kcred, &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) { 24072410Slm66018 PRN("ldi_open_by_name(%s) = errno %d", device_path, status); 24082032Slm66018 return (status); 24092032Slm66018 } 24102032Slm66018 24112531Snarayan /* 24122531Snarayan * nslices must be updated now so that vds_destroy_vd() will close 24132531Snarayan * the slice we have just opened in case of an error. 24142531Snarayan */ 24152531Snarayan vd->nslices = 1; 24162531Snarayan 24172410Slm66018 /* Get device number and size of backing device */ 24182032Slm66018 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 24191991Sheppo PRN("ldi_get_dev() returned errno %d for %s", 24202410Slm66018 status, device_path); 24211991Sheppo return (status); 24221991Sheppo } 24232410Slm66018 if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 24242410Slm66018 PRN("ldi_get_size() failed for %s", device_path); 24252410Slm66018 return (EIO); 24262410Slm66018 } 24272410Slm66018 vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ 24282410Slm66018 24292410Slm66018 /* Verify backing device supports dk_cinfo, dk_geom, and vtoc */ 24302410Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 24312410Slm66018 (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, 24322410Slm66018 &rval)) != 0) { 24332410Slm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 24342410Slm66018 status, device_path); 24351991Sheppo return (status); 24361991Sheppo } 24372410Slm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 24382410Slm66018 PRN("slice %u >= maximum slice %u for %s", 24392410Slm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 24401991Sheppo return (EIO); 24411991Sheppo } 24422531Snarayan 24432531Snarayan status = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, &vd->vdisk_label); 24442531Snarayan 24452531Snarayan if (status != 0) { 24462531Snarayan PRN("vd_read_vtoc returned errno %d for %s", 24472410Slm66018 status, device_path); 24482410Slm66018 return (status); 24492410Slm66018 } 24502531Snarayan 24512531Snarayan if (vd->vdisk_label == VD_DISK_LABEL_VTOC && 24522531Snarayan (status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 24532531Snarayan (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), 24542531Snarayan kcred, &rval)) != 0) { 24552531Snarayan PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 24562531Snarayan status, device_path); 24572531Snarayan return (status); 24582410Slm66018 } 24592410Slm66018 24602410Slm66018 /* Store the device's max transfer size for return to the client */ 24612410Slm66018 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 24622410Slm66018 24632410Slm66018 24642410Slm66018 /* Determine if backing device is a pseudo device */ 24651991Sheppo if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), 24661991Sheppo dev_to_instance(vd->dev[0]), 0)) == NULL) { 24672410Slm66018 PRN("%s is no longer accessible", device_path); 24681991Sheppo return (EIO); 24691991Sheppo } 24701991Sheppo vd->pseudo = is_pseudo_device(dip); 24711991Sheppo ddi_release_devi(dip); 24721991Sheppo if (vd->pseudo) { 24731991Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 24741991Sheppo vd->nslices = 1; 24751991Sheppo return (0); /* ...and we're done */ 24761991Sheppo } 24771991Sheppo 24781991Sheppo 24792032Slm66018 /* If slice is entire-disk slice, initialize for full disk */ 24802032Slm66018 if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) 24812032Slm66018 return (vd_setup_full_disk(vd)); 24821991Sheppo 24832032Slm66018 24842410Slm66018 /* Otherwise, we have a non-entire slice of a device */ 24851991Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 24861991Sheppo vd->nslices = 1; 24871991Sheppo 24882531Snarayan if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 24892531Snarayan status = vd_setup_partition_efi(vd); 24902531Snarayan return (status); 24912531Snarayan } 24921991Sheppo 24932410Slm66018 /* Initialize dk_geom structure for single-slice device */ 24941991Sheppo if (vd->dk_geom.dkg_nsect == 0) { 2495*2793Slm66018 PR0("%s geometry claims 0 sectors per track", device_path); 24961991Sheppo return (EIO); 24971991Sheppo } 24981991Sheppo if (vd->dk_geom.dkg_nhead == 0) { 2499*2793Slm66018 PR0("%s geometry claims 0 heads", device_path); 25001991Sheppo return (EIO); 25011991Sheppo } 25021991Sheppo vd->dk_geom.dkg_ncyl = 25032410Slm66018 vd->vdisk_size/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead; 25041991Sheppo vd->dk_geom.dkg_acyl = 0; 25051991Sheppo vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 25061991Sheppo 25071991Sheppo 25082410Slm66018 /* Initialize vtoc structure for single-slice device */ 25091991Sheppo bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 25101991Sheppo MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 25111991Sheppo bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 25121991Sheppo vd->vtoc.v_nparts = 1; 25131991Sheppo vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 25141991Sheppo vd->vtoc.v_part[0].p_flag = 0; 25151991Sheppo vd->vtoc.v_part[0].p_start = 0; 25162410Slm66018 vd->vtoc.v_part[0].p_size = vd->vdisk_size; 25171991Sheppo bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 25181991Sheppo MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 25191991Sheppo 25201991Sheppo 25211991Sheppo return (0); 25221991Sheppo } 25231991Sheppo 25241991Sheppo static int 25252410Slm66018 vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id, 25261991Sheppo vd_t **vdp) 25271991Sheppo { 25281991Sheppo char tq_name[TASKQ_NAMELEN]; 25292032Slm66018 int status; 25301991Sheppo ddi_iblock_cookie_t iblock = NULL; 25311991Sheppo ldc_attr_t ldc_attr; 25321991Sheppo vd_t *vd; 25331991Sheppo 25341991Sheppo 25351991Sheppo ASSERT(vds != NULL); 25362410Slm66018 ASSERT(device_path != NULL); 25371991Sheppo ASSERT(vdp != NULL); 25382410Slm66018 PR0("Adding vdisk for %s", device_path); 25391991Sheppo 25401991Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 25411991Sheppo PRN("No memory for virtual disk"); 25421991Sheppo return (EAGAIN); 25431991Sheppo } 25441991Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 25451991Sheppo vd->vds = vds; 25461991Sheppo 25471991Sheppo 25482032Slm66018 /* Open vdisk and initialize parameters */ 25492410Slm66018 if ((status = vd_setup_vd(device_path, vd)) != 0) 25501991Sheppo return (status); 25511991Sheppo ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 25521991Sheppo PR0("vdisk_type = %s, pseudo = %s, nslices = %u", 25531991Sheppo ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 25541991Sheppo (vd->pseudo ? "yes" : "no"), vd->nslices); 25551991Sheppo 25561991Sheppo 25571991Sheppo /* Initialize locking */ 25581991Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 25591991Sheppo &iblock) != DDI_SUCCESS) { 25601991Sheppo PRN("Could not get iblock cookie."); 25611991Sheppo return (EIO); 25621991Sheppo } 25631991Sheppo 25641991Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 25651991Sheppo vd->initialized |= VD_LOCKING; 25661991Sheppo 25671991Sheppo 25682336Snarayan /* Create start and completion task queues for the vdisk */ 25692336Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 25701991Sheppo PR1("tq_name = %s", tq_name); 25712336Snarayan if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 25721991Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 25731991Sheppo PRN("Could not create task queue"); 25741991Sheppo return (EIO); 25751991Sheppo } 25762336Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 25772336Snarayan PR1("tq_name = %s", tq_name); 25782336Snarayan if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 25792336Snarayan TASKQ_DEFAULTPRI, 0)) == NULL) { 25802336Snarayan PRN("Could not create task queue"); 25812336Snarayan return (EIO); 25822336Snarayan } 25832336Snarayan vd->enabled = 1; /* before callback can dispatch to startq */ 25841991Sheppo 25851991Sheppo 25861991Sheppo /* Bring up LDC */ 25871991Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 25881991Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 25891991Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 25902410Slm66018 ldc_attr.mtu = VD_LDC_MTU; 25911991Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 2592*2793Slm66018 PR0("ldc_init(%lu) = errno %d", ldc_id, status); 25931991Sheppo return (status); 25941991Sheppo } 25951991Sheppo vd->initialized |= VD_LDC; 25961991Sheppo 25971991Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 25981991Sheppo (caddr_t)vd)) != 0) { 2599*2793Slm66018 PR0("ldc_reg_callback() returned errno %d", status); 26001991Sheppo return (status); 26011991Sheppo } 26021991Sheppo 26031991Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 2604*2793Slm66018 PR0("ldc_open() returned errno %d", status); 26051991Sheppo return (status); 26061991Sheppo } 26071991Sheppo 2608*2793Slm66018 if ((status = ldc_up(vd->ldc_handle)) != 0) { 2609*2793Slm66018 PRN("ldc_up() returned errno %d", status); 2610*2793Slm66018 } 2611*2793Slm66018 26122531Snarayan /* Allocate the inband task memory handle */ 26132531Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 26142531Snarayan if (status) { 26152531Snarayan PRN("ldc_mem_alloc_handle() returned err %d ", status); 26162531Snarayan return (ENXIO); 26172531Snarayan } 26181991Sheppo 26191991Sheppo /* Add the successfully-initialized vdisk to the server's table */ 26201991Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 26211991Sheppo PRN("Error adding vdisk ID %lu to table", id); 26221991Sheppo return (EIO); 26231991Sheppo } 26241991Sheppo 2625*2793Slm66018 /* Allocate the staging buffer */ 2626*2793Slm66018 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 2627*2793Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 2628*2793Slm66018 2629*2793Slm66018 /* store initial state */ 2630*2793Slm66018 vd->state = VD_STATE_INIT; 2631*2793Slm66018 26321991Sheppo return (0); 26331991Sheppo } 26341991Sheppo 2635*2793Slm66018 static void 2636*2793Slm66018 vd_free_dring_task(vd_t *vdp) 2637*2793Slm66018 { 2638*2793Slm66018 if (vdp->dring_task != NULL) { 2639*2793Slm66018 ASSERT(vdp->dring_len != 0); 2640*2793Slm66018 /* Free all dring_task memory handles */ 2641*2793Slm66018 for (int i = 0; i < vdp->dring_len; i++) { 2642*2793Slm66018 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 2643*2793Slm66018 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 2644*2793Slm66018 vdp->dring_task[i].msg = NULL; 2645*2793Slm66018 } 2646*2793Slm66018 kmem_free(vdp->dring_task, 2647*2793Slm66018 (sizeof (*vdp->dring_task)) * vdp->dring_len); 2648*2793Slm66018 vdp->dring_task = NULL; 2649*2793Slm66018 } 2650*2793Slm66018 } 2651*2793Slm66018 26521991Sheppo /* 26531991Sheppo * Destroy the state associated with a virtual disk 26541991Sheppo */ 26551991Sheppo static void 26561991Sheppo vds_destroy_vd(void *arg) 26571991Sheppo { 26581991Sheppo vd_t *vd = (vd_t *)arg; 26591991Sheppo 26601991Sheppo 26611991Sheppo if (vd == NULL) 26621991Sheppo return; 26631991Sheppo 26642336Snarayan PR0("Destroying vdisk state"); 26652336Snarayan 26662531Snarayan if (vd->dk_efi.dki_data != NULL) 26672531Snarayan kmem_free(vd->dk_efi.dki_data, vd->dk_efi.dki_length); 26682531Snarayan 26691991Sheppo /* Disable queuing requests for the vdisk */ 26701991Sheppo if (vd->initialized & VD_LOCKING) { 26711991Sheppo mutex_enter(&vd->lock); 26721991Sheppo vd->enabled = 0; 26731991Sheppo mutex_exit(&vd->lock); 26741991Sheppo } 26751991Sheppo 26762336Snarayan /* Drain and destroy start queue (*before* destroying completionq) */ 26772336Snarayan if (vd->startq != NULL) 26782336Snarayan ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 26792336Snarayan 26802336Snarayan /* Drain and destroy completion queue (*before* shutting down LDC) */ 26812336Snarayan if (vd->completionq != NULL) 26822336Snarayan ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 26832336Snarayan 2684*2793Slm66018 vd_free_dring_task(vd); 2685*2793Slm66018 2686*2793Slm66018 /* Free the staging buffer for msgs */ 2687*2793Slm66018 if (vd->vio_msgp != NULL) { 2688*2793Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 2689*2793Slm66018 vd->vio_msgp = NULL; 2690*2793Slm66018 } 2691*2793Slm66018 2692*2793Slm66018 /* Free the inband message buffer */ 2693*2793Slm66018 if (vd->inband_task.msg != NULL) { 2694*2793Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 2695*2793Slm66018 vd->inband_task.msg = NULL; 26962336Snarayan } 26971991Sheppo 26982531Snarayan /* Free the inband task memory handle */ 26992531Snarayan (void) ldc_mem_free_handle(vd->inband_task.mhdl); 27002531Snarayan 27011991Sheppo /* Shut down LDC */ 27021991Sheppo if (vd->initialized & VD_LDC) { 27031991Sheppo if (vd->initialized & VD_DRING) 27041991Sheppo (void) ldc_mem_dring_unmap(vd->dring_handle); 27051991Sheppo (void) ldc_unreg_callback(vd->ldc_handle); 27061991Sheppo (void) ldc_close(vd->ldc_handle); 27071991Sheppo (void) ldc_fini(vd->ldc_handle); 27081991Sheppo } 27091991Sheppo 27101991Sheppo /* Close any open backing-device slices */ 27111991Sheppo for (uint_t slice = 0; slice < vd->nslices; slice++) { 27121991Sheppo if (vd->ldi_handle[slice] != NULL) { 27131991Sheppo PR0("Closing slice %u", slice); 27141991Sheppo (void) ldi_close(vd->ldi_handle[slice], 27152531Snarayan vd_open_flags | FNDELAY, kcred); 27161991Sheppo } 27171991Sheppo } 27181991Sheppo 27191991Sheppo /* Free lock */ 27201991Sheppo if (vd->initialized & VD_LOCKING) 27211991Sheppo mutex_destroy(&vd->lock); 27221991Sheppo 27231991Sheppo /* Finally, free the vdisk structure itself */ 27241991Sheppo kmem_free(vd, sizeof (*vd)); 27251991Sheppo } 27261991Sheppo 27271991Sheppo static int 27282410Slm66018 vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id) 27291991Sheppo { 27301991Sheppo int status; 27311991Sheppo vd_t *vd = NULL; 27321991Sheppo 27331991Sheppo 27342410Slm66018 if ((status = vds_do_init_vd(vds, id, device_path, ldc_id, &vd)) != 0) 27351991Sheppo vds_destroy_vd(vd); 27361991Sheppo 27371991Sheppo return (status); 27381991Sheppo } 27391991Sheppo 27401991Sheppo static int 27411991Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 27421991Sheppo uint64_t *ldc_id) 27431991Sheppo { 27441991Sheppo int num_channels; 27451991Sheppo 27461991Sheppo 27471991Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 27481991Sheppo if ((num_channels = md_scan_dag(md, vd_node, 27491991Sheppo md_find_name(md, VD_CHANNEL_ENDPOINT), 27501991Sheppo md_find_name(md, "fwd"), channel)) <= 0) { 27511991Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 27521991Sheppo return (-1); 27531991Sheppo } 27541991Sheppo 27551991Sheppo /* Get the "id" value for the first channel endpoint node */ 27561991Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 27571991Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 27581991Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 27591991Sheppo return (-1); 27601991Sheppo } 27611991Sheppo 27621991Sheppo if (num_channels > 1) { 27631991Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 27641991Sheppo } 27651991Sheppo 27661991Sheppo return (0); 27671991Sheppo } 27681991Sheppo 27691991Sheppo static int 27701991Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 27711991Sheppo { 27721991Sheppo int num_nodes, status; 27731991Sheppo size_t size; 27741991Sheppo mde_cookie_t *channel; 27751991Sheppo 27761991Sheppo 27771991Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 27781991Sheppo PRN("Invalid node count in Machine Description subtree"); 27791991Sheppo return (-1); 27801991Sheppo } 27811991Sheppo size = num_nodes*(sizeof (*channel)); 27821991Sheppo channel = kmem_zalloc(size, KM_SLEEP); 27831991Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 27841991Sheppo kmem_free(channel, size); 27851991Sheppo 27861991Sheppo return (status); 27871991Sheppo } 27881991Sheppo 27891991Sheppo static void 27901991Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 27911991Sheppo { 27922410Slm66018 char *device_path = NULL; 27931991Sheppo uint64_t id = 0, ldc_id = 0; 27941991Sheppo 27951991Sheppo 27961991Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 27971991Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 27981991Sheppo return; 27991991Sheppo } 28001991Sheppo PR0("Adding vdisk ID %lu", id); 28011991Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 28022410Slm66018 &device_path) != 0) { 28031991Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 28041991Sheppo return; 28051991Sheppo } 28061991Sheppo 28071991Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 28081991Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 28091991Sheppo return; 28101991Sheppo } 28111991Sheppo 28122410Slm66018 if (vds_init_vd(vds, id, device_path, ldc_id) != 0) { 28131991Sheppo PRN("Failed to add vdisk ID %lu", id); 28141991Sheppo return; 28151991Sheppo } 28161991Sheppo } 28171991Sheppo 28181991Sheppo static void 28191991Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 28201991Sheppo { 28211991Sheppo uint64_t id = 0; 28221991Sheppo 28231991Sheppo 28241991Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 28251991Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 28261991Sheppo VD_ID_PROP); 28271991Sheppo return; 28281991Sheppo } 28291991Sheppo PR0("Removing vdisk ID %lu", id); 28301991Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 28311991Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 28321991Sheppo } 28331991Sheppo 28341991Sheppo static void 28351991Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 28361991Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 28371991Sheppo { 28381991Sheppo char *curr_dev, *prev_dev; 28391991Sheppo uint64_t curr_id = 0, curr_ldc_id = 0; 28401991Sheppo uint64_t prev_id = 0, prev_ldc_id = 0; 28411991Sheppo size_t len; 28421991Sheppo 28431991Sheppo 28441991Sheppo /* Validate that vdisk ID has not changed */ 28451991Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 28461991Sheppo PRN("Error getting previous vdisk \"%s\" property", 28471991Sheppo VD_ID_PROP); 28481991Sheppo return; 28491991Sheppo } 28501991Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 28511991Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 28521991Sheppo return; 28531991Sheppo } 28541991Sheppo if (curr_id != prev_id) { 28551991Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 28561991Sheppo prev_id, curr_id); 28571991Sheppo return; 28581991Sheppo } 28591991Sheppo 28601991Sheppo /* Validate that LDC ID has not changed */ 28611991Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 28621991Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 28631991Sheppo return; 28641991Sheppo } 28651991Sheppo 28661991Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 28671991Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 28681991Sheppo return; 28691991Sheppo } 28701991Sheppo if (curr_ldc_id != prev_ldc_id) { 28712032Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 28721991Sheppo PRN("Not changing vdisk: " 28731991Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 28741991Sheppo return; 28751991Sheppo } 28761991Sheppo 28771991Sheppo /* Determine whether device path has changed */ 28781991Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 28791991Sheppo &prev_dev) != 0) { 28801991Sheppo PRN("Error getting previous vdisk \"%s\"", 28811991Sheppo VD_BLOCK_DEVICE_PROP); 28821991Sheppo return; 28831991Sheppo } 28841991Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 28851991Sheppo &curr_dev) != 0) { 28861991Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 28871991Sheppo return; 28881991Sheppo } 28891991Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 28901991Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 28911991Sheppo return; /* no relevant (supported) change */ 28921991Sheppo 28931991Sheppo PR0("Changing vdisk ID %lu", prev_id); 2894*2793Slm66018 28951991Sheppo /* Remove old state, which will close vdisk and reset */ 28961991Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 28971991Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 2898*2793Slm66018 28991991Sheppo /* Re-initialize vdisk with new state */ 29001991Sheppo if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) { 29011991Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 29021991Sheppo return; 29031991Sheppo } 29041991Sheppo } 29051991Sheppo 29061991Sheppo static int 29071991Sheppo vds_process_md(void *arg, mdeg_result_t *md) 29081991Sheppo { 29091991Sheppo int i; 29101991Sheppo vds_t *vds = arg; 29111991Sheppo 29121991Sheppo 29131991Sheppo if (md == NULL) 29141991Sheppo return (MDEG_FAILURE); 29151991Sheppo ASSERT(vds != NULL); 29161991Sheppo 29171991Sheppo for (i = 0; i < md->removed.nelem; i++) 29181991Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 29191991Sheppo for (i = 0; i < md->match_curr.nelem; i++) 29201991Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 29211991Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 29221991Sheppo for (i = 0; i < md->added.nelem; i++) 29231991Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 29241991Sheppo 29251991Sheppo return (MDEG_SUCCESS); 29261991Sheppo } 29271991Sheppo 29281991Sheppo static int 29291991Sheppo vds_do_attach(dev_info_t *dip) 29301991Sheppo { 29311991Sheppo static char reg_prop[] = "reg"; /* devinfo ID prop */ 29321991Sheppo 29331991Sheppo /* MDEG specification for a (particular) vds node */ 29341991Sheppo static mdeg_prop_spec_t vds_prop_spec[] = { 29351991Sheppo {MDET_PROP_STR, "name", {VDS_NAME}}, 29361991Sheppo {MDET_PROP_VAL, "cfg-handle", {0}}, 29371991Sheppo {MDET_LIST_END, NULL, {0}}}; 29381991Sheppo static mdeg_node_spec_t vds_spec = {"virtual-device", vds_prop_spec}; 29391991Sheppo 29401991Sheppo /* MDEG specification for matching a vd node */ 29411991Sheppo static md_prop_match_t vd_prop_spec[] = { 29421991Sheppo {MDET_PROP_VAL, VD_ID_PROP}, 29431991Sheppo {MDET_LIST_END, NULL}}; 29441991Sheppo static mdeg_node_match_t vd_spec = {"virtual-device-port", 29451991Sheppo vd_prop_spec}; 29461991Sheppo 29471991Sheppo int status; 29481991Sheppo uint64_t cfg_handle; 29491991Sheppo minor_t instance = ddi_get_instance(dip); 29501991Sheppo vds_t *vds; 29511991Sheppo 29521991Sheppo 29531991Sheppo /* 29541991Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 29551991Sheppo * notion of "instance", or unique identifier, for that node; OBP 29561991Sheppo * stores the value of the "cfg-handle" MD property as the value of 29571991Sheppo * the "reg" property on the node in the device tree it builds from 29581991Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 29591991Sheppo * "reg" property value to uniquely identify this device instance when 29601991Sheppo * registering with the MD event-generation framework. If the "reg" 29611991Sheppo * property cannot be found, the device tree state is presumably so 29621991Sheppo * broken that there is no point in continuing. 29631991Sheppo */ 29641991Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, reg_prop)) { 29651991Sheppo PRN("vds \"%s\" property does not exist", reg_prop); 29661991Sheppo return (DDI_FAILURE); 29671991Sheppo } 29681991Sheppo 29691991Sheppo /* Get the MD instance for later MDEG registration */ 29701991Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 29711991Sheppo reg_prop, -1); 29721991Sheppo 29731991Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 29741991Sheppo PRN("Could not allocate state for instance %u", instance); 29751991Sheppo return (DDI_FAILURE); 29761991Sheppo } 29771991Sheppo 29781991Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 29791991Sheppo PRN("Could not get state for instance %u", instance); 29801991Sheppo ddi_soft_state_free(vds_state, instance); 29811991Sheppo return (DDI_FAILURE); 29821991Sheppo } 29831991Sheppo 29841991Sheppo 29851991Sheppo vds->dip = dip; 29861991Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 29871991Sheppo vds_destroy_vd, 29881991Sheppo sizeof (void *)); 29891991Sheppo ASSERT(vds->vd_table != NULL); 29901991Sheppo 29911991Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 29921991Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 29931991Sheppo return (DDI_FAILURE); 29941991Sheppo } 29951991Sheppo vds->initialized |= VDS_LDI; 29961991Sheppo 29971991Sheppo /* Register for MD updates */ 29981991Sheppo vds_prop_spec[1].ps_val = cfg_handle; 29991991Sheppo if (mdeg_register(&vds_spec, &vd_spec, vds_process_md, vds, 30001991Sheppo &vds->mdeg) != MDEG_SUCCESS) { 30011991Sheppo PRN("Unable to register for MD updates"); 30021991Sheppo return (DDI_FAILURE); 30031991Sheppo } 30041991Sheppo vds->initialized |= VDS_MDEG; 30051991Sheppo 30062032Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 30072032Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 30082032Slm66018 DDI_PROP_SUCCESS) { 30092032Slm66018 PRN("failed to set \"%s\" property for instance %u", 30102032Slm66018 DDI_NO_AUTODETACH, instance); 30112032Slm66018 } 30122032Slm66018 30131991Sheppo ddi_report_dev(dip); 30141991Sheppo return (DDI_SUCCESS); 30151991Sheppo } 30161991Sheppo 30171991Sheppo static int 30181991Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 30191991Sheppo { 30201991Sheppo int status; 30211991Sheppo 30221991Sheppo switch (cmd) { 30231991Sheppo case DDI_ATTACH: 30242336Snarayan PR0("Attaching"); 30251991Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 30261991Sheppo (void) vds_detach(dip, DDI_DETACH); 30271991Sheppo return (status); 30281991Sheppo case DDI_RESUME: 30292336Snarayan PR0("No action required for DDI_RESUME"); 30301991Sheppo return (DDI_SUCCESS); 30311991Sheppo default: 30321991Sheppo return (DDI_FAILURE); 30331991Sheppo } 30341991Sheppo } 30351991Sheppo 30361991Sheppo static struct dev_ops vds_ops = { 30371991Sheppo DEVO_REV, /* devo_rev */ 30381991Sheppo 0, /* devo_refcnt */ 30391991Sheppo ddi_no_info, /* devo_getinfo */ 30401991Sheppo nulldev, /* devo_identify */ 30411991Sheppo nulldev, /* devo_probe */ 30421991Sheppo vds_attach, /* devo_attach */ 30431991Sheppo vds_detach, /* devo_detach */ 30441991Sheppo nodev, /* devo_reset */ 30451991Sheppo NULL, /* devo_cb_ops */ 30461991Sheppo NULL, /* devo_bus_ops */ 30471991Sheppo nulldev /* devo_power */ 30481991Sheppo }; 30491991Sheppo 30501991Sheppo static struct modldrv modldrv = { 30511991Sheppo &mod_driverops, 30521991Sheppo "virtual disk server v%I%", 30531991Sheppo &vds_ops, 30541991Sheppo }; 30551991Sheppo 30561991Sheppo static struct modlinkage modlinkage = { 30571991Sheppo MODREV_1, 30581991Sheppo &modldrv, 30591991Sheppo NULL 30601991Sheppo }; 30611991Sheppo 30621991Sheppo 30631991Sheppo int 30641991Sheppo _init(void) 30651991Sheppo { 30661991Sheppo int i, status; 30671991Sheppo 30682336Snarayan 30691991Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 30701991Sheppo return (status); 30711991Sheppo if ((status = mod_install(&modlinkage)) != 0) { 30721991Sheppo ddi_soft_state_fini(&vds_state); 30731991Sheppo return (status); 30741991Sheppo } 30751991Sheppo 30761991Sheppo /* Fill in the bit-mask of server-supported operations */ 30771991Sheppo for (i = 0; i < vds_noperations; i++) 30781991Sheppo vds_operations |= 1 << (vds_operation[i].operation - 1); 30791991Sheppo 30801991Sheppo return (0); 30811991Sheppo } 30821991Sheppo 30831991Sheppo int 30841991Sheppo _info(struct modinfo *modinfop) 30851991Sheppo { 30861991Sheppo return (mod_info(&modlinkage, modinfop)); 30871991Sheppo } 30881991Sheppo 30891991Sheppo int 30901991Sheppo _fini(void) 30911991Sheppo { 30921991Sheppo int status; 30931991Sheppo 30942336Snarayan 30951991Sheppo if ((status = mod_remove(&modlinkage)) != 0) 30961991Sheppo return (status); 30971991Sheppo ddi_soft_state_fini(&vds_state); 30981991Sheppo return (0); 30991991Sheppo } 3100