11991Sheppo /* 21991Sheppo * CDDL HEADER START 31991Sheppo * 41991Sheppo * The contents of this file are subject to the terms of the 51991Sheppo * Common Development and Distribution License (the "License"). 61991Sheppo * You may not use this file except in compliance with the License. 71991Sheppo * 81991Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91991Sheppo * or http://www.opensolaris.org/os/licensing. 101991Sheppo * See the License for the specific language governing permissions 111991Sheppo * and limitations under the License. 121991Sheppo * 131991Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141991Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151991Sheppo * If applicable, add the following below this CDDL HEADER, with the 161991Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171991Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181991Sheppo * 191991Sheppo * CDDL HEADER END 201991Sheppo */ 211991Sheppo 221991Sheppo /* 233401Snarayan * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 241991Sheppo * Use is subject to license terms. 251991Sheppo */ 261991Sheppo 271991Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281991Sheppo 291991Sheppo /* 301991Sheppo * Virtual disk server 311991Sheppo */ 321991Sheppo 331991Sheppo 341991Sheppo #include <sys/types.h> 351991Sheppo #include <sys/conf.h> 362531Snarayan #include <sys/crc32.h> 371991Sheppo #include <sys/ddi.h> 381991Sheppo #include <sys/dkio.h> 391991Sheppo #include <sys/file.h> 401991Sheppo #include <sys/mdeg.h> 411991Sheppo #include <sys/modhash.h> 421991Sheppo #include <sys/note.h> 431991Sheppo #include <sys/pathname.h> 444838Slm66018 #include <sys/sdt.h> 451991Sheppo #include <sys/sunddi.h> 461991Sheppo #include <sys/sunldi.h> 471991Sheppo #include <sys/sysmacros.h> 481991Sheppo #include <sys/vio_common.h> 491991Sheppo #include <sys/vdsk_mailbox.h> 501991Sheppo #include <sys/vdsk_common.h> 511991Sheppo #include <sys/vtoc.h> 523401Snarayan #include <sys/vfs.h> 533401Snarayan #include <sys/stat.h> 544696Sachartre #include <sys/scsi/impl/uscsi.h> 553782Sachartre #include <vm/seg_map.h> 561991Sheppo 571991Sheppo /* Virtual disk server initialization flags */ 582336Snarayan #define VDS_LDI 0x01 592336Snarayan #define VDS_MDEG 0x02 601991Sheppo 611991Sheppo /* Virtual disk server tunable parameters */ 623401Snarayan #define VDS_RETRIES 5 633401Snarayan #define VDS_LDC_DELAY 1000 /* 1 msecs */ 643401Snarayan #define VDS_DEV_DELAY 10000000 /* 10 secs */ 651991Sheppo #define VDS_NCHAINS 32 661991Sheppo 671991Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 681991Sheppo #define VDS_NAME "virtual-disk-server" 691991Sheppo 701991Sheppo #define VD_NAME "vd" 711991Sheppo #define VD_VOLUME_NAME "vdisk" 721991Sheppo #define VD_ASCIILABEL "Virtual Disk" 731991Sheppo 741991Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 751991Sheppo #define VD_ID_PROP "id" 761991Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 775081Sachartre #define VD_BLOCK_DEVICE_OPTS "vds-block-device-opts" 783297Ssb155480 #define VD_REG_PROP "reg" 791991Sheppo 801991Sheppo /* Virtual disk initialization flags */ 813401Snarayan #define VD_DISK_READY 0x01 823401Snarayan #define VD_LOCKING 0x02 833401Snarayan #define VD_LDC 0x04 843401Snarayan #define VD_DRING 0x08 853401Snarayan #define VD_SID 0x10 863401Snarayan #define VD_SEQ_NUM 0x20 875081Sachartre #define VD_SETUP_ERROR 0x40 881991Sheppo 894076Sachartre /* Flags for writing to a vdisk which is a file */ 904076Sachartre #define VD_FILE_WRITE_FLAGS SM_ASYNC 914076Sachartre 924696Sachartre /* Number of backup labels */ 934696Sachartre #define VD_FILE_NUM_BACKUP 5 944696Sachartre 954696Sachartre /* Timeout for SCSI I/O */ 964696Sachartre #define VD_SCSI_RDWR_TIMEOUT 30 /* 30 secs */ 974696Sachartre 981991Sheppo /* 991991Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 1001991Sheppo * unfortunately, this convention does not appear to be codified. 1011991Sheppo */ 1021991Sheppo #define VD_ENTIRE_DISK_SLICE 2 1031991Sheppo 1041991Sheppo /* Return a cpp token as a string */ 1051991Sheppo #define STRINGIZE(token) #token 1061991Sheppo 1071991Sheppo /* 1081991Sheppo * Print a message prefixed with the current function name to the message log 1091991Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 1101991Sheppo * concatenation of string literals and C99 variable-length-argument-list 1111991Sheppo * macros 1121991Sheppo */ 1131991Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 1141991Sheppo #define _PRN(format, ...) \ 1151991Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 1161991Sheppo 1171991Sheppo /* Return a pointer to the "i"th vdisk dring element */ 1181991Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 1191991Sheppo (vd->dring + (i)*vd->descriptor_size)) 1201991Sheppo 1211991Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 1221991Sheppo #define VD_CLIENT(vd) \ 1231991Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 1241991Sheppo (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 1251991Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 1261991Sheppo "unsupported client"))) 1271991Sheppo 1283782Sachartre /* Read disk label from a disk on file */ 1293782Sachartre #define VD_FILE_LABEL_READ(vd, labelp) \ 1304696Sachartre vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \ 1313782Sachartre 0, sizeof (struct dk_label)) 1323782Sachartre 1333782Sachartre /* Write disk label to a disk on file */ 1343782Sachartre #define VD_FILE_LABEL_WRITE(vd, labelp) \ 1354696Sachartre vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ 1363782Sachartre 0, sizeof (struct dk_label)) 1373782Sachartre 1383297Ssb155480 /* 1393297Ssb155480 * Specification of an MD node passed to the MDEG to filter any 1403297Ssb155480 * 'vport' nodes that do not belong to the specified node. This 1413297Ssb155480 * template is copied for each vds instance and filled in with 1423297Ssb155480 * the appropriate 'cfg-handle' value before being passed to the MDEG. 1433297Ssb155480 */ 1443297Ssb155480 static mdeg_prop_spec_t vds_prop_template[] = { 1453297Ssb155480 { MDET_PROP_STR, "name", VDS_NAME }, 1463297Ssb155480 { MDET_PROP_VAL, "cfg-handle", NULL }, 1473297Ssb155480 { MDET_LIST_END, NULL, NULL } 1483297Ssb155480 }; 1493297Ssb155480 1503297Ssb155480 #define VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 1513297Ssb155480 1523297Ssb155480 /* 1533297Ssb155480 * Matching criteria passed to the MDEG to register interest 1543297Ssb155480 * in changes to 'virtual-device-port' nodes identified by their 1553297Ssb155480 * 'id' property. 1563297Ssb155480 */ 1573297Ssb155480 static md_prop_match_t vd_prop_match[] = { 1583297Ssb155480 { MDET_PROP_VAL, VD_ID_PROP }, 1593297Ssb155480 { MDET_LIST_END, NULL } 1603297Ssb155480 }; 1613297Ssb155480 1623297Ssb155480 static mdeg_node_match_t vd_match = {"virtual-device-port", 1633297Ssb155480 vd_prop_match}; 1643297Ssb155480 1655081Sachartre /* 1665081Sachartre * Options for the VD_BLOCK_DEVICE_OPTS property. 1675081Sachartre */ 1685081Sachartre #define VD_OPT_RDONLY 0x1 /* read-only */ 1695081Sachartre #define VD_OPT_SLICE 0x2 /* single slice */ 1705081Sachartre #define VD_OPT_EXCLUSIVE 0x4 /* exclusive access */ 1715081Sachartre 1725081Sachartre #define VD_OPTION_NLEN 128 1735081Sachartre 1745081Sachartre typedef struct vd_option { 1755081Sachartre char vdo_name[VD_OPTION_NLEN]; 1765081Sachartre uint64_t vdo_value; 1775081Sachartre } vd_option_t; 1785081Sachartre 1795081Sachartre vd_option_t vd_bdev_options[] = { 1805081Sachartre { "ro", VD_OPT_RDONLY }, 1815081Sachartre { "slice", VD_OPT_SLICE }, 1825081Sachartre { "excl", VD_OPT_EXCLUSIVE } 1835081Sachartre }; 1845081Sachartre 1851991Sheppo /* Debugging macros */ 1861991Sheppo #ifdef DEBUG 1872793Slm66018 1882793Slm66018 static int vd_msglevel = 0; 1892793Slm66018 1901991Sheppo #define PR0 if (vd_msglevel > 0) PRN 1911991Sheppo #define PR1 if (vd_msglevel > 1) PRN 1921991Sheppo #define PR2 if (vd_msglevel > 2) PRN 1931991Sheppo 1941991Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 1953401Snarayan PR0("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 1961991Sheppo elem->hdr.dstate, \ 1971991Sheppo elem->payload.operation, \ 1981991Sheppo elem->payload.status, \ 1991991Sheppo elem->payload.nbytes, \ 2001991Sheppo elem->payload.addr, \ 2011991Sheppo elem->payload.ncookies); 2021991Sheppo 2032793Slm66018 char * 2042793Slm66018 vd_decode_state(int state) 2052793Slm66018 { 2062793Slm66018 char *str; 2072793Slm66018 2082793Slm66018 #define CASE_STATE(_s) case _s: str = #_s; break; 2092793Slm66018 2102793Slm66018 switch (state) { 2112793Slm66018 CASE_STATE(VD_STATE_INIT) 2122793Slm66018 CASE_STATE(VD_STATE_VER) 2132793Slm66018 CASE_STATE(VD_STATE_ATTR) 2142793Slm66018 CASE_STATE(VD_STATE_DRING) 2152793Slm66018 CASE_STATE(VD_STATE_RDX) 2162793Slm66018 CASE_STATE(VD_STATE_DATA) 2172793Slm66018 default: str = "unknown"; break; 2182793Slm66018 } 2192793Slm66018 2202793Slm66018 #undef CASE_STATE 2212793Slm66018 2222793Slm66018 return (str); 2232793Slm66018 } 2242793Slm66018 2252793Slm66018 void 2262793Slm66018 vd_decode_tag(vio_msg_t *msg) 2272793Slm66018 { 2282793Slm66018 char *tstr, *sstr, *estr; 2292793Slm66018 2302793Slm66018 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 2312793Slm66018 2322793Slm66018 switch (msg->tag.vio_msgtype) { 2332793Slm66018 CASE_TYPE(VIO_TYPE_CTRL) 2342793Slm66018 CASE_TYPE(VIO_TYPE_DATA) 2352793Slm66018 CASE_TYPE(VIO_TYPE_ERR) 2362793Slm66018 default: tstr = "unknown"; break; 2372793Slm66018 } 2382793Slm66018 2392793Slm66018 #undef CASE_TYPE 2402793Slm66018 2412793Slm66018 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 2422793Slm66018 2432793Slm66018 switch (msg->tag.vio_subtype) { 2442793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 2452793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 2462793Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 2472793Slm66018 default: sstr = "unknown"; break; 2482793Slm66018 } 2492793Slm66018 2502793Slm66018 #undef CASE_SUBTYPE 2512793Slm66018 2522793Slm66018 #define CASE_ENV(_s) case _s: estr = #_s; break; 2532793Slm66018 2542793Slm66018 switch (msg->tag.vio_subtype_env) { 2552793Slm66018 CASE_ENV(VIO_VER_INFO) 2562793Slm66018 CASE_ENV(VIO_ATTR_INFO) 2572793Slm66018 CASE_ENV(VIO_DRING_REG) 2582793Slm66018 CASE_ENV(VIO_DRING_UNREG) 2592793Slm66018 CASE_ENV(VIO_RDX) 2602793Slm66018 CASE_ENV(VIO_PKT_DATA) 2612793Slm66018 CASE_ENV(VIO_DESC_DATA) 2622793Slm66018 CASE_ENV(VIO_DRING_DATA) 2632793Slm66018 default: estr = "unknown"; break; 2642793Slm66018 } 2652793Slm66018 2662793Slm66018 #undef CASE_ENV 2672793Slm66018 2682793Slm66018 PR1("(%x/%x/%x) message : (%s/%s/%s)", 2692793Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 2702793Slm66018 msg->tag.vio_subtype_env, tstr, sstr, estr); 2712793Slm66018 } 2722793Slm66018 2731991Sheppo #else /* !DEBUG */ 2742793Slm66018 2751991Sheppo #define PR0(...) 2761991Sheppo #define PR1(...) 2771991Sheppo #define PR2(...) 2781991Sheppo 2791991Sheppo #define VD_DUMP_DRING_ELEM(elem) 2801991Sheppo 2812793Slm66018 #define vd_decode_state(_s) (NULL) 2822793Slm66018 #define vd_decode_tag(_s) (NULL) 2832793Slm66018 2841991Sheppo #endif /* DEBUG */ 2851991Sheppo 2861991Sheppo 2872336Snarayan /* 2882336Snarayan * Soft state structure for a vds instance 2892336Snarayan */ 2901991Sheppo typedef struct vds { 2911991Sheppo uint_t initialized; /* driver inst initialization flags */ 2921991Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 2931991Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 2941991Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 2953297Ssb155480 mdeg_node_spec_t *ispecp; /* mdeg node specification */ 2961991Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 2971991Sheppo } vds_t; 2981991Sheppo 2992336Snarayan /* 3002336Snarayan * Types of descriptor-processing tasks 3012336Snarayan */ 3022336Snarayan typedef enum vd_task_type { 3032336Snarayan VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 3042336Snarayan VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 3052336Snarayan } vd_task_type_t; 3062336Snarayan 3072336Snarayan /* 3082336Snarayan * Structure describing the task for processing a descriptor 3092336Snarayan */ 3102336Snarayan typedef struct vd_task { 3112336Snarayan struct vd *vd; /* vd instance task is for */ 3122336Snarayan vd_task_type_t type; /* type of descriptor task */ 3132336Snarayan int index; /* dring elem index for task */ 3142336Snarayan vio_msg_t *msg; /* VIO message task is for */ 3152336Snarayan size_t msglen; /* length of message content */ 3162336Snarayan vd_dring_payload_t *request; /* request task will perform */ 3172336Snarayan struct buf buf; /* buf(9s) for I/O request */ 3182531Snarayan ldc_mem_handle_t mhdl; /* task memory handle */ 3194838Slm66018 int status; /* status of processing task */ 3204838Slm66018 int (*completef)(struct vd_task *task); /* completion func ptr */ 3212336Snarayan } vd_task_t; 3222336Snarayan 3232336Snarayan /* 3242336Snarayan * Soft state structure for a virtual disk instance 3252336Snarayan */ 3261991Sheppo typedef struct vd { 3271991Sheppo uint_t initialized; /* vdisk initialization flags */ 3281991Sheppo vds_t *vds; /* server for this vdisk */ 3292336Snarayan ddi_taskq_t *startq; /* queue for I/O start tasks */ 3302336Snarayan ddi_taskq_t *completionq; /* queue for completion tasks */ 3311991Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 3323401Snarayan char device_path[MAXPATHLEN + 1]; /* vdisk device */ 3331991Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 3345081Sachartre int open_flags; /* open flags */ 3352410Slm66018 uint_t nslices; /* number of slices */ 3361991Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 3371991Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 3382531Snarayan vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 3392410Slm66018 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 3401991Sheppo boolean_t pseudo; /* underlying pseudo dev */ 3413401Snarayan boolean_t file; /* underlying file */ 3423401Snarayan vnode_t *file_vnode; /* file vnode */ 3433401Snarayan size_t file_size; /* file size */ 3444696Sachartre ddi_devid_t file_devid; /* devid for disk image */ 3452531Snarayan struct dk_efi dk_efi; /* synthetic for slice type */ 3461991Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 3471991Sheppo struct vtoc vtoc; /* synthetic for slice type */ 3481991Sheppo ldc_status_t ldc_state; /* LDC connection state */ 3491991Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 3501991Sheppo size_t max_msglen; /* largest LDC message len */ 3511991Sheppo vd_state_t state; /* client handshake state */ 3521991Sheppo uint8_t xfer_mode; /* transfer mode with client */ 3531991Sheppo uint32_t sid; /* client's session ID */ 3541991Sheppo uint64_t seq_num; /* message sequence number */ 3551991Sheppo uint64_t dring_ident; /* identifier of dring */ 3561991Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 3571991Sheppo uint32_t descriptor_size; /* num bytes in desc */ 3581991Sheppo uint32_t dring_len; /* number of dring elements */ 3591991Sheppo caddr_t dring; /* address of dring */ 3602793Slm66018 caddr_t vio_msgp; /* vio msg staging buffer */ 3612336Snarayan vd_task_t inband_task; /* task for inband descriptor */ 3622336Snarayan vd_task_t *dring_task; /* tasks dring elements */ 3632336Snarayan 3642336Snarayan kmutex_t lock; /* protects variables below */ 3652336Snarayan boolean_t enabled; /* is vdisk enabled? */ 3662336Snarayan boolean_t reset_state; /* reset connection state? */ 3672336Snarayan boolean_t reset_ldc; /* reset LDC channel? */ 3681991Sheppo } vd_t; 3691991Sheppo 3701991Sheppo typedef struct vds_operation { 3712793Slm66018 char *namep; 3721991Sheppo uint8_t operation; 3732336Snarayan int (*start)(vd_task_t *task); 3744838Slm66018 int (*complete)(vd_task_t *task); 3751991Sheppo } vds_operation_t; 3761991Sheppo 3772032Slm66018 typedef struct vd_ioctl { 3782032Slm66018 uint8_t operation; /* vdisk operation */ 3792032Slm66018 const char *operation_name; /* vdisk operation name */ 3802032Slm66018 size_t nbytes; /* size of operation buffer */ 3812032Slm66018 int cmd; /* corresponding ioctl cmd */ 3822032Slm66018 const char *cmd_name; /* ioctl cmd name */ 3832032Slm66018 void *arg; /* ioctl cmd argument */ 3842032Slm66018 /* convert input vd_buf to output ioctl_arg */ 3852032Slm66018 void (*copyin)(void *vd_buf, void *ioctl_arg); 3862032Slm66018 /* convert input ioctl_arg to output vd_buf */ 3872032Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 3885081Sachartre /* write is true if the operation writes any data to the backend */ 3895081Sachartre boolean_t write; 3902032Slm66018 } vd_ioctl_t; 3912032Slm66018 3922032Slm66018 /* Define trivial copyin/copyout conversion function flag */ 3932032Slm66018 #define VD_IDENTITY ((void (*)(void *, void *))-1) 3941991Sheppo 3951991Sheppo 3963401Snarayan static int vds_ldc_retries = VDS_RETRIES; 3972793Slm66018 static int vds_ldc_delay = VDS_LDC_DELAY; 3983401Snarayan static int vds_dev_retries = VDS_RETRIES; 3993401Snarayan static int vds_dev_delay = VDS_DEV_DELAY; 4001991Sheppo static void *vds_state; 4011991Sheppo static uint64_t vds_operations; /* see vds_operation[] definition below */ 4021991Sheppo 4034076Sachartre static uint_t vd_file_write_flags = VD_FILE_WRITE_FLAGS; 4044076Sachartre 4054696Sachartre static short vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT; 4064696Sachartre 4072032Slm66018 /* 4082032Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 4092032Slm66018 * 4102032Slm66018 * Each supported major version should appear only once, paired with (and only 4112032Slm66018 * with) its highest supported minor version number (as the protocol requires 4122032Slm66018 * supporting all lower minor version numbers as well) 4132032Slm66018 */ 4142032Slm66018 static const vio_ver_t vds_version[] = {{1, 0}}; 4152032Slm66018 static const size_t vds_num_versions = 4162032Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 4172032Slm66018 4182793Slm66018 static void vd_free_dring_task(vd_t *vdp); 4193401Snarayan static int vd_setup_vd(vd_t *vd); 4205081Sachartre static int vd_setup_single_slice_disk(vd_t *vd); 4213401Snarayan static boolean_t vd_enabled(vd_t *vd); 4224963Sachartre static ushort_t vd_lbl2cksum(struct dk_label *label); 4234963Sachartre static int vd_file_validate_geometry(vd_t *vd); 4245081Sachartre 4253782Sachartre /* 4263782Sachartre * Function: 4273782Sachartre * vd_file_rw 4283782Sachartre * 4293782Sachartre * Description: 4303782Sachartre * Read or write to a disk on file. 4313782Sachartre * 4323782Sachartre * Parameters: 4333782Sachartre * vd - disk on which the operation is performed. 4343782Sachartre * slice - slice on which the operation is performed, 4354696Sachartre * VD_SLICE_NONE indicates that the operation 4364696Sachartre * is done using an absolute disk offset. 4373782Sachartre * operation - operation to execute: read (VD_OP_BREAD) or 4383782Sachartre * write (VD_OP_BWRITE). 4393782Sachartre * data - buffer where data are read to or written from. 4403782Sachartre * blk - starting block for the operation. 4413782Sachartre * len - number of bytes to read or write. 4423782Sachartre * 4433782Sachartre * Return Code: 4443782Sachartre * n >= 0 - success, n indicates the number of bytes read 4453782Sachartre * or written. 4463782Sachartre * -1 - error. 4473782Sachartre */ 4483782Sachartre static ssize_t 4493782Sachartre vd_file_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t blk, 4503782Sachartre size_t len) 4513782Sachartre { 4523782Sachartre caddr_t maddr; 4533782Sachartre size_t offset, maxlen, moffset, mlen, n; 4543782Sachartre uint_t smflags; 4553782Sachartre enum seg_rw srw; 4563782Sachartre 4573782Sachartre ASSERT(vd->file); 4583782Sachartre ASSERT(len > 0); 4593782Sachartre 4605081Sachartre /* 4615081Sachartre * If a file is exported as a slice then we don't care about the vtoc. 4625081Sachartre * In that case, the vtoc is a fake mainly to make newfs happy and we 4635081Sachartre * handle any I/O as a raw disk access so that we can have access to the 4645081Sachartre * entire backend. 4655081Sachartre */ 4665081Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE || slice == VD_SLICE_NONE) { 4673782Sachartre /* raw disk access */ 4683782Sachartre offset = blk * DEV_BSIZE; 4693782Sachartre } else { 4703782Sachartre ASSERT(slice >= 0 && slice < V_NUMPAR); 4714963Sachartre 4724963Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK && 4734963Sachartre vd_file_validate_geometry(vd) != 0) { 4744963Sachartre PR0("Unknown disk label, can't do I/O from slice %d", 4754963Sachartre slice); 4764963Sachartre return (-1); 4774963Sachartre } 4784963Sachartre 4793782Sachartre if (blk >= vd->vtoc.v_part[slice].p_size) { 4803782Sachartre /* address past the end of the slice */ 4813782Sachartre PR0("req_addr (0x%lx) > psize (0x%lx)", 4823782Sachartre blk, vd->vtoc.v_part[slice].p_size); 4833782Sachartre return (0); 4843782Sachartre } 4853782Sachartre 4863782Sachartre offset = (vd->vtoc.v_part[slice].p_start + blk) * DEV_BSIZE; 4873782Sachartre 4883782Sachartre /* 4893782Sachartre * If the requested size is greater than the size 4903782Sachartre * of the partition, truncate the read/write. 4913782Sachartre */ 4923782Sachartre maxlen = (vd->vtoc.v_part[slice].p_size - blk) * DEV_BSIZE; 4933782Sachartre 4943782Sachartre if (len > maxlen) { 4953782Sachartre PR0("I/O size truncated to %lu bytes from %lu bytes", 4963782Sachartre maxlen, len); 4973782Sachartre len = maxlen; 4983782Sachartre } 4993782Sachartre } 5003782Sachartre 5013782Sachartre /* 5023782Sachartre * We have to ensure that we are reading/writing into the mmap 5033782Sachartre * range. If we have a partial disk image (e.g. an image of 5043782Sachartre * s0 instead s2) the system can try to access slices that 5053782Sachartre * are not included into the disk image. 5063782Sachartre */ 5073782Sachartre if ((offset + len) >= vd->file_size) { 5083782Sachartre PR0("offset + nbytes (0x%lx + 0x%lx) >= " 5093782Sachartre "file_size (0x%lx)", offset, len, vd->file_size); 5103782Sachartre return (-1); 5113782Sachartre } 5123782Sachartre 5133782Sachartre srw = (operation == VD_OP_BREAD)? S_READ : S_WRITE; 5144076Sachartre smflags = (operation == VD_OP_BREAD)? 0 : 5154076Sachartre (SM_WRITE | vd_file_write_flags); 5163782Sachartre n = len; 5173782Sachartre 5183782Sachartre do { 5193782Sachartre /* 5203782Sachartre * segmap_getmapflt() returns a MAXBSIZE chunk which is 5213782Sachartre * MAXBSIZE aligned. 5223782Sachartre */ 5233782Sachartre moffset = offset & MAXBOFFSET; 5243782Sachartre mlen = MIN(MAXBSIZE - moffset, n); 5253782Sachartre maddr = segmap_getmapflt(segkmap, vd->file_vnode, offset, 5263782Sachartre mlen, 1, srw); 5273782Sachartre /* 5283782Sachartre * Fault in the pages so we can check for error and ensure 5293782Sachartre * that we can safely used the mapped address. 5303782Sachartre */ 5313782Sachartre if (segmap_fault(kas.a_hat, segkmap, maddr, mlen, 5323782Sachartre F_SOFTLOCK, srw) != 0) { 5333782Sachartre (void) segmap_release(segkmap, maddr, 0); 5343782Sachartre return (-1); 5353782Sachartre } 5363782Sachartre 5373782Sachartre if (operation == VD_OP_BREAD) 5383782Sachartre bcopy(maddr + moffset, data, mlen); 5393782Sachartre else 5403782Sachartre bcopy(data, maddr + moffset, mlen); 5413782Sachartre 5423782Sachartre if (segmap_fault(kas.a_hat, segkmap, maddr, mlen, 5433782Sachartre F_SOFTUNLOCK, srw) != 0) { 5443782Sachartre (void) segmap_release(segkmap, maddr, 0); 5453782Sachartre return (-1); 5463782Sachartre } 5473782Sachartre if (segmap_release(segkmap, maddr, smflags) != 0) 5483782Sachartre return (-1); 5493782Sachartre n -= mlen; 5503782Sachartre offset += mlen; 5513782Sachartre data += mlen; 5523782Sachartre 5533782Sachartre } while (n > 0); 5543782Sachartre 5553782Sachartre return (len); 5563782Sachartre } 5573782Sachartre 5584696Sachartre /* 5594696Sachartre * Function: 5604963Sachartre * vd_file_build_default_label 5614963Sachartre * 5624963Sachartre * Description: 5634963Sachartre * Return a default label for the given disk. This is used when the disk 5644963Sachartre * does not have a valid VTOC so that the user can get a valid default 5654963Sachartre * configuration. The default label have all slices size set to 0 (except 5664963Sachartre * slice 2 which is the entire disk) to force the user to write a valid 5674963Sachartre * label onto the disk image. 5684963Sachartre * 5694963Sachartre * Parameters: 5704963Sachartre * vd - disk on which the operation is performed. 5714963Sachartre * label - the returned default label. 5724963Sachartre * 5734963Sachartre * Return Code: 5744963Sachartre * none. 5754963Sachartre */ 5764963Sachartre static void 5774963Sachartre vd_file_build_default_label(vd_t *vd, struct dk_label *label) 5784963Sachartre { 5794963Sachartre size_t size; 5804963Sachartre char prefix; 5815081Sachartre int slice, nparts; 5825081Sachartre uint16_t tag; 5834963Sachartre 5844963Sachartre ASSERT(vd->file); 5854963Sachartre 5864963Sachartre /* 5874963Sachartre * We must have a resonable number of cylinders and sectors so 5884963Sachartre * that newfs can run using default values. 5894963Sachartre * 5904963Sachartre * if (disk_size < 2MB) 5914963Sachartre * phys_cylinders = disk_size / 100K 5924963Sachartre * else 5934963Sachartre * phys_cylinders = disk_size / 300K 5944963Sachartre * 5954963Sachartre * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders 5964963Sachartre * alt_cylinders = (phys_cylinders > 2) ? 2 : 0; 5974963Sachartre * data_cylinders = phys_cylinders - alt_cylinders 5984963Sachartre * 5994963Sachartre * sectors = disk_size / (phys_cylinders * blk_size) 6004963Sachartre * 6014963Sachartre * The file size test is an attempt to not have too few cylinders 6024963Sachartre * for a small file, or so many on a big file that you waste space 6034963Sachartre * for backup superblocks or cylinder group structures. 6044963Sachartre */ 6054963Sachartre if (vd->file_size < (2 * 1024 * 1024)) 6064963Sachartre label->dkl_pcyl = vd->file_size / (100 * 1024); 6074963Sachartre else 6084963Sachartre label->dkl_pcyl = vd->file_size / (300 * 1024); 6094963Sachartre 6104963Sachartre if (label->dkl_pcyl == 0) 6114963Sachartre label->dkl_pcyl = 1; 6124963Sachartre 6135081Sachartre label->dkl_acyl = 0; 6145081Sachartre 6155081Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 6165081Sachartre nparts = 1; 6175081Sachartre slice = 0; 6185081Sachartre tag = V_UNASSIGNED; 6195081Sachartre } else { 6205081Sachartre if (label->dkl_pcyl > 2) 6215081Sachartre label->dkl_acyl = 2; 6225081Sachartre nparts = V_NUMPAR; 6235081Sachartre slice = VD_ENTIRE_DISK_SLICE; 6245081Sachartre tag = V_BACKUP; 6255081Sachartre } 6264963Sachartre 6274963Sachartre label->dkl_nsect = vd->file_size / 6284963Sachartre (DEV_BSIZE * label->dkl_pcyl); 6294963Sachartre label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; 6304963Sachartre label->dkl_nhead = 1; 6314963Sachartre label->dkl_write_reinstruct = 0; 6324963Sachartre label->dkl_read_reinstruct = 0; 6334963Sachartre label->dkl_rpm = 7200; 6344963Sachartre label->dkl_apc = 0; 6354963Sachartre label->dkl_intrlv = 0; 6364963Sachartre 6374963Sachartre PR0("requested disk size: %ld bytes\n", vd->file_size); 6384963Sachartre PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, 6394963Sachartre label->dkl_nhead, label->dkl_nsect); 6404963Sachartre PR0("provided disk size: %ld bytes\n", (uint64_t) 6414963Sachartre (label->dkl_pcyl * label->dkl_nhead * 6424963Sachartre label->dkl_nsect * DEV_BSIZE)); 6434963Sachartre 6444963Sachartre if (vd->file_size < (1ULL << 20)) { 6454963Sachartre size = vd->file_size >> 10; 6464963Sachartre prefix = 'K'; /* Kilobyte */ 6474963Sachartre } else if (vd->file_size < (1ULL << 30)) { 6484963Sachartre size = vd->file_size >> 20; 6494963Sachartre prefix = 'M'; /* Megabyte */ 6504963Sachartre } else if (vd->file_size < (1ULL << 40)) { 6514963Sachartre size = vd->file_size >> 30; 6524963Sachartre prefix = 'G'; /* Gigabyte */ 6534963Sachartre } else { 6544963Sachartre size = vd->file_size >> 40; 6554963Sachartre prefix = 'T'; /* Terabyte */ 6564963Sachartre } 6574963Sachartre 6584963Sachartre /* 6594963Sachartre * We must have a correct label name otherwise format(1m) will 6604963Sachartre * not recognized the disk as labeled. 6614963Sachartre */ 6624963Sachartre (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, 6634963Sachartre "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", 6644963Sachartre size, prefix, 6654963Sachartre label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, 6664963Sachartre label->dkl_nsect); 6674963Sachartre 6684963Sachartre /* default VTOC */ 6694963Sachartre label->dkl_vtoc.v_version = V_VERSION; 6705081Sachartre label->dkl_vtoc.v_nparts = nparts; 6714963Sachartre label->dkl_vtoc.v_sanity = VTOC_SANE; 6725081Sachartre label->dkl_vtoc.v_part[slice].p_tag = tag; 6735081Sachartre label->dkl_map[slice].dkl_cylno = 0; 6745081Sachartre label->dkl_map[slice].dkl_nblk = label->dkl_ncyl * 6754963Sachartre label->dkl_nhead * label->dkl_nsect; 6764963Sachartre label->dkl_cksum = vd_lbl2cksum(label); 6774963Sachartre } 6784963Sachartre 6794963Sachartre /* 6804963Sachartre * Function: 6814696Sachartre * vd_file_set_vtoc 6824696Sachartre * 6834696Sachartre * Description: 6844696Sachartre * Set the vtoc of a disk image by writing the label and backup 6854696Sachartre * labels into the disk image backend. 6864696Sachartre * 6874696Sachartre * Parameters: 6884696Sachartre * vd - disk on which the operation is performed. 6894696Sachartre * label - the data to be written. 6904696Sachartre * 6914696Sachartre * Return Code: 6924696Sachartre * 0 - success. 6934696Sachartre * n > 0 - error, n indicates the errno code. 6944696Sachartre */ 6954696Sachartre static int 6964696Sachartre vd_file_set_vtoc(vd_t *vd, struct dk_label *label) 6974696Sachartre { 6984696Sachartre int blk, sec, cyl, head, cnt; 6994696Sachartre 7004696Sachartre ASSERT(vd->file); 7014696Sachartre 7024696Sachartre if (VD_FILE_LABEL_WRITE(vd, label) < 0) { 7034696Sachartre PR0("fail to write disk label"); 7044696Sachartre return (EIO); 7054696Sachartre } 7064696Sachartre 7074696Sachartre /* 7084696Sachartre * Backup labels are on the last alternate cylinder's 7094696Sachartre * first five odd sectors. 7104696Sachartre */ 7114696Sachartre if (label->dkl_acyl == 0) { 7124696Sachartre PR0("no alternate cylinder, can not store backup labels"); 7134696Sachartre return (0); 7144696Sachartre } 7154696Sachartre 7164696Sachartre cyl = label->dkl_ncyl + label->dkl_acyl - 1; 7174696Sachartre head = label->dkl_nhead - 1; 7184696Sachartre 7194696Sachartre blk = (cyl * ((label->dkl_nhead * label->dkl_nsect) - label->dkl_apc)) + 7204696Sachartre (head * label->dkl_nsect); 7214696Sachartre 7224696Sachartre /* 7234696Sachartre * Write the backup labels. Make sure we don't try to write past 7244696Sachartre * the last cylinder. 7254696Sachartre */ 7264696Sachartre sec = 1; 7274696Sachartre 7284696Sachartre for (cnt = 0; cnt < VD_FILE_NUM_BACKUP; cnt++) { 7294696Sachartre 7304696Sachartre if (sec >= label->dkl_nsect) { 7314696Sachartre PR0("not enough sector to store all backup labels"); 7324696Sachartre return (0); 7334696Sachartre } 7344696Sachartre 7354696Sachartre if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)label, 7364696Sachartre blk + sec, sizeof (struct dk_label)) < 0) { 7374696Sachartre PR0("error writing backup label at block %d\n", 7384696Sachartre blk + sec); 7394696Sachartre return (EIO); 7404696Sachartre } 7414696Sachartre 7424696Sachartre PR1("wrote backup label at block %d\n", blk + sec); 7434696Sachartre 7444696Sachartre sec += 2; 7454696Sachartre } 7464696Sachartre 7474696Sachartre return (0); 7484696Sachartre } 7494696Sachartre 7504696Sachartre /* 7514696Sachartre * Function: 7524696Sachartre * vd_file_get_devid_block 7534696Sachartre * 7544696Sachartre * Description: 7554696Sachartre * Return the block number where the device id is stored. 7564696Sachartre * 7574696Sachartre * Parameters: 7584696Sachartre * vd - disk on which the operation is performed. 7594696Sachartre * blkp - pointer to the block number 7604696Sachartre * 7614696Sachartre * Return Code: 7624696Sachartre * 0 - success 7634696Sachartre * ENOSPC - disk has no space to store a device id 7644696Sachartre */ 7654696Sachartre static int 7664696Sachartre vd_file_get_devid_block(vd_t *vd, size_t *blkp) 7674696Sachartre { 7684696Sachartre diskaddr_t spc, head, cyl; 7694696Sachartre 7704696Sachartre ASSERT(vd->file); 7714696Sachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 7724696Sachartre 7734696Sachartre /* this geometry doesn't allow us to have a devid */ 7744696Sachartre if (vd->dk_geom.dkg_acyl < 2) { 7754696Sachartre PR0("not enough alternate cylinder available for devid " 7764696Sachartre "(acyl=%u)", vd->dk_geom.dkg_acyl); 7774696Sachartre return (ENOSPC); 7784696Sachartre } 7794696Sachartre 7804696Sachartre /* the devid is in on the track next to the last cylinder */ 7814696Sachartre cyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl - 2; 7824696Sachartre spc = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 7834696Sachartre head = vd->dk_geom.dkg_nhead - 1; 7844696Sachartre 7854696Sachartre *blkp = (cyl * (spc - vd->dk_geom.dkg_apc)) + 7864696Sachartre (head * vd->dk_geom.dkg_nsect) + 1; 7874696Sachartre 7884696Sachartre return (0); 7894696Sachartre } 7904696Sachartre 7914696Sachartre /* 7924696Sachartre * Return the checksum of a disk block containing an on-disk devid. 7934696Sachartre */ 7944696Sachartre static uint_t 7954696Sachartre vd_dkdevid2cksum(struct dk_devid *dkdevid) 7964696Sachartre { 7974696Sachartre uint_t chksum, *ip; 7984696Sachartre int i; 7994696Sachartre 8004696Sachartre chksum = 0; 8014696Sachartre ip = (uint_t *)dkdevid; 8024696Sachartre for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) 8034696Sachartre chksum ^= ip[i]; 8044696Sachartre 8054696Sachartre return (chksum); 8064696Sachartre } 8074696Sachartre 8084696Sachartre /* 8094696Sachartre * Function: 8104696Sachartre * vd_file_read_devid 8114696Sachartre * 8124696Sachartre * Description: 8134696Sachartre * Read the device id stored on a disk image. 8144696Sachartre * 8154696Sachartre * Parameters: 8164696Sachartre * vd - disk on which the operation is performed. 8174696Sachartre * devid - the return address of the device ID. 8184696Sachartre * 8194696Sachartre * Return Code: 8204696Sachartre * 0 - success 8214696Sachartre * EIO - I/O error while trying to access the disk image 8224696Sachartre * EINVAL - no valid device id was found 8234696Sachartre * ENOSPC - disk has no space to store a device id 8244696Sachartre */ 8254696Sachartre static int 8264696Sachartre vd_file_read_devid(vd_t *vd, ddi_devid_t *devid) 8274696Sachartre { 8284696Sachartre struct dk_devid *dkdevid; 8294696Sachartre size_t blk; 8304696Sachartre uint_t chksum; 8314696Sachartre int status, sz; 8324696Sachartre 8334696Sachartre if ((status = vd_file_get_devid_block(vd, &blk)) != 0) 8344696Sachartre return (status); 8354696Sachartre 8364696Sachartre dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 8374696Sachartre 8384696Sachartre /* get the devid */ 8394696Sachartre if ((vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)dkdevid, blk, 8404696Sachartre DEV_BSIZE)) < 0) { 8414696Sachartre PR0("error reading devid block at %lu", blk); 8424696Sachartre status = EIO; 8434696Sachartre goto done; 8444696Sachartre } 8454696Sachartre 8464696Sachartre /* validate the revision */ 8474696Sachartre if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) || 8484696Sachartre (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) { 8494696Sachartre PR0("invalid devid found at block %lu (bad revision)", blk); 8504696Sachartre status = EINVAL; 8514696Sachartre goto done; 8524696Sachartre } 8534696Sachartre 8544696Sachartre /* compute checksum */ 8554696Sachartre chksum = vd_dkdevid2cksum(dkdevid); 8564696Sachartre 8574696Sachartre /* compare the checksums */ 8584696Sachartre if (DKD_GETCHKSUM(dkdevid) != chksum) { 8594696Sachartre PR0("invalid devid found at block %lu (bad checksum)", blk); 8604696Sachartre status = EINVAL; 8614696Sachartre goto done; 8624696Sachartre } 8634696Sachartre 8644696Sachartre /* validate the device id */ 8654696Sachartre if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) { 8664696Sachartre PR0("invalid devid found at block %lu", blk); 8674696Sachartre status = EINVAL; 8684696Sachartre goto done; 8694696Sachartre } 8704696Sachartre 8714696Sachartre PR1("devid read at block %lu", blk); 8724696Sachartre 8734696Sachartre sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid); 8744696Sachartre *devid = kmem_alloc(sz, KM_SLEEP); 8754696Sachartre bcopy(&dkdevid->dkd_devid, *devid, sz); 8764696Sachartre 8774696Sachartre done: 8784696Sachartre kmem_free(dkdevid, DEV_BSIZE); 8794696Sachartre return (status); 8804696Sachartre 8814696Sachartre } 8824696Sachartre 8834696Sachartre /* 8844696Sachartre * Function: 8854696Sachartre * vd_file_write_devid 8864696Sachartre * 8874696Sachartre * Description: 8884696Sachartre * Write a device id into disk image. 8894696Sachartre * 8904696Sachartre * Parameters: 8914696Sachartre * vd - disk on which the operation is performed. 8924696Sachartre * devid - the device ID to store. 8934696Sachartre * 8944696Sachartre * Return Code: 8954696Sachartre * 0 - success 8964696Sachartre * EIO - I/O error while trying to access the disk image 8974696Sachartre * ENOSPC - disk has no space to store a device id 8984696Sachartre */ 8994696Sachartre static int 9004696Sachartre vd_file_write_devid(vd_t *vd, ddi_devid_t devid) 9014696Sachartre { 9024696Sachartre struct dk_devid *dkdevid; 9034696Sachartre uint_t chksum; 9044696Sachartre size_t blk; 9054696Sachartre int status; 9064696Sachartre 9074696Sachartre if ((status = vd_file_get_devid_block(vd, &blk)) != 0) 9084696Sachartre return (status); 9094696Sachartre 9104696Sachartre dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 9114696Sachartre 9124696Sachartre /* set revision */ 9134696Sachartre dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB; 9144696Sachartre dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB; 9154696Sachartre 9164696Sachartre /* copy devid */ 9174696Sachartre bcopy(devid, &dkdevid->dkd_devid, ddi_devid_sizeof(devid)); 9184696Sachartre 9194696Sachartre /* compute checksum */ 9204696Sachartre chksum = vd_dkdevid2cksum(dkdevid); 9214696Sachartre 9224696Sachartre /* set checksum */ 9234696Sachartre DKD_FORMCHKSUM(chksum, dkdevid); 9244696Sachartre 9254696Sachartre /* store the devid */ 9264696Sachartre if ((status = vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 9274696Sachartre (caddr_t)dkdevid, blk, DEV_BSIZE)) < 0) { 9284696Sachartre PR0("Error writing devid block at %lu", blk); 9294696Sachartre status = EIO; 9304696Sachartre } else { 9314696Sachartre PR1("devid written at block %lu", blk); 9324696Sachartre status = 0; 9334696Sachartre } 9344696Sachartre 9354696Sachartre kmem_free(dkdevid, DEV_BSIZE); 9364696Sachartre return (status); 9374696Sachartre } 9384696Sachartre 9394696Sachartre /* 9404696Sachartre * Function: 9414696Sachartre * vd_scsi_rdwr 9424696Sachartre * 9434696Sachartre * Description: 9444696Sachartre * Read or write to a SCSI disk using an absolute disk offset. 9454696Sachartre * 9464696Sachartre * Parameters: 9474696Sachartre * vd - disk on which the operation is performed. 9484696Sachartre * operation - operation to execute: read (VD_OP_BREAD) or 9494696Sachartre * write (VD_OP_BWRITE). 9504696Sachartre * data - buffer where data are read to or written from. 9514696Sachartre * blk - starting block for the operation. 9524696Sachartre * len - number of bytes to read or write. 9534696Sachartre * 9544696Sachartre * Return Code: 9554696Sachartre * 0 - success 9564696Sachartre * n != 0 - error. 9574696Sachartre */ 9584696Sachartre static int 9594696Sachartre vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) 9604696Sachartre { 9614696Sachartre struct uscsi_cmd ucmd; 9624696Sachartre union scsi_cdb cdb; 9634696Sachartre int nsectors, nblk; 9644696Sachartre int max_sectors; 9654696Sachartre int status, rval; 9664696Sachartre 9674696Sachartre ASSERT(!vd->file); 9684696Sachartre 9694696Sachartre max_sectors = vd->max_xfer_sz; 9704696Sachartre nblk = (len / DEV_BSIZE); 9714696Sachartre 9724696Sachartre if (len % DEV_BSIZE != 0) 9734696Sachartre return (EINVAL); 9744696Sachartre 9754696Sachartre /* 9764696Sachartre * Build and execute the uscsi ioctl. We build a group0, group1 9774696Sachartre * or group4 command as necessary, since some targets 9784696Sachartre * do not support group1 commands. 9794696Sachartre */ 9804696Sachartre while (nblk) { 9814696Sachartre 9824696Sachartre bzero(&ucmd, sizeof (ucmd)); 9834696Sachartre bzero(&cdb, sizeof (cdb)); 9844696Sachartre 9854696Sachartre nsectors = (max_sectors < nblk) ? max_sectors : nblk; 9864696Sachartre 9874696Sachartre if (blk < (2 << 20) && nsectors <= 0xff) { 9884696Sachartre FORMG0ADDR(&cdb, blk); 9894696Sachartre FORMG0COUNT(&cdb, nsectors); 9904696Sachartre ucmd.uscsi_cdblen = CDB_GROUP0; 9914696Sachartre } else if (blk > 0xffffffff) { 9924696Sachartre FORMG4LONGADDR(&cdb, blk); 9934696Sachartre FORMG4COUNT(&cdb, nsectors); 9944696Sachartre ucmd.uscsi_cdblen = CDB_GROUP4; 9954696Sachartre cdb.scc_cmd |= SCMD_GROUP4; 9964696Sachartre } else { 9974696Sachartre FORMG1ADDR(&cdb, blk); 9984696Sachartre FORMG1COUNT(&cdb, nsectors); 9994696Sachartre ucmd.uscsi_cdblen = CDB_GROUP1; 10004696Sachartre cdb.scc_cmd |= SCMD_GROUP1; 10014696Sachartre } 10024696Sachartre 10034696Sachartre ucmd.uscsi_cdb = (caddr_t)&cdb; 10044696Sachartre ucmd.uscsi_bufaddr = data; 10054696Sachartre ucmd.uscsi_buflen = nsectors * DEV_BSIZE; 10064696Sachartre ucmd.uscsi_timeout = vd_scsi_rdwr_timeout; 10074696Sachartre /* 10084696Sachartre * Set flags so that the command is isolated from normal 10094696Sachartre * commands and no error message is printed. 10104696Sachartre */ 10114696Sachartre ucmd.uscsi_flags = USCSI_ISOLATE | USCSI_SILENT; 10124696Sachartre 10134696Sachartre if (operation == VD_OP_BREAD) { 10144696Sachartre cdb.scc_cmd |= SCMD_READ; 10154696Sachartre ucmd.uscsi_flags |= USCSI_READ; 10164696Sachartre } else { 10174696Sachartre cdb.scc_cmd |= SCMD_WRITE; 10184696Sachartre } 10194696Sachartre 10204696Sachartre status = ldi_ioctl(vd->ldi_handle[VD_ENTIRE_DISK_SLICE], 10215081Sachartre USCSICMD, (intptr_t)&ucmd, (vd->open_flags | FKIOCTL), 10224696Sachartre kcred, &rval); 10234696Sachartre 10244696Sachartre if (status == 0) 10254696Sachartre status = ucmd.uscsi_status; 10264696Sachartre 10274696Sachartre if (status != 0) 10284696Sachartre break; 10294696Sachartre 10304696Sachartre /* 10314696Sachartre * Check if partial DMA breakup is required. If so, reduce 10324696Sachartre * the request size by half and retry the last request. 10334696Sachartre */ 10344696Sachartre if (ucmd.uscsi_resid == ucmd.uscsi_buflen) { 10354696Sachartre max_sectors >>= 1; 10364696Sachartre if (max_sectors <= 0) { 10374696Sachartre status = EIO; 10384696Sachartre break; 10394696Sachartre } 10404696Sachartre continue; 10414696Sachartre } 10424696Sachartre 10434696Sachartre if (ucmd.uscsi_resid != 0) { 10444696Sachartre status = EIO; 10454696Sachartre break; 10464696Sachartre } 10474696Sachartre 10484696Sachartre blk += nsectors; 10494696Sachartre nblk -= nsectors; 10504696Sachartre data += nsectors * DEV_BSIZE; /* SECSIZE */ 10514696Sachartre } 10524696Sachartre 10534696Sachartre return (status); 10544696Sachartre } 10554696Sachartre 10564838Slm66018 /* 10574838Slm66018 * Return Values 10584838Slm66018 * EINPROGRESS - operation was successfully started 10594838Slm66018 * EIO - encountered LDC (aka. task error) 10604838Slm66018 * 0 - operation completed successfully 10614838Slm66018 * 10624838Slm66018 * Side Effect 10634838Slm66018 * sets request->status = <disk operation status> 10644838Slm66018 */ 10651991Sheppo static int 10662336Snarayan vd_start_bio(vd_task_t *task) 10671991Sheppo { 10682531Snarayan int rv, status = 0; 10692336Snarayan vd_t *vd = task->vd; 10702336Snarayan vd_dring_payload_t *request = task->request; 10712336Snarayan struct buf *buf = &task->buf; 10722531Snarayan uint8_t mtype; 10733401Snarayan int slice; 10745081Sachartre char *bufaddr = 0; 10755081Sachartre size_t buflen; 10762336Snarayan 10772336Snarayan ASSERT(vd != NULL); 10782336Snarayan ASSERT(request != NULL); 10793401Snarayan 10803401Snarayan slice = request->slice; 10813401Snarayan 10824696Sachartre ASSERT(slice == VD_SLICE_NONE || slice < vd->nslices); 10832336Snarayan ASSERT((request->operation == VD_OP_BREAD) || 10842336Snarayan (request->operation == VD_OP_BWRITE)); 10852336Snarayan 10864838Slm66018 if (request->nbytes == 0) { 10874838Slm66018 /* no service for trivial requests */ 10884838Slm66018 request->status = EINVAL; 10894838Slm66018 return (0); 10904838Slm66018 } 10912336Snarayan 10922336Snarayan PR1("%s %lu bytes at block %lu", 10932336Snarayan (request->operation == VD_OP_BREAD) ? "Read" : "Write", 10942336Snarayan request->nbytes, request->addr); 10952336Snarayan 10965081Sachartre /* 10975081Sachartre * We have to check the open flags because the functions processing 10985081Sachartre * the read/write request will not do it. 10995081Sachartre */ 11005081Sachartre if (request->operation == VD_OP_BWRITE && !(vd->open_flags & FWRITE)) { 11015081Sachartre PR0("write fails because backend is opened read-only"); 11025081Sachartre request->nbytes = 0; 11035081Sachartre request->status = EROFS; 11045081Sachartre return (0); 11055081Sachartre } 11062336Snarayan 11072531Snarayan mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 11082531Snarayan 11092531Snarayan /* Map memory exported by client */ 11102531Snarayan status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 11112531Snarayan mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 11125081Sachartre &bufaddr, NULL); 11132531Snarayan if (status != 0) { 11142793Slm66018 PR0("ldc_mem_map() returned err %d ", status); 11154838Slm66018 return (EIO); 11162336Snarayan } 11172336Snarayan 11185081Sachartre buflen = request->nbytes; 11195081Sachartre 11205081Sachartre status = ldc_mem_acquire(task->mhdl, 0, buflen); 11212531Snarayan if (status != 0) { 11222531Snarayan (void) ldc_mem_unmap(task->mhdl); 11232793Slm66018 PR0("ldc_mem_acquire() returned err %d ", status); 11244838Slm66018 return (EIO); 11252531Snarayan } 11262531Snarayan 11272336Snarayan /* Start the block I/O */ 11283401Snarayan if (vd->file) { 11295081Sachartre rv = vd_file_rw(vd, slice, request->operation, bufaddr, 11303782Sachartre request->addr, request->nbytes); 11313782Sachartre if (rv < 0) { 11323401Snarayan request->nbytes = 0; 11334838Slm66018 request->status = EIO; 11343782Sachartre } else { 11353782Sachartre request->nbytes = rv; 11364838Slm66018 request->status = 0; 11373401Snarayan } 11383401Snarayan } else { 11394696Sachartre if (slice == VD_SLICE_NONE) { 11404696Sachartre /* 11414696Sachartre * This is not a disk image so it is a real disk. We 11424696Sachartre * assume that the underlying device driver supports 11434696Sachartre * USCSICMD ioctls. This is the case of all SCSI devices 11444696Sachartre * (sd, ssd...). 11454696Sachartre * 11464696Sachartre * In the future if we have non-SCSI disks we would need 11474696Sachartre * to invoke the appropriate function to do I/O using an 11484696Sachartre * absolute disk offset (for example using DKIOCTL_RWCMD 11494696Sachartre * for IDE disks). 11504696Sachartre */ 11515081Sachartre rv = vd_scsi_rdwr(vd, request->operation, bufaddr, 11525081Sachartre request->addr, request->nbytes); 11534696Sachartre if (rv != 0) { 11544696Sachartre request->nbytes = 0; 11554838Slm66018 request->status = EIO; 11564696Sachartre } else { 11574838Slm66018 request->status = 0; 11584696Sachartre } 11594696Sachartre } else { 11605081Sachartre bioinit(buf); 11615081Sachartre buf->b_flags = B_BUSY; 11625081Sachartre buf->b_bcount = request->nbytes; 11635081Sachartre buf->b_lblkno = request->addr; 11645081Sachartre buf->b_edev = vd->dev[slice]; 11655081Sachartre buf->b_un.b_addr = bufaddr; 11665081Sachartre buf->b_flags |= (request->operation == VD_OP_BREAD)? 11675081Sachartre B_READ : B_WRITE; 11685081Sachartre 11694838Slm66018 request->status = 11704838Slm66018 ldi_strategy(vd->ldi_handle[slice], buf); 11714838Slm66018 11724838Slm66018 /* 11734838Slm66018 * This is to indicate to the caller that the request 11744838Slm66018 * needs to be finished by vd_complete_bio() by calling 11754838Slm66018 * biowait() there and waiting for that to return before 11764838Slm66018 * triggering the notification of the vDisk client. 11774838Slm66018 * 11784838Slm66018 * This is necessary when writing to real disks as 11794838Slm66018 * otherwise calls to ldi_strategy() would be serialized 11804838Slm66018 * behind the calls to biowait() and performance would 11814838Slm66018 * suffer. 11824838Slm66018 */ 11834838Slm66018 if (request->status == 0) 11844696Sachartre return (EINPROGRESS); 11855081Sachartre 11865081Sachartre biofini(buf); 11874696Sachartre } 11883401Snarayan } 11893401Snarayan 11902336Snarayan /* Clean up after error */ 11915081Sachartre rv = ldc_mem_release(task->mhdl, 0, buflen); 11922531Snarayan if (rv) { 11932793Slm66018 PR0("ldc_mem_release() returned err %d ", rv); 11944838Slm66018 status = EIO; 11952531Snarayan } 11962531Snarayan rv = ldc_mem_unmap(task->mhdl); 11972531Snarayan if (rv) { 11984838Slm66018 PR0("ldc_mem_unmap() returned err %d ", rv); 11994838Slm66018 status = EIO; 12002531Snarayan } 12012531Snarayan 12022336Snarayan return (status); 12032336Snarayan } 12042336Snarayan 12054838Slm66018 /* 12064838Slm66018 * This function should only be called from vd_notify to ensure that requests 12074838Slm66018 * are responded to in the order that they are received. 12084838Slm66018 */ 12092336Snarayan static int 12102336Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 12112336Snarayan { 12122793Slm66018 int status; 12132336Snarayan size_t nbytes; 12142336Snarayan 12152793Slm66018 do { 12162336Snarayan nbytes = msglen; 12172336Snarayan status = ldc_write(ldc_handle, msg, &nbytes); 12182793Slm66018 if (status != EWOULDBLOCK) 12192793Slm66018 break; 12202793Slm66018 drv_usecwait(vds_ldc_delay); 12212793Slm66018 } while (status == EWOULDBLOCK); 12222336Snarayan 12232336Snarayan if (status != 0) { 12242793Slm66018 if (status != ECONNRESET) 12252793Slm66018 PR0("ldc_write() returned errno %d", status); 12262336Snarayan return (status); 12272336Snarayan } else if (nbytes != msglen) { 12282793Slm66018 PR0("ldc_write() performed only partial write"); 12292336Snarayan return (EIO); 12302336Snarayan } 12312336Snarayan 12322336Snarayan PR1("SENT %lu bytes", msglen); 12332336Snarayan return (0); 12342336Snarayan } 12352336Snarayan 12362336Snarayan static void 12372336Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc) 12382336Snarayan { 12392336Snarayan mutex_enter(&vd->lock); 12402336Snarayan vd->reset_state = B_TRUE; 12412336Snarayan vd->reset_ldc = reset_ldc; 12422336Snarayan mutex_exit(&vd->lock); 12432336Snarayan } 12442336Snarayan 12452336Snarayan /* 12462336Snarayan * Reset the state of the connection with a client, if needed; reset the LDC 12472336Snarayan * transport as well, if needed. This function should only be called from the 12482793Slm66018 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 12492336Snarayan */ 12502336Snarayan static void 12512336Snarayan vd_reset_if_needed(vd_t *vd) 12522336Snarayan { 12532793Slm66018 int status = 0; 12542336Snarayan 12552336Snarayan mutex_enter(&vd->lock); 12562336Snarayan if (!vd->reset_state) { 12572336Snarayan ASSERT(!vd->reset_ldc); 12582336Snarayan mutex_exit(&vd->lock); 12592336Snarayan return; 12602336Snarayan } 12612336Snarayan mutex_exit(&vd->lock); 12622336Snarayan 12632336Snarayan PR0("Resetting connection state with %s", VD_CLIENT(vd)); 12642336Snarayan 12652336Snarayan /* 12662336Snarayan * Let any asynchronous I/O complete before possibly pulling the rug 12672336Snarayan * out from under it; defer checking vd->reset_ldc, as one of the 12682336Snarayan * asynchronous tasks might set it 12692336Snarayan */ 12702336Snarayan ddi_taskq_wait(vd->completionq); 12712336Snarayan 12723401Snarayan if (vd->file) { 12733401Snarayan status = VOP_FSYNC(vd->file_vnode, FSYNC, kcred); 12743401Snarayan if (status) { 12753401Snarayan PR0("VOP_FSYNC returned errno %d", status); 12763401Snarayan } 12773401Snarayan } 12783401Snarayan 12792336Snarayan if ((vd->initialized & VD_DRING) && 12802336Snarayan ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 12812793Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 12822793Slm66018 12832793Slm66018 vd_free_dring_task(vd); 12842793Slm66018 12852793Slm66018 /* Free the staging buffer for msgs */ 12862793Slm66018 if (vd->vio_msgp != NULL) { 12872793Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 12882793Slm66018 vd->vio_msgp = NULL; 12892336Snarayan } 12902336Snarayan 12912793Slm66018 /* Free the inband message buffer */ 12922793Slm66018 if (vd->inband_task.msg != NULL) { 12932793Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 12942793Slm66018 vd->inband_task.msg = NULL; 12952793Slm66018 } 12962336Snarayan 12972336Snarayan mutex_enter(&vd->lock); 12982793Slm66018 12992793Slm66018 if (vd->reset_ldc) 13002793Slm66018 PR0("taking down LDC channel"); 13012410Slm66018 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 13022793Slm66018 PR0("ldc_down() returned errno %d", status); 13032336Snarayan 13042336Snarayan vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 13052336Snarayan vd->state = VD_STATE_INIT; 13062336Snarayan vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 13072336Snarayan 13082793Slm66018 /* Allocate the staging buffer */ 13092793Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 13102793Slm66018 13113010Slm66018 PR0("calling ldc_up\n"); 13123010Slm66018 (void) ldc_up(vd->ldc_handle); 13132793Slm66018 13142336Snarayan vd->reset_state = B_FALSE; 13152336Snarayan vd->reset_ldc = B_FALSE; 13162793Slm66018 13172336Snarayan mutex_exit(&vd->lock); 13182336Snarayan } 13192336Snarayan 13202793Slm66018 static void vd_recv_msg(void *arg); 13212793Slm66018 13222793Slm66018 static void 13232793Slm66018 vd_mark_in_reset(vd_t *vd) 13242793Slm66018 { 13252793Slm66018 int status; 13262793Slm66018 13272793Slm66018 PR0("vd_mark_in_reset: marking vd in reset\n"); 13282793Slm66018 13292793Slm66018 vd_need_reset(vd, B_FALSE); 13302793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 13312793Slm66018 if (status == DDI_FAILURE) { 13322793Slm66018 PR0("cannot schedule task to recv msg\n"); 13332793Slm66018 vd_need_reset(vd, B_TRUE); 13342793Slm66018 return; 13352793Slm66018 } 13362793Slm66018 } 13372793Slm66018 13382336Snarayan static int 13393401Snarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status, int elem_nbytes) 13402336Snarayan { 13412336Snarayan boolean_t accepted; 13422336Snarayan int status; 13432336Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 13442336Snarayan 13452793Slm66018 if (vd->reset_state) 13462793Slm66018 return (0); 13472336Snarayan 13482336Snarayan /* Acquire the element */ 13492793Slm66018 if (!vd->reset_state && 13502793Slm66018 (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 13512793Slm66018 if (status == ECONNRESET) { 13522793Slm66018 vd_mark_in_reset(vd); 13532793Slm66018 return (0); 13542793Slm66018 } else { 13552793Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", 13562793Slm66018 status); 13572793Slm66018 return (status); 13582793Slm66018 } 13592336Snarayan } 13602336Snarayan 13612336Snarayan /* Set the element's status and mark it done */ 13622336Snarayan accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 13632336Snarayan if (accepted) { 13643401Snarayan elem->payload.nbytes = elem_nbytes; 13652336Snarayan elem->payload.status = elem_status; 13662336Snarayan elem->hdr.dstate = VIO_DESC_DONE; 13672336Snarayan } else { 13682336Snarayan /* Perhaps client timed out waiting for I/O... */ 13692793Slm66018 PR0("element %u no longer \"accepted\"", idx); 13702336Snarayan VD_DUMP_DRING_ELEM(elem); 13712336Snarayan } 13722336Snarayan /* Release the element */ 13732793Slm66018 if (!vd->reset_state && 13742793Slm66018 (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 13752793Slm66018 if (status == ECONNRESET) { 13762793Slm66018 vd_mark_in_reset(vd); 13772793Slm66018 return (0); 13782793Slm66018 } else { 13792793Slm66018 PR0("ldc_mem_dring_release() returned errno %d", 13802793Slm66018 status); 13812793Slm66018 return (status); 13822793Slm66018 } 13832336Snarayan } 13842336Snarayan 13852336Snarayan return (accepted ? 0 : EINVAL); 13862336Snarayan } 13872336Snarayan 13884838Slm66018 /* 13894838Slm66018 * Return Values 13904838Slm66018 * 0 - operation completed successfully 13914838Slm66018 * EIO - encountered LDC / task error 13924838Slm66018 * 13934838Slm66018 * Side Effect 13944838Slm66018 * sets request->status = <disk operation status> 13954838Slm66018 */ 13964838Slm66018 static int 13974838Slm66018 vd_complete_bio(vd_task_t *task) 13982336Snarayan { 13992336Snarayan int status = 0; 14004838Slm66018 int rv = 0; 14012336Snarayan vd_t *vd = task->vd; 14022336Snarayan vd_dring_payload_t *request = task->request; 14032336Snarayan struct buf *buf = &task->buf; 14042336Snarayan 14052336Snarayan 14062336Snarayan ASSERT(vd != NULL); 14072336Snarayan ASSERT(request != NULL); 14082336Snarayan ASSERT(task->msg != NULL); 14092336Snarayan ASSERT(task->msglen >= sizeof (*task->msg)); 14103401Snarayan ASSERT(!vd->file); 14114838Slm66018 ASSERT(request->slice != VD_SLICE_NONE); 14124838Slm66018 14134838Slm66018 /* Wait for the I/O to complete [ call to ldi_strategy(9f) ] */ 14142336Snarayan request->status = biowait(buf); 14152336Snarayan 14163401Snarayan /* return back the number of bytes read/written */ 14173401Snarayan request->nbytes = buf->b_bcount - buf->b_resid; 14183401Snarayan 14192531Snarayan /* Release the buffer */ 14202793Slm66018 if (!vd->reset_state) 14212793Slm66018 status = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 14222531Snarayan if (status) { 14232793Slm66018 PR0("ldc_mem_release() returned errno %d copying to " 14242793Slm66018 "client", status); 14252793Slm66018 if (status == ECONNRESET) { 14262793Slm66018 vd_mark_in_reset(vd); 14272793Slm66018 } 14284838Slm66018 rv = EIO; 14291991Sheppo } 14302336Snarayan 14312793Slm66018 /* Unmap the memory, even if in reset */ 14322531Snarayan status = ldc_mem_unmap(task->mhdl); 14332531Snarayan if (status) { 14342793Slm66018 PR0("ldc_mem_unmap() returned errno %d copying to client", 14352531Snarayan status); 14362793Slm66018 if (status == ECONNRESET) { 14372793Slm66018 vd_mark_in_reset(vd); 14382793Slm66018 } 14394838Slm66018 rv = EIO; 14402531Snarayan } 14412531Snarayan 14422336Snarayan biofini(buf); 14432336Snarayan 14444838Slm66018 return (rv); 14454838Slm66018 } 14464838Slm66018 14474838Slm66018 /* 14484838Slm66018 * Description: 14494838Slm66018 * This function is called by the two functions called by a taskq 14504838Slm66018 * [ vd_complete_notify() and vd_serial_notify()) ] to send the 14514838Slm66018 * message to the client. 14524838Slm66018 * 14534838Slm66018 * Parameters: 14544838Slm66018 * arg - opaque pointer to structure containing task to be completed 14554838Slm66018 * 14564838Slm66018 * Return Values 14574838Slm66018 * None 14584838Slm66018 */ 14594838Slm66018 static void 14604838Slm66018 vd_notify(vd_task_t *task) 14614838Slm66018 { 14624838Slm66018 int status; 14634838Slm66018 14644838Slm66018 ASSERT(task != NULL); 14654838Slm66018 ASSERT(task->vd != NULL); 14664838Slm66018 14674838Slm66018 if (task->vd->reset_state) 14684838Slm66018 return; 14694838Slm66018 14704838Slm66018 /* 14714838Slm66018 * Send the "ack" or "nack" back to the client; if sending the message 14724838Slm66018 * via LDC fails, arrange to reset both the connection state and LDC 14734838Slm66018 * itself 14744838Slm66018 */ 14754838Slm66018 PR2("Sending %s", 14764838Slm66018 (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 14774838Slm66018 14784838Slm66018 status = send_msg(task->vd->ldc_handle, task->msg, task->msglen); 14794838Slm66018 switch (status) { 14804838Slm66018 case 0: 14814838Slm66018 break; 14824838Slm66018 case ECONNRESET: 14834838Slm66018 vd_mark_in_reset(task->vd); 14844838Slm66018 break; 14854838Slm66018 default: 14864838Slm66018 PR0("initiating full reset"); 14874838Slm66018 vd_need_reset(task->vd, B_TRUE); 14884838Slm66018 break; 14894838Slm66018 } 14904838Slm66018 14914838Slm66018 DTRACE_PROBE1(task__end, vd_task_t *, task); 14924838Slm66018 } 14934838Slm66018 14944838Slm66018 /* 14954838Slm66018 * Description: 14964838Slm66018 * Mark the Dring entry as Done and (if necessary) send an ACK/NACK to 14974838Slm66018 * the vDisk client 14984838Slm66018 * 14994838Slm66018 * Parameters: 15004838Slm66018 * task - structure containing the request sent from client 15014838Slm66018 * 15024838Slm66018 * Return Values 15034838Slm66018 * None 15044838Slm66018 */ 15054838Slm66018 static void 15064838Slm66018 vd_complete_notify(vd_task_t *task) 15074838Slm66018 { 15084838Slm66018 int status = 0; 15094838Slm66018 vd_t *vd = task->vd; 15104838Slm66018 vd_dring_payload_t *request = task->request; 15114838Slm66018 15122336Snarayan /* Update the dring element for a dring client */ 15134838Slm66018 if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE)) { 15143401Snarayan status = vd_mark_elem_done(vd, task->index, 15153401Snarayan request->status, request->nbytes); 15162793Slm66018 if (status == ECONNRESET) 15172793Slm66018 vd_mark_in_reset(vd); 15182793Slm66018 } 15192336Snarayan 15202336Snarayan /* 15214838Slm66018 * If a transport error occurred while marking the element done or 15224838Slm66018 * previously while executing the task, arrange to "nack" the message 15234838Slm66018 * when the final task in the descriptor element range completes 15242336Snarayan */ 15254838Slm66018 if ((status != 0) || (task->status != 0)) 15262336Snarayan task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 15272336Snarayan 15282336Snarayan /* 15292336Snarayan * Only the final task for a range of elements will respond to and 15302336Snarayan * free the message 15312336Snarayan */ 15322793Slm66018 if (task->type == VD_NONFINAL_RANGE_TASK) { 15332336Snarayan return; 15342793Slm66018 } 15352336Snarayan 15364838Slm66018 vd_notify(task); 15374838Slm66018 } 15384838Slm66018 15394838Slm66018 /* 15404838Slm66018 * Description: 15414838Slm66018 * This is the basic completion function called to handle inband data 15424838Slm66018 * requests and handshake messages. All it needs to do is trigger a 15434838Slm66018 * message to the client that the request is completed. 15444838Slm66018 * 15454838Slm66018 * Parameters: 15464838Slm66018 * arg - opaque pointer to structure containing task to be completed 15474838Slm66018 * 15484838Slm66018 * Return Values 15494838Slm66018 * None 15504838Slm66018 */ 15514838Slm66018 static void 15524838Slm66018 vd_serial_notify(void *arg) 15534838Slm66018 { 15544838Slm66018 vd_task_t *task = (vd_task_t *)arg; 15554838Slm66018 15564838Slm66018 ASSERT(task != NULL); 15574838Slm66018 vd_notify(task); 15581991Sheppo } 15591991Sheppo 15602032Slm66018 static void 15612032Slm66018 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) 15622032Slm66018 { 15632032Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 15642032Slm66018 } 15652032Slm66018 15662032Slm66018 static void 15672032Slm66018 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) 15682032Slm66018 { 15692032Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 15702032Slm66018 } 15712032Slm66018 15722032Slm66018 static void 15732032Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 15742032Slm66018 { 15752032Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 15762032Slm66018 } 15772032Slm66018 15782032Slm66018 static void 15792032Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 15802032Slm66018 { 15812032Slm66018 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 15822032Slm66018 } 15832032Slm66018 15842531Snarayan static void 15852531Snarayan vd_get_efi_in(void *vd_buf, void *ioctl_arg) 15862531Snarayan { 15872531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 15882531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 15892531Snarayan 15902531Snarayan dk_efi->dki_lba = vd_efi->lba; 15912531Snarayan dk_efi->dki_length = vd_efi->length; 15922531Snarayan dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 15932531Snarayan } 15942531Snarayan 15952531Snarayan static void 15962531Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf) 15972531Snarayan { 15982531Snarayan int len; 15992531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 16002531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 16012531Snarayan 16022531Snarayan len = vd_efi->length; 16032531Snarayan DK_EFI2VD_EFI(dk_efi, vd_efi); 16042531Snarayan kmem_free(dk_efi->dki_data, len); 16052531Snarayan } 16062531Snarayan 16072531Snarayan static void 16082531Snarayan vd_set_efi_in(void *vd_buf, void *ioctl_arg) 16092531Snarayan { 16102531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 16112531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 16122531Snarayan 16132531Snarayan dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 16142531Snarayan VD_EFI2DK_EFI(vd_efi, dk_efi); 16152531Snarayan } 16162531Snarayan 16172531Snarayan static void 16182531Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf) 16192531Snarayan { 16202531Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 16212531Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 16222531Snarayan 16232531Snarayan kmem_free(dk_efi->dki_data, vd_efi->length); 16242531Snarayan } 16252531Snarayan 16264963Sachartre static vd_disk_label_t 16275081Sachartre vd_read_vtoc(vd_t *vd, struct vtoc *vtoc) 16282531Snarayan { 16292531Snarayan int status, rval; 16302531Snarayan struct dk_gpt *efi; 16312531Snarayan size_t efi_len; 16322531Snarayan 16335081Sachartre ASSERT(vd->ldi_handle[0] != NULL); 16345081Sachartre 16355081Sachartre status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, (intptr_t)vtoc, 16365081Sachartre (vd->open_flags | FKIOCTL), kcred, &rval); 16372531Snarayan 16382531Snarayan if (status == 0) { 16394963Sachartre return (VD_DISK_LABEL_VTOC); 16402531Snarayan } else if (status != ENOTSUP) { 16412793Slm66018 PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status); 16424963Sachartre return (VD_DISK_LABEL_UNK); 16432531Snarayan } 16442531Snarayan 16455081Sachartre status = vds_efi_alloc_and_read(vd->ldi_handle[0], &efi, &efi_len); 16462531Snarayan 16472531Snarayan if (status) { 16482793Slm66018 PR0("vds_efi_alloc_and_read returned error %d", status); 16494963Sachartre return (VD_DISK_LABEL_UNK); 16502531Snarayan } 16512531Snarayan 16522531Snarayan vd_efi_to_vtoc(efi, vtoc); 16532531Snarayan vd_efi_free(efi, efi_len); 16542531Snarayan 16554963Sachartre return (VD_DISK_LABEL_EFI); 16562531Snarayan } 16572531Snarayan 16583782Sachartre static ushort_t 16593401Snarayan vd_lbl2cksum(struct dk_label *label) 16603401Snarayan { 16613401Snarayan int count; 16623782Sachartre ushort_t sum, *sp; 16633401Snarayan 16643401Snarayan count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 16653782Sachartre sp = (ushort_t *)label; 16663401Snarayan sum = 0; 16673401Snarayan while (count--) { 16683401Snarayan sum ^= *sp++; 16693401Snarayan } 16703401Snarayan 16713401Snarayan return (sum); 16723401Snarayan } 16733401Snarayan 16744696Sachartre /* 16754696Sachartre * Handle ioctls to a disk slice. 16764838Slm66018 * 16774838Slm66018 * Return Values 16784838Slm66018 * 0 - Indicates that there are no errors in disk operations 16794838Slm66018 * ENOTSUP - Unknown disk label type or unsupported DKIO ioctl 16804838Slm66018 * EINVAL - Not enough room to copy the EFI label 16814838Slm66018 * 16824696Sachartre */ 16831991Sheppo static int 16842032Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 16851991Sheppo { 16862531Snarayan dk_efi_t *dk_ioc; 16872531Snarayan 16882531Snarayan switch (vd->vdisk_label) { 16892531Snarayan 16904696Sachartre /* ioctls for a slice from a disk with a VTOC label */ 16912531Snarayan case VD_DISK_LABEL_VTOC: 16922531Snarayan 16932531Snarayan switch (cmd) { 16942531Snarayan case DKIOCGGEOM: 16952531Snarayan ASSERT(ioctl_arg != NULL); 16962531Snarayan bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 16972531Snarayan return (0); 16982531Snarayan case DKIOCGVTOC: 16992531Snarayan ASSERT(ioctl_arg != NULL); 17002531Snarayan bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 17012531Snarayan return (0); 17022531Snarayan default: 17032531Snarayan return (ENOTSUP); 17042531Snarayan } 17052531Snarayan 17064696Sachartre /* ioctls for a slice from a disk with an EFI label */ 17072531Snarayan case VD_DISK_LABEL_EFI: 17082531Snarayan 17092531Snarayan switch (cmd) { 17102531Snarayan case DKIOCGETEFI: 17112531Snarayan ASSERT(ioctl_arg != NULL); 17122531Snarayan dk_ioc = (dk_efi_t *)ioctl_arg; 17132531Snarayan if (dk_ioc->dki_length < vd->dk_efi.dki_length) 17142531Snarayan return (EINVAL); 17152531Snarayan bcopy(vd->dk_efi.dki_data, dk_ioc->dki_data, 17162531Snarayan vd->dk_efi.dki_length); 17172531Snarayan return (0); 17182531Snarayan default: 17192531Snarayan return (ENOTSUP); 17202531Snarayan } 17212531Snarayan 17221991Sheppo default: 17234838Slm66018 /* Unknown disk label type */ 17241991Sheppo return (ENOTSUP); 17251991Sheppo } 17261991Sheppo } 17271991Sheppo 17284696Sachartre /* 17294963Sachartre * Function: 17304963Sachartre * vd_file_validate_geometry 17314963Sachartre * 17324963Sachartre * Description: 17334963Sachartre * Read the label and validate the geometry of a disk image. The driver 17344963Sachartre * label, vtoc and geometry information are updated according to the 17354963Sachartre * label read from the disk image. 17364963Sachartre * 17374963Sachartre * If no valid label is found, the label is set to unknown and the 17384963Sachartre * function returns EINVAL, but a default vtoc and geometry are provided 17394963Sachartre * to the driver. 17404963Sachartre * 17414963Sachartre * Parameters: 17424963Sachartre * vd - disk on which the operation is performed. 17434963Sachartre * 17444963Sachartre * Return Code: 17454963Sachartre * 0 - success. 17464963Sachartre * EIO - error reading the label from the disk image. 17474963Sachartre * EINVAL - unknown disk label. 17484963Sachartre */ 17494963Sachartre static int 17504963Sachartre vd_file_validate_geometry(vd_t *vd) 17514963Sachartre { 17524963Sachartre struct dk_label label; 17534963Sachartre struct dk_geom *geom = &vd->dk_geom; 17544963Sachartre struct vtoc *vtoc = &vd->vtoc; 17554963Sachartre int i; 17564963Sachartre int status = 0; 17574963Sachartre 17584963Sachartre ASSERT(vd->file); 17594963Sachartre 17605081Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 17615081Sachartre /* 17625081Sachartre * For single slice disk we always fake the geometry, and we 17635081Sachartre * only need to do it once because the geometry will never 17645081Sachartre * change. 17655081Sachartre */ 17665081Sachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC) 17675081Sachartre /* geometry was already validated */ 17685081Sachartre return (0); 17695081Sachartre 17705081Sachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_UNK); 17714963Sachartre vd_file_build_default_label(vd, &label); 17725081Sachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 17734963Sachartre } else { 17745081Sachartre if (VD_FILE_LABEL_READ(vd, &label) < 0) 17755081Sachartre return (EIO); 17765081Sachartre 17775081Sachartre if (label.dkl_magic != DKL_MAGIC || 17785081Sachartre label.dkl_cksum != vd_lbl2cksum(&label) || 17795081Sachartre label.dkl_vtoc.v_sanity != VTOC_SANE || 17805081Sachartre label.dkl_vtoc.v_nparts != V_NUMPAR) { 17815081Sachartre vd->vdisk_label = VD_DISK_LABEL_UNK; 17825081Sachartre vd_file_build_default_label(vd, &label); 17835081Sachartre status = EINVAL; 17845081Sachartre } else { 17855081Sachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 17865081Sachartre } 17874963Sachartre } 17884963Sachartre 17894963Sachartre /* Update the driver geometry */ 17904963Sachartre bzero(geom, sizeof (struct dk_geom)); 17914963Sachartre 17924963Sachartre geom->dkg_ncyl = label.dkl_ncyl; 17934963Sachartre geom->dkg_acyl = label.dkl_acyl; 17944963Sachartre geom->dkg_nhead = label.dkl_nhead; 17954963Sachartre geom->dkg_nsect = label.dkl_nsect; 17964963Sachartre geom->dkg_intrlv = label.dkl_intrlv; 17974963Sachartre geom->dkg_apc = label.dkl_apc; 17984963Sachartre geom->dkg_rpm = label.dkl_rpm; 17994963Sachartre geom->dkg_pcyl = label.dkl_pcyl; 18004963Sachartre geom->dkg_write_reinstruct = label.dkl_write_reinstruct; 18014963Sachartre geom->dkg_read_reinstruct = label.dkl_read_reinstruct; 18024963Sachartre 18034963Sachartre /* Update the driver vtoc */ 18044963Sachartre bzero(vtoc, sizeof (struct vtoc)); 18054963Sachartre 18064963Sachartre vtoc->v_sanity = label.dkl_vtoc.v_sanity; 18074963Sachartre vtoc->v_version = label.dkl_vtoc.v_version; 18084963Sachartre vtoc->v_sectorsz = DEV_BSIZE; 18094963Sachartre vtoc->v_nparts = label.dkl_vtoc.v_nparts; 18104963Sachartre 18114963Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 18124963Sachartre vtoc->v_part[i].p_tag = 18134963Sachartre label.dkl_vtoc.v_part[i].p_tag; 18144963Sachartre vtoc->v_part[i].p_flag = 18154963Sachartre label.dkl_vtoc.v_part[i].p_flag; 18164963Sachartre vtoc->v_part[i].p_start = 18174963Sachartre label.dkl_map[i].dkl_cylno * 18184963Sachartre (label.dkl_nhead * label.dkl_nsect); 18194963Sachartre vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk; 18204963Sachartre vtoc->timestamp[i] = 18214963Sachartre label.dkl_vtoc.v_timestamp[i]; 18224963Sachartre } 18234963Sachartre /* 18244963Sachartre * The bootinfo array can not be copied with bcopy() because 18254963Sachartre * elements are of type long in vtoc (so 64-bit) and of type 18264963Sachartre * int in dk_vtoc (so 32-bit). 18274963Sachartre */ 18284963Sachartre vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0]; 18294963Sachartre vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1]; 18304963Sachartre vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2]; 18314963Sachartre bcopy(label.dkl_asciilabel, vtoc->v_asciilabel, 18324963Sachartre LEN_DKL_ASCII); 18334963Sachartre bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume, 18344963Sachartre LEN_DKL_VVOL); 18354963Sachartre 18364963Sachartre return (status); 18374963Sachartre } 18384963Sachartre 18394963Sachartre /* 18404838Slm66018 * Handle ioctls to a disk image (file-based). 18414838Slm66018 * 18424838Slm66018 * Return Values 18434838Slm66018 * 0 - Indicates that there are no errors 18444838Slm66018 * != 0 - Disk operation returned an error 18454696Sachartre */ 18464696Sachartre static int 18474696Sachartre vd_do_file_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 18484696Sachartre { 18494696Sachartre struct dk_label label; 18504696Sachartre struct dk_geom *geom; 18514696Sachartre struct vtoc *vtoc; 18524696Sachartre int i, rc; 18534696Sachartre 18544696Sachartre ASSERT(vd->file); 18554696Sachartre 18564696Sachartre switch (cmd) { 18574696Sachartre 18584696Sachartre case DKIOCGGEOM: 18594696Sachartre ASSERT(ioctl_arg != NULL); 18604696Sachartre geom = (struct dk_geom *)ioctl_arg; 18614696Sachartre 18624963Sachartre rc = vd_file_validate_geometry(vd); 18635081Sachartre if (rc != 0 && rc != EINVAL) { 18645081Sachartre ASSERT(vd->vdisk_type != VD_DISK_TYPE_SLICE); 18654963Sachartre return (rc); 18665081Sachartre } 18674963Sachartre 18684963Sachartre bcopy(&vd->dk_geom, geom, sizeof (struct dk_geom)); 18694696Sachartre return (0); 18704696Sachartre 18714696Sachartre case DKIOCGVTOC: 18724696Sachartre ASSERT(ioctl_arg != NULL); 18734696Sachartre vtoc = (struct vtoc *)ioctl_arg; 18744696Sachartre 18754963Sachartre rc = vd_file_validate_geometry(vd); 18765081Sachartre if (rc != 0 && rc != EINVAL) { 18775081Sachartre ASSERT(vd->vdisk_type != VD_DISK_TYPE_SLICE); 18784963Sachartre return (rc); 18795081Sachartre } 18804963Sachartre 18814963Sachartre bcopy(&vd->vtoc, vtoc, sizeof (struct vtoc)); 18824696Sachartre return (0); 18834696Sachartre 18844696Sachartre case DKIOCSGEOM: 18854696Sachartre ASSERT(ioctl_arg != NULL); 18864696Sachartre geom = (struct dk_geom *)ioctl_arg; 18874696Sachartre 18885081Sachartre /* geometry can only be changed for full disk */ 18895081Sachartre if (vd->vdisk_type != VD_DISK_TYPE_DISK) 18905081Sachartre return (ENOTSUP); 18915081Sachartre 18924696Sachartre if (geom->dkg_nhead == 0 || geom->dkg_nsect == 0) 18934696Sachartre return (EINVAL); 18944696Sachartre 18954696Sachartre /* 18964696Sachartre * The current device geometry is not updated, just the driver 18974696Sachartre * "notion" of it. The device geometry will be effectively 18984696Sachartre * updated when a label is written to the device during a next 18994696Sachartre * DKIOCSVTOC. 19004696Sachartre */ 19014696Sachartre bcopy(ioctl_arg, &vd->dk_geom, sizeof (vd->dk_geom)); 19024696Sachartre return (0); 19034696Sachartre 19044696Sachartre case DKIOCSVTOC: 19054696Sachartre ASSERT(ioctl_arg != NULL); 19064696Sachartre ASSERT(vd->dk_geom.dkg_nhead != 0 && 19074696Sachartre vd->dk_geom.dkg_nsect != 0); 19084696Sachartre vtoc = (struct vtoc *)ioctl_arg; 19094696Sachartre 19105081Sachartre /* vtoc can only be changed for full disk */ 19115081Sachartre if (vd->vdisk_type != VD_DISK_TYPE_DISK) 19125081Sachartre return (ENOTSUP); 19135081Sachartre 19144696Sachartre if (vtoc->v_sanity != VTOC_SANE || 19154696Sachartre vtoc->v_sectorsz != DEV_BSIZE || 19164696Sachartre vtoc->v_nparts != V_NUMPAR) 19174696Sachartre return (EINVAL); 19184696Sachartre 19194696Sachartre bzero(&label, sizeof (label)); 19204696Sachartre label.dkl_ncyl = vd->dk_geom.dkg_ncyl; 19214696Sachartre label.dkl_acyl = vd->dk_geom.dkg_acyl; 19224696Sachartre label.dkl_pcyl = vd->dk_geom.dkg_pcyl; 19234696Sachartre label.dkl_nhead = vd->dk_geom.dkg_nhead; 19244696Sachartre label.dkl_nsect = vd->dk_geom.dkg_nsect; 19254696Sachartre label.dkl_intrlv = vd->dk_geom.dkg_intrlv; 19264696Sachartre label.dkl_apc = vd->dk_geom.dkg_apc; 19274696Sachartre label.dkl_rpm = vd->dk_geom.dkg_rpm; 19284696Sachartre label.dkl_write_reinstruct = vd->dk_geom.dkg_write_reinstruct; 19294696Sachartre label.dkl_read_reinstruct = vd->dk_geom.dkg_read_reinstruct; 19304696Sachartre 19314696Sachartre label.dkl_vtoc.v_nparts = V_NUMPAR; 19324696Sachartre label.dkl_vtoc.v_sanity = VTOC_SANE; 19334696Sachartre label.dkl_vtoc.v_version = vtoc->v_version; 19344696Sachartre for (i = 0; i < V_NUMPAR; i++) { 19354696Sachartre label.dkl_vtoc.v_timestamp[i] = 19364696Sachartre vtoc->timestamp[i]; 19374696Sachartre label.dkl_vtoc.v_part[i].p_tag = 19384696Sachartre vtoc->v_part[i].p_tag; 19394696Sachartre label.dkl_vtoc.v_part[i].p_flag = 19404696Sachartre vtoc->v_part[i].p_flag; 19414696Sachartre label.dkl_map[i].dkl_cylno = 19424696Sachartre vtoc->v_part[i].p_start / 19434696Sachartre (label.dkl_nhead * label.dkl_nsect); 19444696Sachartre label.dkl_map[i].dkl_nblk = 19454696Sachartre vtoc->v_part[i].p_size; 19464696Sachartre } 19474696Sachartre /* 19484696Sachartre * The bootinfo array can not be copied with bcopy() because 19494696Sachartre * elements are of type long in vtoc (so 64-bit) and of type 19504696Sachartre * int in dk_vtoc (so 32-bit). 19514696Sachartre */ 19524696Sachartre label.dkl_vtoc.v_bootinfo[0] = vtoc->v_bootinfo[0]; 19534696Sachartre label.dkl_vtoc.v_bootinfo[1] = vtoc->v_bootinfo[1]; 19544696Sachartre label.dkl_vtoc.v_bootinfo[2] = vtoc->v_bootinfo[2]; 19554696Sachartre bcopy(vtoc->v_asciilabel, label.dkl_asciilabel, 19564696Sachartre LEN_DKL_ASCII); 19574696Sachartre bcopy(vtoc->v_volume, label.dkl_vtoc.v_volume, 19584696Sachartre LEN_DKL_VVOL); 19594696Sachartre 19604696Sachartre /* re-compute checksum */ 19614696Sachartre label.dkl_magic = DKL_MAGIC; 19624696Sachartre label.dkl_cksum = vd_lbl2cksum(&label); 19634696Sachartre 19644696Sachartre /* write label to the disk image */ 19654696Sachartre if ((rc = vd_file_set_vtoc(vd, &label)) != 0) 19664696Sachartre return (rc); 19674696Sachartre 19684963Sachartre /* check the geometry and update the driver info */ 19694963Sachartre if ((rc = vd_file_validate_geometry(vd)) != 0) 19704963Sachartre return (rc); 19714696Sachartre 19724696Sachartre /* 19734696Sachartre * The disk geometry may have changed, so we need to write 19744696Sachartre * the devid (if there is one) so that it is stored at the 19754696Sachartre * right location. 19764696Sachartre */ 19774696Sachartre if (vd->file_devid != NULL && 19784696Sachartre vd_file_write_devid(vd, vd->file_devid) != 0) { 19794696Sachartre PR0("Fail to write devid"); 19804696Sachartre } 19814696Sachartre 19824696Sachartre return (0); 19834696Sachartre 1984*5188Szk194757 case DKIOCFLUSHWRITECACHE: 1985*5188Szk194757 return (VOP_FSYNC(vd->file_vnode, FSYNC, kcred)); 1986*5188Szk194757 19874696Sachartre default: 19884696Sachartre return (ENOTSUP); 19894696Sachartre } 19904696Sachartre } 19914696Sachartre 19924838Slm66018 /* 19934838Slm66018 * Description: 19944838Slm66018 * This is the function that processes the ioctl requests (farming it 19954838Slm66018 * out to functions that handle slices, files or whole disks) 19964838Slm66018 * 19974838Slm66018 * Return Values 19984838Slm66018 * 0 - ioctl operation completed successfully 19994838Slm66018 * != 0 - The LDC error value encountered 20004838Slm66018 * (propagated back up the call stack as a task error) 20014838Slm66018 * 20024838Slm66018 * Side Effect 20034838Slm66018 * sets request->status to the return value of the ioctl function. 20044838Slm66018 */ 20051991Sheppo static int 20062032Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 20071991Sheppo { 20084838Slm66018 int rval = 0, status = 0; 20091991Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 20101991Sheppo 20111991Sheppo 20121991Sheppo ASSERT(request->slice < vd->nslices); 20131991Sheppo PR0("Performing %s", ioctl->operation_name); 20141991Sheppo 20152032Slm66018 /* Get data from client and convert, if necessary */ 20162032Slm66018 if (ioctl->copyin != NULL) { 20171991Sheppo ASSERT(nbytes != 0 && buf != NULL); 20181991Sheppo PR1("Getting \"arg\" data from client"); 20191991Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 20204696Sachartre request->cookie, request->ncookies, 20214696Sachartre LDC_COPY_IN)) != 0) { 20222793Slm66018 PR0("ldc_mem_copy() returned errno %d " 20231991Sheppo "copying from client", status); 20241991Sheppo return (status); 20251991Sheppo } 20262032Slm66018 20272032Slm66018 /* Convert client's data, if necessary */ 20282032Slm66018 if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ 20292032Slm66018 ioctl->arg = buf; 20302032Slm66018 else /* convert client vdisk operation data to ioctl data */ 20312032Slm66018 (ioctl->copyin)(buf, (void *)ioctl->arg); 20321991Sheppo } 20331991Sheppo 20341991Sheppo /* 20351991Sheppo * Handle single-slice block devices internally; otherwise, have the 20361991Sheppo * real driver perform the ioctl() 20371991Sheppo */ 20384696Sachartre if (vd->file) { 20394838Slm66018 request->status = 20404838Slm66018 vd_do_file_ioctl(vd, ioctl->cmd, (void *)ioctl->arg); 20414838Slm66018 20424696Sachartre } else if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { 20434838Slm66018 request->status = 20444838Slm66018 vd_do_slice_ioctl(vd, ioctl->cmd, (void *)ioctl->arg); 20454838Slm66018 20464838Slm66018 } else { 20474838Slm66018 request->status = ldi_ioctl(vd->ldi_handle[request->slice], 20485081Sachartre ioctl->cmd, (intptr_t)ioctl->arg, vd->open_flags | FKIOCTL, 20494838Slm66018 kcred, &rval); 20504838Slm66018 20514838Slm66018 #ifdef DEBUG 20524838Slm66018 if (rval != 0) { 20534838Slm66018 PR0("%s set rval = %d, which is not being returned to" 20544838Slm66018 " client", ioctl->cmd_name, rval); 20554838Slm66018 } 20564838Slm66018 #endif /* DEBUG */ 20571991Sheppo } 20584838Slm66018 20594838Slm66018 if (request->status != 0) { 20604838Slm66018 PR0("ioctl(%s) = errno %d", ioctl->cmd_name, request->status); 20614838Slm66018 return (0); 20621991Sheppo } 20631991Sheppo 20642032Slm66018 /* Convert data and send to client, if necessary */ 20652032Slm66018 if (ioctl->copyout != NULL) { 20661991Sheppo ASSERT(nbytes != 0 && buf != NULL); 20671991Sheppo PR1("Sending \"arg\" data to client"); 20682032Slm66018 20692032Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 20702032Slm66018 if (ioctl->copyout != VD_IDENTITY) 20712032Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 20722032Slm66018 20731991Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 20744696Sachartre request->cookie, request->ncookies, 20754696Sachartre LDC_COPY_OUT)) != 0) { 20762793Slm66018 PR0("ldc_mem_copy() returned errno %d " 20771991Sheppo "copying to client", status); 20781991Sheppo return (status); 20791991Sheppo } 20801991Sheppo } 20811991Sheppo 20821991Sheppo return (status); 20831991Sheppo } 20841991Sheppo 20851991Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 20864838Slm66018 20874838Slm66018 /* 20884838Slm66018 * Description: 20894838Slm66018 * This generic function is called by the task queue to complete 20904838Slm66018 * the processing of the tasks. The specific completion function 20914838Slm66018 * is passed in as a field in the task pointer. 20924838Slm66018 * 20934838Slm66018 * Parameters: 20944838Slm66018 * arg - opaque pointer to structure containing task to be completed 20954838Slm66018 * 20964838Slm66018 * Return Values 20974838Slm66018 * None 20984838Slm66018 */ 20994838Slm66018 static void 21004838Slm66018 vd_complete(void *arg) 21014838Slm66018 { 21024838Slm66018 vd_task_t *task = (vd_task_t *)arg; 21034838Slm66018 21044838Slm66018 ASSERT(task != NULL); 21054838Slm66018 ASSERT(task->status == EINPROGRESS); 21064838Slm66018 ASSERT(task->completef != NULL); 21074838Slm66018 21084838Slm66018 task->status = task->completef(task); 21094838Slm66018 if (task->status) 21104838Slm66018 PR0("%s: Error %d completing task", __func__, task->status); 21114838Slm66018 21124838Slm66018 /* Now notify the vDisk client */ 21134838Slm66018 vd_complete_notify(task); 21144838Slm66018 } 21154838Slm66018 21161991Sheppo static int 21172336Snarayan vd_ioctl(vd_task_t *task) 21181991Sheppo { 21194696Sachartre int i, status; 21202336Snarayan void *buf = NULL; 21212336Snarayan struct dk_geom dk_geom = {0}; 21222336Snarayan struct vtoc vtoc = {0}; 21232531Snarayan struct dk_efi dk_efi = {0}; 21242336Snarayan vd_t *vd = task->vd; 21252336Snarayan vd_dring_payload_t *request = task->request; 21262336Snarayan vd_ioctl_t ioctl[] = { 21271991Sheppo /* Command (no-copy) operations */ 21282032Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 21292032Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 21305081Sachartre NULL, NULL, NULL, B_TRUE}, 21311991Sheppo 21321991Sheppo /* "Get" (copy-out) operations */ 21332032Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 21342032Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 21355081Sachartre NULL, VD_IDENTITY, VD_IDENTITY, B_FALSE}, 21362032Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 21372032Slm66018 RNDSIZE(vd_geom_t), 21382032Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 21395081Sachartre &dk_geom, NULL, dk_geom2vd_geom, B_FALSE}, 21402032Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 21412032Slm66018 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 21425081Sachartre &vtoc, NULL, vtoc2vd_vtoc, B_FALSE}, 21432531Snarayan {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 21442531Snarayan DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 21455081Sachartre &dk_efi, vd_get_efi_in, vd_get_efi_out, B_FALSE}, 21461991Sheppo 21471991Sheppo /* "Set" (copy-in) operations */ 21482032Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 21492032Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 21505081Sachartre NULL, VD_IDENTITY, VD_IDENTITY, B_TRUE}, 21512032Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 21522032Slm66018 RNDSIZE(vd_geom_t), 21532032Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 21545081Sachartre &dk_geom, vd_geom2dk_geom, NULL, B_TRUE}, 21552032Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 21562032Slm66018 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 21575081Sachartre &vtoc, vd_vtoc2vtoc, NULL, B_TRUE}, 21582531Snarayan {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 21592531Snarayan DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 21605081Sachartre &dk_efi, vd_set_efi_in, vd_set_efi_out, B_TRUE}, 21611991Sheppo }; 21621991Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 21631991Sheppo 21641991Sheppo 21652336Snarayan ASSERT(vd != NULL); 21662336Snarayan ASSERT(request != NULL); 21671991Sheppo ASSERT(request->slice < vd->nslices); 21681991Sheppo 21691991Sheppo /* 21701991Sheppo * Determine ioctl corresponding to caller's "operation" and 21711991Sheppo * validate caller's "nbytes" 21721991Sheppo */ 21731991Sheppo for (i = 0; i < nioctls; i++) { 21741991Sheppo if (request->operation == ioctl[i].operation) { 21752032Slm66018 /* LDC memory operations require 8-byte multiples */ 21762032Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 21772032Slm66018 21782531Snarayan if (request->operation == VD_OP_GET_EFI || 21792531Snarayan request->operation == VD_OP_SET_EFI) { 21802531Snarayan if (request->nbytes >= ioctl[i].nbytes) 21812531Snarayan break; 21822793Slm66018 PR0("%s: Expected at least nbytes = %lu, " 21832531Snarayan "got %lu", ioctl[i].operation_name, 21842531Snarayan ioctl[i].nbytes, request->nbytes); 21852531Snarayan return (EINVAL); 21862531Snarayan } 21872531Snarayan 21882032Slm66018 if (request->nbytes != ioctl[i].nbytes) { 21892793Slm66018 PR0("%s: Expected nbytes = %lu, got %lu", 21902032Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 21912032Slm66018 request->nbytes); 21921991Sheppo return (EINVAL); 21931991Sheppo } 21941991Sheppo 21951991Sheppo break; 21961991Sheppo } 21971991Sheppo } 21981991Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 21991991Sheppo 22005081Sachartre if (!(vd->open_flags & FWRITE) && ioctl[i].write) { 22015081Sachartre PR0("%s fails because backend is opened read-only", 22025081Sachartre ioctl[i].operation_name); 22035081Sachartre request->status = EROFS; 22045081Sachartre return (0); 22055081Sachartre } 22065081Sachartre 22071991Sheppo if (request->nbytes) 22081991Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 22091991Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 22101991Sheppo if (request->nbytes) 22111991Sheppo kmem_free(buf, request->nbytes); 22124696Sachartre 22131991Sheppo return (status); 22141991Sheppo } 22151991Sheppo 22162531Snarayan static int 22172531Snarayan vd_get_devid(vd_task_t *task) 22182531Snarayan { 22192531Snarayan vd_t *vd = task->vd; 22202531Snarayan vd_dring_payload_t *request = task->request; 22212531Snarayan vd_devid_t *vd_devid; 22222531Snarayan impl_devid_t *devid; 22234696Sachartre int status, bufid_len, devid_len, len, sz; 22242793Slm66018 int bufbytes; 22252793Slm66018 22262793Slm66018 PR1("Get Device ID, nbytes=%ld", request->nbytes); 22272531Snarayan 22283401Snarayan if (vd->file) { 22294696Sachartre if (vd->file_devid == NULL) { 22304696Sachartre PR2("No Device ID"); 22314838Slm66018 request->status = ENOENT; 22324838Slm66018 return (0); 22334696Sachartre } else { 22344696Sachartre sz = ddi_devid_sizeof(vd->file_devid); 22354696Sachartre devid = kmem_alloc(sz, KM_SLEEP); 22364696Sachartre bcopy(vd->file_devid, devid, sz); 22374696Sachartre } 22384696Sachartre } else { 22394696Sachartre if (ddi_lyr_get_devid(vd->dev[request->slice], 22404696Sachartre (ddi_devid_t *)&devid) != DDI_SUCCESS) { 22414696Sachartre PR2("No Device ID"); 22424838Slm66018 request->status = ENOENT; 22434838Slm66018 return (0); 22444696Sachartre } 22452531Snarayan } 22462531Snarayan 22472531Snarayan bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 22482531Snarayan devid_len = DEVID_GETLEN(devid); 22492531Snarayan 22502793Slm66018 /* 22512793Slm66018 * Save the buffer size here for use in deallocation. 22522793Slm66018 * The actual number of bytes copied is returned in 22532793Slm66018 * the 'nbytes' field of the request structure. 22542793Slm66018 */ 22552793Slm66018 bufbytes = request->nbytes; 22562793Slm66018 22572793Slm66018 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 22582531Snarayan vd_devid->length = devid_len; 22592531Snarayan vd_devid->type = DEVID_GETTYPE(devid); 22602531Snarayan 22612531Snarayan len = (devid_len > bufid_len)? bufid_len : devid_len; 22622531Snarayan 22632531Snarayan bcopy(devid->did_id, vd_devid->id, len); 22642531Snarayan 22654963Sachartre request->status = 0; 22664963Sachartre 22672531Snarayan /* LDC memory operations require 8-byte multiples */ 22682531Snarayan ASSERT(request->nbytes % sizeof (uint64_t) == 0); 22692531Snarayan 22702531Snarayan if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 22712531Snarayan &request->nbytes, request->cookie, request->ncookies, 22722531Snarayan LDC_COPY_OUT)) != 0) { 22732793Slm66018 PR0("ldc_mem_copy() returned errno %d copying to client", 22742531Snarayan status); 22752531Snarayan } 22762793Slm66018 PR1("post mem_copy: nbytes=%ld", request->nbytes); 22772793Slm66018 22782793Slm66018 kmem_free(vd_devid, bufbytes); 22792531Snarayan ddi_devid_free((ddi_devid_t)devid); 22802531Snarayan 22812531Snarayan return (status); 22822531Snarayan } 22832531Snarayan 22841991Sheppo /* 22851991Sheppo * Define the supported operations once the functions for performing them have 22861991Sheppo * been defined 22871991Sheppo */ 22881991Sheppo static const vds_operation_t vds_operation[] = { 22892793Slm66018 #define X(_s) #_s, _s 22902793Slm66018 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 22912793Slm66018 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 22922793Slm66018 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 22932793Slm66018 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 22942793Slm66018 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 22952793Slm66018 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 22962793Slm66018 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 22972793Slm66018 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 22982793Slm66018 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 22992793Slm66018 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 23002793Slm66018 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 23012793Slm66018 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 23022793Slm66018 #undef X 23031991Sheppo }; 23041991Sheppo 23051991Sheppo static const size_t vds_noperations = 23061991Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 23071991Sheppo 23081991Sheppo /* 23092336Snarayan * Process a task specifying a client I/O request 23104838Slm66018 * 23114838Slm66018 * Parameters: 23124838Slm66018 * task - structure containing the request sent from client 23134838Slm66018 * 23144838Slm66018 * Return Value 23154838Slm66018 * 0 - success 23164838Slm66018 * ENOTSUP - Unknown/Unsupported VD_OP_XXX operation 23174838Slm66018 * EINVAL - Invalid disk slice 23184838Slm66018 * != 0 - some other non-zero return value from start function 23191991Sheppo */ 23201991Sheppo static int 23214838Slm66018 vd_do_process_task(vd_task_t *task) 23221991Sheppo { 23234838Slm66018 int i; 23242336Snarayan vd_t *vd = task->vd; 23252336Snarayan vd_dring_payload_t *request = task->request; 23262336Snarayan 23272336Snarayan ASSERT(vd != NULL); 23282336Snarayan ASSERT(request != NULL); 23291991Sheppo 23302336Snarayan /* Find the requested operation */ 23314838Slm66018 for (i = 0; i < vds_noperations; i++) { 23324838Slm66018 if (request->operation == vds_operation[i].operation) { 23334838Slm66018 /* all operations should have a start func */ 23344838Slm66018 ASSERT(vds_operation[i].start != NULL); 23354838Slm66018 23364838Slm66018 task->completef = vds_operation[i].complete; 23372336Snarayan break; 23384838Slm66018 } 23394838Slm66018 } 23402336Snarayan if (i == vds_noperations) { 23412793Slm66018 PR0("Unsupported operation %u", request->operation); 23422336Snarayan return (ENOTSUP); 23432336Snarayan } 23442336Snarayan 23452748Slm66018 /* Range-check slice */ 23464696Sachartre if (request->slice >= vd->nslices && 23474696Sachartre (vd->vdisk_type != VD_DISK_TYPE_DISK || 23484696Sachartre request->slice != VD_SLICE_NONE)) { 23492793Slm66018 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 23502748Slm66018 request->slice, (vd->nslices - 1)); 23512748Slm66018 return (EINVAL); 23522748Slm66018 } 23532748Slm66018 23544838Slm66018 /* 23554838Slm66018 * Call the function pointer that starts the operation. 23564838Slm66018 */ 23574838Slm66018 return (vds_operation[i].start(task)); 23584838Slm66018 } 23594838Slm66018 23604838Slm66018 /* 23614838Slm66018 * Description: 23624838Slm66018 * This function is called by both the in-band and descriptor ring 23634838Slm66018 * message processing functions paths to actually execute the task 23644838Slm66018 * requested by the vDisk client. It in turn calls its worker 23654838Slm66018 * function, vd_do_process_task(), to carry our the request. 23664838Slm66018 * 23674838Slm66018 * Any transport errors (e.g. LDC errors, vDisk protocol errors) are 23684838Slm66018 * saved in the 'status' field of the task and are propagated back 23694838Slm66018 * up the call stack to trigger a NACK 23704838Slm66018 * 23714838Slm66018 * Any request errors (e.g. ENOTTY from an ioctl) are saved in 23724838Slm66018 * the 'status' field of the request and result in an ACK being sent 23734838Slm66018 * by the completion handler. 23744838Slm66018 * 23754838Slm66018 * Parameters: 23764838Slm66018 * task - structure containing the request sent from client 23774838Slm66018 * 23784838Slm66018 * Return Value 23794838Slm66018 * 0 - successful synchronous request. 23804838Slm66018 * != 0 - transport error (e.g. LDC errors, vDisk protocol) 23814838Slm66018 * EINPROGRESS - task will be finished in a completion handler 23824838Slm66018 */ 23834838Slm66018 static int 23844838Slm66018 vd_process_task(vd_task_t *task) 23854838Slm66018 { 23864838Slm66018 vd_t *vd = task->vd; 23874838Slm66018 int status; 23884838Slm66018 23894838Slm66018 DTRACE_PROBE1(task__start, vd_task_t *, task); 23904838Slm66018 23914838Slm66018 task->status = vd_do_process_task(task); 23924838Slm66018 23934838Slm66018 /* 23944838Slm66018 * If the task processing function returned EINPROGRESS indicating 23954838Slm66018 * that the task needs completing then schedule a taskq entry to 23964838Slm66018 * finish it now. 23974838Slm66018 * 23984838Slm66018 * Otherwise the task processing function returned either zero 23994838Slm66018 * indicating that the task was finished in the start function (and we 24004838Slm66018 * don't need to wait in a completion function) or the start function 24014838Slm66018 * returned an error - in both cases all that needs to happen is the 24024838Slm66018 * notification to the vDisk client higher up the call stack. 24034838Slm66018 * If the task was using a Descriptor Ring, we need to mark it as done 24044838Slm66018 * at this stage. 24054838Slm66018 */ 24064838Slm66018 if (task->status == EINPROGRESS) { 24074838Slm66018 /* Queue a task to complete the operation */ 24084838Slm66018 (void) ddi_taskq_dispatch(vd->completionq, vd_complete, 24094838Slm66018 task, DDI_SLEEP); 24104838Slm66018 24114838Slm66018 } else if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE)) { 24124838Slm66018 /* Update the dring element if it's a dring client */ 24134838Slm66018 status = vd_mark_elem_done(vd, task->index, 24144838Slm66018 task->request->status, task->request->nbytes); 24154838Slm66018 if (status == ECONNRESET) 24164838Slm66018 vd_mark_in_reset(vd); 24171991Sheppo } 24181991Sheppo 24194838Slm66018 return (task->status); 24201991Sheppo } 24211991Sheppo 24221991Sheppo /* 24232032Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 24242032Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 24251991Sheppo */ 24262032Slm66018 boolean_t 24271991Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 24281991Sheppo { 24291991Sheppo return ((tag->vio_msgtype == type) && 24304696Sachartre (tag->vio_subtype == subtype) && 24314696Sachartre (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 24321991Sheppo } 24331991Sheppo 24342032Slm66018 /* 24352032Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 24362032Slm66018 * by this server. 24372032Slm66018 */ 24382032Slm66018 static boolean_t 24392032Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 24402032Slm66018 { 24412032Slm66018 for (int i = 0; i < vds_num_versions; i++) { 24422032Slm66018 ASSERT(vds_version[i].major > 0); 24432032Slm66018 ASSERT((i == 0) || 24442032Slm66018 (vds_version[i].major < vds_version[i-1].major)); 24452032Slm66018 24462032Slm66018 /* 24472032Slm66018 * If the major versions match, adjust the minor version, if 24482032Slm66018 * necessary, down to the highest value supported by this 24492032Slm66018 * server and return true so this message will get "ack"ed; 24502032Slm66018 * the client should also support all minor versions lower 24512032Slm66018 * than the value it sent 24522032Slm66018 */ 24532032Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 24542032Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 24552032Slm66018 PR0("Adjusting minor version from %u to %u", 24562032Slm66018 ver_msg->ver_minor, vds_version[i].minor); 24572032Slm66018 ver_msg->ver_minor = vds_version[i].minor; 24582032Slm66018 } 24592032Slm66018 return (B_TRUE); 24602032Slm66018 } 24612032Slm66018 24622032Slm66018 /* 24632032Slm66018 * If the message contains a higher major version number, set 24642032Slm66018 * the message's major/minor versions to the current values 24652032Slm66018 * and return false, so this message will get "nack"ed with 24662032Slm66018 * these values, and the client will potentially try again 24672032Slm66018 * with the same or a lower version 24682032Slm66018 */ 24692032Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 24702032Slm66018 ver_msg->ver_major = vds_version[i].major; 24712032Slm66018 ver_msg->ver_minor = vds_version[i].minor; 24722032Slm66018 return (B_FALSE); 24732032Slm66018 } 24742032Slm66018 24752032Slm66018 /* 24762032Slm66018 * Otherwise, the message's major version is less than the 24772032Slm66018 * current major version, so continue the loop to the next 24782032Slm66018 * (lower) supported version 24792032Slm66018 */ 24802032Slm66018 } 24812032Slm66018 24822032Slm66018 /* 24832032Slm66018 * No common version was found; "ground" the version pair in the 24842032Slm66018 * message to terminate negotiation 24852032Slm66018 */ 24862032Slm66018 ver_msg->ver_major = 0; 24872032Slm66018 ver_msg->ver_minor = 0; 24882032Slm66018 return (B_FALSE); 24892032Slm66018 } 24902032Slm66018 24912032Slm66018 /* 24922032Slm66018 * Process a version message from a client. vds expects to receive version 24932032Slm66018 * messages from clients seeking service, but never issues version messages 24942032Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 24952032Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 24962032Slm66018 * messages as invalid). 24972032Slm66018 */ 24981991Sheppo static int 24992032Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 25001991Sheppo { 25011991Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 25021991Sheppo 25031991Sheppo 25041991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 25051991Sheppo 25061991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 25074696Sachartre VIO_VER_INFO)) { 25081991Sheppo return (ENOMSG); /* not a version message */ 25091991Sheppo } 25101991Sheppo 25111991Sheppo if (msglen != sizeof (*ver_msg)) { 25122793Slm66018 PR0("Expected %lu-byte version message; " 25131991Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 25141991Sheppo return (EBADMSG); 25151991Sheppo } 25161991Sheppo 25171991Sheppo if (ver_msg->dev_class != VDEV_DISK) { 25182793Slm66018 PR0("Expected device class %u (disk); received %u", 25191991Sheppo VDEV_DISK, ver_msg->dev_class); 25201991Sheppo return (EBADMSG); 25211991Sheppo } 25221991Sheppo 25232032Slm66018 /* 25242032Slm66018 * We're talking to the expected kind of client; set our device class 25252032Slm66018 * for "ack/nack" back to the client 25262032Slm66018 */ 25272032Slm66018 ver_msg->dev_class = VDEV_DISK_SERVER; 25282032Slm66018 25292032Slm66018 /* 25302032Slm66018 * Check whether the (valid) version message specifies a version 25312032Slm66018 * supported by this server. If the version is not supported, return 25322032Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 25332032Slm66018 * will have updated the message with a supported version for the 25342032Slm66018 * client to consider 25352032Slm66018 */ 25362032Slm66018 if (!vds_supported_version(ver_msg)) 25371991Sheppo return (EBADMSG); 25382032Slm66018 25392032Slm66018 25402032Slm66018 /* 25412032Slm66018 * A version has been agreed upon; use the client's SID for 25422032Slm66018 * communication on this channel now 25432032Slm66018 */ 25442032Slm66018 ASSERT(!(vd->initialized & VD_SID)); 25452032Slm66018 vd->sid = ver_msg->tag.vio_sid; 25462032Slm66018 vd->initialized |= VD_SID; 25471991Sheppo 25482032Slm66018 /* 25492032Slm66018 * When multiple versions are supported, this function should store 25502032Slm66018 * the negotiated major and minor version values in the "vd" data 25512032Slm66018 * structure to govern further communication; in particular, note that 25522032Slm66018 * the client might have specified a lower minor version for the 25532032Slm66018 * agreed major version than specifed in the vds_version[] array. The 25542032Slm66018 * following assertions should help remind future maintainers to make 25552032Slm66018 * the appropriate changes to support multiple versions. 25562032Slm66018 */ 25572032Slm66018 ASSERT(vds_num_versions == 1); 25582032Slm66018 ASSERT(ver_msg->ver_major == vds_version[0].major); 25592032Slm66018 ASSERT(ver_msg->ver_minor == vds_version[0].minor); 25602032Slm66018 25612032Slm66018 PR0("Using major version %u, minor version %u", 25622032Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 25631991Sheppo return (0); 25641991Sheppo } 25651991Sheppo 25661991Sheppo static int 25671991Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 25681991Sheppo { 25691991Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 25703401Snarayan int status, retry = 0; 25711991Sheppo 25721991Sheppo 25731991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 25741991Sheppo 25751991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 25764696Sachartre VIO_ATTR_INFO)) { 25772336Snarayan PR0("Message is not an attribute message"); 25782336Snarayan return (ENOMSG); 25791991Sheppo } 25801991Sheppo 25811991Sheppo if (msglen != sizeof (*attr_msg)) { 25822793Slm66018 PR0("Expected %lu-byte attribute message; " 25831991Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 25841991Sheppo return (EBADMSG); 25851991Sheppo } 25861991Sheppo 25871991Sheppo if (attr_msg->max_xfer_sz == 0) { 25882793Slm66018 PR0("Received maximum transfer size of 0 from client"); 25891991Sheppo return (EBADMSG); 25901991Sheppo } 25911991Sheppo 25921991Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 25931991Sheppo (attr_msg->xfer_mode != VIO_DRING_MODE)) { 25942793Slm66018 PR0("Client requested unsupported transfer mode"); 25951991Sheppo return (EBADMSG); 25961991Sheppo } 25971991Sheppo 25983401Snarayan /* 25993401Snarayan * check if the underlying disk is ready, if not try accessing 26003401Snarayan * the device again. Open the vdisk device and extract info 26013401Snarayan * about it, as this is needed to respond to the attr info msg 26023401Snarayan */ 26033401Snarayan if ((vd->initialized & VD_DISK_READY) == 0) { 26043401Snarayan PR0("Retry setting up disk (%s)", vd->device_path); 26053401Snarayan do { 26063401Snarayan status = vd_setup_vd(vd); 26073401Snarayan if (status != EAGAIN || ++retry > vds_dev_retries) 26083401Snarayan break; 26093401Snarayan 26103401Snarayan /* incremental delay */ 26113401Snarayan delay(drv_usectohz(vds_dev_delay)); 26123401Snarayan 26133401Snarayan /* if vdisk is no longer enabled - return error */ 26143401Snarayan if (!vd_enabled(vd)) 26153401Snarayan return (ENXIO); 26163401Snarayan 26173401Snarayan } while (status == EAGAIN); 26183401Snarayan 26193401Snarayan if (status) 26203401Snarayan return (ENXIO); 26213401Snarayan 26223401Snarayan vd->initialized |= VD_DISK_READY; 26233401Snarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 26243401Snarayan PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 26253401Snarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 26263401Snarayan (vd->pseudo ? "yes" : "no"), 26273401Snarayan (vd->file ? "yes" : "no"), 26283401Snarayan vd->nslices); 26293401Snarayan } 26303401Snarayan 26311991Sheppo /* Success: valid message and transfer mode */ 26321991Sheppo vd->xfer_mode = attr_msg->xfer_mode; 26332793Slm66018 26341991Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 26352793Slm66018 26361991Sheppo /* 26371991Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 26381991Sheppo * for up to n-1 more cookies, where "n" is the number of full 26391991Sheppo * pages plus possibly one partial page required to cover 26401991Sheppo * "max_xfer_sz". Add room for one more cookie if 26411991Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 26421991Sheppo * Must first get the maximum transfer size in bytes. 26431991Sheppo */ 26441991Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 26451991Sheppo attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 26461991Sheppo attr_msg->max_xfer_sz; 26471991Sheppo size_t max_inband_msglen = 26481991Sheppo sizeof (vd_dring_inband_msg_t) + 26491991Sheppo ((max_xfer_bytes/PAGESIZE + 26504696Sachartre ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 26514696Sachartre (sizeof (ldc_mem_cookie_t))); 26521991Sheppo 26531991Sheppo /* 26541991Sheppo * Set the maximum expected message length to 26551991Sheppo * accommodate in-band-descriptor messages with all 26561991Sheppo * their cookies 26571991Sheppo */ 26581991Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 26592336Snarayan 26602336Snarayan /* 26612336Snarayan * Initialize the data structure for processing in-band I/O 26622336Snarayan * request descriptors 26632336Snarayan */ 26642336Snarayan vd->inband_task.vd = vd; 26652793Slm66018 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 26662336Snarayan vd->inband_task.index = 0; 26672336Snarayan vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 26681991Sheppo } 26691991Sheppo 26702410Slm66018 /* Return the device's block size and max transfer size to the client */ 26712410Slm66018 attr_msg->vdisk_block_size = DEV_BSIZE; 26722410Slm66018 attr_msg->max_xfer_sz = vd->max_xfer_sz; 26732410Slm66018 26741991Sheppo attr_msg->vdisk_size = vd->vdisk_size; 26751991Sheppo attr_msg->vdisk_type = vd->vdisk_type; 26761991Sheppo attr_msg->operations = vds_operations; 26771991Sheppo PR0("%s", VD_CLIENT(vd)); 26782793Slm66018 26792793Slm66018 ASSERT(vd->dring_task == NULL); 26802793Slm66018 26811991Sheppo return (0); 26821991Sheppo } 26831991Sheppo 26841991Sheppo static int 26851991Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 26861991Sheppo { 26871991Sheppo int status; 26881991Sheppo size_t expected; 26891991Sheppo ldc_mem_info_t dring_minfo; 26901991Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 26911991Sheppo 26921991Sheppo 26931991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 26941991Sheppo 26951991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 26964696Sachartre VIO_DRING_REG)) { 26972336Snarayan PR0("Message is not a register-dring message"); 26982336Snarayan return (ENOMSG); 26991991Sheppo } 27001991Sheppo 27011991Sheppo if (msglen < sizeof (*reg_msg)) { 27022793Slm66018 PR0("Expected at least %lu-byte register-dring message; " 27031991Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 27041991Sheppo return (EBADMSG); 27051991Sheppo } 27061991Sheppo 27071991Sheppo expected = sizeof (*reg_msg) + 27081991Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 27091991Sheppo if (msglen != expected) { 27102793Slm66018 PR0("Expected %lu-byte register-dring message; " 27111991Sheppo "received %lu bytes", expected, msglen); 27121991Sheppo return (EBADMSG); 27131991Sheppo } 27141991Sheppo 27151991Sheppo if (vd->initialized & VD_DRING) { 27162793Slm66018 PR0("A dring was previously registered; only support one"); 27171991Sheppo return (EBADMSG); 27181991Sheppo } 27191991Sheppo 27202336Snarayan if (reg_msg->num_descriptors > INT32_MAX) { 27212793Slm66018 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 27222336Snarayan reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 27232336Snarayan return (EBADMSG); 27242336Snarayan } 27252336Snarayan 27261991Sheppo if (reg_msg->ncookies != 1) { 27271991Sheppo /* 27281991Sheppo * In addition to fixing the assertion in the success case 27291991Sheppo * below, supporting drings which require more than one 27301991Sheppo * "cookie" requires increasing the value of vd->max_msglen 27311991Sheppo * somewhere in the code path prior to receiving the message 27321991Sheppo * which results in calling this function. Note that without 27331991Sheppo * making this change, the larger message size required to 27341991Sheppo * accommodate multiple cookies cannot be successfully 27351991Sheppo * received, so this function will not even get called. 27361991Sheppo * Gracefully accommodating more dring cookies might 27371991Sheppo * reasonably demand exchanging an additional attribute or 27381991Sheppo * making a minor protocol adjustment 27391991Sheppo */ 27402793Slm66018 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 27411991Sheppo return (EBADMSG); 27421991Sheppo } 27431991Sheppo 27441991Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 27451991Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 27462531Snarayan reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle); 27471991Sheppo if (status != 0) { 27482793Slm66018 PR0("ldc_mem_dring_map() returned errno %d", status); 27491991Sheppo return (status); 27501991Sheppo } 27511991Sheppo 27521991Sheppo /* 27531991Sheppo * To remove the need for this assertion, must call 27541991Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 27551991Sheppo * successful call to ldc_mem_dring_map() 27561991Sheppo */ 27571991Sheppo ASSERT(reg_msg->ncookies == 1); 27581991Sheppo 27591991Sheppo if ((status = 27604696Sachartre ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 27612793Slm66018 PR0("ldc_mem_dring_info() returned errno %d", status); 27621991Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 27632793Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 27641991Sheppo return (status); 27651991Sheppo } 27661991Sheppo 27671991Sheppo if (dring_minfo.vaddr == NULL) { 27682793Slm66018 PR0("Descriptor ring virtual address is NULL"); 27692032Slm66018 return (ENXIO); 27701991Sheppo } 27711991Sheppo 27721991Sheppo 27732336Snarayan /* Initialize for valid message and mapped dring */ 27741991Sheppo PR1("descriptor size = %u, dring length = %u", 27751991Sheppo vd->descriptor_size, vd->dring_len); 27761991Sheppo vd->initialized |= VD_DRING; 27771991Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 27781991Sheppo vd->dring = dring_minfo.vaddr; 27791991Sheppo vd->descriptor_size = reg_msg->descriptor_size; 27801991Sheppo vd->dring_len = reg_msg->num_descriptors; 27811991Sheppo reg_msg->dring_ident = vd->dring_ident; 27822336Snarayan 27832336Snarayan /* 27842336Snarayan * Allocate and initialize a "shadow" array of data structures for 27852336Snarayan * tasks to process I/O requests in dring elements 27862336Snarayan */ 27872336Snarayan vd->dring_task = 27882336Snarayan kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 27892336Snarayan for (int i = 0; i < vd->dring_len; i++) { 27902336Snarayan vd->dring_task[i].vd = vd; 27912336Snarayan vd->dring_task[i].index = i; 27922336Snarayan vd->dring_task[i].request = &VD_DRING_ELEM(i)->payload; 27932531Snarayan 27942531Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, 27952531Snarayan &(vd->dring_task[i].mhdl)); 27962531Snarayan if (status) { 27972793Slm66018 PR0("ldc_mem_alloc_handle() returned err %d ", status); 27982531Snarayan return (ENXIO); 27992531Snarayan } 28002793Slm66018 28012793Slm66018 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 28022336Snarayan } 28032336Snarayan 28041991Sheppo return (0); 28051991Sheppo } 28061991Sheppo 28071991Sheppo static int 28081991Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 28091991Sheppo { 28101991Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 28111991Sheppo 28121991Sheppo 28131991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 28141991Sheppo 28151991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 28164696Sachartre VIO_DRING_UNREG)) { 28172336Snarayan PR0("Message is not an unregister-dring message"); 28182336Snarayan return (ENOMSG); 28191991Sheppo } 28201991Sheppo 28211991Sheppo if (msglen != sizeof (*unreg_msg)) { 28222793Slm66018 PR0("Expected %lu-byte unregister-dring message; " 28231991Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 28241991Sheppo return (EBADMSG); 28251991Sheppo } 28261991Sheppo 28271991Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 28282793Slm66018 PR0("Expected dring ident %lu; received %lu", 28291991Sheppo vd->dring_ident, unreg_msg->dring_ident); 28301991Sheppo return (EBADMSG); 28311991Sheppo } 28321991Sheppo 28331991Sheppo return (0); 28341991Sheppo } 28351991Sheppo 28361991Sheppo static int 28371991Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 28381991Sheppo { 28391991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 28401991Sheppo 28412336Snarayan if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 28422336Snarayan PR0("Message is not an RDX message"); 28432336Snarayan return (ENOMSG); 28442336Snarayan } 28451991Sheppo 28461991Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 28472793Slm66018 PR0("Expected %lu-byte RDX message; received %lu bytes", 28481991Sheppo sizeof (vio_rdx_msg_t), msglen); 28491991Sheppo return (EBADMSG); 28501991Sheppo } 28511991Sheppo 28522336Snarayan PR0("Valid RDX message"); 28531991Sheppo return (0); 28541991Sheppo } 28551991Sheppo 28561991Sheppo static int 28571991Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 28581991Sheppo { 28591991Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 28602793Slm66018 PR0("Received seq_num %lu; expected %lu", 28611991Sheppo seq_num, (vd->seq_num + 1)); 28622793Slm66018 PR0("initiating soft reset"); 28632336Snarayan vd_need_reset(vd, B_FALSE); 28641991Sheppo return (1); 28651991Sheppo } 28661991Sheppo 28671991Sheppo vd->seq_num = seq_num; 28681991Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 28691991Sheppo return (0); 28701991Sheppo } 28711991Sheppo 28721991Sheppo /* 28731991Sheppo * Return the expected size of an inband-descriptor message with all the 28741991Sheppo * cookies it claims to include 28751991Sheppo */ 28761991Sheppo static size_t 28771991Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 28781991Sheppo { 28791991Sheppo return ((sizeof (*msg)) + 28801991Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 28811991Sheppo } 28821991Sheppo 28831991Sheppo /* 28841991Sheppo * Process an in-band descriptor message: used with clients like OBP, with 28851991Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 28861991Sheppo * operating on them within a descriptor ring 28871991Sheppo */ 28881991Sheppo static int 28892793Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 28901991Sheppo { 28911991Sheppo size_t expected; 28921991Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 28931991Sheppo 28941991Sheppo 28951991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 28961991Sheppo 28971991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 28984696Sachartre VIO_DESC_DATA)) { 28992336Snarayan PR1("Message is not an in-band-descriptor message"); 29002336Snarayan return (ENOMSG); 29012336Snarayan } 29021991Sheppo 29031991Sheppo if (msglen < sizeof (*desc_msg)) { 29042793Slm66018 PR0("Expected at least %lu-byte descriptor message; " 29051991Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 29061991Sheppo return (EBADMSG); 29071991Sheppo } 29081991Sheppo 29091991Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 29102793Slm66018 PR0("Expected %lu-byte descriptor message; " 29111991Sheppo "received %lu bytes", expected, msglen); 29121991Sheppo return (EBADMSG); 29131991Sheppo } 29141991Sheppo 29152336Snarayan if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 29161991Sheppo return (EBADMSG); 29172336Snarayan 29182336Snarayan /* 29192336Snarayan * Valid message: Set up the in-band descriptor task and process the 29202336Snarayan * request. Arrange to acknowledge the client's message, unless an 29212336Snarayan * error processing the descriptor task results in setting 29222336Snarayan * VIO_SUBTYPE_NACK 29232336Snarayan */ 29242336Snarayan PR1("Valid in-band-descriptor message"); 29252336Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 29262793Slm66018 29272793Slm66018 ASSERT(vd->inband_task.msg != NULL); 29282793Slm66018 29292793Slm66018 bcopy(msg, vd->inband_task.msg, msglen); 29302336Snarayan vd->inband_task.msglen = msglen; 29312793Slm66018 29322793Slm66018 /* 29332793Slm66018 * The task request is now the payload of the message 29342793Slm66018 * that was just copied into the body of the task. 29352793Slm66018 */ 29362793Slm66018 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 29372336Snarayan vd->inband_task.request = &desc_msg->payload; 29382793Slm66018 29392336Snarayan return (vd_process_task(&vd->inband_task)); 29401991Sheppo } 29411991Sheppo 29421991Sheppo static int 29432336Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 29442793Slm66018 vio_msg_t *msg, size_t msglen) 29451991Sheppo { 29462336Snarayan int status; 29472336Snarayan boolean_t ready; 29482336Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 29492336Snarayan 29502336Snarayan 29512336Snarayan /* Accept the updated dring element */ 29522336Snarayan if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 29532793Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", status); 29541991Sheppo return (status); 29551991Sheppo } 29562336Snarayan ready = (elem->hdr.dstate == VIO_DESC_READY); 29572336Snarayan if (ready) { 29582336Snarayan elem->hdr.dstate = VIO_DESC_ACCEPTED; 29592336Snarayan } else { 29602793Slm66018 PR0("descriptor %u not ready", idx); 29612336Snarayan VD_DUMP_DRING_ELEM(elem); 29622336Snarayan } 29632336Snarayan if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 29642793Slm66018 PR0("ldc_mem_dring_release() returned errno %d", status); 29651991Sheppo return (status); 29661991Sheppo } 29672336Snarayan if (!ready) 29682336Snarayan return (EBUSY); 29692336Snarayan 29702336Snarayan 29712336Snarayan /* Initialize a task and process the accepted element */ 29722336Snarayan PR1("Processing dring element %u", idx); 29732336Snarayan vd->dring_task[idx].type = type; 29742793Slm66018 29752793Slm66018 /* duplicate msg buf for cookies etc. */ 29762793Slm66018 bcopy(msg, vd->dring_task[idx].msg, msglen); 29772793Slm66018 29782336Snarayan vd->dring_task[idx].msglen = msglen; 29794838Slm66018 return (vd_process_task(&vd->dring_task[idx])); 29801991Sheppo } 29811991Sheppo 29821991Sheppo static int 29832336Snarayan vd_process_element_range(vd_t *vd, int start, int end, 29842793Slm66018 vio_msg_t *msg, size_t msglen) 29852336Snarayan { 29862336Snarayan int i, n, nelem, status = 0; 29872336Snarayan boolean_t inprogress = B_FALSE; 29882336Snarayan vd_task_type_t type; 29892336Snarayan 29902336Snarayan 29912336Snarayan ASSERT(start >= 0); 29922336Snarayan ASSERT(end >= 0); 29932336Snarayan 29942336Snarayan /* 29952336Snarayan * Arrange to acknowledge the client's message, unless an error 29962336Snarayan * processing one of the dring elements results in setting 29972336Snarayan * VIO_SUBTYPE_NACK 29982336Snarayan */ 29992336Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 30002336Snarayan 30012336Snarayan /* 30022336Snarayan * Process the dring elements in the range 30032336Snarayan */ 30042336Snarayan nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 30052336Snarayan for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 30062336Snarayan ((vio_dring_msg_t *)msg)->end_idx = i; 30072336Snarayan type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 30082793Slm66018 status = vd_process_element(vd, type, i, msg, msglen); 30092336Snarayan if (status == EINPROGRESS) 30102336Snarayan inprogress = B_TRUE; 30112336Snarayan else if (status != 0) 30122336Snarayan break; 30132336Snarayan } 30142336Snarayan 30152336Snarayan /* 30162336Snarayan * If some, but not all, operations of a multi-element range are in 30172336Snarayan * progress, wait for other operations to complete before returning 30182336Snarayan * (which will result in "ack" or "nack" of the message). Note that 30192336Snarayan * all outstanding operations will need to complete, not just the ones 30202336Snarayan * corresponding to the current range of dring elements; howevever, as 30212336Snarayan * this situation is an error case, performance is less critical. 30222336Snarayan */ 30232336Snarayan if ((nelem > 1) && (status != EINPROGRESS) && inprogress) 30242336Snarayan ddi_taskq_wait(vd->completionq); 30252336Snarayan 30262336Snarayan return (status); 30272336Snarayan } 30282336Snarayan 30292336Snarayan static int 30302793Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 30311991Sheppo { 30321991Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 30331991Sheppo 30341991Sheppo 30351991Sheppo ASSERT(msglen >= sizeof (msg->tag)); 30361991Sheppo 30371991Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 30384696Sachartre VIO_DRING_DATA)) { 30392336Snarayan PR1("Message is not a dring-data message"); 30402336Snarayan return (ENOMSG); 30411991Sheppo } 30421991Sheppo 30431991Sheppo if (msglen != sizeof (*dring_msg)) { 30442793Slm66018 PR0("Expected %lu-byte dring message; received %lu bytes", 30451991Sheppo sizeof (*dring_msg), msglen); 30461991Sheppo return (EBADMSG); 30471991Sheppo } 30481991Sheppo 30492336Snarayan if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 30501991Sheppo return (EBADMSG); 30511991Sheppo 30521991Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 30532793Slm66018 PR0("Expected dring ident %lu; received ident %lu", 30541991Sheppo vd->dring_ident, dring_msg->dring_ident); 30551991Sheppo return (EBADMSG); 30561991Sheppo } 30571991Sheppo 30582336Snarayan if (dring_msg->start_idx >= vd->dring_len) { 30592793Slm66018 PR0("\"start_idx\" = %u; must be less than %u", 30602336Snarayan dring_msg->start_idx, vd->dring_len); 30612336Snarayan return (EBADMSG); 30622336Snarayan } 30632336Snarayan 30642336Snarayan if ((dring_msg->end_idx < 0) || 30652336Snarayan (dring_msg->end_idx >= vd->dring_len)) { 30662793Slm66018 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 30672336Snarayan dring_msg->end_idx, vd->dring_len); 30682336Snarayan return (EBADMSG); 30692336Snarayan } 30702336Snarayan 30712336Snarayan /* Valid message; process range of updated dring elements */ 30722336Snarayan PR1("Processing descriptor range, start = %u, end = %u", 30732336Snarayan dring_msg->start_idx, dring_msg->end_idx); 30742336Snarayan return (vd_process_element_range(vd, dring_msg->start_idx, 30754696Sachartre dring_msg->end_idx, msg, msglen)); 30761991Sheppo } 30771991Sheppo 30781991Sheppo static int 30791991Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 30801991Sheppo { 30811991Sheppo int retry, status; 30821991Sheppo size_t size = *nbytes; 30831991Sheppo 30841991Sheppo 30851991Sheppo for (retry = 0, status = ETIMEDOUT; 30861991Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 30871991Sheppo retry++) { 30881991Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 30891991Sheppo *nbytes = size; 30901991Sheppo status = ldc_read(ldc_handle, msg, nbytes); 30911991Sheppo } 30921991Sheppo 30932793Slm66018 if (status) { 30942793Slm66018 PR0("ldc_read() returned errno %d", status); 30952793Slm66018 if (status != ECONNRESET) 30962793Slm66018 return (ENOMSG); 30971991Sheppo return (status); 30981991Sheppo } else if (*nbytes == 0) { 30991991Sheppo PR1("ldc_read() returned 0 and no message read"); 31001991Sheppo return (ENOMSG); 31011991Sheppo } 31021991Sheppo 31031991Sheppo PR1("RCVD %lu-byte message", *nbytes); 31041991Sheppo return (0); 31051991Sheppo } 31061991Sheppo 31071991Sheppo static int 31082793Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 31091991Sheppo { 31101991Sheppo int status; 31111991Sheppo 31121991Sheppo 31131991Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 31141991Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 31152793Slm66018 #ifdef DEBUG 31162793Slm66018 vd_decode_tag(msg); 31172793Slm66018 #endif 31181991Sheppo 31191991Sheppo /* 31201991Sheppo * Validate session ID up front, since it applies to all messages 31211991Sheppo * once set 31221991Sheppo */ 31231991Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 31242793Slm66018 PR0("Expected SID %u, received %u", vd->sid, 31251991Sheppo msg->tag.vio_sid); 31261991Sheppo return (EBADMSG); 31271991Sheppo } 31281991Sheppo 31292793Slm66018 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 31301991Sheppo 31311991Sheppo /* 31321991Sheppo * Process the received message based on connection state 31331991Sheppo */ 31341991Sheppo switch (vd->state) { 31351991Sheppo case VD_STATE_INIT: /* expect version message */ 31362032Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 31371991Sheppo return (status); 31381991Sheppo 31391991Sheppo /* Version negotiated, move to that state */ 31401991Sheppo vd->state = VD_STATE_VER; 31411991Sheppo return (0); 31421991Sheppo 31431991Sheppo case VD_STATE_VER: /* expect attribute message */ 31441991Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 31451991Sheppo return (status); 31461991Sheppo 31471991Sheppo /* Attributes exchanged, move to that state */ 31481991Sheppo vd->state = VD_STATE_ATTR; 31491991Sheppo return (0); 31501991Sheppo 31511991Sheppo case VD_STATE_ATTR: 31521991Sheppo switch (vd->xfer_mode) { 31531991Sheppo case VIO_DESC_MODE: /* expect RDX message */ 31541991Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 31551991Sheppo return (status); 31561991Sheppo 31571991Sheppo /* Ready to receive in-band descriptors */ 31581991Sheppo vd->state = VD_STATE_DATA; 31591991Sheppo return (0); 31601991Sheppo 31611991Sheppo case VIO_DRING_MODE: /* expect register-dring message */ 31621991Sheppo if ((status = 31634696Sachartre vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 31641991Sheppo return (status); 31651991Sheppo 31661991Sheppo /* One dring negotiated, move to that state */ 31671991Sheppo vd->state = VD_STATE_DRING; 31681991Sheppo return (0); 31691991Sheppo 31701991Sheppo default: 31711991Sheppo ASSERT("Unsupported transfer mode"); 31722793Slm66018 PR0("Unsupported transfer mode"); 31731991Sheppo return (ENOTSUP); 31741991Sheppo } 31751991Sheppo 31761991Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 31771991Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 31781991Sheppo /* Ready to receive data */ 31791991Sheppo vd->state = VD_STATE_DATA; 31801991Sheppo return (0); 31811991Sheppo } else if (status != ENOMSG) { 31821991Sheppo return (status); 31831991Sheppo } 31841991Sheppo 31851991Sheppo 31861991Sheppo /* 31871991Sheppo * If another register-dring message is received, stay in 31881991Sheppo * dring state in case the client sends RDX; although the 31891991Sheppo * protocol allows multiple drings, this server does not 31901991Sheppo * support using more than one 31911991Sheppo */ 31921991Sheppo if ((status = 31934696Sachartre vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 31941991Sheppo return (status); 31951991Sheppo 31961991Sheppo /* 31971991Sheppo * Acknowledge an unregister-dring message, but reset the 31981991Sheppo * connection anyway: Although the protocol allows 31991991Sheppo * unregistering drings, this server cannot serve a vdisk 32001991Sheppo * without its only dring 32011991Sheppo */ 32021991Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 32031991Sheppo return ((status == 0) ? ENOTSUP : status); 32041991Sheppo 32051991Sheppo case VD_STATE_DATA: 32061991Sheppo switch (vd->xfer_mode) { 32071991Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 32082793Slm66018 return (vd_process_desc_msg(vd, msg, msglen)); 32091991Sheppo 32101991Sheppo case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 32111991Sheppo /* 32121991Sheppo * Typically expect dring-data messages, so handle 32131991Sheppo * them first 32141991Sheppo */ 32151991Sheppo if ((status = vd_process_dring_msg(vd, msg, 32164696Sachartre msglen)) != ENOMSG) 32171991Sheppo return (status); 32181991Sheppo 32191991Sheppo /* 32201991Sheppo * Acknowledge an unregister-dring message, but reset 32211991Sheppo * the connection anyway: Although the protocol 32221991Sheppo * allows unregistering drings, this server cannot 32231991Sheppo * serve a vdisk without its only dring 32241991Sheppo */ 32251991Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 32261991Sheppo return ((status == 0) ? ENOTSUP : status); 32271991Sheppo 32281991Sheppo default: 32291991Sheppo ASSERT("Unsupported transfer mode"); 32302793Slm66018 PR0("Unsupported transfer mode"); 32311991Sheppo return (ENOTSUP); 32321991Sheppo } 32331991Sheppo 32341991Sheppo default: 32351991Sheppo ASSERT("Invalid client connection state"); 32362793Slm66018 PR0("Invalid client connection state"); 32371991Sheppo return (ENOTSUP); 32381991Sheppo } 32391991Sheppo } 32401991Sheppo 32412336Snarayan static int 32422793Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 32431991Sheppo { 32441991Sheppo int status; 32451991Sheppo boolean_t reset_ldc = B_FALSE; 32464838Slm66018 vd_task_t task; 32471991Sheppo 32481991Sheppo /* 32491991Sheppo * Check that the message is at least big enough for a "tag", so that 32501991Sheppo * message processing can proceed based on tag-specified message type 32511991Sheppo */ 32521991Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 32532793Slm66018 PR0("Received short (%lu-byte) message", msglen); 32541991Sheppo /* Can't "nack" short message, so drop the big hammer */ 32552793Slm66018 PR0("initiating full reset"); 32562336Snarayan vd_need_reset(vd, B_TRUE); 32572336Snarayan return (EBADMSG); 32581991Sheppo } 32591991Sheppo 32601991Sheppo /* 32611991Sheppo * Process the message 32621991Sheppo */ 32632793Slm66018 switch (status = vd_do_process_msg(vd, msg, msglen)) { 32641991Sheppo case 0: 32651991Sheppo /* "ack" valid, successfully-processed messages */ 32661991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 32671991Sheppo break; 32681991Sheppo 32692336Snarayan case EINPROGRESS: 32702336Snarayan /* The completion handler will "ack" or "nack" the message */ 32712336Snarayan return (EINPROGRESS); 32721991Sheppo case ENOMSG: 32732793Slm66018 PR0("Received unexpected message"); 32741991Sheppo _NOTE(FALLTHROUGH); 32751991Sheppo case EBADMSG: 32761991Sheppo case ENOTSUP: 32774838Slm66018 /* "transport" error will cause NACK of invalid messages */ 32781991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 32791991Sheppo break; 32801991Sheppo 32811991Sheppo default: 32824838Slm66018 /* "transport" error will cause NACK of invalid messages */ 32831991Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 32841991Sheppo /* An LDC error probably occurred, so try resetting it */ 32851991Sheppo reset_ldc = B_TRUE; 32861991Sheppo break; 32871991Sheppo } 32881991Sheppo 32892793Slm66018 PR1("\tResulting in state %d (%s)", vd->state, 32904696Sachartre vd_decode_state(vd->state)); 32912793Slm66018 32924838Slm66018 /* populate the task so we can dispatch it on the taskq */ 32934838Slm66018 task.vd = vd; 32944838Slm66018 task.msg = msg; 32954838Slm66018 task.msglen = msglen; 32964838Slm66018 32974838Slm66018 /* 32984838Slm66018 * Queue a task to send the notification that the operation completed. 32994838Slm66018 * We need to ensure that requests are responded to in the correct 33004838Slm66018 * order and since the taskq is processed serially this ordering 33014838Slm66018 * is maintained. 33024838Slm66018 */ 33034838Slm66018 (void) ddi_taskq_dispatch(vd->completionq, vd_serial_notify, 33044838Slm66018 &task, DDI_SLEEP); 33054838Slm66018 33064838Slm66018 /* 33074838Slm66018 * To ensure handshake negotiations do not happen out of order, such 33084838Slm66018 * requests that come through this path should not be done in parallel 33094838Slm66018 * so we need to wait here until the response is sent to the client. 33104838Slm66018 */ 33114838Slm66018 ddi_taskq_wait(vd->completionq); 33121991Sheppo 33132336Snarayan /* Arrange to reset the connection for nack'ed or failed messages */ 33142793Slm66018 if ((status != 0) || reset_ldc) { 33152793Slm66018 PR0("initiating %s reset", 33162793Slm66018 (reset_ldc) ? "full" : "soft"); 33172336Snarayan vd_need_reset(vd, reset_ldc); 33182793Slm66018 } 33192336Snarayan 33202336Snarayan return (status); 33212336Snarayan } 33222336Snarayan 33232336Snarayan static boolean_t 33242336Snarayan vd_enabled(vd_t *vd) 33252336Snarayan { 33262336Snarayan boolean_t enabled; 33272336Snarayan 33282336Snarayan mutex_enter(&vd->lock); 33292336Snarayan enabled = vd->enabled; 33302336Snarayan mutex_exit(&vd->lock); 33312336Snarayan return (enabled); 33321991Sheppo } 33331991Sheppo 33341991Sheppo static void 33352032Slm66018 vd_recv_msg(void *arg) 33361991Sheppo { 33372032Slm66018 vd_t *vd = (vd_t *)arg; 33382793Slm66018 int rv = 0, status = 0; 33391991Sheppo 33401991Sheppo ASSERT(vd != NULL); 33412793Slm66018 33422336Snarayan PR2("New task to receive incoming message(s)"); 33432793Slm66018 33442793Slm66018 33452336Snarayan while (vd_enabled(vd) && status == 0) { 33462336Snarayan size_t msglen, msgsize; 33472793Slm66018 ldc_status_t lstatus; 33482336Snarayan 33492336Snarayan /* 33502336Snarayan * Receive and process a message 33512336Snarayan */ 33522336Snarayan vd_reset_if_needed(vd); /* can change vd->max_msglen */ 33532793Slm66018 33542793Slm66018 /* 33552793Slm66018 * check if channel is UP - else break out of loop 33562793Slm66018 */ 33572793Slm66018 status = ldc_status(vd->ldc_handle, &lstatus); 33582793Slm66018 if (lstatus != LDC_UP) { 33592793Slm66018 PR0("channel not up (status=%d), exiting recv loop\n", 33602793Slm66018 lstatus); 33612793Slm66018 break; 33622793Slm66018 } 33632793Slm66018 33642793Slm66018 ASSERT(vd->max_msglen != 0); 33652793Slm66018 33662793Slm66018 msgsize = vd->max_msglen; /* stable copy for alloc/free */ 33672793Slm66018 msglen = msgsize; /* actual len after recv_msg() */ 33682793Slm66018 33692793Slm66018 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 33702793Slm66018 switch (status) { 33712793Slm66018 case 0: 33722793Slm66018 rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, 33734696Sachartre msglen); 33742793Slm66018 /* check if max_msglen changed */ 33752793Slm66018 if (msgsize != vd->max_msglen) { 33762793Slm66018 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 33772793Slm66018 msgsize, vd->max_msglen); 33782793Slm66018 kmem_free(vd->vio_msgp, msgsize); 33792793Slm66018 vd->vio_msgp = 33804696Sachartre kmem_alloc(vd->max_msglen, KM_SLEEP); 33812793Slm66018 } 33822793Slm66018 if (rv == EINPROGRESS) 33832793Slm66018 continue; 33842793Slm66018 break; 33852793Slm66018 33862793Slm66018 case ENOMSG: 33872793Slm66018 break; 33882793Slm66018 33892793Slm66018 case ECONNRESET: 33902793Slm66018 PR0("initiating soft reset (ECONNRESET)\n"); 33912793Slm66018 vd_need_reset(vd, B_FALSE); 33922793Slm66018 status = 0; 33932793Slm66018 break; 33942793Slm66018 33952793Slm66018 default: 33962336Snarayan /* Probably an LDC failure; arrange to reset it */ 33972793Slm66018 PR0("initiating full reset (status=0x%x)", status); 33982336Snarayan vd_need_reset(vd, B_TRUE); 33992793Slm66018 break; 34002336Snarayan } 34012032Slm66018 } 34022793Slm66018 34032336Snarayan PR2("Task finished"); 34042032Slm66018 } 34052032Slm66018 34062032Slm66018 static uint_t 34071991Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 34081991Sheppo { 34091991Sheppo vd_t *vd = (vd_t *)(void *)arg; 34102793Slm66018 int status; 34111991Sheppo 34121991Sheppo ASSERT(vd != NULL); 34132336Snarayan 34142336Snarayan if (!vd_enabled(vd)) 34152336Snarayan return (LDC_SUCCESS); 34162336Snarayan 34172793Slm66018 if (event & LDC_EVT_DOWN) { 34183166Ssg70180 PR0("LDC_EVT_DOWN: LDC channel went down"); 34192793Slm66018 34202793Slm66018 vd_need_reset(vd, B_TRUE); 34212793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 34222793Slm66018 DDI_SLEEP); 34232793Slm66018 if (status == DDI_FAILURE) { 34242793Slm66018 PR0("cannot schedule task to recv msg\n"); 34252793Slm66018 vd_need_reset(vd, B_TRUE); 34262793Slm66018 } 34272793Slm66018 } 34282793Slm66018 34292336Snarayan if (event & LDC_EVT_RESET) { 34302793Slm66018 PR0("LDC_EVT_RESET: LDC channel was reset"); 34312793Slm66018 34322793Slm66018 if (vd->state != VD_STATE_INIT) { 34332793Slm66018 PR0("scheduling full reset"); 34342793Slm66018 vd_need_reset(vd, B_FALSE); 34352793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 34362793Slm66018 vd, DDI_SLEEP); 34372793Slm66018 if (status == DDI_FAILURE) { 34382793Slm66018 PR0("cannot schedule task to recv msg\n"); 34392793Slm66018 vd_need_reset(vd, B_TRUE); 34402793Slm66018 } 34412793Slm66018 34422793Slm66018 } else { 34432793Slm66018 PR0("channel already reset, ignoring...\n"); 34442793Slm66018 PR0("doing ldc up...\n"); 34452793Slm66018 (void) ldc_up(vd->ldc_handle); 34462793Slm66018 } 34472793Slm66018 34482336Snarayan return (LDC_SUCCESS); 34492336Snarayan } 34502336Snarayan 34512336Snarayan if (event & LDC_EVT_UP) { 34522793Slm66018 PR0("EVT_UP: LDC is up\nResetting client connection state"); 34532793Slm66018 PR0("initiating soft reset"); 34542336Snarayan vd_need_reset(vd, B_FALSE); 34552793Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 34562793Slm66018 vd, DDI_SLEEP); 34572793Slm66018 if (status == DDI_FAILURE) { 34582793Slm66018 PR0("cannot schedule task to recv msg\n"); 34592793Slm66018 vd_need_reset(vd, B_TRUE); 34602793Slm66018 return (LDC_SUCCESS); 34612793Slm66018 } 34622336Snarayan } 34632336Snarayan 34642336Snarayan if (event & LDC_EVT_READ) { 34652336Snarayan int status; 34662336Snarayan 34672336Snarayan PR1("New data available"); 34682336Snarayan /* Queue a task to receive the new data */ 34692336Snarayan status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 34702336Snarayan DDI_SLEEP); 34712793Slm66018 34722793Slm66018 if (status == DDI_FAILURE) { 34732793Slm66018 PR0("cannot schedule task to recv msg\n"); 34742793Slm66018 vd_need_reset(vd, B_TRUE); 34752793Slm66018 } 34762336Snarayan } 34772336Snarayan 34782336Snarayan return (LDC_SUCCESS); 34791991Sheppo } 34801991Sheppo 34811991Sheppo static uint_t 34821991Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 34831991Sheppo { 34841991Sheppo _NOTE(ARGUNUSED(key, val)) 34851991Sheppo (*((uint_t *)arg))++; 34861991Sheppo return (MH_WALK_TERMINATE); 34871991Sheppo } 34881991Sheppo 34891991Sheppo 34901991Sheppo static int 34911991Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 34921991Sheppo { 34931991Sheppo uint_t vd_present = 0; 34941991Sheppo minor_t instance; 34951991Sheppo vds_t *vds; 34961991Sheppo 34971991Sheppo 34981991Sheppo switch (cmd) { 34991991Sheppo case DDI_DETACH: 35001991Sheppo /* the real work happens below */ 35011991Sheppo break; 35021991Sheppo case DDI_SUSPEND: 35032336Snarayan PR0("No action required for DDI_SUSPEND"); 35041991Sheppo return (DDI_SUCCESS); 35051991Sheppo default: 35062793Slm66018 PR0("Unrecognized \"cmd\""); 35071991Sheppo return (DDI_FAILURE); 35081991Sheppo } 35091991Sheppo 35101991Sheppo ASSERT(cmd == DDI_DETACH); 35111991Sheppo instance = ddi_get_instance(dip); 35121991Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 35132793Slm66018 PR0("Could not get state for instance %u", instance); 35141991Sheppo ddi_soft_state_free(vds_state, instance); 35151991Sheppo return (DDI_FAILURE); 35161991Sheppo } 35171991Sheppo 35181991Sheppo /* Do no detach when serving any vdisks */ 35191991Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 35201991Sheppo if (vd_present) { 35211991Sheppo PR0("Not detaching because serving vdisks"); 35221991Sheppo return (DDI_FAILURE); 35231991Sheppo } 35241991Sheppo 35251991Sheppo PR0("Detaching"); 35263297Ssb155480 if (vds->initialized & VDS_MDEG) { 35271991Sheppo (void) mdeg_unregister(vds->mdeg); 35283297Ssb155480 kmem_free(vds->ispecp->specp, sizeof (vds_prop_template)); 35293297Ssb155480 kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t)); 35303297Ssb155480 vds->ispecp = NULL; 35313297Ssb155480 vds->mdeg = NULL; 35323297Ssb155480 } 35333297Ssb155480 35341991Sheppo if (vds->initialized & VDS_LDI) 35351991Sheppo (void) ldi_ident_release(vds->ldi_ident); 35361991Sheppo mod_hash_destroy_hash(vds->vd_table); 35371991Sheppo ddi_soft_state_free(vds_state, instance); 35381991Sheppo return (DDI_SUCCESS); 35391991Sheppo } 35401991Sheppo 35411991Sheppo static boolean_t 35421991Sheppo is_pseudo_device(dev_info_t *dip) 35431991Sheppo { 35441991Sheppo dev_info_t *parent, *root = ddi_root_node(); 35451991Sheppo 35461991Sheppo 35471991Sheppo for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 35481991Sheppo parent = ddi_get_parent(parent)) { 35491991Sheppo if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 35501991Sheppo return (B_TRUE); 35511991Sheppo } 35521991Sheppo 35531991Sheppo return (B_FALSE); 35541991Sheppo } 35551991Sheppo 35561991Sheppo static int 35572032Slm66018 vd_setup_full_disk(vd_t *vd) 35582032Slm66018 { 35592032Slm66018 int rval, status; 35602032Slm66018 major_t major = getmajor(vd->dev[0]); 35612032Slm66018 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 35622531Snarayan struct dk_minfo dk_minfo; 35632531Snarayan 35645081Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 35655081Sachartre 35662531Snarayan /* 35672531Snarayan * At this point, vdisk_size is set to the size of partition 2 but 35682531Snarayan * this does not represent the size of the disk because partition 2 35692531Snarayan * may not cover the entire disk and its size does not include reserved 35702531Snarayan * blocks. So we update vdisk_size to be the size of the entire disk. 35712531Snarayan */ 35722531Snarayan if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 35735081Sachartre (intptr_t)&dk_minfo, (vd->open_flags | FKIOCTL), 35742531Snarayan kcred, &rval)) != 0) { 35753782Sachartre PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", 35762531Snarayan status); 35772032Slm66018 return (status); 35782032Slm66018 } 35792531Snarayan vd->vdisk_size = dk_minfo.dki_capacity; 35802032Slm66018 35812032Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 35822032Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 35832032Slm66018 vd->dev[0] = 0; 35842032Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 35852032Slm66018 vd->ldi_handle[0] = NULL; 35862032Slm66018 35872032Slm66018 /* Initialize device numbers for remaining slices and open them */ 35882032Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 35892032Slm66018 /* 35902032Slm66018 * Skip the entire-disk slice, as it's already open and its 35912032Slm66018 * device known 35922032Slm66018 */ 35932032Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 35942032Slm66018 continue; 35952032Slm66018 ASSERT(vd->dev[slice] == 0); 35962032Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 35972032Slm66018 35982032Slm66018 /* 35992032Slm66018 * Construct the device number for the current slice 36002032Slm66018 */ 36012032Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 36022032Slm66018 36032032Slm66018 /* 36043166Ssg70180 * Open all slices of the disk to serve them to the client. 36053166Ssg70180 * Slices are opened exclusively to prevent other threads or 36063166Ssg70180 * processes in the service domain from performing I/O to 36073166Ssg70180 * slices being accessed by a client. Failure to open a slice 36083166Ssg70180 * results in vds not serving this disk, as the client could 36093166Ssg70180 * attempt (and should be able) to access any slice immediately. 36103166Ssg70180 * Any slices successfully opened before a failure will get 36113166Ssg70180 * closed by vds_destroy_vd() as a result of the error returned 36123166Ssg70180 * by this function. 36133166Ssg70180 * 36143166Ssg70180 * We need to do the open with FNDELAY so that opening an empty 36153166Ssg70180 * slice does not fail. 36162032Slm66018 */ 36172032Slm66018 PR0("Opening device major %u, minor %u = slice %u", 36182032Slm66018 major, minor, slice); 36195081Sachartre 36205081Sachartre /* 36215081Sachartre * Try to open the device. This can fail for example if we are 36225081Sachartre * opening an empty slice. So in case of a failure, we try the 36235081Sachartre * open again but this time with the FNDELAY flag. 36245081Sachartre */ 36255081Sachartre status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 36265081Sachartre vd->open_flags, kcred, &vd->ldi_handle[slice], 36275081Sachartre vd->vds->ldi_ident); 36285081Sachartre 36295081Sachartre if (status != 0) { 36305081Sachartre status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 36315081Sachartre vd->open_flags | FNDELAY, kcred, 36325081Sachartre &vd->ldi_handle[slice], vd->vds->ldi_ident); 36335081Sachartre } 36345081Sachartre 36355081Sachartre if (status != 0) { 36363782Sachartre PRN("ldi_open_by_dev() returned errno %d " 36372032Slm66018 "for slice %u", status, slice); 36382032Slm66018 /* vds_destroy_vd() will close any open slices */ 36393782Sachartre vd->ldi_handle[slice] = NULL; 36402032Slm66018 return (status); 36412032Slm66018 } 36422032Slm66018 } 36432032Slm66018 36442032Slm66018 return (0); 36452032Slm66018 } 36462032Slm66018 36472032Slm66018 static int 36484963Sachartre vd_setup_partition_vtoc(vd_t *vd) 36494963Sachartre { 36504963Sachartre int rval, status; 36514963Sachartre char *device_path = vd->device_path; 36524963Sachartre 36534963Sachartre status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 36545081Sachartre (intptr_t)&vd->dk_geom, (vd->open_flags | FKIOCTL), kcred, &rval); 36554963Sachartre 36564963Sachartre if (status != 0) { 36574963Sachartre PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 36584963Sachartre status, device_path); 36594963Sachartre return (status); 36604963Sachartre } 36614963Sachartre 36624963Sachartre /* Initialize dk_geom structure for single-slice device */ 36634963Sachartre if (vd->dk_geom.dkg_nsect == 0) { 36644963Sachartre PRN("%s geometry claims 0 sectors per track", device_path); 36654963Sachartre return (EIO); 36664963Sachartre } 36674963Sachartre if (vd->dk_geom.dkg_nhead == 0) { 36684963Sachartre PRN("%s geometry claims 0 heads", device_path); 36694963Sachartre return (EIO); 36704963Sachartre } 36714963Sachartre vd->dk_geom.dkg_ncyl = vd->vdisk_size / vd->dk_geom.dkg_nsect / 36724963Sachartre vd->dk_geom.dkg_nhead; 36734963Sachartre vd->dk_geom.dkg_acyl = 0; 36744963Sachartre vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 36754963Sachartre 36764963Sachartre 36774963Sachartre /* Initialize vtoc structure for single-slice device */ 36784963Sachartre bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 36794963Sachartre MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 36804963Sachartre bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 36814963Sachartre vd->vtoc.v_nparts = 1; 36824963Sachartre vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 36834963Sachartre vd->vtoc.v_part[0].p_flag = 0; 36844963Sachartre vd->vtoc.v_part[0].p_start = 0; 36854963Sachartre vd->vtoc.v_part[0].p_size = vd->vdisk_size; 36864963Sachartre bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 36874963Sachartre MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 36884963Sachartre 36894963Sachartre return (0); 36904963Sachartre } 36914963Sachartre 36924963Sachartre static int 36932531Snarayan vd_setup_partition_efi(vd_t *vd) 36942531Snarayan { 36952531Snarayan efi_gpt_t *gpt; 36962531Snarayan efi_gpe_t *gpe; 36972531Snarayan struct uuid uuid = EFI_RESERVED; 36982531Snarayan uint32_t crc; 36992531Snarayan int length; 37002531Snarayan 37012531Snarayan length = sizeof (efi_gpt_t) + sizeof (efi_gpe_t); 37022531Snarayan 37032531Snarayan gpt = kmem_zalloc(length, KM_SLEEP); 37042531Snarayan gpe = (efi_gpe_t *)(gpt + 1); 37052531Snarayan 37062531Snarayan gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 37072531Snarayan gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 37082531Snarayan gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 37092531Snarayan gpt->efi_gpt_FirstUsableLBA = LE_64(0ULL); 37102531Snarayan gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 37112531Snarayan gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 37122531Snarayan gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 37132531Snarayan 37142531Snarayan UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid); 37152531Snarayan gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA; 37162531Snarayan gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA; 37172531Snarayan 37182531Snarayan CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table); 37192531Snarayan gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 37202531Snarayan 37212531Snarayan CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 37222531Snarayan gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 37232531Snarayan 37242531Snarayan vd->dk_efi.dki_lba = 0; 37252531Snarayan vd->dk_efi.dki_length = length; 37262531Snarayan vd->dk_efi.dki_data = gpt; 37272531Snarayan 37282531Snarayan return (0); 37292531Snarayan } 37302531Snarayan 37315081Sachartre /* 37325081Sachartre * Setup for a virtual disk whose backend is a file (exported as a single slice 37335081Sachartre * or as a full disk) or a pseudo device (for example a ZFS, SVM or VxVM volume) 37345081Sachartre * exported as a full disk. In these cases, the backend is accessed using the 37355081Sachartre * vnode interface. 37365081Sachartre */ 37372531Snarayan static int 37385081Sachartre vd_setup_backend_vnode(vd_t *vd) 37393401Snarayan { 37404963Sachartre int rval, status; 37413401Snarayan vattr_t vattr; 37423401Snarayan dev_t dev; 37433401Snarayan char *file_path = vd->device_path; 37443401Snarayan char dev_path[MAXPATHLEN + 1]; 37453401Snarayan ldi_handle_t lhandle; 37463401Snarayan struct dk_cinfo dk_cinfo; 37473401Snarayan 37485081Sachartre if ((status = vn_open(file_path, UIO_SYSSPACE, vd->open_flags | FOFFMAX, 37493401Snarayan 0, &vd->file_vnode, 0, 0)) != 0) { 37503782Sachartre PRN("vn_open(%s) = errno %d", file_path, status); 37513401Snarayan return (status); 37523401Snarayan } 37533401Snarayan 37543782Sachartre /* 37553782Sachartre * We set vd->file now so that vds_destroy_vd will take care of 37563782Sachartre * closing the file and releasing the vnode in case of an error. 37573782Sachartre */ 37583782Sachartre vd->file = B_TRUE; 37593782Sachartre 37603401Snarayan vattr.va_mask = AT_SIZE; 37613401Snarayan if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred)) != 0) { 37623782Sachartre PRN("VOP_GETATTR(%s) = errno %d", file_path, status); 37633401Snarayan return (EIO); 37643401Snarayan } 37653401Snarayan 37663401Snarayan vd->file_size = vattr.va_size; 37673401Snarayan /* size should be at least sizeof(dk_label) */ 37683401Snarayan if (vd->file_size < sizeof (struct dk_label)) { 37693401Snarayan PRN("Size of file has to be at least %ld bytes", 37703401Snarayan sizeof (struct dk_label)); 37713401Snarayan return (EIO); 37723401Snarayan } 37733401Snarayan 37743782Sachartre if (vd->file_vnode->v_flag & VNOMAP) { 37753782Sachartre PRN("File %s cannot be mapped", file_path); 37763401Snarayan return (EIO); 37773401Snarayan } 37783401Snarayan 37795081Sachartre /* 37805081Sachartre * Find and validate the geometry of a disk image. For a single slice 37815081Sachartre * disk image, this will build a fake geometry and vtoc. 37825081Sachartre */ 37834963Sachartre status = vd_file_validate_geometry(vd); 37844963Sachartre if (status != 0 && status != EINVAL) { 37854963Sachartre PRN("Fail to read label from %s", file_path); 37863782Sachartre return (EIO); 37873782Sachartre } 37883401Snarayan 37893401Snarayan /* sector size = block size = DEV_BSIZE */ 37904696Sachartre vd->vdisk_size = vd->file_size / DEV_BSIZE; 37913401Snarayan vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ 37923401Snarayan 37935081Sachartre /* 37945081Sachartre * Get max_xfer_sz from the device where the file is or from the device 37955081Sachartre * itself if we have a pseudo device. 37965081Sachartre */ 37975081Sachartre dev_path[0] = '\0'; 37985081Sachartre 37995081Sachartre if (vd->pseudo) { 38005081Sachartre status = ldi_open_by_name(file_path, FREAD, kcred, &lhandle, 38015081Sachartre vd->vds->ldi_ident); 38025081Sachartre } else { 38035081Sachartre dev = vd->file_vnode->v_vfsp->vfs_dev; 38045081Sachartre if (ddi_dev_pathname(dev, S_IFBLK, dev_path) == DDI_SUCCESS) { 38055081Sachartre PR0("underlying device = %s\n", dev_path); 38065081Sachartre } 38075081Sachartre 38085081Sachartre status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD, kcred, &lhandle, 38095081Sachartre vd->vds->ldi_ident); 38103401Snarayan } 38113401Snarayan 38125081Sachartre if (status != 0) { 38135081Sachartre PR0("ldi_open() returned errno %d for device %s", 38145081Sachartre status, (dev_path[0] == '\0')? file_path : dev_path); 38153401Snarayan } else { 38163401Snarayan if ((status = ldi_ioctl(lhandle, DKIOCINFO, 38175081Sachartre (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 38183401Snarayan &rval)) != 0) { 38193401Snarayan PR0("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 38203401Snarayan status, dev_path); 38213401Snarayan } else { 38223401Snarayan /* 38233401Snarayan * Store the device's max transfer size for 38243401Snarayan * return to the client 38253401Snarayan */ 38263401Snarayan vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 38273401Snarayan } 38283401Snarayan 38293401Snarayan PR0("close the device %s", dev_path); 38303401Snarayan (void) ldi_close(lhandle, FREAD, kcred); 38313401Snarayan } 38323401Snarayan 38334838Slm66018 PR0("using file %s, dev %s, max_xfer = %u blks", 38343401Snarayan file_path, dev_path, vd->max_xfer_sz); 38353401Snarayan 38364696Sachartre /* Setup devid for the disk image */ 38374696Sachartre 38385081Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) 38395081Sachartre return (0); 38405081Sachartre 38414963Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 38424963Sachartre 38434963Sachartre status = vd_file_read_devid(vd, &vd->file_devid); 38444963Sachartre 38454963Sachartre if (status == 0) { 38464963Sachartre /* a valid devid was found */ 38474963Sachartre return (0); 38484963Sachartre } 38494963Sachartre 38504963Sachartre if (status != EINVAL) { 38514963Sachartre /* 38524963Sachartre * There was an error while trying to read the devid. 38534963Sachartre * So this disk image may have a devid but we are 38544963Sachartre * unable to read it. 38554963Sachartre */ 38564963Sachartre PR0("can not read devid for %s", file_path); 38574963Sachartre vd->file_devid = NULL; 38584963Sachartre return (0); 38594963Sachartre } 38604696Sachartre } 38614696Sachartre 38624696Sachartre /* 38634696Sachartre * No valid device id was found so we create one. Note that a failure 38644696Sachartre * to create a device id is not fatal and does not prevent the disk 38654696Sachartre * image from being attached. 38664696Sachartre */ 38674696Sachartre PR1("creating devid for %s", file_path); 38684696Sachartre 38694696Sachartre if (ddi_devid_init(vd->vds->dip, DEVID_FAB, NULL, 0, 38704696Sachartre &vd->file_devid) != DDI_SUCCESS) { 38714696Sachartre PR0("fail to create devid for %s", file_path); 38724696Sachartre vd->file_devid = NULL; 38734696Sachartre return (0); 38744696Sachartre } 38754696Sachartre 38764963Sachartre /* 38774963Sachartre * Write devid to the disk image. The devid is stored into the disk 38784963Sachartre * image if we have a valid label; otherwise the devid will be stored 38794963Sachartre * when the user writes a valid label. 38804963Sachartre */ 38814963Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 38824963Sachartre if (vd_file_write_devid(vd, vd->file_devid) != 0) { 38834963Sachartre PR0("fail to write devid for %s", file_path); 38844963Sachartre ddi_devid_free(vd->file_devid); 38854963Sachartre vd->file_devid = NULL; 38864963Sachartre } 38874696Sachartre } 38884696Sachartre 38893401Snarayan return (0); 38903401Snarayan } 38913401Snarayan 38925081Sachartre /* 38935081Sachartre * Setup for a virtual disk which backend is a device (a physical disk, 38945081Sachartre * slice or pseudo device) that is directly exported either as a full disk 38955081Sachartre * for a physical disk or as a slice for a pseudo device or a disk slice. 38965081Sachartre * In these cases, the backend is accessed using the LDI interface. 38975081Sachartre */ 38983401Snarayan static int 38995081Sachartre vd_setup_backend_ldi(vd_t *vd) 39001991Sheppo { 39012410Slm66018 int rval, status; 39021991Sheppo struct dk_cinfo dk_cinfo; 39033401Snarayan char *device_path = vd->device_path; 39041991Sheppo 39052531Snarayan /* 39065081Sachartre * Try to open the device. This can fail for example if we are opening 39075081Sachartre * an empty slice. So in case of a failure, we try the open again but 39085081Sachartre * this time with the FNDELAY flag. 39092531Snarayan */ 39105081Sachartre status = ldi_open_by_name(device_path, vd->open_flags, kcred, 39115081Sachartre &vd->ldi_handle[0], vd->vds->ldi_ident); 39125081Sachartre 39135081Sachartre if (status != 0) 39145081Sachartre status = ldi_open_by_name(device_path, vd->open_flags | FNDELAY, 39155081Sachartre kcred, &vd->ldi_handle[0], vd->vds->ldi_ident); 39165081Sachartre 39175081Sachartre if (status != 0) { 39183401Snarayan PR0("ldi_open_by_name(%s) = errno %d", device_path, status); 39193782Sachartre vd->ldi_handle[0] = NULL; 39202032Slm66018 return (status); 39212032Slm66018 } 39222032Slm66018 39233401Snarayan vd->file = B_FALSE; 39242531Snarayan 39255081Sachartre /* Get device number of backing device */ 39262032Slm66018 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 39271991Sheppo PRN("ldi_get_dev() returned errno %d for %s", 39282410Slm66018 status, device_path); 39291991Sheppo return (status); 39301991Sheppo } 39312410Slm66018 39324963Sachartre /* Verify backing device supports dk_cinfo */ 39332410Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 39345081Sachartre (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 39354696Sachartre &rval)) != 0) { 39362410Slm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 39372410Slm66018 status, device_path); 39381991Sheppo return (status); 39391991Sheppo } 39402410Slm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 39412410Slm66018 PRN("slice %u >= maximum slice %u for %s", 39422410Slm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 39431991Sheppo return (EIO); 39441991Sheppo } 39452531Snarayan 39465081Sachartre vd->vdisk_label = vd_read_vtoc(vd, &vd->vtoc); 39472410Slm66018 39482410Slm66018 /* Store the device's max transfer size for return to the client */ 39492410Slm66018 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 39502410Slm66018 39515081Sachartre /* 39525081Sachartre * Export a full disk. 39535081Sachartre * 39545081Sachartre * When we use the LDI interface, we export a device as a full disk 39555081Sachartre * if we have an entire disk slice (slice 2) and if this slice is 39565081Sachartre * exported as a full disk and not as a single slice disk. 39575081Sachartre * 39585081Sachartre * Note that pseudo devices are exported as full disks using the vnode 39595081Sachartre * interface, not the LDI interface. 39605081Sachartre */ 39615081Sachartre if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE && 39625081Sachartre vd->vdisk_type == VD_DISK_TYPE_DISK) { 39635081Sachartre ASSERT(!vd->pseudo); 39645081Sachartre return (vd_setup_full_disk(vd)); 39655081Sachartre } 39665081Sachartre 39675081Sachartre /* 39685081Sachartre * Export a single slice disk. 39695081Sachartre * 39705081Sachartre * The exported device can be either a pseudo device or a disk slice. If 39715081Sachartre * it is a disk slice different from slice 2 then it is always exported 39725081Sachartre * as a single slice disk even if the "slice" option is not specified. 39735081Sachartre * If it is disk slice 2 or a pseudo device then it is exported as a 39745081Sachartre * single slice disk only if the "slice" option is specified. 39755081Sachartre */ 39765081Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE || 39775081Sachartre dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE); 39785081Sachartre return (vd_setup_single_slice_disk(vd)); 39795081Sachartre } 39805081Sachartre 39815081Sachartre static int 39825081Sachartre vd_setup_single_slice_disk(vd_t *vd) 39835081Sachartre { 39845081Sachartre int status; 39855081Sachartre char *device_path = vd->device_path; 39865081Sachartre 39875081Sachartre /* Get size of backing device */ 39885081Sachartre if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 39895081Sachartre PRN("ldi_get_size() failed for %s", device_path); 39901991Sheppo return (EIO); 39911991Sheppo } 39925081Sachartre vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ 39935081Sachartre 39941991Sheppo if (vd->pseudo) { 39955081Sachartre 39965081Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 39975081Sachartre 39984963Sachartre /* 39994963Sachartre * Currently we only support exporting pseudo devices which 40004963Sachartre * provide a valid disk label. 40014963Sachartre */ 40024963Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 40034963Sachartre PRN("%s is a pseudo device with an invalid disk " 40044963Sachartre "label\n", device_path); 40054963Sachartre return (EINVAL); 40064963Sachartre } 40071991Sheppo return (0); /* ...and we're done */ 40081991Sheppo } 40091991Sheppo 40104963Sachartre /* We can only export a slice if the disk has a valid label */ 40114963Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 40124963Sachartre PRN("%s is a slice from a disk with an unknown disk label\n", 40134963Sachartre device_path); 40144963Sachartre return (EINVAL); 40154963Sachartre } 40162032Slm66018 40175081Sachartre /* 40185081Sachartre * We export the slice as a single slice disk even if the "slice" 40195081Sachartre * option was not specified. 40205081Sachartre */ 40211991Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 40221991Sheppo vd->nslices = 1; 40231991Sheppo 40242531Snarayan if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 40254963Sachartre /* Slice from a disk with an EFI label */ 40262531Snarayan status = vd_setup_partition_efi(vd); 40274963Sachartre } else { 40284963Sachartre /* Slice from a disk with a VTOC label */ 40294963Sachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 40304963Sachartre status = vd_setup_partition_vtoc(vd); 40311991Sheppo } 40324963Sachartre 40334963Sachartre return (status); 40341991Sheppo } 40351991Sheppo 40361991Sheppo static int 40375081Sachartre vd_setup_vd(vd_t *vd) 40385081Sachartre { 40395081Sachartre int status; 40405081Sachartre dev_info_t *dip; 40415081Sachartre vnode_t *vnp; 40425081Sachartre char *path = vd->device_path; 40435081Sachartre 40445081Sachartre /* make sure the vdisk backend is valid */ 40455081Sachartre if ((status = lookupname(path, UIO_SYSSPACE, 40465081Sachartre FOLLOW, NULLVPP, &vnp)) != 0) { 40475081Sachartre PR0("Cannot lookup %s errno %d", path, status); 40485081Sachartre goto done; 40495081Sachartre } 40505081Sachartre 40515081Sachartre switch (vnp->v_type) { 40525081Sachartre case VREG: 40535081Sachartre /* 40545081Sachartre * Backend is a file so it is exported as a full disk or as a 40555081Sachartre * single slice disk using the vnode interface. 40565081Sachartre */ 40575081Sachartre VN_RELE(vnp); 40585081Sachartre vd->pseudo = B_FALSE; 40595081Sachartre status = vd_setup_backend_vnode(vd); 40605081Sachartre break; 40615081Sachartre 40625081Sachartre case VBLK: 40635081Sachartre case VCHR: 40645081Sachartre /* 40655081Sachartre * Backend is a device. The way it is exported depends on the 40665081Sachartre * type of the device. 40675081Sachartre * 40685081Sachartre * - A pseudo device is exported as a full disk using the vnode 40695081Sachartre * interface or as a single slice disk using the LDI 40705081Sachartre * interface. 40715081Sachartre * 40725081Sachartre * - A disk (represented by the slice 2 of that disk) is 40735081Sachartre * exported as a full disk using the LDI interface. 40745081Sachartre * 40755081Sachartre * - A disk slice (different from slice 2) is always exported 40765081Sachartre * as a single slice disk using the LDI interface. 40775081Sachartre * 40785081Sachartre * - The slice 2 of a disk is exported as a single slice disk 40795081Sachartre * if the "slice" option is specified, otherwise the entire 40805081Sachartre * disk will be exported. In any case, the LDI interface is 40815081Sachartre * used. 40825081Sachartre */ 40835081Sachartre 40845081Sachartre /* check if this is a pseudo device */ 40855081Sachartre if ((dip = ddi_hold_devi_by_instance(getmajor(vnp->v_rdev), 40865081Sachartre dev_to_instance(vnp->v_rdev), 0)) == NULL) { 40875081Sachartre PRN("%s is no longer accessible", path); 40885081Sachartre VN_RELE(vnp); 40895081Sachartre status = EIO; 40905081Sachartre break; 40915081Sachartre } 40925081Sachartre vd->pseudo = is_pseudo_device(dip); 40935081Sachartre ddi_release_devi(dip); 40945081Sachartre VN_RELE(vnp); 40955081Sachartre 40965081Sachartre /* 40975081Sachartre * If this is a pseudo device then its usage depends if the 40985081Sachartre * "slice" option is set or not. If the "slice" option is set 40995081Sachartre * then the pseudo device will be exported as a single slice, 41005081Sachartre * otherwise it will be exported as a full disk. 41015081Sachartre */ 41025081Sachartre if (vd->pseudo && vd->vdisk_type == VD_DISK_TYPE_DISK) 41035081Sachartre status = vd_setup_backend_vnode(vd); 41045081Sachartre else 41055081Sachartre status = vd_setup_backend_ldi(vd); 41065081Sachartre break; 41075081Sachartre 41085081Sachartre default: 41095081Sachartre PRN("Unsupported vdisk backend %s", path); 41105081Sachartre VN_RELE(vnp); 41115081Sachartre status = EBADF; 41125081Sachartre } 41135081Sachartre 41145081Sachartre done: 41155081Sachartre if (status != 0) { 41165081Sachartre /* 41175081Sachartre * If the error is retryable print an error message only 41185081Sachartre * during the first try. 41195081Sachartre */ 41205081Sachartre if (status == ENXIO || status == ENODEV || 41215081Sachartre status == ENOENT || status == EROFS) { 41225081Sachartre if (!(vd->initialized & VD_SETUP_ERROR)) { 41235081Sachartre PRN("%s is currently inaccessible (error %d)", 41245081Sachartre path, status); 41255081Sachartre } 41265081Sachartre status = EAGAIN; 41275081Sachartre } else { 41285081Sachartre PRN("%s can not be exported as a virtual disk " 41295081Sachartre "(error %d)", path, status); 41305081Sachartre } 41315081Sachartre vd->initialized |= VD_SETUP_ERROR; 41325081Sachartre 41335081Sachartre } else if (vd->initialized & VD_SETUP_ERROR) { 41345081Sachartre /* print a message only if we previously had an error */ 41355081Sachartre PRN("%s is now online", path); 41365081Sachartre vd->initialized &= ~VD_SETUP_ERROR; 41375081Sachartre } 41385081Sachartre 41395081Sachartre return (status); 41405081Sachartre } 41415081Sachartre 41425081Sachartre static int 41435081Sachartre vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t options, 41445081Sachartre uint64_t ldc_id, vd_t **vdp) 41451991Sheppo { 41461991Sheppo char tq_name[TASKQ_NAMELEN]; 41472032Slm66018 int status; 41481991Sheppo ddi_iblock_cookie_t iblock = NULL; 41491991Sheppo ldc_attr_t ldc_attr; 41501991Sheppo vd_t *vd; 41511991Sheppo 41521991Sheppo 41531991Sheppo ASSERT(vds != NULL); 41542410Slm66018 ASSERT(device_path != NULL); 41551991Sheppo ASSERT(vdp != NULL); 41562410Slm66018 PR0("Adding vdisk for %s", device_path); 41571991Sheppo 41581991Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 41591991Sheppo PRN("No memory for virtual disk"); 41601991Sheppo return (EAGAIN); 41611991Sheppo } 41621991Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 41631991Sheppo vd->vds = vds; 41643401Snarayan (void) strncpy(vd->device_path, device_path, MAXPATHLEN); 41651991Sheppo 41665081Sachartre /* Setup open flags */ 41675081Sachartre vd->open_flags = FREAD; 41685081Sachartre 41695081Sachartre if (!(options & VD_OPT_RDONLY)) 41705081Sachartre vd->open_flags |= FWRITE; 41715081Sachartre 41725081Sachartre if (options & VD_OPT_EXCLUSIVE) 41735081Sachartre vd->open_flags |= FEXCL; 41745081Sachartre 41755081Sachartre /* Setup disk type */ 41765081Sachartre if (options & VD_OPT_SLICE) { 41775081Sachartre vd->vdisk_type = VD_DISK_TYPE_SLICE; 41785081Sachartre vd->nslices = 1; 41795081Sachartre } else { 41805081Sachartre vd->vdisk_type = VD_DISK_TYPE_DISK; 41815081Sachartre vd->nslices = V_NUMPAR; 41825081Sachartre } 41835081Sachartre 41845081Sachartre /* default disk label */ 41855081Sachartre vd->vdisk_label = VD_DISK_LABEL_UNK; 41865081Sachartre 41872032Slm66018 /* Open vdisk and initialize parameters */ 41883401Snarayan if ((status = vd_setup_vd(vd)) == 0) { 41893401Snarayan vd->initialized |= VD_DISK_READY; 41903401Snarayan 41913401Snarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 41923401Snarayan PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 41933401Snarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 41943401Snarayan (vd->pseudo ? "yes" : "no"), (vd->file ? "yes" : "no"), 41953401Snarayan vd->nslices); 41963401Snarayan } else { 41973401Snarayan if (status != EAGAIN) 41983401Snarayan return (status); 41993401Snarayan } 42001991Sheppo 42011991Sheppo /* Initialize locking */ 42021991Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 42034696Sachartre &iblock) != DDI_SUCCESS) { 42041991Sheppo PRN("Could not get iblock cookie."); 42051991Sheppo return (EIO); 42061991Sheppo } 42071991Sheppo 42081991Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 42091991Sheppo vd->initialized |= VD_LOCKING; 42101991Sheppo 42111991Sheppo 42122336Snarayan /* Create start and completion task queues for the vdisk */ 42132336Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 42141991Sheppo PR1("tq_name = %s", tq_name); 42152336Snarayan if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 42164696Sachartre TASKQ_DEFAULTPRI, 0)) == NULL) { 42171991Sheppo PRN("Could not create task queue"); 42181991Sheppo return (EIO); 42191991Sheppo } 42202336Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 42212336Snarayan PR1("tq_name = %s", tq_name); 42222336Snarayan if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 42234696Sachartre TASKQ_DEFAULTPRI, 0)) == NULL) { 42242336Snarayan PRN("Could not create task queue"); 42252336Snarayan return (EIO); 42262336Snarayan } 42272336Snarayan vd->enabled = 1; /* before callback can dispatch to startq */ 42281991Sheppo 42291991Sheppo 42301991Sheppo /* Bring up LDC */ 42311991Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 42321991Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 42331991Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 42342410Slm66018 ldc_attr.mtu = VD_LDC_MTU; 42351991Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 42363782Sachartre PRN("Could not initialize LDC channel %lu, " 42373782Sachartre "init failed with error %d", ldc_id, status); 42381991Sheppo return (status); 42391991Sheppo } 42401991Sheppo vd->initialized |= VD_LDC; 42411991Sheppo 42421991Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 42434696Sachartre (caddr_t)vd)) != 0) { 42443782Sachartre PRN("Could not initialize LDC channel %lu," 42453782Sachartre "reg_callback failed with error %d", ldc_id, status); 42461991Sheppo return (status); 42471991Sheppo } 42481991Sheppo 42491991Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 42503782Sachartre PRN("Could not initialize LDC channel %lu," 42513782Sachartre "open failed with error %d", ldc_id, status); 42521991Sheppo return (status); 42531991Sheppo } 42541991Sheppo 42552793Slm66018 if ((status = ldc_up(vd->ldc_handle)) != 0) { 42563166Ssg70180 PR0("ldc_up() returned errno %d", status); 42572793Slm66018 } 42582793Slm66018 42592531Snarayan /* Allocate the inband task memory handle */ 42602531Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 42612531Snarayan if (status) { 42623782Sachartre PRN("Could not initialize LDC channel %lu," 42633782Sachartre "alloc_handle failed with error %d", ldc_id, status); 42642531Snarayan return (ENXIO); 42652531Snarayan } 42661991Sheppo 42671991Sheppo /* Add the successfully-initialized vdisk to the server's table */ 42681991Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 42691991Sheppo PRN("Error adding vdisk ID %lu to table", id); 42701991Sheppo return (EIO); 42711991Sheppo } 42721991Sheppo 42732793Slm66018 /* Allocate the staging buffer */ 42742793Slm66018 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 42752793Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 42762793Slm66018 42772793Slm66018 /* store initial state */ 42782793Slm66018 vd->state = VD_STATE_INIT; 42792793Slm66018 42801991Sheppo return (0); 42811991Sheppo } 42821991Sheppo 42832793Slm66018 static void 42842793Slm66018 vd_free_dring_task(vd_t *vdp) 42852793Slm66018 { 42862793Slm66018 if (vdp->dring_task != NULL) { 42872793Slm66018 ASSERT(vdp->dring_len != 0); 42882793Slm66018 /* Free all dring_task memory handles */ 42892793Slm66018 for (int i = 0; i < vdp->dring_len; i++) { 42902793Slm66018 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 42912793Slm66018 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 42922793Slm66018 vdp->dring_task[i].msg = NULL; 42932793Slm66018 } 42942793Slm66018 kmem_free(vdp->dring_task, 42952793Slm66018 (sizeof (*vdp->dring_task)) * vdp->dring_len); 42962793Slm66018 vdp->dring_task = NULL; 42972793Slm66018 } 42982793Slm66018 } 42992793Slm66018 43001991Sheppo /* 43011991Sheppo * Destroy the state associated with a virtual disk 43021991Sheppo */ 43031991Sheppo static void 43041991Sheppo vds_destroy_vd(void *arg) 43051991Sheppo { 43061991Sheppo vd_t *vd = (vd_t *)arg; 43073166Ssg70180 int retry = 0, rv; 43081991Sheppo 43091991Sheppo if (vd == NULL) 43101991Sheppo return; 43111991Sheppo 43122336Snarayan PR0("Destroying vdisk state"); 43132336Snarayan 43142531Snarayan if (vd->dk_efi.dki_data != NULL) 43152531Snarayan kmem_free(vd->dk_efi.dki_data, vd->dk_efi.dki_length); 43162531Snarayan 43171991Sheppo /* Disable queuing requests for the vdisk */ 43181991Sheppo if (vd->initialized & VD_LOCKING) { 43191991Sheppo mutex_enter(&vd->lock); 43201991Sheppo vd->enabled = 0; 43211991Sheppo mutex_exit(&vd->lock); 43221991Sheppo } 43231991Sheppo 43242336Snarayan /* Drain and destroy start queue (*before* destroying completionq) */ 43252336Snarayan if (vd->startq != NULL) 43262336Snarayan ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 43272336Snarayan 43282336Snarayan /* Drain and destroy completion queue (*before* shutting down LDC) */ 43292336Snarayan if (vd->completionq != NULL) 43302336Snarayan ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 43312336Snarayan 43322793Slm66018 vd_free_dring_task(vd); 43332793Slm66018 43343166Ssg70180 /* Free the inband task memory handle */ 43353166Ssg70180 (void) ldc_mem_free_handle(vd->inband_task.mhdl); 43363166Ssg70180 43373166Ssg70180 /* Shut down LDC */ 43383166Ssg70180 if (vd->initialized & VD_LDC) { 43393166Ssg70180 /* unmap the dring */ 43403166Ssg70180 if (vd->initialized & VD_DRING) 43413166Ssg70180 (void) ldc_mem_dring_unmap(vd->dring_handle); 43423166Ssg70180 43433166Ssg70180 /* close LDC channel - retry on EAGAIN */ 43443166Ssg70180 while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) { 43453166Ssg70180 if (++retry > vds_ldc_retries) { 43463166Ssg70180 PR0("Timed out closing channel"); 43473166Ssg70180 break; 43483166Ssg70180 } 43493166Ssg70180 drv_usecwait(vds_ldc_delay); 43503166Ssg70180 } 43513166Ssg70180 if (rv == 0) { 43523166Ssg70180 (void) ldc_unreg_callback(vd->ldc_handle); 43533166Ssg70180 (void) ldc_fini(vd->ldc_handle); 43543166Ssg70180 } else { 43553166Ssg70180 /* 43563166Ssg70180 * Closing the LDC channel has failed. Ideally we should 43573166Ssg70180 * fail here but there is no Zeus level infrastructure 43583166Ssg70180 * to handle this. The MD has already been changed and 43593166Ssg70180 * we have to do the close. So we try to do as much 43603166Ssg70180 * clean up as we can. 43613166Ssg70180 */ 43623166Ssg70180 (void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE); 43633166Ssg70180 while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN) 43643166Ssg70180 drv_usecwait(vds_ldc_delay); 43653166Ssg70180 } 43663166Ssg70180 } 43673166Ssg70180 43682793Slm66018 /* Free the staging buffer for msgs */ 43692793Slm66018 if (vd->vio_msgp != NULL) { 43702793Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 43712793Slm66018 vd->vio_msgp = NULL; 43722793Slm66018 } 43732793Slm66018 43742793Slm66018 /* Free the inband message buffer */ 43752793Slm66018 if (vd->inband_task.msg != NULL) { 43762793Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 43772793Slm66018 vd->inband_task.msg = NULL; 43782336Snarayan } 43793782Sachartre if (vd->file) { 43803782Sachartre /* Close file */ 43815081Sachartre (void) VOP_CLOSE(vd->file_vnode, vd->open_flags, 1, 43823782Sachartre 0, kcred); 43833782Sachartre VN_RELE(vd->file_vnode); 43844696Sachartre if (vd->file_devid != NULL) 43854696Sachartre ddi_devid_free(vd->file_devid); 43863782Sachartre } else { 43873782Sachartre /* Close any open backing-device slices */ 43883782Sachartre for (uint_t slice = 0; slice < vd->nslices; slice++) { 43893782Sachartre if (vd->ldi_handle[slice] != NULL) { 43903782Sachartre PR0("Closing slice %u", slice); 43913782Sachartre (void) ldi_close(vd->ldi_handle[slice], 43925081Sachartre vd->open_flags, kcred); 43933401Snarayan } 43941991Sheppo } 43951991Sheppo } 43961991Sheppo 43971991Sheppo /* Free lock */ 43981991Sheppo if (vd->initialized & VD_LOCKING) 43991991Sheppo mutex_destroy(&vd->lock); 44001991Sheppo 44011991Sheppo /* Finally, free the vdisk structure itself */ 44021991Sheppo kmem_free(vd, sizeof (*vd)); 44031991Sheppo } 44041991Sheppo 44051991Sheppo static int 44065081Sachartre vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t options, 44075081Sachartre uint64_t ldc_id) 44081991Sheppo { 44091991Sheppo int status; 44101991Sheppo vd_t *vd = NULL; 44111991Sheppo 44121991Sheppo 44135081Sachartre if ((status = vds_do_init_vd(vds, id, device_path, options, 44145081Sachartre ldc_id, &vd)) != 0) 44151991Sheppo vds_destroy_vd(vd); 44161991Sheppo 44171991Sheppo return (status); 44181991Sheppo } 44191991Sheppo 44201991Sheppo static int 44211991Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 44221991Sheppo uint64_t *ldc_id) 44231991Sheppo { 44241991Sheppo int num_channels; 44251991Sheppo 44261991Sheppo 44271991Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 44281991Sheppo if ((num_channels = md_scan_dag(md, vd_node, 44294696Sachartre md_find_name(md, VD_CHANNEL_ENDPOINT), 44304696Sachartre md_find_name(md, "fwd"), channel)) <= 0) { 44311991Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 44321991Sheppo return (-1); 44331991Sheppo } 44341991Sheppo 44351991Sheppo /* Get the "id" value for the first channel endpoint node */ 44361991Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 44371991Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 44381991Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 44391991Sheppo return (-1); 44401991Sheppo } 44411991Sheppo 44421991Sheppo if (num_channels > 1) { 44431991Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 44441991Sheppo } 44451991Sheppo 44461991Sheppo return (0); 44471991Sheppo } 44481991Sheppo 44491991Sheppo static int 44501991Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 44511991Sheppo { 44521991Sheppo int num_nodes, status; 44531991Sheppo size_t size; 44541991Sheppo mde_cookie_t *channel; 44551991Sheppo 44561991Sheppo 44571991Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 44581991Sheppo PRN("Invalid node count in Machine Description subtree"); 44591991Sheppo return (-1); 44601991Sheppo } 44611991Sheppo size = num_nodes*(sizeof (*channel)); 44621991Sheppo channel = kmem_zalloc(size, KM_SLEEP); 44631991Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 44641991Sheppo kmem_free(channel, size); 44651991Sheppo 44661991Sheppo return (status); 44671991Sheppo } 44681991Sheppo 44695081Sachartre /* 44705081Sachartre * Function: 44715081Sachartre * vds_get_options 44725081Sachartre * 44735081Sachartre * Description: 44745081Sachartre * Parse the options of a vds node. Options are defined as an array 44755081Sachartre * of strings in the vds-block-device-opts property of the vds node 44765081Sachartre * in the machine description. Options are returned as a bitmask. The 44775081Sachartre * mapping between the bitmask options and the options strings from the 44785081Sachartre * machine description is defined in the vd_bdev_options[] array. 44795081Sachartre * 44805081Sachartre * The vds-block-device-opts property is optional. If a vds has no such 44815081Sachartre * property then no option is defined. 44825081Sachartre * 44835081Sachartre * Parameters: 44845081Sachartre * md - machine description. 44855081Sachartre * vd_node - vds node in the machine description for which 44865081Sachartre * options have to be parsed. 44875081Sachartre * options - the returned options. 44885081Sachartre * 44895081Sachartre * Return Code: 44905081Sachartre * none. 44915081Sachartre */ 44925081Sachartre static void 44935081Sachartre vds_get_options(md_t *md, mde_cookie_t vd_node, uint64_t *options) 44945081Sachartre { 44955081Sachartre char *optstr, *opt; 44965081Sachartre int len, n, i; 44975081Sachartre 44985081Sachartre *options = 0; 44995081Sachartre 45005081Sachartre if (md_get_prop_data(md, vd_node, VD_BLOCK_DEVICE_OPTS, 45015081Sachartre (uint8_t **)&optstr, &len) != 0) { 45025081Sachartre PR0("No options found"); 45035081Sachartre return; 45045081Sachartre } 45055081Sachartre 45065081Sachartre /* parse options */ 45075081Sachartre opt = optstr; 45085081Sachartre n = sizeof (vd_bdev_options) / sizeof (vd_option_t); 45095081Sachartre 45105081Sachartre while (opt < optstr + len) { 45115081Sachartre for (i = 0; i < n; i++) { 45125081Sachartre if (strncmp(vd_bdev_options[i].vdo_name, 45135081Sachartre opt, VD_OPTION_NLEN) == 0) { 45145081Sachartre *options |= vd_bdev_options[i].vdo_value; 45155081Sachartre break; 45165081Sachartre } 45175081Sachartre } 45185081Sachartre 45195081Sachartre if (i < n) { 45205081Sachartre PR0("option: %s", opt); 45215081Sachartre } else { 45225081Sachartre PRN("option %s is unknown or unsupported", opt); 45235081Sachartre } 45245081Sachartre 45255081Sachartre opt += strlen(opt) + 1; 45265081Sachartre } 45275081Sachartre } 45285081Sachartre 45291991Sheppo static void 45301991Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 45311991Sheppo { 45322410Slm66018 char *device_path = NULL; 45335081Sachartre uint64_t id = 0, ldc_id = 0, options = 0; 45341991Sheppo 45351991Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 45361991Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 45371991Sheppo return; 45381991Sheppo } 45391991Sheppo PR0("Adding vdisk ID %lu", id); 45401991Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 45414696Sachartre &device_path) != 0) { 45421991Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 45431991Sheppo return; 45441991Sheppo } 45451991Sheppo 45465081Sachartre vds_get_options(md, vd_node, &options); 45475081Sachartre 45481991Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 45491991Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 45501991Sheppo return; 45511991Sheppo } 45521991Sheppo 45535081Sachartre if (vds_init_vd(vds, id, device_path, options, ldc_id) != 0) { 45541991Sheppo PRN("Failed to add vdisk ID %lu", id); 45551991Sheppo return; 45561991Sheppo } 45571991Sheppo } 45581991Sheppo 45591991Sheppo static void 45601991Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 45611991Sheppo { 45621991Sheppo uint64_t id = 0; 45631991Sheppo 45641991Sheppo 45651991Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 45661991Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 45671991Sheppo VD_ID_PROP); 45681991Sheppo return; 45691991Sheppo } 45701991Sheppo PR0("Removing vdisk ID %lu", id); 45711991Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 45721991Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 45731991Sheppo } 45741991Sheppo 45751991Sheppo static void 45761991Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 45771991Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 45781991Sheppo { 45791991Sheppo char *curr_dev, *prev_dev; 45805081Sachartre uint64_t curr_id = 0, curr_ldc_id = 0, curr_options = 0; 45815081Sachartre uint64_t prev_id = 0, prev_ldc_id = 0, prev_options = 0; 45821991Sheppo size_t len; 45831991Sheppo 45841991Sheppo 45851991Sheppo /* Validate that vdisk ID has not changed */ 45861991Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 45871991Sheppo PRN("Error getting previous vdisk \"%s\" property", 45881991Sheppo VD_ID_PROP); 45891991Sheppo return; 45901991Sheppo } 45911991Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 45921991Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 45931991Sheppo return; 45941991Sheppo } 45951991Sheppo if (curr_id != prev_id) { 45961991Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 45971991Sheppo prev_id, curr_id); 45981991Sheppo return; 45991991Sheppo } 46001991Sheppo 46011991Sheppo /* Validate that LDC ID has not changed */ 46021991Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 46031991Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 46041991Sheppo return; 46051991Sheppo } 46061991Sheppo 46071991Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 46081991Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 46091991Sheppo return; 46101991Sheppo } 46111991Sheppo if (curr_ldc_id != prev_ldc_id) { 46122032Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 46131991Sheppo PRN("Not changing vdisk: " 46141991Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 46151991Sheppo return; 46161991Sheppo } 46171991Sheppo 46181991Sheppo /* Determine whether device path has changed */ 46191991Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 46204696Sachartre &prev_dev) != 0) { 46211991Sheppo PRN("Error getting previous vdisk \"%s\"", 46221991Sheppo VD_BLOCK_DEVICE_PROP); 46231991Sheppo return; 46241991Sheppo } 46251991Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 46264696Sachartre &curr_dev) != 0) { 46271991Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 46281991Sheppo return; 46291991Sheppo } 46301991Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 46311991Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 46321991Sheppo return; /* no relevant (supported) change */ 46331991Sheppo 46345081Sachartre /* Validate that options have not changed */ 46355081Sachartre vds_get_options(prev_md, prev_vd_node, &prev_options); 46365081Sachartre vds_get_options(curr_md, curr_vd_node, &curr_options); 46375081Sachartre if (prev_options != curr_options) { 46385081Sachartre PRN("Not changing vdisk: options changed from %lx to %lx", 46395081Sachartre prev_options, curr_options); 46405081Sachartre return; 46415081Sachartre } 46425081Sachartre 46431991Sheppo PR0("Changing vdisk ID %lu", prev_id); 46442793Slm66018 46451991Sheppo /* Remove old state, which will close vdisk and reset */ 46461991Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 46471991Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 46482793Slm66018 46491991Sheppo /* Re-initialize vdisk with new state */ 46505081Sachartre if (vds_init_vd(vds, curr_id, curr_dev, curr_options, 46515081Sachartre curr_ldc_id) != 0) { 46521991Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 46531991Sheppo return; 46541991Sheppo } 46551991Sheppo } 46561991Sheppo 46571991Sheppo static int 46581991Sheppo vds_process_md(void *arg, mdeg_result_t *md) 46591991Sheppo { 46601991Sheppo int i; 46611991Sheppo vds_t *vds = arg; 46621991Sheppo 46631991Sheppo 46641991Sheppo if (md == NULL) 46651991Sheppo return (MDEG_FAILURE); 46661991Sheppo ASSERT(vds != NULL); 46671991Sheppo 46681991Sheppo for (i = 0; i < md->removed.nelem; i++) 46691991Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 46701991Sheppo for (i = 0; i < md->match_curr.nelem; i++) 46711991Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 46721991Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 46731991Sheppo for (i = 0; i < md->added.nelem; i++) 46741991Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 46751991Sheppo 46761991Sheppo return (MDEG_SUCCESS); 46771991Sheppo } 46781991Sheppo 46793401Snarayan 46801991Sheppo static int 46811991Sheppo vds_do_attach(dev_info_t *dip) 46821991Sheppo { 46833297Ssb155480 int status, sz; 46843297Ssb155480 int cfg_handle; 46851991Sheppo minor_t instance = ddi_get_instance(dip); 46861991Sheppo vds_t *vds; 46873297Ssb155480 mdeg_prop_spec_t *pspecp; 46883297Ssb155480 mdeg_node_spec_t *ispecp; 46891991Sheppo 46901991Sheppo /* 46911991Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 46921991Sheppo * notion of "instance", or unique identifier, for that node; OBP 46931991Sheppo * stores the value of the "cfg-handle" MD property as the value of 46941991Sheppo * the "reg" property on the node in the device tree it builds from 46951991Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 46961991Sheppo * "reg" property value to uniquely identify this device instance when 46971991Sheppo * registering with the MD event-generation framework. If the "reg" 46981991Sheppo * property cannot be found, the device tree state is presumably so 46991991Sheppo * broken that there is no point in continuing. 47001991Sheppo */ 47013297Ssb155480 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 47024696Sachartre VD_REG_PROP)) { 47033297Ssb155480 PRN("vds \"%s\" property does not exist", VD_REG_PROP); 47041991Sheppo return (DDI_FAILURE); 47051991Sheppo } 47061991Sheppo 47071991Sheppo /* Get the MD instance for later MDEG registration */ 47081991Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 47093297Ssb155480 VD_REG_PROP, -1); 47101991Sheppo 47111991Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 47121991Sheppo PRN("Could not allocate state for instance %u", instance); 47131991Sheppo return (DDI_FAILURE); 47141991Sheppo } 47151991Sheppo 47161991Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 47171991Sheppo PRN("Could not get state for instance %u", instance); 47181991Sheppo ddi_soft_state_free(vds_state, instance); 47191991Sheppo return (DDI_FAILURE); 47201991Sheppo } 47211991Sheppo 47221991Sheppo vds->dip = dip; 47231991Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 47244696Sachartre vds_destroy_vd, sizeof (void *)); 47254696Sachartre 47261991Sheppo ASSERT(vds->vd_table != NULL); 47271991Sheppo 47281991Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 47291991Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 47301991Sheppo return (DDI_FAILURE); 47311991Sheppo } 47321991Sheppo vds->initialized |= VDS_LDI; 47331991Sheppo 47341991Sheppo /* Register for MD updates */ 47353297Ssb155480 sz = sizeof (vds_prop_template); 47363297Ssb155480 pspecp = kmem_alloc(sz, KM_SLEEP); 47373297Ssb155480 bcopy(vds_prop_template, pspecp, sz); 47383297Ssb155480 47393297Ssb155480 VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle); 47403297Ssb155480 47413297Ssb155480 /* initialize the complete prop spec structure */ 47423297Ssb155480 ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 47433297Ssb155480 ispecp->namep = "virtual-device"; 47443297Ssb155480 ispecp->specp = pspecp; 47453297Ssb155480 47463297Ssb155480 if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, 47474696Sachartre &vds->mdeg) != MDEG_SUCCESS) { 47481991Sheppo PRN("Unable to register for MD updates"); 47493297Ssb155480 kmem_free(ispecp, sizeof (mdeg_node_spec_t)); 47503297Ssb155480 kmem_free(pspecp, sz); 47511991Sheppo return (DDI_FAILURE); 47521991Sheppo } 47533297Ssb155480 47543297Ssb155480 vds->ispecp = ispecp; 47551991Sheppo vds->initialized |= VDS_MDEG; 47561991Sheppo 47572032Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 47582032Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 47592032Slm66018 DDI_PROP_SUCCESS) { 47602032Slm66018 PRN("failed to set \"%s\" property for instance %u", 47612032Slm66018 DDI_NO_AUTODETACH, instance); 47622032Slm66018 } 47632032Slm66018 47641991Sheppo ddi_report_dev(dip); 47651991Sheppo return (DDI_SUCCESS); 47661991Sheppo } 47671991Sheppo 47681991Sheppo static int 47691991Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 47701991Sheppo { 47711991Sheppo int status; 47721991Sheppo 47731991Sheppo switch (cmd) { 47741991Sheppo case DDI_ATTACH: 47752336Snarayan PR0("Attaching"); 47761991Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 47771991Sheppo (void) vds_detach(dip, DDI_DETACH); 47781991Sheppo return (status); 47791991Sheppo case DDI_RESUME: 47802336Snarayan PR0("No action required for DDI_RESUME"); 47811991Sheppo return (DDI_SUCCESS); 47821991Sheppo default: 47831991Sheppo return (DDI_FAILURE); 47841991Sheppo } 47851991Sheppo } 47861991Sheppo 47871991Sheppo static struct dev_ops vds_ops = { 47881991Sheppo DEVO_REV, /* devo_rev */ 47891991Sheppo 0, /* devo_refcnt */ 47901991Sheppo ddi_no_info, /* devo_getinfo */ 47911991Sheppo nulldev, /* devo_identify */ 47921991Sheppo nulldev, /* devo_probe */ 47931991Sheppo vds_attach, /* devo_attach */ 47941991Sheppo vds_detach, /* devo_detach */ 47951991Sheppo nodev, /* devo_reset */ 47961991Sheppo NULL, /* devo_cb_ops */ 47971991Sheppo NULL, /* devo_bus_ops */ 47981991Sheppo nulldev /* devo_power */ 47991991Sheppo }; 48001991Sheppo 48011991Sheppo static struct modldrv modldrv = { 48021991Sheppo &mod_driverops, 48034838Slm66018 "virtual disk server", 48041991Sheppo &vds_ops, 48051991Sheppo }; 48061991Sheppo 48071991Sheppo static struct modlinkage modlinkage = { 48081991Sheppo MODREV_1, 48091991Sheppo &modldrv, 48101991Sheppo NULL 48111991Sheppo }; 48121991Sheppo 48131991Sheppo 48141991Sheppo int 48151991Sheppo _init(void) 48161991Sheppo { 48171991Sheppo int i, status; 48181991Sheppo 48192336Snarayan 48201991Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 48211991Sheppo return (status); 48221991Sheppo if ((status = mod_install(&modlinkage)) != 0) { 48231991Sheppo ddi_soft_state_fini(&vds_state); 48241991Sheppo return (status); 48251991Sheppo } 48261991Sheppo 48271991Sheppo /* Fill in the bit-mask of server-supported operations */ 48281991Sheppo for (i = 0; i < vds_noperations; i++) 48291991Sheppo vds_operations |= 1 << (vds_operation[i].operation - 1); 48301991Sheppo 48311991Sheppo return (0); 48321991Sheppo } 48331991Sheppo 48341991Sheppo int 48351991Sheppo _info(struct modinfo *modinfop) 48361991Sheppo { 48371991Sheppo return (mod_info(&modlinkage, modinfop)); 48381991Sheppo } 48391991Sheppo 48401991Sheppo int 48411991Sheppo _fini(void) 48421991Sheppo { 48431991Sheppo int status; 48441991Sheppo 48452336Snarayan 48461991Sheppo if ((status = mod_remove(&modlinkage)) != 0) 48471991Sheppo return (status); 48481991Sheppo ddi_soft_state_fini(&vds_state); 48491991Sheppo return (0); 48501991Sheppo } 4851