1 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/zfs_context.h> 28 #include <sys/spa.h> 29 #include <sys/refcount.h> 30 #include <sys/vdev_disk.h> 31 #include <sys/vdev_impl.h> 32 #include <sys/fs/zfs.h> 33 #include <sys/zio.h> 34 #include <sys/sunldi.h> 35 #include <sys/fm/fs/zfs.h> 36 #include <sys/disklabel.h> 37 #include <sys/dkio.h> 38 #include <sys/workqueue.h> 39 40 /* 41 * Virtual device vector for disks. 42 */ 43 44 static void vdev_disk_io_intr(buf_t *); 45 46 static void 47 vdev_disk_flush(struct work *work, void *cookie) 48 { 49 vdev_disk_t *dvd; 50 int error, cmd; 51 buf_t *bp; 52 vnode_t *vp; 53 54 bp = (struct buf *)work; 55 vp = bp->b_vp; 56 dvd = cookie; 57 58 KASSERT(vp == dvd->vd_vn); 59 60 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 61 cmd = 1; 62 error = VOP_IOCTL(vp, DIOCCACHESYNC, &cmd, FREAD|FWRITE, 63 kauth_cred_get()); 64 VOP_UNLOCK(vp, 0); 65 bp->b_error = error; 66 vdev_disk_io_intr(bp); 67 } 68 69 static int 70 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) 71 { 72 struct partinfo pinfo; 73 vdev_disk_t *dvd; 74 vnode_t *vp; 75 int error, cmd; 76 77 /* 78 * We must have a pathname, and it must be absolute. 79 */ 80 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 81 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 82 return (EINVAL); 83 } 84 85 dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); 86 87 /* 88 * When opening a disk device, we want to preserve the user's original 89 * intent. We always want to open the device by the path the user gave 90 * us, even if it is one of multiple paths to the save device. But we 91 * also want to be able to survive disks being removed/recabled. 92 * Therefore the sequence of opening devices is: 93 * 94 * 1. Try opening the device by path. For legacy pools without the 95 * 'whole_disk' property, attempt to fix the path by appending 's0'. 96 * 97 * 2. If the devid of the device matches the stored value, return 98 * success. 99 * 100 * 3. Otherwise, the device may have moved. Try opening the device 101 * by the devid instead. 102 * 103 */ 104 if (vd->vdev_devid != NULL) { 105 /* XXXNETBSD wedges */ 106 } 107 108 error = EINVAL; /* presume failure */ 109 110 error = vn_open(vd->vdev_path, UIO_SYSSPACE, FREAD|FWRITE, 0, 111 &vp, CRCREAT, 0); 112 if (error != 0) { 113 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 114 return error; 115 } 116 if (vp->v_type != VBLK) { 117 vrele(vp); 118 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 119 return EINVAL; 120 } 121 122 /* 123 * XXXNETBSD Compare the devid to the stored value. 124 */ 125 126 /* 127 * Determine the actual size of the device. 128 * XXXNETBSD wedges. 129 */ 130 error = VOP_IOCTL(vp, DIOCGPART, &pinfo, FREAD|FWRITE, 131 kauth_cred_get()); 132 if (error != 0) { 133 vrele(vp); 134 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 135 return error; 136 } 137 *psize = (uint64_t)pinfo.part->p_size * pinfo.disklab->d_secsize; 138 *ashift = highbit(MAX(pinfo.disklab->d_secsize, SPA_MINBLOCKSIZE)) - 1; 139 vd->vdev_wholedisk = (pinfo.part->p_offset == 0); /* XXXNETBSD */ 140 141 /* 142 * Create a workqueue to process cache-flushes concurrently. 143 */ 144 error = workqueue_create(&dvd->vd_wq, "vdevsync", 145 vdev_disk_flush, dvd, PRI_NONE, IPL_NONE, WQ_MPSAFE); 146 if (error != 0) { 147 vrele(vp); 148 return error; 149 } 150 151 /* 152 * Clear the nowritecache bit, so that on a vdev_reopen() we will 153 * try again. 154 */ 155 vd->vdev_nowritecache = B_FALSE; 156 157 dvd->vd_vn = vp; 158 return 0; 159 } 160 161 static void 162 vdev_disk_close(vdev_t *vd) 163 { 164 vdev_disk_t *dvd = vd->vdev_tsd; 165 vnode_t *vp; 166 167 if (dvd == NULL) 168 return; 169 170 dprintf("removing disk %s, devid %s\n", 171 vd->vdev_path ? vd->vdev_path : "<none>", 172 vd->vdev_devid ? vd->vdev_devid : "<none>"); 173 174 if ((vp = dvd->vd_vn) != NULL) { 175 /* XXX NetBSD Sometimes we deadlock on this why ? */ 176 // vprint("vnode close info", vp); 177 vn_close(vp, FREAD|FWRITE, kauth_cred_get()); 178 // vprint("vnode close info", vp); 179 /* XXX is this needed ? vrele(vp); */ 180 workqueue_destroy(dvd->vd_wq); 181 } 182 kmem_free(dvd, sizeof (vdev_disk_t)); 183 vd->vdev_tsd = NULL; 184 } 185 186 static void 187 vdev_disk_io_intr(buf_t *bp) 188 { 189 zio_t *zio = bp->b_private; 190 191 dprintf("vdev_disk_io_intr bp=%p\n", bp); 192 /* 193 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO. 194 * Rather than teach the rest of the stack about other error 195 * possibilities (EFAULT, etc), we normalize the error value here. 196 */ 197 if (bp->b_error == 0) { 198 if (bp->b_resid != 0) { 199 zio->io_error = EIO; 200 } else { 201 zio->io_error = 0; 202 } 203 } else { 204 zio->io_error = EIO; 205 } 206 207 putiobuf(bp); 208 zio_interrupt(zio); 209 } 210 211 static int 212 vdev_disk_io_start(zio_t *zio) 213 { 214 vdev_t *vd = zio->io_vd; 215 vdev_disk_t *dvd = vd->vdev_tsd; 216 vnode_t *vp; 217 buf_t *bp, *nbp; 218 int error, size, off, resid; 219 220 vp = dvd->vd_vn; 221 if (zio->io_type == ZIO_TYPE_IOCTL) { 222 /* XXPOLICY */ 223 if (!vdev_readable(vd)) { 224 zio->io_error = ENXIO; 225 return (ZIO_PIPELINE_CONTINUE); 226 } 227 228 switch (zio->io_cmd) { 229 case DKIOCFLUSHWRITECACHE: 230 231 if (zfs_nocacheflush) 232 break; 233 234 if (vd->vdev_nowritecache) { 235 zio->io_error = ENOTSUP; 236 break; 237 } 238 239 bp = getiobuf(vp, true); 240 bp->b_private = zio; 241 workqueue_enqueue(dvd->vd_wq, &bp->b_work, NULL); 242 return (ZIO_PIPELINE_STOP); 243 244 default: 245 zio->io_error = ENOTSUP; 246 break; 247 } 248 249 return (ZIO_PIPELINE_CONTINUE); 250 } 251 252 bp = getiobuf(vp, true); 253 bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); 254 bp->b_cflags = BC_BUSY | BC_NOCACHE; 255 bp->b_data = zio->io_data; 256 bp->b_blkno = btodb(zio->io_offset); 257 bp->b_bcount = zio->io_size; 258 bp->b_resid = zio->io_size; 259 bp->b_iodone = vdev_disk_io_intr; 260 bp->b_private = zio; 261 262 if (!(bp->b_flags & B_READ)) { 263 mutex_enter(&vp->v_interlock); 264 vp->v_numoutput++; 265 mutex_exit(&vp->v_interlock); 266 } 267 268 if (bp->b_bcount <= MAXPHYS) { 269 /* We can do this I/O in one pass. */ 270 (void)VOP_STRATEGY(vp, bp); 271 } else { 272 /* 273 * The I/O is larger than we can process in one pass. 274 * Split it into smaller pieces. 275 */ 276 resid = zio->io_size; 277 off = 0; 278 while (resid != 0) { 279 size = min(resid, MAXPHYS); 280 nbp = getiobuf(vp, true); 281 nbp->b_blkno = btodb(zio->io_offset + off); 282 /* Below call increments v_numoutput. */ 283 nestiobuf_setup(bp, nbp, off, size); 284 (void)VOP_STRATEGY(vp, nbp); 285 resid -= size; 286 off += size; 287 } 288 } 289 290 return (ZIO_PIPELINE_STOP); 291 } 292 293 static void 294 vdev_disk_io_done(zio_t *zio) 295 { 296 297 /* NetBSD: nothing */ 298 } 299 300 vdev_ops_t vdev_disk_ops = { 301 vdev_disk_open, 302 vdev_disk_close, 303 vdev_default_asize, 304 vdev_disk_io_start, 305 vdev_disk_io_done, 306 NULL, 307 VDEV_TYPE_DISK, /* name of this vdev type */ 308 B_TRUE /* leaf vdev */ 309 }; 310 311 /* 312 * Given the root disk device devid or pathname, read the label from 313 * the device, and construct a configuration nvlist. 314 */ 315 int 316 vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) 317 { 318 319 return EOPNOTSUPP; 320 } 321