xref: /netbsd-src/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c (revision ba65fde2d7fefa7d39838fa5fa855e62bd606b5e)
1 
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/zfs_context.h>
28 #include <sys/spa.h>
29 #include <sys/refcount.h>
30 #include <sys/vdev_disk.h>
31 #include <sys/vdev_impl.h>
32 #include <sys/fs/zfs.h>
33 #include <sys/zio.h>
34 #include <sys/sunldi.h>
35 #include <sys/fm/fs/zfs.h>
36 #include <sys/disklabel.h>
37 #include <sys/dkio.h>
38 #include <sys/workqueue.h>
39 
40 /*
41  * Virtual device vector for disks.
42  */
43 
44 static void	vdev_disk_io_intr(buf_t *);
45 
46 static void
47 vdev_disk_flush(struct work *work, void *cookie)
48 {
49 	vdev_disk_t *dvd;
50 	int error, cmd;
51 	buf_t *bp;
52 	vnode_t *vp;
53 
54 	bp = (struct buf *)work;
55 	vp = bp->b_vp;
56 	dvd = cookie;
57 
58 	KASSERT(vp == dvd->vd_vn);
59 
60 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
61 	cmd = 1;
62 	error = VOP_IOCTL(vp, DIOCCACHESYNC, &cmd, FREAD|FWRITE,
63 	    kauth_cred_get());
64 	VOP_UNLOCK(vp);
65 	bp->b_error = error;
66 	vdev_disk_io_intr(bp);
67 }
68 
69 static int
70 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
71 {
72 	spa_t *spa = vd->vdev_spa;
73 	vdev_disk_t *dvd;
74 	vnode_t *vp;
75 	int error, cmd;
76 	struct partinfo pinfo;
77 
78 	/*
79 	 * We must have a pathname, and it must be absolute.
80 	 */
81 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
82 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
83 		return (EINVAL);
84 	}
85 
86 	/*
87 	 * Reopen the device if it's not currently open. Otherwise,
88 	 * just update the physical size of the device.
89 	 */
90 	if (vd->vdev_tsd != NULL) {
91 		ASSERT(vd->vdev_reopening);
92 		dvd = vd->vdev_tsd;
93 		goto skip_open;
94 	}
95 
96 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
97 
98 	/*
99 	 * When opening a disk device, we want to preserve the user's original
100 	 * intent.  We always want to open the device by the path the user gave
101 	 * us, even if it is one of multiple paths to the save device.  But we
102 	 * also want to be able to survive disks being removed/recabled.
103 	 * Therefore the sequence of opening devices is:
104 	 *
105 	 * 1. Try opening the device by path.  For legacy pools without the
106 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
107 	 *
108 	 * 2. If the devid of the device matches the stored value, return
109 	 *    success.
110 	 *
111 	 * 3. Otherwise, the device may have moved.  Try opening the device
112 	 *    by the devid instead.
113 	 */
114 	if (vd->vdev_devid != NULL) {
115 		/* XXXNETBSD wedges */
116 	}
117 
118 	error = EINVAL;		/* presume failure */
119 
120 	error = vn_open(vd->vdev_path, UIO_SYSSPACE, FREAD|FWRITE, 0,
121 	    &vp, CRCREAT, 0);
122 	if (error != 0) {
123 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
124 		return error;
125 	}
126 	if (vp->v_type != VBLK) {
127 		vrele(vp);
128 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
129 		return EINVAL;
130 	}
131 
132 	/*
133 	 * XXXNETBSD Compare the devid to the stored value.
134 	 */
135 
136 skip_open:
137 	/*
138 	 * Determine the actual size of the device.
139 	 * XXXNETBSD wedges.
140 	 */
141 	error = VOP_IOCTL(vp, DIOCGPART, &pinfo, FREAD|FWRITE,
142 	    kauth_cred_get());
143 	if (error != 0) {
144 		vrele(vp);
145 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
146 		return error;
147 	}
148 	*psize = (uint64_t)pinfo.part->p_size * pinfo.disklab->d_secsize;
149 	*ashift = highbit(MAX(pinfo.disklab->d_secsize, SPA_MINBLOCKSIZE)) - 1;
150 	vd->vdev_wholedisk = (pinfo.part->p_offset == 0); /* XXXNETBSD */
151 
152 	/*
153 	 * Create a workqueue to process cache-flushes concurrently.
154 	 */
155 	error = workqueue_create(&dvd->vd_wq, "vdevsync",
156 	    vdev_disk_flush, dvd, PRI_NONE, IPL_NONE, WQ_MPSAFE);
157 	if (error != 0) {
158 		vrele(vp);
159 		return error;
160 	}
161 
162 	/*
163 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
164 	 * try again.
165 	 */
166 	vd->vdev_nowritecache = B_FALSE;
167 
168 	dvd->vd_vn = vp;
169 	return 0;
170 }
171 
172 static void
173 vdev_disk_close(vdev_t *vd)
174 {
175 	vdev_disk_t *dvd = vd->vdev_tsd;
176 	vnode_t *vp;
177 
178 	if (vd->vdev_reopening || dvd == NULL)
179 		return;
180 
181 	if ((vp = dvd->vd_vn) != NULL) {
182 /* XXX NetBSD Sometimes we deadlock on this why ? */
183 //		vprint("vnode close info", vp);
184 		vn_close(vp, FREAD|FWRITE, kauth_cred_get());
185 //		vprint("vnode close info", vp);
186 /* XXX is this needed ?		vrele(vp); */
187 		workqueue_destroy(dvd->vd_wq);
188 	}
189 
190 	kmem_free(dvd, sizeof (vdev_disk_t));
191 	vd->vdev_tsd = NULL;
192 }
193 
194 static void
195 vdev_disk_io_intr(buf_t *bp)
196 {
197 	zio_t *zio = bp->b_private;
198 
199 	/*
200 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
201 	 * Rather than teach the rest of the stack about other error
202 	 * possibilities (EFAULT, etc), we normalize the error value here.
203 	 */
204 	if (bp->b_error == 0) {
205 		if (bp->b_resid != 0) {
206 			zio->io_error = EIO;
207 		} else {
208 			zio->io_error = 0;
209 		}
210 	} else {
211 		zio->io_error = EIO;
212 	}
213 
214 
215 	putiobuf(bp);
216 	zio_interrupt(zio);
217 }
218 
219 static void
220 vdev_disk_ioctl_free(zio_t *zio)
221 {
222 	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
223 }
224 
225 static const zio_vsd_ops_t vdev_disk_vsd_ops = {
226 	vdev_disk_ioctl_free,
227 	zio_vsd_default_cksum_report
228 };
229 
230 static void
231 vdev_disk_ioctl_done(void *zio_arg, int error)
232 {
233 	zio_t *zio = zio_arg;
234 
235 	zio->io_error = error;
236 
237 	zio_interrupt(zio);
238 }
239 
240 static int
241 vdev_disk_io_start(zio_t *zio)
242 {
243 	vdev_t *vd = zio->io_vd;
244 	vdev_disk_t *dvd = vd->vdev_tsd;
245 	vnode_t *vp;
246 	buf_t *bp, *nbp;
247 	int error, size, off, resid;
248 
249 	vp = dvd->vd_vn;
250 	if (zio->io_type == ZIO_TYPE_IOCTL) {
251 		/* XXPOLICY */
252 		if (!vdev_readable(vd)) {
253 			zio->io_error = ENXIO;
254 			return (ZIO_PIPELINE_CONTINUE);
255 		}
256 
257 		switch (zio->io_cmd) {
258 		case DKIOCFLUSHWRITECACHE:
259 
260 			if (zfs_nocacheflush)
261 				break;
262 
263 			if (vd->vdev_nowritecache) {
264 				zio->io_error = ENOTSUP;
265 				break;
266 			}
267 
268 			bp = getiobuf(vp, true);
269 			bp->b_private = zio;
270 			workqueue_enqueue(dvd->vd_wq, &bp->b_work, NULL);
271 			return (ZIO_PIPELINE_STOP);
272 			break;
273 
274 		default:
275 			zio->io_error = ENOTSUP;
276 			break;
277 		}
278 
279 		return (ZIO_PIPELINE_CONTINUE);
280 	}
281 
282 	bp = getiobuf(vp, true);
283 	bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
284 	bp->b_cflags = BC_BUSY | BC_NOCACHE;
285 	bp->b_data = zio->io_data;
286 	bp->b_blkno = btodb(zio->io_offset);
287 	bp->b_bcount = zio->io_size;
288 	bp->b_resid = zio->io_size;
289 	bp->b_iodone = vdev_disk_io_intr;
290 	bp->b_private = zio;
291 
292 	if (!(bp->b_flags & B_READ)) {
293 		mutex_enter(vp->v_interlock);
294 		vp->v_numoutput++;
295 		mutex_exit(vp->v_interlock);
296 	}
297 
298 	if (bp->b_bcount <= MAXPHYS) {
299 		/* We can do this I/O in one pass. */
300 		(void)VOP_STRATEGY(vp, bp);
301 	} else {
302 		/*
303 		 * The I/O is larger than we can process in one pass.
304 		 * Split it into smaller pieces.
305 		 */
306 		resid = zio->io_size;
307 		off = 0;
308 		while (resid != 0) {
309 			size = min(resid, MAXPHYS);
310 			nbp = getiobuf(vp, true);
311 			nbp->b_blkno = btodb(zio->io_offset + off);
312 			/* Below call increments v_numoutput. */
313 			nestiobuf_setup(bp, nbp, off, size);
314 			(void)VOP_STRATEGY(vp, nbp);
315 			resid -= size;
316 			off += size;
317 		}
318 	}
319 
320 	return (ZIO_PIPELINE_STOP);
321 }
322 
323 static void
324 vdev_disk_io_done(zio_t *zio)
325 {
326 
327 	/* NetBSD: nothing */
328 }
329 
330 vdev_ops_t vdev_disk_ops = {
331 	vdev_disk_open,
332 	vdev_disk_close,
333 	vdev_default_asize,
334 	vdev_disk_io_start,
335 	vdev_disk_io_done,
336 	NULL,
337 	VDEV_TYPE_DISK,		/* name of this vdev type */
338 	B_TRUE			/* leaf vdev */
339 };
340 
341 /*
342  * Given the root disk device devid or pathname, read the label from
343  * the device, and construct a configuration nvlist.
344  */
345 int
346 vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
347 {
348 
349 	return EOPNOTSUPP;
350 }
351