xref: /netbsd-src/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c (revision 3816d47b2c42fcd6e549e3407f842a5b1a1d23ad)
1 
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/zfs_context.h>
28 #include <sys/spa.h>
29 #include <sys/refcount.h>
30 #include <sys/vdev_disk.h>
31 #include <sys/vdev_impl.h>
32 #include <sys/fs/zfs.h>
33 #include <sys/zio.h>
34 #include <sys/sunldi.h>
35 #include <sys/fm/fs/zfs.h>
36 #include <sys/disklabel.h>
37 #include <sys/dkio.h>
38 #include <sys/workqueue.h>
39 
40 /*
41  * Virtual device vector for disks.
42  */
43 
44 static void	vdev_disk_io_intr(buf_t *);
45 
46 static void
47 vdev_disk_flush(struct work *work, void *cookie)
48 {
49 	vdev_disk_t *dvd;
50 	int error, cmd;
51 	buf_t *bp;
52 	vnode_t *vp;
53 
54 	bp = (struct buf *)work;
55 	vp = bp->b_vp;
56 	dvd = cookie;
57 
58 	KASSERT(vp == dvd->vd_vn);
59 
60 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
61 	cmd = 1;
62 	error = VOP_IOCTL(vp, DIOCCACHESYNC, &cmd, FREAD|FWRITE,
63 	    kauth_cred_get());
64 	VOP_UNLOCK(vp, 0);
65 	bp->b_error = error;
66 	vdev_disk_io_intr(bp);
67 }
68 
69 static int
70 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
71 {
72 	struct partinfo pinfo;
73 	vdev_disk_t *dvd;
74 	vnode_t *vp;
75 	int error, cmd;
76 
77 	/*
78 	 * We must have a pathname, and it must be absolute.
79 	 */
80 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
81 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
82 		return (EINVAL);
83 	}
84 
85 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
86 
87 	/*
88 	 * When opening a disk device, we want to preserve the user's original
89 	 * intent.  We always want to open the device by the path the user gave
90 	 * us, even if it is one of multiple paths to the save device.  But we
91 	 * also want to be able to survive disks being removed/recabled.
92 	 * Therefore the sequence of opening devices is:
93 	 *
94 	 * 1. Try opening the device by path.  For legacy pools without the
95 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
96 	 *
97 	 * 2. If the devid of the device matches the stored value, return
98 	 *    success.
99 	 *
100 	 * 3. Otherwise, the device may have moved.  Try opening the device
101 	 *    by the devid instead.
102 	 *
103 	 */
104 	if (vd->vdev_devid != NULL) {
105 		/* XXXNETBSD wedges */
106 	}
107 
108 	error = EINVAL;		/* presume failure */
109 
110 	error = vn_open(vd->vdev_path, UIO_SYSSPACE, FREAD|FWRITE, 0,
111 	    &vp, CRCREAT, 0);
112 	if (error != 0) {
113 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
114 		return error;
115 	}
116 	if (vp->v_type != VBLK) {
117 		vrele(vp);
118 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
119 		return EINVAL;
120 	}
121 
122 	/*
123 	 * XXXNETBSD Compare the devid to the stored value.
124 	 */
125 
126 	/*
127 	 * Determine the actual size of the device.
128 	 * XXXNETBSD wedges.
129 	 */
130 	error = VOP_IOCTL(vp, DIOCGPART, &pinfo, FREAD|FWRITE,
131 	    kauth_cred_get());
132 	if (error != 0) {
133 		vrele(vp);
134 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
135 		return error;
136 	}
137 	*psize = (uint64_t)pinfo.part->p_size * pinfo.disklab->d_secsize;
138 	*ashift = highbit(MAX(pinfo.disklab->d_secsize, SPA_MINBLOCKSIZE)) - 1;
139 	vd->vdev_wholedisk = (pinfo.part->p_offset == 0); /* XXXNETBSD */
140 
141 	/*
142 	 * Create a workqueue to process cache-flushes concurrently.
143 	 */
144 	error = workqueue_create(&dvd->vd_wq, "vdevsync",
145 	    vdev_disk_flush, dvd, PRI_NONE, IPL_NONE, WQ_MPSAFE);
146 	if (error != 0) {
147 		vrele(vp);
148 		return error;
149 	}
150 
151 	/*
152 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
153 	 * try again.
154 	 */
155 	vd->vdev_nowritecache = B_FALSE;
156 
157 	dvd->vd_vn = vp;
158 	return 0;
159 }
160 
161 static void
162 vdev_disk_close(vdev_t *vd)
163 {
164 	vdev_disk_t *dvd = vd->vdev_tsd;
165 	vnode_t *vp;
166 
167 	if (dvd == NULL)
168 		return;
169 
170 	dprintf("removing disk %s, devid %s\n",
171 	    vd->vdev_path ? vd->vdev_path : "<none>",
172 	    vd->vdev_devid ? vd->vdev_devid : "<none>");
173 
174 	if ((vp = dvd->vd_vn) != NULL) {
175 /* XXX NetBSD Sometimes we deadlock on this why ? */
176 //		vprint("vnode close info", vp);
177 		vn_close(vp, FREAD|FWRITE, kauth_cred_get());
178 //		vprint("vnode close info", vp);
179 /* XXX is this needed ?		vrele(vp); */
180 		workqueue_destroy(dvd->vd_wq);
181 	}
182 	kmem_free(dvd, sizeof (vdev_disk_t));
183 	vd->vdev_tsd = NULL;
184 }
185 
186 static void
187 vdev_disk_io_intr(buf_t *bp)
188 {
189 	zio_t *zio = bp->b_private;
190 
191 	dprintf("vdev_disk_io_intr bp=%p\n", bp);
192 	/*
193 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
194 	 * Rather than teach the rest of the stack about other error
195 	 * possibilities (EFAULT, etc), we normalize the error value here.
196 	 */
197 	if (bp->b_error == 0) {
198 		if (bp->b_resid != 0) {
199 			zio->io_error = EIO;
200 		} else {
201 			zio->io_error = 0;
202 		}
203 	} else {
204 		zio->io_error = EIO;
205 	}
206 
207 	putiobuf(bp);
208 	zio_interrupt(zio);
209 }
210 
211 static int
212 vdev_disk_io_start(zio_t *zio)
213 {
214 	vdev_t *vd = zio->io_vd;
215 	vdev_disk_t *dvd = vd->vdev_tsd;
216 	vnode_t *vp;
217 	buf_t *bp, *nbp;
218 	int error, size, off, resid;
219 
220 	vp = dvd->vd_vn;
221 	if (zio->io_type == ZIO_TYPE_IOCTL) {
222 		/* XXPOLICY */
223 		if (!vdev_readable(vd)) {
224 			zio->io_error = ENXIO;
225 			return (ZIO_PIPELINE_CONTINUE);
226 		}
227 
228 		switch (zio->io_cmd) {
229 		case DKIOCFLUSHWRITECACHE:
230 
231 			if (zfs_nocacheflush)
232 				break;
233 
234 			if (vd->vdev_nowritecache) {
235 				zio->io_error = ENOTSUP;
236 				break;
237 			}
238 
239 			bp = getiobuf(vp, true);
240 			bp->b_private = zio;
241 			workqueue_enqueue(dvd->vd_wq, &bp->b_work, NULL);
242 			return (ZIO_PIPELINE_STOP);
243 
244 		default:
245 			zio->io_error = ENOTSUP;
246 			break;
247 		}
248 
249 		return (ZIO_PIPELINE_CONTINUE);
250 	}
251 
252 	bp = getiobuf(vp, true);
253 	bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
254 	bp->b_cflags = BC_BUSY | BC_NOCACHE;
255 	bp->b_data = zio->io_data;
256 	bp->b_blkno = btodb(zio->io_offset);
257 	bp->b_bcount = zio->io_size;
258 	bp->b_resid = zio->io_size;
259 	bp->b_iodone = vdev_disk_io_intr;
260 	bp->b_private = zio;
261 
262 	if (!(bp->b_flags & B_READ)) {
263 		mutex_enter(&vp->v_interlock);
264 		vp->v_numoutput++;
265 		mutex_exit(&vp->v_interlock);
266 	}
267 
268 	if (bp->b_bcount <= MAXPHYS) {
269 		/* We can do this I/O in one pass. */
270 		(void)VOP_STRATEGY(vp, bp);
271 	} else {
272 		/*
273 		 * The I/O is larger than we can process in one pass.
274 		 * Split it into smaller pieces.
275 		 */
276 		resid = zio->io_size;
277 		off = 0;
278 		while (resid != 0) {
279 			size = min(resid, MAXPHYS);
280 			nbp = getiobuf(vp, true);
281 			nbp->b_blkno = btodb(zio->io_offset + off);
282 			/* Below call increments v_numoutput. */
283 			nestiobuf_setup(bp, nbp, off, size);
284 			(void)VOP_STRATEGY(vp, nbp);
285 			resid -= size;
286 			off += size;
287 		}
288 	}
289 
290 	return (ZIO_PIPELINE_STOP);
291 }
292 
293 static void
294 vdev_disk_io_done(zio_t *zio)
295 {
296 
297 	/* NetBSD: nothing */
298 }
299 
300 vdev_ops_t vdev_disk_ops = {
301 	vdev_disk_open,
302 	vdev_disk_close,
303 	vdev_default_asize,
304 	vdev_disk_io_start,
305 	vdev_disk_io_done,
306 	NULL,
307 	VDEV_TYPE_DISK,		/* name of this vdev type */
308 	B_TRUE			/* leaf vdev */
309 };
310 
311 /*
312  * Given the root disk device devid or pathname, read the label from
313  * the device, and construct a configuration nvlist.
314  */
315 int
316 vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
317 {
318 
319 	return EOPNOTSUPP;
320 }
321