xref: /onnv-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision 789:b348f31ed315)
1*789Sahrens /*
2*789Sahrens  * CDDL HEADER START
3*789Sahrens  *
4*789Sahrens  * The contents of this file are subject to the terms of the
5*789Sahrens  * Common Development and Distribution License, Version 1.0 only
6*789Sahrens  * (the "License").  You may not use this file except in compliance
7*789Sahrens  * with the License.
8*789Sahrens  *
9*789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*789Sahrens  * or http://www.opensolaris.org/os/licensing.
11*789Sahrens  * See the License for the specific language governing permissions
12*789Sahrens  * and limitations under the License.
13*789Sahrens  *
14*789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15*789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17*789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18*789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19*789Sahrens  *
20*789Sahrens  * CDDL HEADER END
21*789Sahrens  */
22*789Sahrens /*
23*789Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*789Sahrens  * Use is subject to license terms.
25*789Sahrens  */
26*789Sahrens 
27*789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*789Sahrens 
29*789Sahrens #include <sys/zfs_context.h>
30*789Sahrens #include <sys/spa.h>
31*789Sahrens #include <sys/vdev_disk.h>
32*789Sahrens #include <sys/vdev_impl.h>
33*789Sahrens #include <sys/fs/zfs.h>
34*789Sahrens #include <sys/zio.h>
35*789Sahrens #include <sys/sunddi.h>
36*789Sahrens 
37*789Sahrens /*
38*789Sahrens  * Virtual device vector for disks.
39*789Sahrens  */
40*789Sahrens 
41*789Sahrens extern ldi_ident_t zfs_li;
42*789Sahrens 
43*789Sahrens typedef struct vdev_disk_buf {
44*789Sahrens 	buf_t	vdb_buf;
45*789Sahrens 	zio_t	*vdb_io;
46*789Sahrens } vdev_disk_buf_t;
47*789Sahrens 
48*789Sahrens static int
49*789Sahrens vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
50*789Sahrens {
51*789Sahrens 	vdev_disk_t *dvd;
52*789Sahrens 	int error;
53*789Sahrens 
54*789Sahrens 	/*
55*789Sahrens 	 * We must have a pathname, and it must be absolute.
56*789Sahrens 	 */
57*789Sahrens 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
58*789Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
59*789Sahrens 		return (EINVAL);
60*789Sahrens 	}
61*789Sahrens 
62*789Sahrens 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
63*789Sahrens 
64*789Sahrens 	/*
65*789Sahrens 	 * When opening a disk device, we want to preserve the user's original
66*789Sahrens 	 * intent.  We always want to open the device by the path the user gave
67*789Sahrens 	 * us, even if it is one of multiple paths to the save device.  But we
68*789Sahrens 	 * also want to be able to survive disks being removed/recabled.
69*789Sahrens 	 * Therefore the sequence of opening devices is:
70*789Sahrens 	 *
71*789Sahrens 	 * 1. Try opening the device by path.
72*789Sahrens 	 *
73*789Sahrens 	 * 	a. First append "s0" to see if this is a whole disk
74*789Sahrens 	 * 	b. Fall back to path otherwise
75*789Sahrens 	 *
76*789Sahrens 	 * 2. If the devid of the device matches the stored value, return
77*789Sahrens 	 *    success.
78*789Sahrens 	 *
79*789Sahrens 	 * 3. Otherwise, the device may have moved.  Try opening the device
80*789Sahrens 	 *    by the devid instead.
81*789Sahrens 	 *
82*789Sahrens 	 */
83*789Sahrens 	if (vd->vdev_devid != NULL) {
84*789Sahrens 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
85*789Sahrens 		    &dvd->vd_minor) != 0) {
86*789Sahrens 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
87*789Sahrens 			return (EINVAL);
88*789Sahrens 		}
89*789Sahrens 	}
90*789Sahrens 
91*789Sahrens 	error = EINVAL;		/* presume failure */
92*789Sahrens 
93*789Sahrens 	if (vd->vdev_path != NULL) {
94*789Sahrens 		size_t len = strlen(vd->vdev_path) + 3;
95*789Sahrens 		char *buf = kmem_alloc(len, KM_SLEEP);
96*789Sahrens 		ddi_devid_t devid;
97*789Sahrens 
98*789Sahrens 		(void) snprintf(buf, len, "%ss0", vd->vdev_path);
99*789Sahrens 
100*789Sahrens 		/*
101*789Sahrens 		 * Try whole disk first, then slice name.
102*789Sahrens 		 */
103*789Sahrens 		if ((error = ldi_open_by_name(buf, spa_mode, kcred,
104*789Sahrens 		    &dvd->vd_lh, zfs_li)) != 0)
105*789Sahrens 			error = ldi_open_by_name(vd->vdev_path,
106*789Sahrens 			    spa_mode, kcred, &dvd->vd_lh, zfs_li);
107*789Sahrens 
108*789Sahrens 		kmem_free(buf, len);
109*789Sahrens 
110*789Sahrens 		/*
111*789Sahrens 		 * Compare the devid to the stored value.
112*789Sahrens 		 */
113*789Sahrens 		if (error == 0 && vd->vdev_devid != NULL &&
114*789Sahrens 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
115*789Sahrens 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
116*789Sahrens 				error = EINVAL;
117*789Sahrens 				(void) ldi_close(dvd->vd_lh, spa_mode, kcred);
118*789Sahrens 				dvd->vd_lh = NULL;
119*789Sahrens 			}
120*789Sahrens 			ddi_devid_free(devid);
121*789Sahrens 		}
122*789Sahrens 	}
123*789Sahrens 
124*789Sahrens 	/*
125*789Sahrens 	 * If we were unable to open by path, or the devid check fails, open by
126*789Sahrens 	 * devid instead.
127*789Sahrens 	 */
128*789Sahrens 	if (error != 0 && vd->vdev_devid != NULL)
129*789Sahrens 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
130*789Sahrens 		    spa_mode, kcred, &dvd->vd_lh, zfs_li);
131*789Sahrens 
132*789Sahrens 	if (error) {
133*789Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
134*789Sahrens 		return (error);
135*789Sahrens 	}
136*789Sahrens 
137*789Sahrens 	/*
138*789Sahrens 	 * Determine the actual size of the device.
139*789Sahrens 	 */
140*789Sahrens 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
141*789Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
142*789Sahrens 		return (EINVAL);
143*789Sahrens 	}
144*789Sahrens 
145*789Sahrens 	*ashift = SPA_MINBLOCKSHIFT;
146*789Sahrens 
147*789Sahrens 	return (0);
148*789Sahrens }
149*789Sahrens 
150*789Sahrens static void
151*789Sahrens vdev_disk_close(vdev_t *vd)
152*789Sahrens {
153*789Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
154*789Sahrens 
155*789Sahrens 	if (dvd == NULL)
156*789Sahrens 		return;
157*789Sahrens 
158*789Sahrens 	dprintf("removing disk %s, devid %s\n",
159*789Sahrens 	    vd->vdev_path ? vd->vdev_path : "<none>",
160*789Sahrens 	    vd->vdev_devid ? vd->vdev_devid : "<none>");
161*789Sahrens 
162*789Sahrens 	if (dvd->vd_minor != NULL)
163*789Sahrens 		ddi_devid_str_free(dvd->vd_minor);
164*789Sahrens 
165*789Sahrens 	if (dvd->vd_devid != NULL)
166*789Sahrens 		ddi_devid_free(dvd->vd_devid);
167*789Sahrens 
168*789Sahrens 	if (dvd->vd_lh != NULL)
169*789Sahrens 		(void) ldi_close(dvd->vd_lh, spa_mode, kcred);
170*789Sahrens 
171*789Sahrens 	kmem_free(dvd, sizeof (vdev_disk_t));
172*789Sahrens 	vd->vdev_tsd = NULL;
173*789Sahrens }
174*789Sahrens 
175*789Sahrens static void
176*789Sahrens vdev_disk_io_intr(buf_t *bp)
177*789Sahrens {
178*789Sahrens 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
179*789Sahrens 	zio_t *zio = vdb->vdb_io;
180*789Sahrens 
181*789Sahrens 	if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0)
182*789Sahrens 		zio->io_error = EIO;
183*789Sahrens 
184*789Sahrens 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
185*789Sahrens 
186*789Sahrens 	zio_next_stage_async(zio);
187*789Sahrens }
188*789Sahrens 
189*789Sahrens static void
190*789Sahrens vdev_disk_ioctl_done(void *zio_arg, int error)
191*789Sahrens {
192*789Sahrens 	zio_t *zio = zio_arg;
193*789Sahrens 
194*789Sahrens 	zio->io_error = error;
195*789Sahrens 
196*789Sahrens 	zio_next_stage_async(zio);
197*789Sahrens }
198*789Sahrens 
199*789Sahrens static void
200*789Sahrens vdev_disk_io_start(zio_t *zio)
201*789Sahrens {
202*789Sahrens 	vdev_t *vd = zio->io_vd;
203*789Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
204*789Sahrens 	vdev_disk_buf_t *vdb;
205*789Sahrens 	buf_t *bp;
206*789Sahrens 	int flags, error;
207*789Sahrens 
208*789Sahrens 	if (zio->io_type == ZIO_TYPE_IOCTL) {
209*789Sahrens 		zio_vdev_io_bypass(zio);
210*789Sahrens 
211*789Sahrens 		/* XXPOLICY */
212*789Sahrens 		if (vdev_is_dead(vd)) {
213*789Sahrens 			zio->io_error = ENXIO;
214*789Sahrens 			zio_next_stage_async(zio);
215*789Sahrens 			return;
216*789Sahrens 		}
217*789Sahrens 
218*789Sahrens 		switch (zio->io_cmd) {
219*789Sahrens 
220*789Sahrens 		case DKIOCFLUSHWRITECACHE:
221*789Sahrens 
222*789Sahrens 			zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done;
223*789Sahrens 			zio->io_dk_callback.dkc_cookie = zio;
224*789Sahrens 
225*789Sahrens 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
226*789Sahrens 			    (uintptr_t)&zio->io_dk_callback,
227*789Sahrens 			    FKIOCTL, kcred, NULL);
228*789Sahrens 
229*789Sahrens 			if (error == 0) {
230*789Sahrens 				/*
231*789Sahrens 				 * The ioctl will be done asychronously,
232*789Sahrens 				 * and will call vdev_disk_ioctl_done()
233*789Sahrens 				 * upon completion.
234*789Sahrens 				 */
235*789Sahrens 				return;
236*789Sahrens 			}
237*789Sahrens 			zio->io_error = error;
238*789Sahrens 			break;
239*789Sahrens 
240*789Sahrens 		default:
241*789Sahrens 			zio->io_error = ENOTSUP;
242*789Sahrens 		}
243*789Sahrens 
244*789Sahrens 		zio_next_stage_async(zio);
245*789Sahrens 		return;
246*789Sahrens 	}
247*789Sahrens 
248*789Sahrens 	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
249*789Sahrens 		return;
250*789Sahrens 
251*789Sahrens 	if ((zio = vdev_queue_io(zio)) == NULL)
252*789Sahrens 		return;
253*789Sahrens 
254*789Sahrens 	flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
255*789Sahrens 	flags |= B_BUSY | B_NOCACHE;
256*789Sahrens 	if (zio->io_flags & ZIO_FLAG_FAILFAST)
257*789Sahrens 		flags |= B_FAILFAST;
258*789Sahrens 
259*789Sahrens 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
260*789Sahrens 
261*789Sahrens 	vdb->vdb_io = zio;
262*789Sahrens 	bp = &vdb->vdb_buf;
263*789Sahrens 
264*789Sahrens 	bioinit(bp);
265*789Sahrens 	bp->b_flags = flags;
266*789Sahrens 	bp->b_bcount = zio->io_size;
267*789Sahrens 	bp->b_un.b_addr = zio->io_data;
268*789Sahrens 	bp->b_lblkno = lbtodb(zio->io_offset);
269*789Sahrens 	bp->b_bufsize = zio->io_size;
270*789Sahrens 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
271*789Sahrens 
272*789Sahrens 	/* XXPOLICY */
273*789Sahrens 	error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
274*789Sahrens 	if (error) {
275*789Sahrens 		zio->io_error = error;
276*789Sahrens 		bioerror(bp, error);
277*789Sahrens 		bp->b_resid = bp->b_bcount;
278*789Sahrens 		bp->b_iodone(bp);
279*789Sahrens 		return;
280*789Sahrens 	}
281*789Sahrens 
282*789Sahrens 	error = ldi_strategy(dvd->vd_lh, bp);
283*789Sahrens 	/* ldi_strategy() will return non-zero only on programming errors */
284*789Sahrens 	ASSERT(error == 0);
285*789Sahrens }
286*789Sahrens 
287*789Sahrens static void
288*789Sahrens vdev_disk_io_done(zio_t *zio)
289*789Sahrens {
290*789Sahrens 	vdev_queue_io_done(zio);
291*789Sahrens 
292*789Sahrens 	if (zio->io_type == ZIO_TYPE_WRITE)
293*789Sahrens 		vdev_cache_write(zio);
294*789Sahrens 
295*789Sahrens 	zio_next_stage(zio);
296*789Sahrens }
297*789Sahrens 
298*789Sahrens vdev_ops_t vdev_disk_ops = {
299*789Sahrens 	vdev_disk_open,
300*789Sahrens 	vdev_disk_close,
301*789Sahrens 	vdev_default_asize,
302*789Sahrens 	vdev_disk_io_start,
303*789Sahrens 	vdev_disk_io_done,
304*789Sahrens 	NULL,
305*789Sahrens 	VDEV_TYPE_DISK,		/* name of this vdev type */
306*789Sahrens 	B_TRUE			/* leaf vdev */
307*789Sahrens };
308