xref: /netbsd-src/sys/dev/dkwedge/dk.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /*	$NetBSD: dk.c,v 1.10 2005/12/11 12:21:20 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2004 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.10 2005/12/11 12:21:20 christos Exp $");
41 
42 #include "opt_dkwedge.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/errno.h>
48 #include <sys/pool.h>
49 #include <sys/ioctl.h>
50 #include <sys/disklabel.h>
51 #include <sys/disk.h>
52 #include <sys/fcntl.h>
53 #include <sys/buf.h>
54 #include <sys/bufq.h>
55 #include <sys/vnode.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/callout.h>
59 #include <sys/kernel.h>
60 #include <sys/lock.h>
61 #include <sys/malloc.h>
62 #include <sys/device.h>
63 
64 #include <miscfs/specfs/specdev.h>
65 
66 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
67 
68 typedef enum {
69 	DKW_STATE_LARVAL	= 0,
70 	DKW_STATE_RUNNING	= 1,
71 	DKW_STATE_DYING		= 2,
72 	DKW_STATE_DEAD		= 666
73 } dkwedge_state_t;
74 
75 struct dkwedge_softc {
76 	struct device	*sc_dev;	/* pointer to our pseudo-device */
77 	struct cfdata	sc_cfdata;	/* our cfdata structure */
78 	uint8_t		sc_wname[128];	/* wedge name (Unicode, UTF-8) */
79 
80 	dkwedge_state_t sc_state;	/* state this wedge is in */
81 
82 	struct disk	*sc_parent;	/* parent disk */
83 	daddr_t		sc_offset;	/* LBA offset of wedge in parent */
84 	uint64_t	sc_size;	/* size of wedge in blocks */
85 	char		sc_ptype[32];	/* partition type */
86 	dev_t		sc_pdev;	/* cached parent's dev_t */
87 					/* link on parent's wedge list */
88 	LIST_ENTRY(dkwedge_softc) sc_plink;
89 
90 	struct disk	sc_dk;		/* our own disk structure */
91 	struct bufq_state *sc_bufq;	/* buffer queue */
92 	struct callout	sc_restart_ch;	/* callout to restart I/O */
93 
94 	u_int		sc_iopend;	/* I/Os pending */
95 	int		sc_flags;	/* flags (splbio) */
96 };
97 
98 #define	DK_F_WAIT_DRAIN		0x0001	/* waiting for I/O to drain */
99 
100 static void	dkstart(struct dkwedge_softc *);
101 static void	dkiodone(struct buf *);
102 static void	dkrestart(void *);
103 
104 static dev_type_open(dkopen);
105 static dev_type_close(dkclose);
106 static dev_type_read(dkread);
107 static dev_type_write(dkwrite);
108 static dev_type_ioctl(dkioctl);
109 static dev_type_strategy(dkstrategy);
110 static dev_type_dump(dkdump);
111 static dev_type_size(dksize);
112 
113 const struct bdevsw dk_bdevsw = {
114 	dkopen, dkclose, dkstrategy, dkioctl, dkdump, dksize, D_DISK
115 };
116 
117 const struct cdevsw dk_cdevsw = {
118 	dkopen, dkclose, dkread, dkwrite, dkioctl,
119 	    nostop, notty, nopoll, nommap, nokqfilter, D_DISK
120 };
121 
122 static struct dkwedge_softc **dkwedges;
123 static u_int ndkwedges;
124 static struct lock dkwedges_lock = LOCK_INITIALIZER(PRIBIO, "dkwgs", 0, 0);
125 
126 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
127 static int dkwedge_discovery_methods_initialized;
128 static struct lock dkwedge_discovery_methods_lock =
129     LOCK_INITIALIZER(PRIBIO, "dkddm", 0, 0);
130 
131 /*
132  * dkwedge_match:
133  *
134  *	Autoconfiguration match function for pseudo-device glue.
135  */
136 static int
137 dkwedge_match(struct device *parent, struct cfdata *match, void *aux)
138 {
139 
140 	/* Pseudo-device; always present. */
141 	return (1);
142 }
143 
144 /*
145  * dkwedge_attach:
146  *
147  *	Autoconfiguration attach function for pseudo-device glue.
148  */
149 static void
150 dkwedge_attach(struct device *parent, struct device *self, void *aux)
151 {
152 
153 	/* Nothing to do. */
154 }
155 
156 /*
157  * dkwedge_detach:
158  *
159  *	Autoconfiguration detach function for pseudo-device glue.
160  */
161 static int
162 dkwedge_detach(struct device *self, int flags)
163 {
164 
165 	/* Always succeeds. */
166 	return (0);
167 }
168 
169 CFDRIVER_DECL(dk, DV_DISK, NULL);
170 CFATTACH_DECL(dk, sizeof(struct device),
171 	      dkwedge_match, dkwedge_attach, dkwedge_detach, NULL);
172 
173 static int dkwedge_cfglue_initialized;
174 static struct simplelock dkwedge_cfglue_initialized_slock =
175     SIMPLELOCK_INITIALIZER;
176 
177 static void
178 dkwedge_cfglue_init(void)
179 {
180 
181 	simple_lock(&dkwedge_cfglue_initialized_slock);
182 	if (dkwedge_cfglue_initialized == 0) {
183 		if (config_cfdriver_attach(&dk_cd) != 0)
184 			panic("dkwedge: unable to attach cfdriver");
185 		if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
186 			panic("dkwedge: unable to attach cfattach");
187 
188 		dkwedge_cfglue_initialized = 1;
189 	}
190 	simple_unlock(&dkwedge_cfglue_initialized_slock);
191 }
192 
193 /*
194  * dkwedge_wait_drain:
195  *
196  *	Wait for I/O on the wedge to drain.
197  *	NOTE: Must be called at splbio()!
198  */
199 static void
200 dkwedge_wait_drain(struct dkwedge_softc *sc)
201 {
202 
203 	while (sc->sc_iopend != 0) {
204 		sc->sc_flags |= DK_F_WAIT_DRAIN;
205 		(void) tsleep(&sc->sc_iopend, PRIBIO, "dkdrn", 0);
206 	}
207 }
208 
209 /*
210  * dkwedge_compute_pdev:
211  *
212  *	Compute the parent disk's dev_t.
213  */
214 static int
215 dkwedge_compute_pdev(const char *pname, dev_t *pdevp)
216 {
217 	const char *name, *cp;
218 	int punit, pmaj;
219 	char devname[16];
220 
221 	name = pname;
222 	if ((pmaj = devsw_name2blk(name, devname, sizeof(devname))) == -1)
223 		return (ENODEV);
224 
225 	name += strlen(devname);
226 	for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
227 		punit = (punit * 10) + (*cp - '0');
228 	if (cp == name) {
229 		/* Invalid parent disk name. */
230 		return (ENODEV);
231 	}
232 
233 	*pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
234 
235 	return (0);
236 }
237 
238 /*
239  * dkwedge_array_expand:
240  *
241  *	Expand the dkwedges array.
242  */
243 static void
244 dkwedge_array_expand(void)
245 {
246 	int newcnt = ndkwedges + 16;
247 	struct dkwedge_softc **newarray, **oldarray;
248 
249 	newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
250 	    M_WAITOK|M_ZERO);
251 	if ((oldarray = dkwedges) != NULL)
252 		memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
253 	dkwedges = newarray;
254 	ndkwedges = newcnt;
255 	if (oldarray != NULL)
256 		free(oldarray, M_DKWEDGE);
257 }
258 
259 /*
260  * dkwedge_add:		[exported function]
261  *
262  *	Add a disk wedge based on the provided information.
263  *
264  *	The incoming dkw_devname[] is ignored, instead being
265  *	filled in and returned to the caller.
266  */
267 int
268 dkwedge_add(struct dkwedge_info *dkw)
269 {
270 	struct dkwedge_softc *sc, *lsc;
271 	struct disk *pdk;
272 	u_int unit;
273 	int error;
274 	dev_t pdev;
275 
276 	if (dkwedge_cfglue_initialized == 0)
277 		dkwedge_cfglue_init();
278 
279 	dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
280 	pdk = disk_find(dkw->dkw_parent);
281 	if (pdk == NULL)
282 		return (ENODEV);
283 
284 	error = dkwedge_compute_pdev(pdk->dk_name, &pdev);
285 	if (error)
286 		return (error);
287 
288 	if (dkw->dkw_offset < 0)
289 		return (EINVAL);
290 
291 	sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
292 	sc->sc_state = DKW_STATE_LARVAL;
293 	sc->sc_parent = pdk;
294 	sc->sc_pdev = pdev;
295 	sc->sc_offset = dkw->dkw_offset;
296 	sc->sc_size = dkw->dkw_size;
297 
298 	memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
299 	sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
300 
301 	memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
302 	sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
303 
304 	bufq_alloc(&sc->sc_bufq, "fcfs", 0);
305 
306 	callout_init(&sc->sc_restart_ch);
307 	callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
308 
309 	/*
310 	 * Wedge will be added; increment the wedge count for the parent.
311 	 * Only allow this to happend if RAW_PART is the only thing open.
312 	 */
313 	(void) lockmgr(&pdk->dk_openlock, LK_EXCLUSIVE, NULL);
314 	if (pdk->dk_openmask & ~(1 << RAW_PART))
315 		error = EBUSY;
316 	else {
317 		/* Check for wedge overlap. */
318 		LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
319 			daddr_t lastblk = sc->sc_offset + sc->sc_size - 1;
320 			daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1;
321 
322 			if (sc->sc_offset >= lsc->sc_offset &&
323 			    sc->sc_offset <= llastblk) {
324 				/* Overlaps the tail of the exsiting wedge. */
325 				break;
326 			}
327 			if (lastblk >= lsc->sc_offset &&
328 			    lastblk <= llastblk) {
329 				/* Overlaps the head of the existing wedge. */
330 			    	break;
331 			}
332 		}
333 		if (lsc != NULL)
334 			error = EINVAL;
335 		else {
336 			pdk->dk_nwedges++;
337 			LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
338 		}
339 	}
340 	(void) lockmgr(&pdk->dk_openlock, LK_RELEASE, NULL);
341 	if (error) {
342 		bufq_free(sc->sc_bufq);
343 		free(sc, M_DKWEDGE);
344 		return (error);
345 	}
346 
347 	/* Fill in our cfdata for the pseudo-device glue. */
348 	sc->sc_cfdata.cf_name = dk_cd.cd_name;
349 	sc->sc_cfdata.cf_atname = dk_ca.ca_name;
350 	/* sc->sc_cfdata.cf_unit set below */
351 	sc->sc_cfdata.cf_fstate = FSTATE_STAR;
352 
353 	/* Insert the larval wedge into the array. */
354 	(void) lockmgr(&dkwedges_lock, LK_EXCLUSIVE, NULL);
355 	for (error = 0;;) {
356 		struct dkwedge_softc **scpp;
357 
358 		/*
359 		 * Check for a duplicate wname while searching for
360 		 * a slot.
361 		 */
362 		for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
363 			if (dkwedges[unit] == NULL) {
364 				if (scpp == NULL) {
365 					scpp = &dkwedges[unit];
366 					sc->sc_cfdata.cf_unit = unit;
367 				}
368 			} else {
369 				/* XXX Unicode. */
370 				if (strcmp(dkwedges[unit]->sc_wname,
371 					   sc->sc_wname) == 0) {
372 					error = EEXIST;
373 					break;
374 				}
375 			}
376 		}
377 		if (error)
378 			break;
379 		KASSERT(unit == ndkwedges);
380 		if (scpp == NULL)
381 			dkwedge_array_expand();
382 		else {
383 			KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
384 			*scpp = sc;
385 			break;
386 		}
387 	}
388 	(void) lockmgr(&dkwedges_lock, LK_RELEASE, NULL);
389 	if (error) {
390 		(void) lockmgr(&pdk->dk_openlock, LK_EXCLUSIVE, NULL);
391 		pdk->dk_nwedges--;
392 		LIST_REMOVE(sc, sc_plink);
393 		(void) lockmgr(&pdk->dk_openlock, LK_RELEASE, NULL);
394 
395 		bufq_free(sc->sc_bufq);
396 		free(sc, M_DKWEDGE);
397 		return (error);
398 	}
399 
400 	/*
401 	 * Now that we know the unit #, attach a pseudo-device for
402 	 * this wedge instance.  This will provide us with the
403 	 * "struct device" necessary for glue to other parts of the
404 	 * system.
405 	 *
406 	 * This should never fail, unless we're almost totally out of
407 	 * memory.
408 	 */
409 	if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
410 		aprint_error("%s%u: unable to attach pseudo-device\n",
411 		    sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
412 
413 		(void) lockmgr(&dkwedges_lock, LK_EXCLUSIVE, NULL);
414 		dkwedges[sc->sc_cfdata.cf_unit] = NULL;
415 		(void) lockmgr(&dkwedges_lock, LK_RELEASE, NULL);
416 
417 		(void) lockmgr(&pdk->dk_openlock, LK_EXCLUSIVE, NULL);
418 		pdk->dk_nwedges--;
419 		LIST_REMOVE(sc, sc_plink);
420 		(void) lockmgr(&pdk->dk_openlock, LK_RELEASE, NULL);
421 
422 		bufq_free(sc->sc_bufq);
423 		free(sc, M_DKWEDGE);
424 		return (ENOMEM);
425 	}
426 	sc->sc_dk.dk_name = sc->sc_dev->dv_xname;
427 
428 	/* Return the devname to the caller. */
429 	strcpy(dkw->dkw_devname, sc->sc_dev->dv_xname);
430 
431 	/*
432 	 * XXX Really ought to make the disk_attach() and the changing
433 	 * of state to RUNNING atomic.
434 	 */
435 
436 	disk_attach(&sc->sc_dk);
437 
438 	/* Disk wedge is ready for use! */
439 	sc->sc_state = DKW_STATE_RUNNING;
440 
441 	/* Announce our arrival. */
442 	aprint_normal("%s at %s: %s\n", sc->sc_dev->dv_xname, pdk->dk_name,
443 	    sc->sc_wname);	/* XXX Unicode */
444 	aprint_normal("%s: %"PRIu64" blocks at %"PRId64", type: %s\n",
445 	    sc->sc_dev->dv_xname, sc->sc_size, sc->sc_offset, sc->sc_ptype);
446 
447 	return (0);
448 }
449 
450 /*
451  * dkwedge_del:		[exported function]
452  *
453  *	Delete a disk wedge based on the provided information.
454  *	NOTE: We look up the wedge based on the wedge devname,
455  *	not wname.
456  */
457 int
458 dkwedge_del(struct dkwedge_info *dkw)
459 {
460 	struct dkwedge_softc *sc = NULL;
461 	u_int unit;
462 	int bmaj, cmaj, i, mn, s;
463 
464 	/* Find our softc. */
465 	dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
466 	(void) lockmgr(&dkwedges_lock, LK_EXCLUSIVE, NULL);
467 	for (unit = 0; unit < ndkwedges; unit++) {
468 		if ((sc = dkwedges[unit]) != NULL &&
469 		    strcmp(sc->sc_dev->dv_xname, dkw->dkw_devname) == 0 &&
470 		    strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
471 			/* Mark the wedge as dying. */
472 			sc->sc_state = DKW_STATE_DYING;
473 			break;
474 		}
475 	}
476 	(void) lockmgr(&dkwedges_lock, LK_RELEASE, NULL);
477 	if (unit == ndkwedges)
478 		return (ESRCH);
479 
480 	KASSERT(sc != NULL);
481 
482 	/* Locate the wedge major numbers. */
483 	bmaj = bdevsw_lookup_major(&dk_bdevsw);
484 	cmaj = cdevsw_lookup_major(&dk_cdevsw);
485 
486 	/* Kill any pending restart. */
487 	callout_stop(&sc->sc_restart_ch);
488 
489 	/*
490 	 * dkstart() will kill any queued buffers now that the
491 	 * state of the wedge is not RUNNING.  Once we've done
492 	 * that, wait for any other pending I/O to complete.
493 	 */
494 	s = splbio();
495 	dkstart(sc);
496 	dkwedge_wait_drain(sc);
497 	splx(s);
498 
499 	/* Nuke the vnodes for any open instances. */
500 	for (i = 0; i < MAXPARTITIONS; i++) {
501 		mn = DISKMINOR(unit, i);
502 		vdevgone(bmaj, mn, mn, VBLK);
503 		vdevgone(cmaj, mn, mn, VCHR);
504 	}
505 
506 	/* Clean up the parent. */
507 	(void) lockmgr(&sc->sc_dk.dk_openlock, LK_EXCLUSIVE, NULL);
508 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_EXCLUSIVE, NULL);
509 	if (sc->sc_dk.dk_openmask) {
510 		if (sc->sc_parent->dk_rawopens-- == 1) {
511 			KASSERT(sc->sc_parent->dk_rawvp != NULL);
512 			(void) vn_close(sc->sc_parent->dk_rawvp, FREAD | FWRITE,
513 					NOCRED, curlwp);
514 			sc->sc_parent->dk_rawvp = NULL;
515 		}
516 		sc->sc_dk.dk_openmask = 0;
517 	}
518 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_RELEASE, NULL);
519 	(void) lockmgr(&sc->sc_dk.dk_openlock, LK_RELEASE, NULL);
520 
521 	/* Announce our departure. */
522 	aprint_normal("%s at %s (%s) deleted\n", sc->sc_dev->dv_xname,
523 	    sc->sc_parent->dk_name,
524 	    sc->sc_wname);	/* XXX Unicode */
525 
526 	/* Delete our pseudo-device. */
527 	(void) config_detach(sc->sc_dev, DETACH_FORCE | DETACH_QUIET);
528 
529 	(void) lockmgr(&sc->sc_parent->dk_openlock, LK_EXCLUSIVE, NULL);
530 	sc->sc_parent->dk_nwedges--;
531 	LIST_REMOVE(sc, sc_plink);
532 	(void) lockmgr(&sc->sc_parent->dk_openlock, LK_RELEASE, NULL);
533 
534 	/* Delete our buffer queue. */
535 	bufq_free(sc->sc_bufq);
536 
537 	/* Detach from the disk list. */
538 	disk_detach(&sc->sc_dk);
539 
540 	/* Poof. */
541 	(void) lockmgr(&dkwedges_lock, LK_EXCLUSIVE, NULL);
542 	dkwedges[unit] = NULL;
543 	sc->sc_state = DKW_STATE_DEAD;
544 	(void) lockmgr(&dkwedges_lock, LK_RELEASE, NULL);
545 
546 	free(sc, M_DKWEDGE);
547 
548 	return (0);
549 }
550 
551 /*
552  * dkwedge_delall:	[exported function]
553  *
554  *	Delete all of the wedges on the specified disk.  Used when
555  *	a disk is being detached.
556  */
557 void
558 dkwedge_delall(struct disk *pdk)
559 {
560 	struct dkwedge_info dkw;
561 	struct dkwedge_softc *sc;
562 
563 	for (;;) {
564 		(void) lockmgr(&pdk->dk_openlock, LK_EXCLUSIVE, NULL);
565 		if ((sc = LIST_FIRST(&pdk->dk_wedges)) == NULL) {
566 			KASSERT(pdk->dk_nwedges == 0);
567 			(void) lockmgr(&pdk->dk_openlock, LK_RELEASE, NULL);
568 			return;
569 		}
570 		strcpy(dkw.dkw_parent, pdk->dk_name);
571 		strcpy(dkw.dkw_devname, sc->sc_dev->dv_xname);
572 		(void) lockmgr(&pdk->dk_openlock, LK_RELEASE, NULL);
573 		(void) dkwedge_del(&dkw);
574 	}
575 }
576 
577 /*
578  * dkwedge_list:	[exported function]
579  *
580  *	List all of the wedges on a particular disk.
581  *	If p == NULL, the buffer is in kernel space.  Otherwise, it is
582  *	in user space of the specified process.
583  */
584 int
585 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
586 {
587 	struct uio uio;
588 	struct iovec iov;
589 	struct dkwedge_softc *sc;
590 	struct dkwedge_info dkw;
591 	int error = 0;
592 
593 	iov.iov_base = dkwl->dkwl_buf;
594 	iov.iov_len = dkwl->dkwl_bufsize;
595 
596 	uio.uio_iov = &iov;
597 	uio.uio_iovcnt = 1;
598 	uio.uio_offset = 0;
599 	uio.uio_resid = dkwl->dkwl_bufsize;
600 	uio.uio_segflg = l != NULL ? UIO_USERSPACE : UIO_SYSSPACE;
601 	uio.uio_rw = UIO_READ;
602 	uio.uio_lwp = l;
603 
604 	dkwl->dkwl_ncopied = 0;
605 
606 	(void) lockmgr(&pdk->dk_openlock, LK_EXCLUSIVE, NULL);
607 	LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
608 		if (uio.uio_resid < sizeof(dkw))
609 			break;
610 
611 		if (sc->sc_state != DKW_STATE_RUNNING)
612 			continue;
613 
614 		strcpy(dkw.dkw_devname, sc->sc_dev->dv_xname);
615 		memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
616 		dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
617 		strcpy(dkw.dkw_parent, sc->sc_parent->dk_name);
618 		dkw.dkw_offset = sc->sc_offset;
619 		dkw.dkw_size = sc->sc_size;
620 		strcpy(dkw.dkw_ptype, sc->sc_ptype);
621 
622 		error = uiomove(&dkw, sizeof(dkw), &uio);
623 		if (error)
624 			break;
625 		dkwl->dkwl_ncopied++;
626 	}
627 	dkwl->dkwl_nwedges = pdk->dk_nwedges;
628 	(void) lockmgr(&pdk->dk_openlock, LK_RELEASE, NULL);
629 
630 	return (error);
631 }
632 
633 /*
634  * dkwedge_set_bootwedge
635  *
636  *	Set the booted_wedge global based on the specified parent name
637  *	and offset/length.
638  */
639 void
640 dkwedge_set_bootwedge(struct device *parent, daddr_t startblk, uint64_t nblks)
641 {
642 	struct dkwedge_softc *sc;
643 	int i;
644 
645 	(void) lockmgr(&dkwedges_lock, LK_EXCLUSIVE, NULL);
646 	for (i = 0; i < ndkwedges; i++) {
647 		if ((sc = dkwedges[i]) == NULL)
648 			continue;
649 		if (strcmp(sc->sc_parent->dk_name, parent->dv_xname) == 0 &&
650 		    sc->sc_offset == startblk &&
651 		    sc->sc_size == nblks) {
652 			if (booted_wedge) {
653 				printf("WARNING: double match for boot wedge "
654 				    "(%s, %s)\n",
655 				    booted_wedge->dv_xname,
656 				    sc->sc_dev->dv_xname);
657 				continue;
658 			}
659 			booted_device = parent;
660 			booted_wedge = sc->sc_dev;
661 			booted_partition = 0;
662 		}
663 	}
664 	/*
665 	 * XXX What if we don't find one?  Should we create a special
666 	 * XXX root wedge?
667 	 */
668 	(void) lockmgr(&dkwedges_lock, LK_RELEASE, NULL);
669 }
670 
671 /*
672  * We need a dummy objet to stuff into the dkwedge discovery method link
673  * set to ensure that there is always at least one object in the set.
674  */
675 static struct dkwedge_discovery_method dummy_discovery_method;
676 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
677 
678 /*
679  * dkwedge_discover_init:
680  *
681  *	Initialize the disk wedge discovery method list.
682  */
683 static void
684 dkwedge_discover_init(void)
685 {
686 	__link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
687 	struct dkwedge_discovery_method * const *ddmp;
688 	struct dkwedge_discovery_method *lddm, *ddm;
689 
690 	(void) lockmgr(&dkwedge_discovery_methods_lock, LK_EXCLUSIVE, NULL);
691 
692 	if (dkwedge_discovery_methods_initialized) {
693 		(void) lockmgr(&dkwedge_discovery_methods_lock, LK_RELEASE,
694 			       NULL);
695 		return;
696 	}
697 
698 	LIST_INIT(&dkwedge_discovery_methods);
699 
700 	__link_set_foreach(ddmp, dkwedge_methods) {
701 		ddm = *ddmp;
702 		if (ddm == &dummy_discovery_method)
703 			continue;
704 		if (LIST_EMPTY(&dkwedge_discovery_methods)) {
705 			LIST_INSERT_HEAD(&dkwedge_discovery_methods,
706 					 ddm, ddm_list);
707 			continue;
708 		}
709 		LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
710 			if (ddm->ddm_priority == lddm->ddm_priority) {
711 				aprint_error("dk-method-%s: method \"%s\" "
712 				    "already exists at priority %d\n",
713 				    ddm->ddm_name, lddm->ddm_name,
714 				    lddm->ddm_priority);
715 				/* Not inserted. */
716 				break;
717 			}
718 			if (ddm->ddm_priority < lddm->ddm_priority) {
719 				/* Higher priority; insert before. */
720 				LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
721 				break;
722 			}
723 			if (LIST_NEXT(lddm, ddm_list) == NULL) {
724 				/* Last one; insert after. */
725 				KASSERT(lddm->ddm_priority < ddm->ddm_priority);
726 				LIST_INSERT_AFTER(lddm, ddm, ddm_list);
727 				break;
728 			}
729 		}
730 	}
731 
732 	dkwedge_discovery_methods_initialized = 1;
733 
734 	(void) lockmgr(&dkwedge_discovery_methods_lock, LK_RELEASE, NULL);
735 }
736 
737 #ifdef DKWEDGE_AUTODISCOVER
738 int	dkwedge_autodiscover = 1;
739 #else
740 int	dkwedge_autodiscover = 0;
741 #endif
742 
743 /*
744  * dkwedge_discover:	[exported function]
745  *
746  *	Discover the wedges on a newly attached disk.
747  */
748 void
749 dkwedge_discover(struct disk *pdk)
750 {
751 	struct dkwedge_discovery_method *ddm;
752 	struct vnode *vp;
753 	int error;
754 	dev_t pdev;
755 
756 	/*
757 	 * Require people playing with wedges to enable this explicitly.
758 	 */
759 	if (dkwedge_autodiscover == 0)
760 		return;
761 
762 	if (dkwedge_discovery_methods_initialized == 0)
763 		dkwedge_discover_init();
764 
765 	(void) lockmgr(&dkwedge_discovery_methods_lock, LK_SHARED, NULL);
766 
767 	error = dkwedge_compute_pdev(pdk->dk_name, &pdev);
768 	if (error) {
769 		aprint_error("%s: unable to compute pdev, error = %d\n",
770 		    pdk->dk_name, error);
771 		goto out;
772 	}
773 
774 	error = bdevvp(pdev, &vp);
775 	if (error) {
776 		aprint_error("%s: unable to find vnode for pdev, error = %d\n",
777 		    pdk->dk_name, error);
778 		goto out;
779 	}
780 
781 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
782 	if (error) {
783 		aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
784 		    pdk->dk_name, error);
785 		vrele(vp);
786 		goto out;
787 	}
788 
789 	error = VOP_OPEN(vp, FREAD | FWRITE, NOCRED, 0);
790 	if (error) {
791 		aprint_error("%s: unable to open device, error = %d\n",
792 		    pdk->dk_name, error);
793 		vput(vp);
794 		goto out;
795 	}
796 	/* VOP_OPEN() doesn't do this for us. */
797 	vp->v_writecount++;
798 	VOP_UNLOCK(vp, 0);
799 
800 	/*
801 	 * For each supported partition map type, look to see if
802 	 * this map type exists.  If so, parse it and add the
803 	 * corresponding wedges.
804 	 */
805 	LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
806 		error = (*ddm->ddm_discover)(pdk, vp);
807 		if (error == 0) {
808 			/* Successfully created wedges; we're done. */
809 			break;
810 		}
811 	}
812 
813 	error = vn_close(vp, FREAD | FWRITE, NOCRED, curlwp);
814 	if (error) {
815 		aprint_error("%s: unable to close device, error = %d\n",
816 		    pdk->dk_name, error);
817 		/* We'll just assume the vnode has been cleaned up. */
818 	}
819  out:
820 	(void) lockmgr(&dkwedge_discovery_methods_lock, LK_RELEASE, NULL);
821 }
822 
823 /*
824  * dkwedge_read:
825  *
826  *	Read the some data from the specified disk, used for
827  *	partition discovery.
828  */
829 int
830 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno, void *tbuf,
831     size_t len)
832 {
833 	struct buf b;
834 
835 	BUF_INIT(&b);
836 
837 	b.b_vp = vp;
838 	b.b_dev = vp->v_rdev;
839 	b.b_blkno = blkno;
840 	b.b_bcount = b.b_resid = len;
841 	b.b_flags = B_READ;
842 	b.b_proc = curproc;
843 	b.b_data = tbuf;
844 
845 	VOP_STRATEGY(vp, &b);
846 	return (biowait(&b));
847 }
848 
849 /*
850  * dkwedge_lookup:
851  *
852  *	Look up a dkwedge_softc based on the provided dev_t.
853  */
854 static struct dkwedge_softc *
855 dkwedge_lookup(dev_t dev)
856 {
857 	int unit = minor(dev);
858 
859 	if (unit >= ndkwedges)
860 		return (NULL);
861 
862 	KASSERT(dkwedges != NULL);
863 
864 	return (dkwedges[unit]);
865 }
866 
867 /*
868  * dkopen:		[devsw entry point]
869  *
870  *	Open a wedge.
871  */
872 static int
873 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
874 {
875 	struct dkwedge_softc *sc = dkwedge_lookup(dev);
876 	struct vnode *vp;
877 	int error;
878 
879 	if (sc == NULL)
880 		return (ENODEV);
881 
882 	if (sc->sc_state != DKW_STATE_RUNNING)
883 		return (ENXIO);
884 
885 	/*
886 	 * We go through a complicated little dance to only open the parent
887 	 * vnode once per wedge, no matter how many times the wedge is
888 	 * opened.  The reason?  We see one dkopen() per open call, but
889 	 * only dkclose() on the last close.
890 	 */
891 	(void) lockmgr(&sc->sc_dk.dk_openlock, LK_EXCLUSIVE, NULL);
892 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_EXCLUSIVE, NULL);
893 	if (sc->sc_dk.dk_openmask == 0) {
894 		if (sc->sc_parent->dk_rawopens++ == 0) {
895 			KASSERT(sc->sc_parent->dk_rawvp == NULL);
896 			error = bdevvp(sc->sc_pdev, &vp);
897 			if (error)
898 				goto popen_fail;
899 			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
900 			if (error) {
901 				vrele(vp);
902 				goto popen_fail;
903 			}
904 			error = VOP_OPEN(vp, FREAD | FWRITE, NOCRED, 0);
905 			if (error) {
906 				vput(vp);
907 				goto popen_fail;
908 			}
909 			/* VOP_OPEN() doesn't do this for us. */
910 			vp->v_writecount++;
911 			VOP_UNLOCK(vp, 0);
912 			sc->sc_parent->dk_rawvp = vp;
913 		}
914 		if (fmt == S_IFCHR)
915 			sc->sc_dk.dk_copenmask |= 1;
916 		else
917 			sc->sc_dk.dk_bopenmask |= 1;
918 		sc->sc_dk.dk_openmask =
919 		    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
920 	}
921 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_RELEASE, NULL);
922 	(void) lockmgr(&sc->sc_dk.dk_openlock, LK_RELEASE, NULL);
923 
924 	return (0);
925 
926  popen_fail:
927 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_RELEASE, NULL);
928 	return (error);
929 }
930 
931 /*
932  * dkclose:		[devsw entry point]
933  *
934  *	Close a wedge.
935  */
936 static int
937 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
938 {
939 	struct dkwedge_softc *sc = dkwedge_lookup(dev);
940 	int error = 0;
941 
942 	KASSERT(sc->sc_dk.dk_openmask != 0);
943 
944 	(void) lockmgr(&sc->sc_dk.dk_openlock, LK_EXCLUSIVE, NULL);
945 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_EXCLUSIVE, NULL);
946 
947 	if (fmt == S_IFCHR)
948 		sc->sc_dk.dk_copenmask &= ~1;
949 	else
950 		sc->sc_dk.dk_bopenmask &= ~1;
951 	sc->sc_dk.dk_openmask =
952 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
953 
954 	if (sc->sc_dk.dk_openmask == 0) {
955 		if (sc->sc_parent->dk_rawopens-- == 1) {
956 			KASSERT(sc->sc_parent->dk_rawvp != NULL);
957 			error = vn_close(sc->sc_parent->dk_rawvp,
958 					 FREAD | FWRITE, NOCRED, l);
959 			sc->sc_parent->dk_rawvp = NULL;
960 		}
961 	}
962 
963 	(void) lockmgr(&sc->sc_parent->dk_rawlock, LK_RELEASE, NULL);
964 	(void) lockmgr(&sc->sc_dk.dk_openlock, LK_RELEASE, NULL);
965 
966 	return (error);
967 }
968 
969 /*
970  * dkstragegy:		[devsw entry point]
971  *
972  *	Perform I/O based on the wedge I/O strategy.
973  */
974 static void
975 dkstrategy(struct buf *bp)
976 {
977 	struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
978 	int s;
979 
980 	if (sc->sc_state != DKW_STATE_RUNNING) {
981 		bp->b_error = ENXIO;
982 		bp->b_flags |= B_ERROR;
983 		goto done;
984 	}
985 
986 	/* If it's an empty transfer, wake up the top half now. */
987 	if (bp->b_bcount == 0)
988 		goto done;
989 
990 	/* Make sure it's in-range. */
991 	if (bounds_check_with_mediasize(bp, DEV_BSIZE, sc->sc_size) <= 0)
992 		goto done;
993 
994 	/* Translate it to the parent's raw LBA. */
995 	bp->b_rawblkno = bp->b_blkno + sc->sc_offset;
996 
997 	/* Place it in the queue and start I/O on the unit. */
998 	s = splbio();
999 	sc->sc_iopend++;
1000 	BUFQ_PUT(sc->sc_bufq, bp);
1001 	dkstart(sc);
1002 	splx(s);
1003 	return;
1004 
1005  done:
1006 	bp->b_resid = bp->b_bcount;
1007 	biodone(bp);
1008 }
1009 
1010 /*
1011  * dkstart:
1012  *
1013  *	Start I/O that has been enqueued on the wedge.
1014  *	NOTE: Must be called at splbio()!
1015  */
1016 static void
1017 dkstart(struct dkwedge_softc *sc)
1018 {
1019 	struct buf *bp, *nbp;
1020 
1021 	/* Do as much work as has been enqueued. */
1022 	while ((bp = BUFQ_PEEK(sc->sc_bufq)) != NULL) {
1023 		if (sc->sc_state != DKW_STATE_RUNNING) {
1024 			(void) BUFQ_GET(sc->sc_bufq);
1025 			if (sc->sc_iopend-- == 1 &&
1026 			    (sc->sc_flags & DK_F_WAIT_DRAIN) != 0) {
1027 				sc->sc_flags &= ~DK_F_WAIT_DRAIN;
1028 				wakeup(&sc->sc_iopend);
1029 			}
1030 			bp->b_error = ENXIO;
1031 			bp->b_flags |= B_ERROR;
1032 			bp->b_resid = bp->b_bcount;
1033 			biodone(bp);
1034 		}
1035 
1036 		/* Instrumentation. */
1037 		disk_busy(&sc->sc_dk);
1038 
1039 		nbp = pool_get(&bufpool, PR_NOWAIT);
1040 		if (nbp == NULL) {
1041 			/*
1042 			 * No resources to run this request; leave the
1043 			 * buffer queued up, and schedule a timer to
1044 			 * restart the queue in 1/2 a second.
1045 			 */
1046 			disk_unbusy(&sc->sc_dk, 0, bp->b_flags & B_READ);
1047 			callout_schedule(&sc->sc_restart_ch, hz / 2);
1048 			return;
1049 		}
1050 
1051 		(void) BUFQ_GET(sc->sc_bufq);
1052 
1053 		BUF_INIT(nbp);
1054 		nbp->b_data = bp->b_data;
1055 		nbp->b_flags = bp->b_flags | B_CALL;
1056 		nbp->b_iodone = dkiodone;
1057 		nbp->b_proc = bp->b_proc;
1058 		nbp->b_blkno = bp->b_rawblkno;
1059 		nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1060 		nbp->b_vp = sc->sc_parent->dk_rawvp;
1061 		nbp->b_bcount = bp->b_bcount;
1062 		nbp->b_private = bp;
1063 		BIO_COPYPRIO(nbp, bp);
1064 
1065 		if ((nbp->b_flags & B_READ) == 0)
1066 			V_INCR_NUMOUTPUT(nbp->b_vp);
1067 		VOP_STRATEGY(nbp->b_vp, nbp);
1068 	}
1069 }
1070 
1071 /*
1072  * dkiodone:
1073  *
1074  *	I/O to a wedge has completed; alert the top half.
1075  *	NOTE: Must be called at splbio()!
1076  */
1077 static void
1078 dkiodone(struct buf *bp)
1079 {
1080 	struct buf *obp = bp->b_private;
1081 	struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1082 
1083 	if (bp->b_flags & B_ERROR) {
1084 		obp->b_flags |= B_ERROR;
1085 		obp->b_error = bp->b_error;
1086 	}
1087 	obp->b_resid = bp->b_resid;
1088 	pool_put(&bufpool, bp);
1089 
1090 	if (sc->sc_iopend-- == 1 && (sc->sc_flags & DK_F_WAIT_DRAIN) != 0) {
1091 		sc->sc_flags &= ~DK_F_WAIT_DRAIN;
1092 		wakeup(&sc->sc_iopend);
1093 	}
1094 
1095 	disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1096 	    obp->b_flags & B_READ);
1097 
1098 	biodone(obp);
1099 
1100 	/* Kick the queue in case there is more work we can do. */
1101 	dkstart(sc);
1102 }
1103 
1104 /*
1105  * dkrestart:
1106  *
1107  *	Restart the work queue after it was stalled due to
1108  *	a resource shortage.  Invoked via a callout.
1109  */
1110 static void
1111 dkrestart(void *v)
1112 {
1113 	struct dkwedge_softc *sc = v;
1114 	int s;
1115 
1116 	s = splbio();
1117 	dkstart(sc);
1118 	splx(s);
1119 }
1120 
1121 /*
1122  * dkread:		[devsw entry point]
1123  *
1124  *	Read from a wedge.
1125  */
1126 static int
1127 dkread(dev_t dev, struct uio *uio, int flags)
1128 {
1129 	struct dkwedge_softc *sc = dkwedge_lookup(dev);
1130 
1131 	if (sc->sc_state != DKW_STATE_RUNNING)
1132 		return (ENXIO);
1133 
1134 	return (physio(dkstrategy, NULL, dev, B_READ,
1135 		       sc->sc_parent->dk_driver->d_minphys, uio));
1136 }
1137 
1138 /*
1139  * dkwrite:		[devsw entry point]
1140  *
1141  *	Write to a wedge.
1142  */
1143 static int
1144 dkwrite(dev_t dev, struct uio *uio, int flags)
1145 {
1146 	struct dkwedge_softc *sc = dkwedge_lookup(dev);
1147 
1148 	if (sc->sc_state != DKW_STATE_RUNNING)
1149 		return (ENXIO);
1150 
1151 	return (physio(dkstrategy, NULL, dev, B_WRITE,
1152 		       sc->sc_parent->dk_driver->d_minphys, uio));
1153 }
1154 
1155 /*
1156  * dkioctl:		[devsw entry point]
1157  *
1158  *	Perform an ioctl request on a wedge.
1159  */
1160 static int
1161 dkioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
1162 {
1163 	struct dkwedge_softc *sc = dkwedge_lookup(dev);
1164 	int error = 0;
1165 
1166 	if (sc->sc_state != DKW_STATE_RUNNING)
1167 		return (ENXIO);
1168 
1169 	switch (cmd) {
1170 	case DIOCCACHESYNC:
1171 		/*
1172 		 * XXX Do we really need to care about having a writable
1173 		 * file descriptor here?
1174 		 */
1175 		if ((flag & FWRITE) == 0)
1176 			error = EBADF;
1177 		else
1178 			error = VOP_IOCTL(sc->sc_parent->dk_rawvp,
1179 					  cmd, data, flag,
1180 					  l != NULL ? l->l_proc->p_ucred : NOCRED, l);
1181 		break;
1182 	case DIOCGWEDGEINFO:
1183 	    {
1184 	    	struct dkwedge_info *dkw = (void *) data;
1185 
1186 		strcpy(dkw->dkw_devname, sc->sc_dev->dv_xname);
1187 	    	memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1188 		dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1189 		strcpy(dkw->dkw_parent, sc->sc_parent->dk_name);
1190 		dkw->dkw_offset = sc->sc_offset;
1191 		dkw->dkw_size = sc->sc_size;
1192 		strcpy(dkw->dkw_ptype, sc->sc_ptype);
1193 
1194 		break;
1195 	    }
1196 
1197 	default:
1198 		error = ENOTTY;
1199 	}
1200 
1201 	return (error);
1202 }
1203 
1204 /*
1205  * dksize:		[devsw entry point]
1206  *
1207  *	Query the size of a wedge for the purpose of performing a dump
1208  *	or for swapping to.
1209  */
1210 static int
1211 dksize(dev_t dev)
1212 {
1213 
1214 	/* XXX */
1215 	return (-1);
1216 }
1217 
1218 /*
1219  * dkdump:		[devsw entry point]
1220  *
1221  *	Perform a crash dump to a wedge.
1222  */
1223 static int
1224 dkdump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1225 {
1226 
1227 	/* XXX */
1228 	return (ENXIO);
1229 }
1230