xref: /netbsd-src/sys/dev/ld.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: ld.c,v 1.53 2007/12/05 07:06:50 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran and Charles M. Hannum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Disk driver for use by RAID controllers.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ld.c,v 1.53 2007/12/05 07:06:50 ad Exp $");
45 
46 #include "rnd.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/queue.h>
53 #include <sys/proc.h>
54 #include <sys/buf.h>
55 #include <sys/bufq.h>
56 #include <sys/endian.h>
57 #include <sys/disklabel.h>
58 #include <sys/disk.h>
59 #include <sys/dkio.h>
60 #include <sys/stat.h>
61 #include <sys/lock.h>
62 #include <sys/conf.h>
63 #include <sys/fcntl.h>
64 #include <sys/vnode.h>
65 #include <sys/syslog.h>
66 #include <sys/mutex.h>
67 #if NRND > 0
68 #include <sys/rnd.h>
69 #endif
70 
71 #include <dev/ldvar.h>
72 
73 #include <prop/proplib.h>
74 
75 static void	ldgetdefaultlabel(struct ld_softc *, struct disklabel *);
76 static void	ldgetdisklabel(struct ld_softc *);
77 static void	ldminphys(struct buf *bp);
78 static void	ldshutdown(void *);
79 static void	ldstart(struct ld_softc *, struct buf *);
80 static void	ld_set_properties(struct ld_softc *);
81 static void	ld_config_interrupts (struct device *);
82 
83 extern struct	cfdriver ld_cd;
84 
85 static dev_type_open(ldopen);
86 static dev_type_close(ldclose);
87 static dev_type_read(ldread);
88 static dev_type_write(ldwrite);
89 static dev_type_ioctl(ldioctl);
90 static dev_type_strategy(ldstrategy);
91 static dev_type_dump(lddump);
92 static dev_type_size(ldsize);
93 
94 const struct bdevsw ld_bdevsw = {
95 	ldopen, ldclose, ldstrategy, ldioctl, lddump, ldsize, D_DISK
96 };
97 
98 const struct cdevsw ld_cdevsw = {
99 	ldopen, ldclose, ldread, ldwrite, ldioctl,
100 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
101 };
102 
103 static struct	dkdriver lddkdriver = { ldstrategy, ldminphys };
104 static void	*ld_sdh;
105 
106 void
107 ldattach(struct ld_softc *sc)
108 {
109 	char tbuf[9];
110 
111 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_VM);
112 
113 	if ((sc->sc_flags & LDF_ENABLED) == 0) {
114 		aprint_normal("%s: disabled\n", sc->sc_dv.dv_xname);
115 		return;
116 	}
117 
118 	/* Initialise and attach the disk structure. */
119 	disk_init(&sc->sc_dk, sc->sc_dv.dv_xname, &lddkdriver);
120 	disk_attach(&sc->sc_dk);
121 
122 	if (sc->sc_maxxfer > MAXPHYS)
123 		sc->sc_maxxfer = MAXPHYS;
124 
125 	/* Build synthetic geometry if necessary. */
126 	if (sc->sc_nheads == 0 || sc->sc_nsectors == 0 ||
127 	    sc->sc_ncylinders == 0) {
128 		uint64_t ncyl;
129 
130 		if (sc->sc_secperunit <= 528 * 2048)		/* 528MB */
131 			sc->sc_nheads = 16;
132 		else if (sc->sc_secperunit <= 1024 * 2048)	/* 1GB */
133 			sc->sc_nheads = 32;
134 		else if (sc->sc_secperunit <= 21504 * 2048)	/* 21GB */
135 			sc->sc_nheads = 64;
136 		else if (sc->sc_secperunit <= 43008 * 2048)	/* 42GB */
137 			sc->sc_nheads = 128;
138 		else
139 			sc->sc_nheads = 255;
140 
141 		sc->sc_nsectors = 63;
142 		sc->sc_ncylinders = INT_MAX;
143 		ncyl = sc->sc_secperunit /
144 		    (sc->sc_nheads * sc->sc_nsectors);
145 		if (ncyl < INT_MAX)
146 			sc->sc_ncylinders = (int)ncyl;
147 	}
148 
149 	format_bytes(tbuf, sizeof(tbuf), sc->sc_secperunit *
150 	    sc->sc_secsize);
151 	aprint_normal("%s: %s, %d cyl, %d head, %d sec, %d bytes/sect x %"PRIu64" sectors\n",
152 	    sc->sc_dv.dv_xname, tbuf, sc->sc_ncylinders, sc->sc_nheads,
153 	    sc->sc_nsectors, sc->sc_secsize, sc->sc_secperunit);
154 
155 	ld_set_properties(sc);
156 
157 #if NRND > 0
158 	/* Attach the device into the rnd source list. */
159 	rnd_attach_source(&sc->sc_rnd_source, sc->sc_dv.dv_xname,
160 	    RND_TYPE_DISK, 0);
161 #endif
162 
163 	/* Set the `shutdownhook'. */
164 	if (ld_sdh == NULL)
165 		ld_sdh = shutdownhook_establish(ldshutdown, NULL);
166 	bufq_alloc(&sc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
167 
168 	/* Discover wedges on this disk. */
169 	config_interrupts(&sc->sc_dv, ld_config_interrupts);
170 }
171 
172 int
173 ldadjqparam(struct ld_softc *sc, int xmax)
174 {
175 	int s;
176 
177 	s = splbio();
178 	sc->sc_maxqueuecnt = xmax;
179 	splx(s);
180 
181 	return (0);
182 }
183 
184 int
185 ldbegindetach(struct ld_softc *sc, int flags)
186 {
187 	int s, rv = 0;
188 
189 	if ((sc->sc_flags & LDF_ENABLED) == 0)
190 		return (0);
191 
192 	if ((flags & DETACH_FORCE) == 0 && sc->sc_dk.dk_openmask != 0)
193 		return (EBUSY);
194 
195 	s = splbio();
196 	sc->sc_maxqueuecnt = 0;
197 	sc->sc_flags |= LDF_DETACH;
198 	while (sc->sc_queuecnt > 0) {
199 		sc->sc_flags |= LDF_DRAIN;
200 		rv = tsleep(&sc->sc_queuecnt, PRIBIO, "lddrn", 0);
201 		if (rv)
202 			break;
203 	}
204 	splx(s);
205 
206 	return (rv);
207 }
208 
209 void
210 ldenddetach(struct ld_softc *sc)
211 {
212 	int s, bmaj, cmaj, i, mn;
213 
214 	if ((sc->sc_flags & LDF_ENABLED) == 0)
215 		return;
216 
217 	/* Wait for commands queued with the hardware to complete. */
218 	if (sc->sc_queuecnt != 0)
219 		if (tsleep(&sc->sc_queuecnt, PRIBIO, "lddtch", 30 * hz))
220 			printf("%s: not drained\n", sc->sc_dv.dv_xname);
221 
222 	/* Locate the major numbers. */
223 	bmaj = bdevsw_lookup_major(&ld_bdevsw);
224 	cmaj = cdevsw_lookup_major(&ld_cdevsw);
225 
226 	/* Kill off any queued buffers. */
227 	s = splbio();
228 	bufq_drain(sc->sc_bufq);
229 	splx(s);
230 
231 	bufq_free(sc->sc_bufq);
232 
233 	/* Nuke the vnodes for any open instances. */
234 	for (i = 0; i < MAXPARTITIONS; i++) {
235 		mn = DISKMINOR(device_unit(&sc->sc_dv), i);
236 		vdevgone(bmaj, mn, mn, VBLK);
237 		vdevgone(cmaj, mn, mn, VCHR);
238 	}
239 
240 	/* Delete all of our wedges. */
241 	dkwedge_delall(&sc->sc_dk);
242 
243 	/* Detach from the disk list. */
244 	disk_detach(&sc->sc_dk);
245 	disk_destroy(&sc->sc_dk);
246 
247 #if NRND > 0
248 	/* Unhook the entropy source. */
249 	rnd_detach_source(&sc->sc_rnd_source);
250 #endif
251 
252 	/*
253 	 * XXX We can't really flush the cache here, beceause the
254 	 * XXX device may already be non-existent from the controller's
255 	 * XXX perspective.
256 	 */
257 #if 0
258 	/* Flush the device's cache. */
259 	if (sc->sc_flush != NULL)
260 		if ((*sc->sc_flush)(sc) != 0)
261 			printf("%s: unable to flush cache\n",
262 			    sc->sc_dv.dv_xname);
263 #endif
264 }
265 
266 /* ARGSUSED */
267 static void
268 ldshutdown(void *cookie)
269 {
270 	struct ld_softc *sc;
271 	int i;
272 
273 	for (i = 0; i < ld_cd.cd_ndevs; i++) {
274 		if ((sc = device_lookup(&ld_cd, i)) == NULL)
275 			continue;
276 		if (sc->sc_flush != NULL && (*sc->sc_flush)(sc) != 0)
277 			printf("%s: unable to flush cache\n",
278 			    sc->sc_dv.dv_xname);
279 	}
280 }
281 
282 /* ARGSUSED */
283 static int
284 ldopen(dev_t dev, int flags, int fmt, struct lwp *l)
285 {
286 	struct ld_softc *sc;
287 	int error, unit, part;
288 
289 	unit = DISKUNIT(dev);
290 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
291 		return (ENXIO);
292 	if ((sc->sc_flags & LDF_ENABLED) == 0)
293 		return (ENODEV);
294 	part = DISKPART(dev);
295 
296 	mutex_enter(&sc->sc_dk.dk_openlock);
297 
298 	if (sc->sc_dk.dk_openmask == 0) {
299 		/* Load the partition info if not already loaded. */
300 		if ((sc->sc_flags & LDF_VLABEL) == 0)
301 			ldgetdisklabel(sc);
302 	}
303 
304 	/* Check that the partition exists. */
305 	if (part != RAW_PART && (part >= sc->sc_dk.dk_label->d_npartitions ||
306 	    sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
307 		error = ENXIO;
308 		goto bad1;
309 	}
310 
311 	/* Ensure only one open at a time. */
312 	switch (fmt) {
313 	case S_IFCHR:
314 		sc->sc_dk.dk_copenmask |= (1 << part);
315 		break;
316 	case S_IFBLK:
317 		sc->sc_dk.dk_bopenmask |= (1 << part);
318 		break;
319 	}
320 	sc->sc_dk.dk_openmask =
321 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
322 
323 	error = 0;
324  bad1:
325 	mutex_exit(&sc->sc_dk.dk_openlock);
326 	return (error);
327 }
328 
329 /* ARGSUSED */
330 static int
331 ldclose(dev_t dev, int flags, int fmt, struct lwp *l)
332 {
333 	struct ld_softc *sc;
334 	int part, unit;
335 
336 	unit = DISKUNIT(dev);
337 	part = DISKPART(dev);
338 	sc = device_lookup(&ld_cd, unit);
339 
340 	mutex_enter(&sc->sc_dk.dk_openlock);
341 
342 	switch (fmt) {
343 	case S_IFCHR:
344 		sc->sc_dk.dk_copenmask &= ~(1 << part);
345 		break;
346 	case S_IFBLK:
347 		sc->sc_dk.dk_bopenmask &= ~(1 << part);
348 		break;
349 	}
350 	sc->sc_dk.dk_openmask =
351 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
352 
353 	if (sc->sc_dk.dk_openmask == 0) {
354 		if (sc->sc_flush != NULL && (*sc->sc_flush)(sc) != 0)
355 			printf("%s: unable to flush cache\n",
356 			    sc->sc_dv.dv_xname);
357 		if ((sc->sc_flags & LDF_KLABEL) == 0)
358 			sc->sc_flags &= ~LDF_VLABEL;
359 	}
360 
361 	mutex_exit(&sc->sc_dk.dk_openlock);
362 	return (0);
363 }
364 
365 /* ARGSUSED */
366 static int
367 ldread(dev_t dev, struct uio *uio, int ioflag)
368 {
369 
370 	return (physio(ldstrategy, NULL, dev, B_READ, ldminphys, uio));
371 }
372 
373 /* ARGSUSED */
374 static int
375 ldwrite(dev_t dev, struct uio *uio, int ioflag)
376 {
377 
378 	return (physio(ldstrategy, NULL, dev, B_WRITE, ldminphys, uio));
379 }
380 
381 /* ARGSUSED */
382 static int
383 ldioctl(dev_t dev, u_long cmd, void *addr, int32_t flag, struct lwp *l)
384 {
385 	struct ld_softc *sc;
386 	int part, unit, error;
387 #ifdef __HAVE_OLD_DISKLABEL
388 	struct disklabel newlabel;
389 #endif
390 	struct disklabel *lp;
391 
392 	unit = DISKUNIT(dev);
393 	part = DISKPART(dev);
394 	sc = device_lookup(&ld_cd, unit);
395 
396 	error = disk_ioctl(&sc->sc_dk, cmd, addr, flag, l);
397 	if (error != EPASSTHROUGH)
398 		return (error);
399 
400 	error = 0;
401 	switch (cmd) {
402 	case DIOCGDINFO:
403 		memcpy(addr, sc->sc_dk.dk_label, sizeof(struct disklabel));
404 		return (0);
405 
406 #ifdef __HAVE_OLD_DISKLABEL
407 	case ODIOCGDINFO:
408 		newlabel = *(sc->sc_dk.dk_label);
409 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
410 			return ENOTTY;
411 		memcpy(addr, &newlabel, sizeof(struct olddisklabel));
412 		return (0);
413 #endif
414 
415 	case DIOCGPART:
416 		((struct partinfo *)addr)->disklab = sc->sc_dk.dk_label;
417 		((struct partinfo *)addr)->part =
418 		    &sc->sc_dk.dk_label->d_partitions[part];
419 		break;
420 
421 	case DIOCWDINFO:
422 	case DIOCSDINFO:
423 #ifdef __HAVE_OLD_DISKLABEL
424 	case ODIOCWDINFO:
425 	case ODIOCSDINFO:
426 
427 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
428 			memset(&newlabel, 0, sizeof newlabel);
429 			memcpy(&newlabel, addr, sizeof (struct olddisklabel));
430 			lp = &newlabel;
431 		} else
432 #endif
433 		lp = (struct disklabel *)addr;
434 
435 		if ((flag & FWRITE) == 0)
436 			return (EBADF);
437 
438 		mutex_enter(&sc->sc_dk.dk_openlock);
439 		sc->sc_flags |= LDF_LABELLING;
440 
441 		error = setdisklabel(sc->sc_dk.dk_label,
442 		    lp, /*sc->sc_dk.dk_openmask : */0,
443 		    sc->sc_dk.dk_cpulabel);
444 		if (error == 0 && (cmd == DIOCWDINFO
445 #ifdef __HAVE_OLD_DISKLABEL
446 		    || cmd == ODIOCWDINFO
447 #endif
448 		    ))
449 			error = writedisklabel(
450 			    MAKEDISKDEV(major(dev), DISKUNIT(dev), RAW_PART),
451 			    ldstrategy, sc->sc_dk.dk_label,
452 			    sc->sc_dk.dk_cpulabel);
453 
454 		sc->sc_flags &= ~LDF_LABELLING;
455 		mutex_exit(&sc->sc_dk.dk_openlock);
456 		break;
457 
458 	case DIOCKLABEL:
459 		if ((flag & FWRITE) == 0)
460 			return (EBADF);
461 		if (*(int *)addr)
462 			sc->sc_flags |= LDF_KLABEL;
463 		else
464 			sc->sc_flags &= ~LDF_KLABEL;
465 		break;
466 
467 	case DIOCWLABEL:
468 		if ((flag & FWRITE) == 0)
469 			return (EBADF);
470 		if (*(int *)addr)
471 			sc->sc_flags |= LDF_WLABEL;
472 		else
473 			sc->sc_flags &= ~LDF_WLABEL;
474 		break;
475 
476 	case DIOCGDEFLABEL:
477 		ldgetdefaultlabel(sc, (struct disklabel *)addr);
478 		break;
479 
480 #ifdef __HAVE_OLD_DISKLABEL
481 	case ODIOCGDEFLABEL:
482 		ldgetdefaultlabel(sc, &newlabel);
483 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
484 			return ENOTTY;
485 		memcpy(addr, &newlabel, sizeof (struct olddisklabel));
486 		break;
487 #endif
488 
489 	case DIOCCACHESYNC:
490 		/*
491 		 * XXX Do we really need to care about having a writable
492 		 * file descriptor here?
493 		 */
494 		if ((flag & FWRITE) == 0)
495 			error = EBADF;
496 		else if (sc->sc_flush)
497 			error = (*sc->sc_flush)(sc);
498 		else
499 			error = 0;	/* XXX Error out instead? */
500 		break;
501 
502 	case DIOCAWEDGE:
503 	    {
504 	    	struct dkwedge_info *dkw = (void *) addr;
505 
506 		if ((flag & FWRITE) == 0)
507 			return (EBADF);
508 
509 		/* If the ioctl happens here, the parent is us. */
510 		strcpy(dkw->dkw_parent, sc->sc_dv.dv_xname);
511 		return (dkwedge_add(dkw));
512 	    }
513 
514 	case DIOCDWEDGE:
515 	    {
516 	    	struct dkwedge_info *dkw = (void *) addr;
517 
518 		if ((flag & FWRITE) == 0)
519 			return (EBADF);
520 
521 		/* If the ioctl happens here, the parent is us. */
522 		strcpy(dkw->dkw_parent, sc->sc_dv.dv_xname);
523 		return (dkwedge_del(dkw));
524 	    }
525 
526 	case DIOCLWEDGES:
527 	    {
528 	    	struct dkwedge_list *dkwl = (void *) addr;
529 
530 		return (dkwedge_list(&sc->sc_dk, dkwl, l));
531 	    }
532 	case DIOCGSTRATEGY:
533 	    {
534 		struct disk_strategy *dks = (void *)addr;
535 
536 		mutex_enter(&sc->sc_mutex);
537 		strlcpy(dks->dks_name, bufq_getstrategyname(sc->sc_bufq),
538 		    sizeof(dks->dks_name));
539 		mutex_exit(&sc->sc_mutex);
540 		dks->dks_paramlen = 0;
541 
542 		return 0;
543 	    }
544 	case DIOCSSTRATEGY:
545 	    {
546 		struct disk_strategy *dks = (void *)addr;
547 		struct bufq_state *new, *old;
548 
549 		if ((flag & FWRITE) == 0)
550 			return EPERM;
551 
552 		if (dks->dks_param != NULL)
553 			return EINVAL;
554 
555 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
556 		error = bufq_alloc(&new, dks->dks_name,
557 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
558 		if (error)
559 			return error;
560 
561 		mutex_enter(&sc->sc_mutex);
562 		old = sc->sc_bufq;
563 		bufq_move(new, old);
564 		sc->sc_bufq = new;
565 		mutex_exit(&sc->sc_mutex);
566 		bufq_free(old);
567 
568 		return 0;
569 	    }
570 	default:
571 		error = ENOTTY;
572 		break;
573 	}
574 
575 	return (error);
576 }
577 
578 static void
579 ldstrategy(struct buf *bp)
580 {
581 	struct ld_softc *sc;
582 	struct disklabel *lp;
583 	daddr_t blkno;
584 	int s, part;
585 
586 	sc = device_lookup(&ld_cd, DISKUNIT(bp->b_dev));
587 	part = DISKPART(bp->b_dev);
588 
589 	if ((sc->sc_flags & LDF_DETACH) != 0) {
590 		bp->b_error = EIO;
591 		goto done;
592 	}
593 
594 	lp = sc->sc_dk.dk_label;
595 
596 	/*
597 	 * The transfer must be a whole number of blocks and the offset must
598 	 * not be negative.
599 	 */
600 	if ((bp->b_bcount % lp->d_secsize) != 0 || bp->b_blkno < 0) {
601 		bp->b_error = EINVAL;
602 		goto done;
603 	}
604 
605 	/* If it's a null transfer, return immediately. */
606 	if (bp->b_bcount == 0)
607 		goto done;
608 
609 	/*
610 	 * Do bounds checking and adjust the transfer.  If error, process.
611 	 * If past the end of partition, just return.
612 	 */
613 	if (part != RAW_PART &&
614 	    bounds_check_with_label(&sc->sc_dk, bp,
615 	    (sc->sc_flags & (LDF_WLABEL | LDF_LABELLING)) != 0) <= 0) {
616 		goto done;
617 	}
618 
619 	/*
620 	 * Convert the block number to absolute and put it in terms
621 	 * of the device's logical block size.
622 	 */
623 	if (lp->d_secsize == DEV_BSIZE)
624 		blkno = bp->b_blkno;
625 	else if (lp->d_secsize > DEV_BSIZE)
626 		blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
627 	else
628 		blkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
629 
630 	if (part != RAW_PART)
631 		blkno += lp->d_partitions[part].p_offset;
632 
633 	bp->b_rawblkno = blkno;
634 
635 	s = splbio();
636 	ldstart(sc, bp);
637 	splx(s);
638 	return;
639 
640  done:
641 	bp->b_resid = bp->b_bcount;
642 	biodone(bp);
643 }
644 
645 static void
646 ldstart(struct ld_softc *sc, struct buf *bp)
647 {
648 	int error;
649 
650 	mutex_enter(&sc->sc_mutex);
651 
652 	if (bp != NULL)
653 		BUFQ_PUT(sc->sc_bufq, bp);
654 
655 	while (sc->sc_queuecnt < sc->sc_maxqueuecnt) {
656 		/* See if there is work to do. */
657 		if ((bp = BUFQ_PEEK(sc->sc_bufq)) == NULL)
658 			break;
659 
660 		disk_busy(&sc->sc_dk);
661 		sc->sc_queuecnt++;
662 
663 		if (__predict_true((error = (*sc->sc_start)(sc, bp)) == 0)) {
664 			/*
665 			 * The back-end is running the job; remove it from
666 			 * the queue.
667 			 */
668 			(void) BUFQ_GET(sc->sc_bufq);
669 		} else  {
670 			disk_unbusy(&sc->sc_dk, 0, (bp->b_flags & B_READ));
671 			sc->sc_queuecnt--;
672 			if (error == EAGAIN) {
673 				/*
674 				 * Temporary resource shortage in the
675 				 * back-end; just defer the job until
676 				 * later.
677 				 *
678 				 * XXX We might consider a watchdog timer
679 				 * XXX to make sure we are kicked into action.
680 				 */
681 				break;
682 			} else {
683 				(void) BUFQ_GET(sc->sc_bufq);
684 				bp->b_error = error;
685 				bp->b_resid = bp->b_bcount;
686 				mutex_exit(&sc->sc_mutex);
687 				biodone(bp);
688 				mutex_enter(&sc->sc_mutex);
689 			}
690 		}
691 	}
692 
693 	mutex_exit(&sc->sc_mutex);
694 }
695 
696 void
697 lddone(struct ld_softc *sc, struct buf *bp)
698 {
699 
700 	if (bp->b_error != 0) {
701 		diskerr(bp, "ld", "error", LOG_PRINTF, 0, sc->sc_dk.dk_label);
702 		printf("\n");
703 	}
704 
705 	disk_unbusy(&sc->sc_dk, bp->b_bcount - bp->b_resid,
706 	    (bp->b_flags & B_READ));
707 #if NRND > 0
708 	rnd_add_uint32(&sc->sc_rnd_source, bp->b_rawblkno);
709 #endif
710 	biodone(bp);
711 
712 	mutex_enter(&sc->sc_mutex);
713 	if (--sc->sc_queuecnt <= sc->sc_maxqueuecnt) {
714 		if ((sc->sc_flags & LDF_DRAIN) != 0) {
715 			sc->sc_flags &= ~LDF_DRAIN;
716 			wakeup(&sc->sc_queuecnt);
717 		}
718 		mutex_exit(&sc->sc_mutex);
719 		ldstart(sc, NULL);
720 	} else
721 		mutex_exit(&sc->sc_mutex);
722 }
723 
724 static int
725 ldsize(dev_t dev)
726 {
727 	struct ld_softc *sc;
728 	int part, unit, omask, size;
729 
730 	unit = DISKUNIT(dev);
731 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
732 		return (ENODEV);
733 	if ((sc->sc_flags & LDF_ENABLED) == 0)
734 		return (ENODEV);
735 	part = DISKPART(dev);
736 
737 	omask = sc->sc_dk.dk_openmask & (1 << part);
738 
739 	if (omask == 0 && ldopen(dev, 0, S_IFBLK, NULL) != 0)
740 		return (-1);
741 	else if (sc->sc_dk.dk_label->d_partitions[part].p_fstype != FS_SWAP)
742 		size = -1;
743 	else
744 		size = sc->sc_dk.dk_label->d_partitions[part].p_size *
745 		    (sc->sc_dk.dk_label->d_secsize / DEV_BSIZE);
746 	if (omask == 0 && ldclose(dev, 0, S_IFBLK, NULL) != 0)
747 		return (-1);
748 
749 	return (size);
750 }
751 
752 /*
753  * Load the label information from the specified device.
754  */
755 static void
756 ldgetdisklabel(struct ld_softc *sc)
757 {
758 	const char *errstring;
759 
760 	ldgetdefaultlabel(sc, sc->sc_dk.dk_label);
761 
762 	/* Call the generic disklabel extraction routine. */
763 	errstring = readdisklabel(MAKEDISKDEV(0, device_unit(&sc->sc_dv),
764 	    RAW_PART), ldstrategy, sc->sc_dk.dk_label, sc->sc_dk.dk_cpulabel);
765 	if (errstring != NULL)
766 		printf("%s: %s\n", sc->sc_dv.dv_xname, errstring);
767 
768 	/* In-core label now valid. */
769 	sc->sc_flags |= LDF_VLABEL;
770 }
771 
772 /*
773  * Construct a ficticious label.
774  */
775 static void
776 ldgetdefaultlabel(struct ld_softc *sc, struct disklabel *lp)
777 {
778 
779 	memset(lp, 0, sizeof(struct disklabel));
780 
781 	lp->d_secsize = sc->sc_secsize;
782 	lp->d_ntracks = sc->sc_nheads;
783 	lp->d_nsectors = sc->sc_nsectors;
784 	lp->d_ncylinders = sc->sc_ncylinders;
785 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
786 	lp->d_type = DTYPE_LD;
787 	strlcpy(lp->d_typename, "unknown", sizeof(lp->d_typename));
788 	strlcpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
789 	lp->d_secperunit = sc->sc_secperunit;
790 	lp->d_rpm = 7200;
791 	lp->d_interleave = 1;
792 	lp->d_flags = 0;
793 
794 	lp->d_partitions[RAW_PART].p_offset = 0;
795 	lp->d_partitions[RAW_PART].p_size =
796 	    lp->d_secperunit * (lp->d_secsize / DEV_BSIZE);
797 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
798 	lp->d_npartitions = RAW_PART + 1;
799 
800 	lp->d_magic = DISKMAGIC;
801 	lp->d_magic2 = DISKMAGIC;
802 	lp->d_checksum = dkcksum(lp);
803 }
804 
805 /*
806  * Take a dump.
807  */
808 static int
809 lddump(dev_t dev, daddr_t blkno, void *vav, size_t size)
810 {
811 	char *va = vav;
812 	struct ld_softc *sc;
813 	struct disklabel *lp;
814 	int unit, part, nsects, sectoff, towrt, nblk, maxblkcnt, rv;
815 	static int dumping;
816 
817 	unit = DISKUNIT(dev);
818 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
819 		return (ENXIO);
820 	if ((sc->sc_flags & LDF_ENABLED) == 0)
821 		return (ENODEV);
822 	if (sc->sc_dump == NULL)
823 		return (ENXIO);
824 
825 	/* Check if recursive dump; if so, punt. */
826 	if (dumping)
827 		return (EFAULT);
828 	dumping = 1;
829 
830 	/* Convert to disk sectors.  Request must be a multiple of size. */
831 	part = DISKPART(dev);
832 	lp = sc->sc_dk.dk_label;
833 	if ((size % lp->d_secsize) != 0)
834 		return (EFAULT);
835 	towrt = size / lp->d_secsize;
836 	blkno = dbtob(blkno) / lp->d_secsize;	/* blkno in DEV_BSIZE units */
837 
838 	nsects = lp->d_partitions[part].p_size;
839 	sectoff = lp->d_partitions[part].p_offset;
840 
841 	/* Check transfer bounds against partition size. */
842 	if ((blkno < 0) || ((blkno + towrt) > nsects))
843 		return (EINVAL);
844 
845 	/* Offset block number to start of partition. */
846 	blkno += sectoff;
847 
848 	/* Start dumping and return when done. */
849 	maxblkcnt = sc->sc_maxxfer / sc->sc_secsize - 1;
850 	while (towrt > 0) {
851 		nblk = min(maxblkcnt, towrt);
852 
853 		if ((rv = (*sc->sc_dump)(sc, va, blkno, nblk)) != 0)
854 			return (rv);
855 
856 		towrt -= nblk;
857 		blkno += nblk;
858 		va += nblk * sc->sc_secsize;
859 	}
860 
861 	dumping = 0;
862 	return (0);
863 }
864 
865 /*
866  * Adjust the size of a transfer.
867  */
868 static void
869 ldminphys(struct buf *bp)
870 {
871 	struct ld_softc *sc;
872 
873 	sc = device_lookup(&ld_cd, DISKUNIT(bp->b_dev));
874 
875 	if (bp->b_bcount > sc->sc_maxxfer)
876 		bp->b_bcount = sc->sc_maxxfer;
877 	minphys(bp);
878 }
879 
880 static void
881 ld_set_properties(struct ld_softc *ld)
882 {
883 	prop_dictionary_t disk_info, odisk_info, geom;
884 
885 	disk_info = prop_dictionary_create();
886 
887 	geom = prop_dictionary_create();
888 
889 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
890 	    ld->sc_secperunit);
891 
892 	prop_dictionary_set_uint32(geom, "sector-size",
893 	    ld->sc_secsize);
894 
895 	prop_dictionary_set_uint16(geom, "sectors-per-track",
896 	    ld->sc_nsectors);
897 
898 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
899 	    ld->sc_nheads);
900 
901 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
902 	    ld->sc_ncylinders);
903 
904 	prop_dictionary_set(disk_info, "geometry", geom);
905 	prop_object_release(geom);
906 
907 	prop_dictionary_set(device_properties(&ld->sc_dv),
908 	    "disk-info", disk_info);
909 
910 	/*
911 	 * Don't release disk_info here; we keep a reference to it.
912 	 * disk_detach() will release it when we go away.
913 	 */
914 
915 	odisk_info = ld->sc_dk.dk_info;
916 	ld->sc_dk.dk_info = disk_info;
917 	if (odisk_info)
918 		prop_object_release(odisk_info);
919 }
920 
921 static void
922 ld_config_interrupts (struct device *d)
923 {
924 	struct ld_softc *sc = (struct ld_softc *)d;
925 	dkwedge_discover(&sc->sc_dk);
926 }
927