xref: /netbsd-src/sys/dev/ld.c (revision 8ac07aec990b9d2e483062509d0a9fa5b4f57cf2)
1 /*	$NetBSD: ld.c,v 1.57 2008/04/09 05:47:19 cegger Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran and Charles M. Hannum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Disk driver for use by RAID controllers.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ld.c,v 1.57 2008/04/09 05:47:19 cegger Exp $");
45 
46 #include "rnd.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/queue.h>
53 #include <sys/proc.h>
54 #include <sys/buf.h>
55 #include <sys/bufq.h>
56 #include <sys/endian.h>
57 #include <sys/disklabel.h>
58 #include <sys/disk.h>
59 #include <sys/dkio.h>
60 #include <sys/stat.h>
61 #include <sys/conf.h>
62 #include <sys/fcntl.h>
63 #include <sys/vnode.h>
64 #include <sys/syslog.h>
65 #include <sys/mutex.h>
66 #if NRND > 0
67 #include <sys/rnd.h>
68 #endif
69 
70 #include <dev/ldvar.h>
71 
72 #include <prop/proplib.h>
73 
74 static void	ldgetdefaultlabel(struct ld_softc *, struct disklabel *);
75 static void	ldgetdisklabel(struct ld_softc *);
76 static void	ldminphys(struct buf *bp);
77 static bool	ld_shutdown(device_t, int);
78 static void	ldstart(struct ld_softc *, struct buf *);
79 static void	ld_set_properties(struct ld_softc *);
80 static void	ld_config_interrupts (struct device *);
81 
82 extern struct	cfdriver ld_cd;
83 
84 static dev_type_open(ldopen);
85 static dev_type_close(ldclose);
86 static dev_type_read(ldread);
87 static dev_type_write(ldwrite);
88 static dev_type_ioctl(ldioctl);
89 static dev_type_strategy(ldstrategy);
90 static dev_type_dump(lddump);
91 static dev_type_size(ldsize);
92 
93 const struct bdevsw ld_bdevsw = {
94 	ldopen, ldclose, ldstrategy, ldioctl, lddump, ldsize, D_DISK
95 };
96 
97 const struct cdevsw ld_cdevsw = {
98 	ldopen, ldclose, ldread, ldwrite, ldioctl,
99 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
100 };
101 
102 static struct	dkdriver lddkdriver = { ldstrategy, ldminphys };
103 
104 void
105 ldattach(struct ld_softc *sc)
106 {
107 	char tbuf[9];
108 
109 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_VM);
110 
111 	if ((sc->sc_flags & LDF_ENABLED) == 0) {
112 		aprint_normal_dev(&sc->sc_dv, "disabled\n");
113 		return;
114 	}
115 
116 	/* Initialise and attach the disk structure. */
117 	disk_init(&sc->sc_dk, device_xname(&sc->sc_dv), &lddkdriver);
118 	disk_attach(&sc->sc_dk);
119 
120 	if (sc->sc_maxxfer > MAXPHYS)
121 		sc->sc_maxxfer = MAXPHYS;
122 
123 	/* Build synthetic geometry if necessary. */
124 	if (sc->sc_nheads == 0 || sc->sc_nsectors == 0 ||
125 	    sc->sc_ncylinders == 0) {
126 		uint64_t ncyl;
127 
128 		if (sc->sc_secperunit <= 528 * 2048)		/* 528MB */
129 			sc->sc_nheads = 16;
130 		else if (sc->sc_secperunit <= 1024 * 2048)	/* 1GB */
131 			sc->sc_nheads = 32;
132 		else if (sc->sc_secperunit <= 21504 * 2048)	/* 21GB */
133 			sc->sc_nheads = 64;
134 		else if (sc->sc_secperunit <= 43008 * 2048)	/* 42GB */
135 			sc->sc_nheads = 128;
136 		else
137 			sc->sc_nheads = 255;
138 
139 		sc->sc_nsectors = 63;
140 		sc->sc_ncylinders = INT_MAX;
141 		ncyl = sc->sc_secperunit /
142 		    (sc->sc_nheads * sc->sc_nsectors);
143 		if (ncyl < INT_MAX)
144 			sc->sc_ncylinders = (int)ncyl;
145 	}
146 
147 	format_bytes(tbuf, sizeof(tbuf), sc->sc_secperunit *
148 	    sc->sc_secsize);
149 	aprint_normal_dev(&sc->sc_dv, "%s, %d cyl, %d head, %d sec, %d bytes/sect x %"PRIu64" sectors\n",
150 	    tbuf, sc->sc_ncylinders, sc->sc_nheads,
151 	    sc->sc_nsectors, sc->sc_secsize, sc->sc_secperunit);
152 
153 	ld_set_properties(sc);
154 
155 #if NRND > 0
156 	/* Attach the device into the rnd source list. */
157 	rnd_attach_source(&sc->sc_rnd_source, device_xname(&sc->sc_dv),
158 	    RND_TYPE_DISK, 0);
159 #endif
160 
161 	/* Register with PMF */
162 	if (!pmf_device_register1(&sc->sc_dv, NULL, NULL, ld_shutdown))
163 		aprint_error_dev(&sc->sc_dv,
164 		    "couldn't establish power handler\n");
165 
166 	bufq_alloc(&sc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
167 
168 	/* Discover wedges on this disk. */
169 	config_interrupts(&sc->sc_dv, ld_config_interrupts);
170 }
171 
172 int
173 ldadjqparam(struct ld_softc *sc, int xmax)
174 {
175 	int s;
176 
177 	s = splbio();
178 	sc->sc_maxqueuecnt = xmax;
179 	splx(s);
180 
181 	return (0);
182 }
183 
184 int
185 ldbegindetach(struct ld_softc *sc, int flags)
186 {
187 	int s, rv = 0;
188 
189 	if ((sc->sc_flags & LDF_ENABLED) == 0)
190 		return (0);
191 
192 	if ((flags & DETACH_FORCE) == 0 && sc->sc_dk.dk_openmask != 0)
193 		return (EBUSY);
194 
195 	s = splbio();
196 	sc->sc_maxqueuecnt = 0;
197 	sc->sc_flags |= LDF_DETACH;
198 	while (sc->sc_queuecnt > 0) {
199 		sc->sc_flags |= LDF_DRAIN;
200 		rv = tsleep(&sc->sc_queuecnt, PRIBIO, "lddrn", 0);
201 		if (rv)
202 			break;
203 	}
204 	splx(s);
205 
206 	return (rv);
207 }
208 
209 void
210 ldenddetach(struct ld_softc *sc)
211 {
212 	int s, bmaj, cmaj, i, mn;
213 
214 	if ((sc->sc_flags & LDF_ENABLED) == 0)
215 		return;
216 
217 	/* Wait for commands queued with the hardware to complete. */
218 	if (sc->sc_queuecnt != 0)
219 		if (tsleep(&sc->sc_queuecnt, PRIBIO, "lddtch", 30 * hz))
220 			printf("%s: not drained\n", device_xname(&sc->sc_dv));
221 
222 	/* Locate the major numbers. */
223 	bmaj = bdevsw_lookup_major(&ld_bdevsw);
224 	cmaj = cdevsw_lookup_major(&ld_cdevsw);
225 
226 	/* Kill off any queued buffers. */
227 	s = splbio();
228 	bufq_drain(sc->sc_bufq);
229 	splx(s);
230 
231 	bufq_free(sc->sc_bufq);
232 
233 	/* Nuke the vnodes for any open instances. */
234 	for (i = 0; i < MAXPARTITIONS; i++) {
235 		mn = DISKMINOR(device_unit(&sc->sc_dv), i);
236 		vdevgone(bmaj, mn, mn, VBLK);
237 		vdevgone(cmaj, mn, mn, VCHR);
238 	}
239 
240 	/* Delete all of our wedges. */
241 	dkwedge_delall(&sc->sc_dk);
242 
243 	/* Detach from the disk list. */
244 	disk_detach(&sc->sc_dk);
245 	disk_destroy(&sc->sc_dk);
246 
247 #if NRND > 0
248 	/* Unhook the entropy source. */
249 	rnd_detach_source(&sc->sc_rnd_source);
250 #endif
251 
252 	/* Deregister with PMF */
253 	pmf_device_deregister(&sc->sc_dv);
254 
255 	/*
256 	 * XXX We can't really flush the cache here, beceause the
257 	 * XXX device may already be non-existent from the controller's
258 	 * XXX perspective.
259 	 */
260 #if 0
261 	/* Flush the device's cache. */
262 	if (sc->sc_flush != NULL)
263 		if ((*sc->sc_flush)(sc) != 0)
264 			aprint_error_dev(&sc->sc_dv, "unable to flush cache\n");
265 #endif
266 }
267 
268 /* ARGSUSED */
269 static bool
270 ld_shutdown(device_t dev, int flags)
271 {
272 	struct ld_softc *sc = device_private(dev);
273 
274 	if (sc->sc_flush != NULL && (*sc->sc_flush)(sc) != 0) {
275 		printf("%s: unable to flush cache\n", device_xname(dev));
276 		return false;
277 	}
278 
279 	return true;
280 }
281 
282 /* ARGSUSED */
283 static int
284 ldopen(dev_t dev, int flags, int fmt, struct lwp *l)
285 {
286 	struct ld_softc *sc;
287 	int error, unit, part;
288 
289 	unit = DISKUNIT(dev);
290 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
291 		return (ENXIO);
292 	if ((sc->sc_flags & LDF_ENABLED) == 0)
293 		return (ENODEV);
294 	part = DISKPART(dev);
295 
296 	mutex_enter(&sc->sc_dk.dk_openlock);
297 
298 	if (sc->sc_dk.dk_openmask == 0) {
299 		/* Load the partition info if not already loaded. */
300 		if ((sc->sc_flags & LDF_VLABEL) == 0)
301 			ldgetdisklabel(sc);
302 	}
303 
304 	/* Check that the partition exists. */
305 	if (part != RAW_PART && (part >= sc->sc_dk.dk_label->d_npartitions ||
306 	    sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
307 		error = ENXIO;
308 		goto bad1;
309 	}
310 
311 	/* Ensure only one open at a time. */
312 	switch (fmt) {
313 	case S_IFCHR:
314 		sc->sc_dk.dk_copenmask |= (1 << part);
315 		break;
316 	case S_IFBLK:
317 		sc->sc_dk.dk_bopenmask |= (1 << part);
318 		break;
319 	}
320 	sc->sc_dk.dk_openmask =
321 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
322 
323 	error = 0;
324  bad1:
325 	mutex_exit(&sc->sc_dk.dk_openlock);
326 	return (error);
327 }
328 
329 /* ARGSUSED */
330 static int
331 ldclose(dev_t dev, int flags, int fmt, struct lwp *l)
332 {
333 	struct ld_softc *sc;
334 	int part, unit;
335 
336 	unit = DISKUNIT(dev);
337 	part = DISKPART(dev);
338 	sc = device_lookup(&ld_cd, unit);
339 
340 	mutex_enter(&sc->sc_dk.dk_openlock);
341 
342 	switch (fmt) {
343 	case S_IFCHR:
344 		sc->sc_dk.dk_copenmask &= ~(1 << part);
345 		break;
346 	case S_IFBLK:
347 		sc->sc_dk.dk_bopenmask &= ~(1 << part);
348 		break;
349 	}
350 	sc->sc_dk.dk_openmask =
351 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
352 
353 	if (sc->sc_dk.dk_openmask == 0) {
354 		if (sc->sc_flush != NULL && (*sc->sc_flush)(sc) != 0)
355 			aprint_error_dev(&sc->sc_dv, "unable to flush cache\n");
356 		if ((sc->sc_flags & LDF_KLABEL) == 0)
357 			sc->sc_flags &= ~LDF_VLABEL;
358 	}
359 
360 	mutex_exit(&sc->sc_dk.dk_openlock);
361 	return (0);
362 }
363 
364 /* ARGSUSED */
365 static int
366 ldread(dev_t dev, struct uio *uio, int ioflag)
367 {
368 
369 	return (physio(ldstrategy, NULL, dev, B_READ, ldminphys, uio));
370 }
371 
372 /* ARGSUSED */
373 static int
374 ldwrite(dev_t dev, struct uio *uio, int ioflag)
375 {
376 
377 	return (physio(ldstrategy, NULL, dev, B_WRITE, ldminphys, uio));
378 }
379 
380 /* ARGSUSED */
381 static int
382 ldioctl(dev_t dev, u_long cmd, void *addr, int32_t flag, struct lwp *l)
383 {
384 	struct ld_softc *sc;
385 	int part, unit, error;
386 #ifdef __HAVE_OLD_DISKLABEL
387 	struct disklabel newlabel;
388 #endif
389 	struct disklabel *lp;
390 
391 	unit = DISKUNIT(dev);
392 	part = DISKPART(dev);
393 	sc = device_lookup(&ld_cd, unit);
394 
395 	error = disk_ioctl(&sc->sc_dk, cmd, addr, flag, l);
396 	if (error != EPASSTHROUGH)
397 		return (error);
398 
399 	error = 0;
400 	switch (cmd) {
401 	case DIOCGDINFO:
402 		memcpy(addr, sc->sc_dk.dk_label, sizeof(struct disklabel));
403 		return (0);
404 
405 #ifdef __HAVE_OLD_DISKLABEL
406 	case ODIOCGDINFO:
407 		newlabel = *(sc->sc_dk.dk_label);
408 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
409 			return ENOTTY;
410 		memcpy(addr, &newlabel, sizeof(struct olddisklabel));
411 		return (0);
412 #endif
413 
414 	case DIOCGPART:
415 		((struct partinfo *)addr)->disklab = sc->sc_dk.dk_label;
416 		((struct partinfo *)addr)->part =
417 		    &sc->sc_dk.dk_label->d_partitions[part];
418 		break;
419 
420 	case DIOCWDINFO:
421 	case DIOCSDINFO:
422 #ifdef __HAVE_OLD_DISKLABEL
423 	case ODIOCWDINFO:
424 	case ODIOCSDINFO:
425 
426 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
427 			memset(&newlabel, 0, sizeof newlabel);
428 			memcpy(&newlabel, addr, sizeof (struct olddisklabel));
429 			lp = &newlabel;
430 		} else
431 #endif
432 		lp = (struct disklabel *)addr;
433 
434 		if ((flag & FWRITE) == 0)
435 			return (EBADF);
436 
437 		mutex_enter(&sc->sc_dk.dk_openlock);
438 		sc->sc_flags |= LDF_LABELLING;
439 
440 		error = setdisklabel(sc->sc_dk.dk_label,
441 		    lp, /*sc->sc_dk.dk_openmask : */0,
442 		    sc->sc_dk.dk_cpulabel);
443 		if (error == 0 && (cmd == DIOCWDINFO
444 #ifdef __HAVE_OLD_DISKLABEL
445 		    || cmd == ODIOCWDINFO
446 #endif
447 		    ))
448 			error = writedisklabel(
449 			    MAKEDISKDEV(major(dev), DISKUNIT(dev), RAW_PART),
450 			    ldstrategy, sc->sc_dk.dk_label,
451 			    sc->sc_dk.dk_cpulabel);
452 
453 		sc->sc_flags &= ~LDF_LABELLING;
454 		mutex_exit(&sc->sc_dk.dk_openlock);
455 		break;
456 
457 	case DIOCKLABEL:
458 		if ((flag & FWRITE) == 0)
459 			return (EBADF);
460 		if (*(int *)addr)
461 			sc->sc_flags |= LDF_KLABEL;
462 		else
463 			sc->sc_flags &= ~LDF_KLABEL;
464 		break;
465 
466 	case DIOCWLABEL:
467 		if ((flag & FWRITE) == 0)
468 			return (EBADF);
469 		if (*(int *)addr)
470 			sc->sc_flags |= LDF_WLABEL;
471 		else
472 			sc->sc_flags &= ~LDF_WLABEL;
473 		break;
474 
475 	case DIOCGDEFLABEL:
476 		ldgetdefaultlabel(sc, (struct disklabel *)addr);
477 		break;
478 
479 #ifdef __HAVE_OLD_DISKLABEL
480 	case ODIOCGDEFLABEL:
481 		ldgetdefaultlabel(sc, &newlabel);
482 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
483 			return ENOTTY;
484 		memcpy(addr, &newlabel, sizeof (struct olddisklabel));
485 		break;
486 #endif
487 
488 	case DIOCCACHESYNC:
489 		/*
490 		 * XXX Do we really need to care about having a writable
491 		 * file descriptor here?
492 		 */
493 		if ((flag & FWRITE) == 0)
494 			error = EBADF;
495 		else if (sc->sc_flush)
496 			error = (*sc->sc_flush)(sc);
497 		else
498 			error = 0;	/* XXX Error out instead? */
499 		break;
500 
501 	case DIOCAWEDGE:
502 	    {
503 	    	struct dkwedge_info *dkw = (void *) addr;
504 
505 		if ((flag & FWRITE) == 0)
506 			return (EBADF);
507 
508 		/* If the ioctl happens here, the parent is us. */
509 		strlcpy(dkw->dkw_parent, device_xname(&sc->sc_dv),
510 			sizeof(dkw->dkw_parent));
511 		return (dkwedge_add(dkw));
512 	    }
513 
514 	case DIOCDWEDGE:
515 	    {
516 	    	struct dkwedge_info *dkw = (void *) addr;
517 
518 		if ((flag & FWRITE) == 0)
519 			return (EBADF);
520 
521 		/* If the ioctl happens here, the parent is us. */
522 		strlcpy(dkw->dkw_parent, device_xname(&sc->sc_dv),
523 			sizeof(dkw->dkw_parent));
524 		return (dkwedge_del(dkw));
525 	    }
526 
527 	case DIOCLWEDGES:
528 	    {
529 	    	struct dkwedge_list *dkwl = (void *) addr;
530 
531 		return (dkwedge_list(&sc->sc_dk, dkwl, l));
532 	    }
533 	case DIOCGSTRATEGY:
534 	    {
535 		struct disk_strategy *dks = (void *)addr;
536 
537 		mutex_enter(&sc->sc_mutex);
538 		strlcpy(dks->dks_name, bufq_getstrategyname(sc->sc_bufq),
539 		    sizeof(dks->dks_name));
540 		mutex_exit(&sc->sc_mutex);
541 		dks->dks_paramlen = 0;
542 
543 		return 0;
544 	    }
545 	case DIOCSSTRATEGY:
546 	    {
547 		struct disk_strategy *dks = (void *)addr;
548 		struct bufq_state *new, *old;
549 
550 		if ((flag & FWRITE) == 0)
551 			return EPERM;
552 
553 		if (dks->dks_param != NULL)
554 			return EINVAL;
555 
556 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
557 		error = bufq_alloc(&new, dks->dks_name,
558 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
559 		if (error)
560 			return error;
561 
562 		mutex_enter(&sc->sc_mutex);
563 		old = sc->sc_bufq;
564 		bufq_move(new, old);
565 		sc->sc_bufq = new;
566 		mutex_exit(&sc->sc_mutex);
567 		bufq_free(old);
568 
569 		return 0;
570 	    }
571 	default:
572 		error = ENOTTY;
573 		break;
574 	}
575 
576 	return (error);
577 }
578 
579 static void
580 ldstrategy(struct buf *bp)
581 {
582 	struct ld_softc *sc;
583 	struct disklabel *lp;
584 	daddr_t blkno;
585 	int s, part;
586 
587 	sc = device_lookup(&ld_cd, DISKUNIT(bp->b_dev));
588 	part = DISKPART(bp->b_dev);
589 
590 	if ((sc->sc_flags & LDF_DETACH) != 0) {
591 		bp->b_error = EIO;
592 		goto done;
593 	}
594 
595 	lp = sc->sc_dk.dk_label;
596 
597 	/*
598 	 * The transfer must be a whole number of blocks and the offset must
599 	 * not be negative.
600 	 */
601 	if ((bp->b_bcount % lp->d_secsize) != 0 || bp->b_blkno < 0) {
602 		bp->b_error = EINVAL;
603 		goto done;
604 	}
605 
606 	/* If it's a null transfer, return immediately. */
607 	if (bp->b_bcount == 0)
608 		goto done;
609 
610 	/*
611 	 * Do bounds checking and adjust the transfer.  If error, process.
612 	 * If past the end of partition, just return.
613 	 */
614 	if (part != RAW_PART &&
615 	    bounds_check_with_label(&sc->sc_dk, bp,
616 	    (sc->sc_flags & (LDF_WLABEL | LDF_LABELLING)) != 0) <= 0) {
617 		goto done;
618 	}
619 
620 	/*
621 	 * Convert the block number to absolute and put it in terms
622 	 * of the device's logical block size.
623 	 */
624 	if (lp->d_secsize == DEV_BSIZE)
625 		blkno = bp->b_blkno;
626 	else if (lp->d_secsize > DEV_BSIZE)
627 		blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
628 	else
629 		blkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
630 
631 	if (part != RAW_PART)
632 		blkno += lp->d_partitions[part].p_offset;
633 
634 	bp->b_rawblkno = blkno;
635 
636 	s = splbio();
637 	ldstart(sc, bp);
638 	splx(s);
639 	return;
640 
641  done:
642 	bp->b_resid = bp->b_bcount;
643 	biodone(bp);
644 }
645 
646 static void
647 ldstart(struct ld_softc *sc, struct buf *bp)
648 {
649 	int error;
650 
651 	mutex_enter(&sc->sc_mutex);
652 
653 	if (bp != NULL)
654 		BUFQ_PUT(sc->sc_bufq, bp);
655 
656 	while (sc->sc_queuecnt < sc->sc_maxqueuecnt) {
657 		/* See if there is work to do. */
658 		if ((bp = BUFQ_PEEK(sc->sc_bufq)) == NULL)
659 			break;
660 
661 		disk_busy(&sc->sc_dk);
662 		sc->sc_queuecnt++;
663 
664 		if (__predict_true((error = (*sc->sc_start)(sc, bp)) == 0)) {
665 			/*
666 			 * The back-end is running the job; remove it from
667 			 * the queue.
668 			 */
669 			(void) BUFQ_GET(sc->sc_bufq);
670 		} else  {
671 			disk_unbusy(&sc->sc_dk, 0, (bp->b_flags & B_READ));
672 			sc->sc_queuecnt--;
673 			if (error == EAGAIN) {
674 				/*
675 				 * Temporary resource shortage in the
676 				 * back-end; just defer the job until
677 				 * later.
678 				 *
679 				 * XXX We might consider a watchdog timer
680 				 * XXX to make sure we are kicked into action.
681 				 */
682 				break;
683 			} else {
684 				(void) BUFQ_GET(sc->sc_bufq);
685 				bp->b_error = error;
686 				bp->b_resid = bp->b_bcount;
687 				mutex_exit(&sc->sc_mutex);
688 				biodone(bp);
689 				mutex_enter(&sc->sc_mutex);
690 			}
691 		}
692 	}
693 
694 	mutex_exit(&sc->sc_mutex);
695 }
696 
697 void
698 lddone(struct ld_softc *sc, struct buf *bp)
699 {
700 
701 	if (bp->b_error != 0) {
702 		diskerr(bp, "ld", "error", LOG_PRINTF, 0, sc->sc_dk.dk_label);
703 		printf("\n");
704 	}
705 
706 	disk_unbusy(&sc->sc_dk, bp->b_bcount - bp->b_resid,
707 	    (bp->b_flags & B_READ));
708 #if NRND > 0
709 	rnd_add_uint32(&sc->sc_rnd_source, bp->b_rawblkno);
710 #endif
711 	biodone(bp);
712 
713 	mutex_enter(&sc->sc_mutex);
714 	if (--sc->sc_queuecnt <= sc->sc_maxqueuecnt) {
715 		if ((sc->sc_flags & LDF_DRAIN) != 0) {
716 			sc->sc_flags &= ~LDF_DRAIN;
717 			wakeup(&sc->sc_queuecnt);
718 		}
719 		mutex_exit(&sc->sc_mutex);
720 		ldstart(sc, NULL);
721 	} else
722 		mutex_exit(&sc->sc_mutex);
723 }
724 
725 static int
726 ldsize(dev_t dev)
727 {
728 	struct ld_softc *sc;
729 	int part, unit, omask, size;
730 
731 	unit = DISKUNIT(dev);
732 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
733 		return (ENODEV);
734 	if ((sc->sc_flags & LDF_ENABLED) == 0)
735 		return (ENODEV);
736 	part = DISKPART(dev);
737 
738 	omask = sc->sc_dk.dk_openmask & (1 << part);
739 
740 	if (omask == 0 && ldopen(dev, 0, S_IFBLK, NULL) != 0)
741 		return (-1);
742 	else if (sc->sc_dk.dk_label->d_partitions[part].p_fstype != FS_SWAP)
743 		size = -1;
744 	else
745 		size = sc->sc_dk.dk_label->d_partitions[part].p_size *
746 		    (sc->sc_dk.dk_label->d_secsize / DEV_BSIZE);
747 	if (omask == 0 && ldclose(dev, 0, S_IFBLK, NULL) != 0)
748 		return (-1);
749 
750 	return (size);
751 }
752 
753 /*
754  * Load the label information from the specified device.
755  */
756 static void
757 ldgetdisklabel(struct ld_softc *sc)
758 {
759 	const char *errstring;
760 
761 	ldgetdefaultlabel(sc, sc->sc_dk.dk_label);
762 
763 	/* Call the generic disklabel extraction routine. */
764 	errstring = readdisklabel(MAKEDISKDEV(0, device_unit(&sc->sc_dv),
765 	    RAW_PART), ldstrategy, sc->sc_dk.dk_label, sc->sc_dk.dk_cpulabel);
766 	if (errstring != NULL)
767 		printf("%s: %s\n", device_xname(&sc->sc_dv), errstring);
768 
769 	/* In-core label now valid. */
770 	sc->sc_flags |= LDF_VLABEL;
771 }
772 
773 /*
774  * Construct a ficticious label.
775  */
776 static void
777 ldgetdefaultlabel(struct ld_softc *sc, struct disklabel *lp)
778 {
779 
780 	memset(lp, 0, sizeof(struct disklabel));
781 
782 	lp->d_secsize = sc->sc_secsize;
783 	lp->d_ntracks = sc->sc_nheads;
784 	lp->d_nsectors = sc->sc_nsectors;
785 	lp->d_ncylinders = sc->sc_ncylinders;
786 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
787 	lp->d_type = DTYPE_LD;
788 	strlcpy(lp->d_typename, "unknown", sizeof(lp->d_typename));
789 	strlcpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
790 	lp->d_secperunit = sc->sc_secperunit;
791 	lp->d_rpm = 7200;
792 	lp->d_interleave = 1;
793 	lp->d_flags = 0;
794 
795 	lp->d_partitions[RAW_PART].p_offset = 0;
796 	lp->d_partitions[RAW_PART].p_size =
797 	    lp->d_secperunit * (lp->d_secsize / DEV_BSIZE);
798 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
799 	lp->d_npartitions = RAW_PART + 1;
800 
801 	lp->d_magic = DISKMAGIC;
802 	lp->d_magic2 = DISKMAGIC;
803 	lp->d_checksum = dkcksum(lp);
804 }
805 
806 /*
807  * Take a dump.
808  */
809 static int
810 lddump(dev_t dev, daddr_t blkno, void *vav, size_t size)
811 {
812 	char *va = vav;
813 	struct ld_softc *sc;
814 	struct disklabel *lp;
815 	int unit, part, nsects, sectoff, towrt, nblk, maxblkcnt, rv;
816 	static int dumping;
817 
818 	unit = DISKUNIT(dev);
819 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
820 		return (ENXIO);
821 	if ((sc->sc_flags & LDF_ENABLED) == 0)
822 		return (ENODEV);
823 	if (sc->sc_dump == NULL)
824 		return (ENXIO);
825 
826 	/* Check if recursive dump; if so, punt. */
827 	if (dumping)
828 		return (EFAULT);
829 	dumping = 1;
830 
831 	/* Convert to disk sectors.  Request must be a multiple of size. */
832 	part = DISKPART(dev);
833 	lp = sc->sc_dk.dk_label;
834 	if ((size % lp->d_secsize) != 0)
835 		return (EFAULT);
836 	towrt = size / lp->d_secsize;
837 	blkno = dbtob(blkno) / lp->d_secsize;	/* blkno in DEV_BSIZE units */
838 
839 	nsects = lp->d_partitions[part].p_size;
840 	sectoff = lp->d_partitions[part].p_offset;
841 
842 	/* Check transfer bounds against partition size. */
843 	if ((blkno < 0) || ((blkno + towrt) > nsects))
844 		return (EINVAL);
845 
846 	/* Offset block number to start of partition. */
847 	blkno += sectoff;
848 
849 	/* Start dumping and return when done. */
850 	maxblkcnt = sc->sc_maxxfer / sc->sc_secsize - 1;
851 	while (towrt > 0) {
852 		nblk = min(maxblkcnt, towrt);
853 
854 		if ((rv = (*sc->sc_dump)(sc, va, blkno, nblk)) != 0)
855 			return (rv);
856 
857 		towrt -= nblk;
858 		blkno += nblk;
859 		va += nblk * sc->sc_secsize;
860 	}
861 
862 	dumping = 0;
863 	return (0);
864 }
865 
866 /*
867  * Adjust the size of a transfer.
868  */
869 static void
870 ldminphys(struct buf *bp)
871 {
872 	struct ld_softc *sc;
873 
874 	sc = device_lookup(&ld_cd, DISKUNIT(bp->b_dev));
875 
876 	if (bp->b_bcount > sc->sc_maxxfer)
877 		bp->b_bcount = sc->sc_maxxfer;
878 	minphys(bp);
879 }
880 
881 static void
882 ld_set_properties(struct ld_softc *ld)
883 {
884 	prop_dictionary_t disk_info, odisk_info, geom;
885 
886 	disk_info = prop_dictionary_create();
887 
888 	geom = prop_dictionary_create();
889 
890 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
891 	    ld->sc_secperunit);
892 
893 	prop_dictionary_set_uint32(geom, "sector-size",
894 	    ld->sc_secsize);
895 
896 	prop_dictionary_set_uint16(geom, "sectors-per-track",
897 	    ld->sc_nsectors);
898 
899 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
900 	    ld->sc_nheads);
901 
902 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
903 	    ld->sc_ncylinders);
904 
905 	prop_dictionary_set(disk_info, "geometry", geom);
906 	prop_object_release(geom);
907 
908 	prop_dictionary_set(device_properties(&ld->sc_dv),
909 	    "disk-info", disk_info);
910 
911 	/*
912 	 * Don't release disk_info here; we keep a reference to it.
913 	 * disk_detach() will release it when we go away.
914 	 */
915 
916 	odisk_info = ld->sc_dk.dk_info;
917 	ld->sc_dk.dk_info = disk_info;
918 	if (odisk_info)
919 		prop_object_release(odisk_info);
920 }
921 
922 static void
923 ld_config_interrupts (struct device *d)
924 {
925 	struct ld_softc *sc = (struct ld_softc *)d;
926 	dkwedge_discover(&sc->sc_dk);
927 }
928