xref: /netbsd-src/sys/dev/dksubr.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /* $NetBSD: dksubr.c,v 1.100 2017/10/29 09:44:17 mlelstv Exp $ */
2 
3 /*-
4  * Copyright (c) 1996, 1997, 1998, 1999, 2002, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe and Roland C. Dowdeswell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.100 2017/10/29 09:44:17 mlelstv Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/stat.h>
38 #include <sys/proc.h>
39 #include <sys/ioctl.h>
40 #include <sys/device.h>
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/buf.h>
44 #include <sys/bufq.h>
45 #include <sys/vnode.h>
46 #include <sys/fcntl.h>
47 #include <sys/namei.h>
48 #include <sys/module.h>
49 #include <sys/syslog.h>
50 
51 #include <dev/dkvar.h>
52 #include <miscfs/specfs/specdev.h> /* for v_rdev */
53 
54 int	dkdebug = 0;
55 
56 #ifdef DEBUG
57 #define DKDB_FOLLOW	0x1
58 #define DKDB_INIT	0x2
59 #define DKDB_VNODE	0x4
60 #define DKDB_DUMP	0x8
61 
62 #define IFDEBUG(x,y)		if (dkdebug & (x)) y
63 #define DPRINTF(x,y)		IFDEBUG(x, printf y)
64 #define DPRINTF_FOLLOW(y)	DPRINTF(DKDB_FOLLOW, y)
65 #else
66 #define IFDEBUG(x,y)
67 #define DPRINTF(x,y)
68 #define DPRINTF_FOLLOW(y)
69 #endif
70 
71 #define DKF_READYFORDUMP	(DKF_INITED|DKF_TAKEDUMP)
72 
73 static int dk_subr_modcmd(modcmd_t, void *);
74 
75 #define DKLABELDEV(dev)	\
76 	(MAKEDISKDEV(major((dev)), DISKUNIT((dev)), RAW_PART))
77 
78 static void	dk_makedisklabel(struct dk_softc *);
79 static int	dk_translate(struct dk_softc *, struct buf *);
80 static void	dk_done1(struct dk_softc *, struct buf *, bool);
81 
82 void
83 dk_init(struct dk_softc *dksc, device_t dev, int dtype)
84 {
85 
86 	memset(dksc, 0x0, sizeof(*dksc));
87 	dksc->sc_dtype = dtype;
88 	dksc->sc_dev = dev;
89 
90 	strlcpy(dksc->sc_xname, device_xname(dev), DK_XNAME_SIZE);
91 	dksc->sc_dkdev.dk_name = dksc->sc_xname;
92 }
93 
94 void
95 dk_attach(struct dk_softc *dksc)
96 {
97 	KASSERT(dksc->sc_dev != NULL);
98 
99 	mutex_init(&dksc->sc_iolock, MUTEX_DEFAULT, IPL_VM);
100 	dksc->sc_flags |= DKF_READYFORDUMP;
101 #ifdef DIAGNOSTIC
102 	dksc->sc_flags |= DKF_WARNLABEL | DKF_LABELSANITY;
103 #endif
104 
105 	if ((dksc->sc_flags & DKF_NO_RND) == 0) {
106 		/* Attach the device into the rnd source list. */
107 		rnd_attach_source(&dksc->sc_rnd_source, dksc->sc_xname,
108 		    RND_TYPE_DISK, RND_FLAG_DEFAULT);
109 	}
110 }
111 
112 void
113 dk_detach(struct dk_softc *dksc)
114 {
115 	if ((dksc->sc_flags & DKF_NO_RND) == 0) {
116 		/* Unhook the entropy source. */
117 		rnd_detach_source(&dksc->sc_rnd_source);
118 	}
119 
120 	dksc->sc_flags &= ~DKF_READYFORDUMP;
121 	mutex_destroy(&dksc->sc_iolock);
122 }
123 
124 /* ARGSUSED */
125 int
126 dk_open(struct dk_softc *dksc, dev_t dev,
127     int flags, int fmt, struct lwp *l)
128 {
129 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
130 	struct	disklabel *lp = dksc->sc_dkdev.dk_label;
131 	int	part = DISKPART(dev);
132 	int	pmask = 1 << part;
133 	int	ret = 0;
134 	struct disk *dk = &dksc->sc_dkdev;
135 
136 	DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%x)\n", __func__,
137 	    dksc->sc_xname, dksc, dev, flags));
138 
139 	mutex_enter(&dk->dk_openlock);
140 
141 	/*
142 	 * If there are wedges, and this is not RAW_PART, then we
143 	 * need to fail.
144 	 */
145 	if (dk->dk_nwedges != 0 && part != RAW_PART) {
146 		ret = EBUSY;
147 		goto done;
148 	}
149 
150 	/*
151 	 * initialize driver for the first opener
152 	 */
153 	if (dk->dk_openmask == 0 && dkd->d_firstopen != NULL) {
154 		ret = (*dkd->d_firstopen)(dksc->sc_dev, dev, flags, fmt);
155 		if (ret)
156 			goto done;
157 	}
158 
159 	/*
160 	 * If we're init'ed and there are no other open partitions then
161 	 * update the in-core disklabel.
162 	 */
163 	if ((dksc->sc_flags & DKF_INITED)) {
164 		if ((dksc->sc_flags & DKF_VLABEL) == 0) {
165 			dksc->sc_flags |= DKF_VLABEL;
166 			dk_getdisklabel(dksc, dev);
167 		}
168 	}
169 
170 	/* Fail if we can't find the partition. */
171 	if (part != RAW_PART &&
172 	    ((dksc->sc_flags & DKF_VLABEL) == 0 ||
173 	     part >= lp->d_npartitions ||
174 	     lp->d_partitions[part].p_fstype == FS_UNUSED)) {
175 		ret = ENXIO;
176 		goto done;
177 	}
178 
179 	/* Mark our unit as open. */
180 	switch (fmt) {
181 	case S_IFCHR:
182 		dk->dk_copenmask |= pmask;
183 		break;
184 	case S_IFBLK:
185 		dk->dk_bopenmask |= pmask;
186 		break;
187 	}
188 
189 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
190 
191 done:
192 	mutex_exit(&dk->dk_openlock);
193 	return ret;
194 }
195 
196 /* ARGSUSED */
197 int
198 dk_close(struct dk_softc *dksc, dev_t dev,
199     int flags, int fmt, struct lwp *l)
200 {
201 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
202 	int	part = DISKPART(dev);
203 	int	pmask = 1 << part;
204 	struct disk *dk = &dksc->sc_dkdev;
205 
206 	DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%x)\n", __func__,
207 	    dksc->sc_xname, dksc, dev, flags));
208 
209 	mutex_enter(&dk->dk_openlock);
210 
211 	switch (fmt) {
212 	case S_IFCHR:
213 		dk->dk_copenmask &= ~pmask;
214 		break;
215 	case S_IFBLK:
216 		dk->dk_bopenmask &= ~pmask;
217 		break;
218 	}
219 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
220 
221 	if (dk->dk_openmask == 0) {
222 		if (dkd->d_lastclose != NULL)
223 			(*dkd->d_lastclose)(dksc->sc_dev);
224 		if ((dksc->sc_flags & DKF_KLABEL) == 0)
225 			dksc->sc_flags &= ~DKF_VLABEL;
226 	}
227 
228 	mutex_exit(&dk->dk_openlock);
229 	return 0;
230 }
231 
232 static int
233 dk_translate(struct dk_softc *dksc, struct buf *bp)
234 {
235 	int	part;
236 	int	wlabel;
237 	daddr_t	blkno;
238 	struct disklabel *lp;
239 	struct disk *dk;
240 	uint64_t numsecs;
241 	unsigned secsize;
242 
243 	lp = dksc->sc_dkdev.dk_label;
244 	dk = &dksc->sc_dkdev;
245 
246 	part = DISKPART(bp->b_dev);
247 	numsecs = dk->dk_geom.dg_secperunit;
248 	secsize = dk->dk_geom.dg_secsize;
249 
250 	/*
251 	 * The transfer must be a whole number of blocks and the offset must
252 	 * not be negative.
253 	 */
254 	if ((bp->b_bcount % secsize) != 0 || bp->b_blkno < 0) {
255 		bp->b_error = EINVAL;
256 		goto done;
257 	}
258 
259 	/* If there is nothing to do, then we are done */
260 	if (bp->b_bcount == 0)
261 		goto done;
262 
263 	wlabel = dksc->sc_flags & (DKF_WLABEL|DKF_LABELLING);
264 	if (part == RAW_PART) {
265 		uint64_t numblocks = btodb(numsecs * secsize);
266 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, numblocks) <= 0)
267 			goto done;
268 	} else {
269 		if (bounds_check_with_label(&dksc->sc_dkdev, bp, wlabel) <= 0)
270 			goto done;
271 	}
272 
273 	/*
274 	 * Convert the block number to absolute and put it in terms
275 	 * of the device's logical block size.
276 	 */
277 	if (secsize >= DEV_BSIZE)
278 		blkno = bp->b_blkno / (secsize / DEV_BSIZE);
279 	else
280 		blkno = bp->b_blkno * (DEV_BSIZE / secsize);
281 
282 	if (part != RAW_PART)
283 		blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
284 	bp->b_rawblkno = blkno;
285 
286 	return -1;
287 
288 done:
289 	bp->b_resid = bp->b_bcount;
290 	return bp->b_error;
291 }
292 
293 static int
294 dk_strategy1(struct dk_softc *dksc, struct buf *bp)
295 {
296 	int error;
297 
298 	DPRINTF_FOLLOW(("%s(%s, %p, %p)\n", __func__,
299 	    dksc->sc_xname, dksc, bp));
300 
301 	if (!(dksc->sc_flags & DKF_INITED)) {
302 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
303 		bp->b_error = ENXIO;
304 		bp->b_resid = bp->b_bcount;
305 		biodone(bp);
306 		return 1;
307 	}
308 
309 	error = dk_translate(dksc, bp);
310 	if (error >= 0) {
311 		biodone(bp);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void
319 dk_strategy(struct dk_softc *dksc, struct buf *bp)
320 {
321 	int error;
322 
323 	error = dk_strategy1(dksc, bp);
324 	if (error)
325 		return;
326 
327 	/*
328 	 * Queue buffer and start unit
329 	 */
330 	dk_start(dksc, bp);
331 }
332 
333 int
334 dk_strategy_defer(struct dk_softc *dksc, struct buf *bp)
335 {
336 	int error;
337 
338 	error = dk_strategy1(dksc, bp);
339 	if (error)
340 		return error;
341 
342 	/*
343 	 * Queue buffer only
344 	 */
345 	mutex_enter(&dksc->sc_iolock);
346 	disk_wait(&dksc->sc_dkdev);
347 	bufq_put(dksc->sc_bufq, bp);
348 	mutex_exit(&dksc->sc_iolock);
349 
350 	return 0;
351 }
352 
353 int
354 dk_strategy_pending(struct dk_softc *dksc)
355 {
356 	struct buf *bp;
357 
358 	if (!(dksc->sc_flags & DKF_INITED)) {
359 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
360 		return 0;
361 	}
362 
363 	mutex_enter(&dksc->sc_iolock);
364 	bp = bufq_peek(dksc->sc_bufq);
365 	mutex_exit(&dksc->sc_iolock);
366 
367 	return bp != NULL;
368 }
369 
370 void
371 dk_start(struct dk_softc *dksc, struct buf *bp)
372 {
373 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
374 	int error;
375 
376 	if (!(dksc->sc_flags & DKF_INITED)) {
377 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
378 		return;
379 	}
380 
381 	mutex_enter(&dksc->sc_iolock);
382 
383 	if (bp != NULL) {
384 		disk_wait(&dksc->sc_dkdev);
385 		bufq_put(dksc->sc_bufq, bp);
386 	}
387 
388 	/*
389 	 * If another thread is running the queue, increment
390 	 * busy counter to 2 so that the queue is retried,
391 	 * because the driver may now accept additional
392 	 * requests.
393 	 */
394 	if (dksc->sc_busy < 2)
395 		dksc->sc_busy++;
396 	if (dksc->sc_busy > 1)
397 		goto done;
398 
399 	/*
400 	 * Peeking at the buffer queue and committing the operation
401 	 * only after success isn't atomic.
402 	 *
403 	 * So when a diskstart fails, the buffer is saved
404 	 * and tried again before the next buffer is fetched.
405 	 * dk_drain() handles flushing of a saved buffer.
406 	 *
407 	 * This keeps order of I/O operations, unlike bufq_put.
408 	 */
409 
410 	while (dksc->sc_busy > 0) {
411 
412 		bp = dksc->sc_deferred;
413 		dksc->sc_deferred = NULL;
414 
415 		if (bp == NULL)
416 			bp = bufq_get(dksc->sc_bufq);
417 
418 		while (bp != NULL) {
419 
420 			disk_busy(&dksc->sc_dkdev);
421 			mutex_exit(&dksc->sc_iolock);
422 			error = dkd->d_diskstart(dksc->sc_dev, bp);
423 			mutex_enter(&dksc->sc_iolock);
424 			if (error == EAGAIN) {
425 				dksc->sc_deferred = bp;
426 				disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
427 				disk_wait(&dksc->sc_dkdev);
428 				break;
429 			}
430 
431 			if (error != 0) {
432 				bp->b_error = error;
433 				bp->b_resid = bp->b_bcount;
434 				dk_done1(dksc, bp, false);
435 			}
436 
437 			bp = bufq_get(dksc->sc_bufq);
438 		}
439 
440 		dksc->sc_busy--;
441 	}
442 done:
443 	mutex_exit(&dksc->sc_iolock);
444 }
445 
446 static void
447 dk_done1(struct dk_softc *dksc, struct buf *bp, bool lock)
448 {
449 	struct disk *dk = &dksc->sc_dkdev;
450 
451 	if (bp->b_error != 0) {
452 		struct cfdriver *cd = device_cfdriver(dksc->sc_dev);
453 
454 		diskerr(bp, cd->cd_name, "error", LOG_PRINTF, 0,
455 			dk->dk_label);
456 		printf("\n");
457 	}
458 
459 	if (lock)
460 		mutex_enter(&dksc->sc_iolock);
461 	disk_unbusy(dk, bp->b_bcount - bp->b_resid, (bp->b_flags & B_READ));
462 	if (lock)
463 		mutex_exit(&dksc->sc_iolock);
464 
465 	if ((dksc->sc_flags & DKF_NO_RND) == 0)
466 		rnd_add_uint32(&dksc->sc_rnd_source, bp->b_rawblkno);
467 
468 	biodone(bp);
469 }
470 
471 void
472 dk_done(struct dk_softc *dksc, struct buf *bp)
473 {
474 	dk_done1(dksc, bp, true);
475 }
476 
477 void
478 dk_drain(struct dk_softc *dksc)
479 {
480 	struct buf *bp;
481 
482 	mutex_enter(&dksc->sc_iolock);
483 	bp = dksc->sc_deferred;
484 	dksc->sc_deferred = NULL;
485 	if (bp != NULL) {
486 		bp->b_error = EIO;
487 		bp->b_resid = bp->b_bcount;
488 		biodone(bp);
489 	}
490 	bufq_drain(dksc->sc_bufq);
491 	mutex_exit(&dksc->sc_iolock);
492 }
493 
494 int
495 dk_discard(struct dk_softc *dksc, dev_t dev, off_t pos, off_t len)
496 {
497 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
498 	unsigned secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
499 	struct buf tmp, *bp = &tmp;
500 	int maxsz;
501 	int error = 0;
502 
503 	KASSERT(len >= 0);
504 
505 	DPRINTF_FOLLOW(("%s(%s, %p, 0x"PRIx64", %jd, %jd)\n", __func__,
506 	    dksc->sc_xname, dksc, (intmax_t)pos, (intmax_t)len));
507 
508 	if (!(dksc->sc_flags & DKF_INITED)) {
509 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
510 		return ENXIO;
511 	}
512 
513 	if (secsize == 0 || (pos % secsize) != 0 || (len % secsize) != 0)
514 		return EINVAL;
515 
516 	/* largest value that b_bcount can store */
517 	maxsz = rounddown(INT_MAX, secsize);
518 
519 	while (len > 0) {
520 		/* enough data to please the bounds checking code */
521 		bp->b_dev = dev;
522 		bp->b_blkno = (daddr_t)(pos / secsize);
523 		bp->b_bcount = min(len, maxsz);
524 		bp->b_flags = B_WRITE;
525 
526 		error = dk_translate(dksc, bp);
527 		if (error >= 0)
528 			break;
529 
530 		error = dkd->d_discard(dksc->sc_dev,
531 			(off_t)bp->b_rawblkno * secsize,
532 			(off_t)bp->b_bcount);
533 		if (error)
534 			break;
535 
536 		pos += bp->b_bcount;
537 		len -= bp->b_bcount;
538 	}
539 
540 	return error;
541 }
542 
543 int
544 dk_size(struct dk_softc *dksc, dev_t dev)
545 {
546 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
547 	struct	disklabel *lp;
548 	int	is_open;
549 	int	part;
550 	int	size;
551 
552 	if ((dksc->sc_flags & DKF_INITED) == 0)
553 		return -1;
554 
555 	part = DISKPART(dev);
556 	is_open = dksc->sc_dkdev.dk_openmask & (1 << part);
557 
558 	if (!is_open && dkd->d_open(dev, 0, S_IFBLK, curlwp))
559 		return -1;
560 
561 	lp = dksc->sc_dkdev.dk_label;
562 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
563 		size = -1;
564 	else
565 		size = lp->d_partitions[part].p_size *
566 		    (lp->d_secsize / DEV_BSIZE);
567 
568 	if (!is_open && dkd->d_close(dev, 0, S_IFBLK, curlwp))
569 		return -1;
570 
571 	return size;
572 }
573 
574 int
575 dk_ioctl(struct dk_softc *dksc, dev_t dev,
576 	    u_long cmd, void *data, int flag, struct lwp *l)
577 {
578 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
579 	struct	disklabel *lp;
580 	struct	disk *dk = &dksc->sc_dkdev;
581 #ifdef __HAVE_OLD_DISKLABEL
582 	struct	disklabel newlabel;
583 #endif
584 	int	error;
585 
586 	DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%lx)\n", __func__,
587 	    dksc->sc_xname, dksc, dev, cmd));
588 
589 	/* ensure that the pseudo disk is open for writes for these commands */
590 	switch (cmd) {
591 	case DIOCSDINFO:
592 	case DIOCWDINFO:
593 #ifdef __HAVE_OLD_DISKLABEL
594 	case ODIOCSDINFO:
595 	case ODIOCWDINFO:
596 #endif
597 	case DIOCKLABEL:
598 	case DIOCWLABEL:
599 	case DIOCAWEDGE:
600 	case DIOCDWEDGE:
601 	case DIOCSSTRATEGY:
602 		if ((flag & FWRITE) == 0)
603 			return EBADF;
604 	}
605 
606 	/* ensure that the pseudo-disk is initialized for these */
607 	switch (cmd) {
608 	case DIOCGDINFO:
609 	case DIOCSDINFO:
610 	case DIOCWDINFO:
611 	case DIOCGPARTINFO:
612 	case DIOCKLABEL:
613 	case DIOCWLABEL:
614 	case DIOCGDEFLABEL:
615 	case DIOCAWEDGE:
616 	case DIOCDWEDGE:
617 	case DIOCLWEDGES:
618 	case DIOCMWEDGES:
619 	case DIOCCACHESYNC:
620 #ifdef __HAVE_OLD_DISKLABEL
621 	case ODIOCGDINFO:
622 	case ODIOCSDINFO:
623 	case ODIOCWDINFO:
624 	case ODIOCGDEFLABEL:
625 #endif
626 		if ((dksc->sc_flags & DKF_INITED) == 0)
627 			return ENXIO;
628 	}
629 
630 	error = disk_ioctl(dk, dev, cmd, data, flag, l);
631 	if (error != EPASSTHROUGH)
632 		return error;
633 	else
634 		error = 0;
635 
636 	switch (cmd) {
637 	case DIOCWDINFO:
638 	case DIOCSDINFO:
639 #ifdef __HAVE_OLD_DISKLABEL
640 	case ODIOCWDINFO:
641 	case ODIOCSDINFO:
642 #endif
643 #ifdef __HAVE_OLD_DISKLABEL
644 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
645 			memset(&newlabel, 0, sizeof newlabel);
646 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
647 			lp = &newlabel;
648 		} else
649 #endif
650 		lp = (struct disklabel *)data;
651 
652 		mutex_enter(&dk->dk_openlock);
653 		dksc->sc_flags |= DKF_LABELLING;
654 
655 		error = setdisklabel(dksc->sc_dkdev.dk_label,
656 		    lp, 0, dksc->sc_dkdev.dk_cpulabel);
657 		if (error == 0) {
658 			if (cmd == DIOCWDINFO
659 #ifdef __HAVE_OLD_DISKLABEL
660 			    || cmd == ODIOCWDINFO
661 #endif
662 			   )
663 				error = writedisklabel(DKLABELDEV(dev),
664 				    dkd->d_strategy, dksc->sc_dkdev.dk_label,
665 				    dksc->sc_dkdev.dk_cpulabel);
666 		}
667 
668 		dksc->sc_flags &= ~DKF_LABELLING;
669 		mutex_exit(&dk->dk_openlock);
670 		break;
671 
672 	case DIOCKLABEL:
673 		if (*(int *)data != 0)
674 			dksc->sc_flags |= DKF_KLABEL;
675 		else
676 			dksc->sc_flags &= ~DKF_KLABEL;
677 		break;
678 
679 	case DIOCWLABEL:
680 		if (*(int *)data != 0)
681 			dksc->sc_flags |= DKF_WLABEL;
682 		else
683 			dksc->sc_flags &= ~DKF_WLABEL;
684 		break;
685 
686 	case DIOCGDEFLABEL:
687 		dk_getdefaultlabel(dksc, (struct disklabel *)data);
688 		break;
689 
690 #ifdef __HAVE_OLD_DISKLABEL
691 	case ODIOCGDEFLABEL:
692 		dk_getdefaultlabel(dksc, &newlabel);
693 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
694 			return ENOTTY;
695 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
696 		break;
697 #endif
698 
699 	case DIOCGSTRATEGY:
700 	    {
701 		struct disk_strategy *dks = (void *)data;
702 
703 		mutex_enter(&dksc->sc_iolock);
704 		if (dksc->sc_bufq != NULL)
705 			strlcpy(dks->dks_name,
706 			    bufq_getstrategyname(dksc->sc_bufq),
707 			    sizeof(dks->dks_name));
708 		else
709 			error = EINVAL;
710 		mutex_exit(&dksc->sc_iolock);
711 		dks->dks_paramlen = 0;
712 		break;
713 	    }
714 
715 	case DIOCSSTRATEGY:
716 	    {
717 		struct disk_strategy *dks = (void *)data;
718 		struct bufq_state *new;
719 		struct bufq_state *old;
720 
721 		if (dks->dks_param != NULL) {
722 			return EINVAL;
723 		}
724 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
725 		error = bufq_alloc(&new, dks->dks_name,
726 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
727 		if (error) {
728 			return error;
729 		}
730 		mutex_enter(&dksc->sc_iolock);
731 		old = dksc->sc_bufq;
732 		if (old)
733 			bufq_move(new, old);
734 		dksc->sc_bufq = new;
735 		mutex_exit(&dksc->sc_iolock);
736 		if (old)
737 			bufq_free(old);
738 		break;
739 	    }
740 
741 	default:
742 		error = ENOTTY;
743 	}
744 
745 	return error;
746 }
747 
748 /*
749  * dk_dump dumps all of physical memory into the partition specified.
750  * This requires substantially more framework than {s,w}ddump, and hence
751  * is probably much more fragile.
752  *
753  */
754 
755 #define DKFF_READYFORDUMP(x)	(((x) & DKF_READYFORDUMP) == DKF_READYFORDUMP)
756 static volatile int	dk_dumping = 0;
757 
758 /* ARGSUSED */
759 int
760 dk_dump(struct dk_softc *dksc, dev_t dev,
761     daddr_t blkno, void *vav, size_t size)
762 {
763 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
764 	char *va = vav;
765 	struct disklabel *lp;
766 	struct partition *p;
767 	int part, towrt, nsects, sectoff, maxblkcnt, nblk;
768 	int maxxfer, rv = 0;
769 
770 	/*
771 	 * ensure that we consider this device to be safe for dumping,
772 	 * and that the device is configured.
773 	 */
774 	if (!DKFF_READYFORDUMP(dksc->sc_flags)) {
775 		DPRINTF(DKDB_DUMP, ("%s: bad dump flags 0x%x\n", __func__,
776 		    dksc->sc_flags));
777 		return ENXIO;
778 	}
779 
780 	/* ensure that we are not already dumping */
781 	if (dk_dumping)
782 		return EFAULT;
783 	dk_dumping = 1;
784 
785 	if (dkd->d_dumpblocks == NULL) {
786 		DPRINTF(DKDB_DUMP, ("%s: no dumpblocks\n", __func__));
787 		return ENXIO;
788 	}
789 
790 	/* device specific max transfer size */
791 	maxxfer = MAXPHYS;
792 	if (dkd->d_iosize != NULL)
793 		(*dkd->d_iosize)(dksc->sc_dev, &maxxfer);
794 
795 	/* Convert to disk sectors.  Request must be a multiple of size. */
796 	part = DISKPART(dev);
797 	lp = dksc->sc_dkdev.dk_label;
798 	if ((size % lp->d_secsize) != 0) {
799 		DPRINTF(DKDB_DUMP, ("%s: odd size %zu\n", __func__, size));
800 		return EFAULT;
801 	}
802 	towrt = size / lp->d_secsize;
803 	blkno = dbtob(blkno) / lp->d_secsize;   /* blkno in secsize units */
804 
805 	p = &lp->d_partitions[part];
806 	if (p->p_fstype != FS_SWAP) {
807 		DPRINTF(DKDB_DUMP, ("%s: bad fstype %d\n", __func__,
808 		    p->p_fstype));
809 		return ENXIO;
810 	}
811 	nsects = p->p_size;
812 	sectoff = p->p_offset;
813 
814 	/* Check transfer bounds against partition size. */
815 	if ((blkno < 0) || ((blkno + towrt) > nsects)) {
816 		DPRINTF(DKDB_DUMP, ("%s: out of bounds blkno=%jd, towrt=%d, "
817 		    "nsects=%d\n", __func__, (intmax_t)blkno, towrt, nsects));
818 		return EINVAL;
819 	}
820 
821 	/* Offset block number to start of partition. */
822 	blkno += sectoff;
823 
824 	/* Start dumping and return when done. */
825 	maxblkcnt = howmany(maxxfer, lp->d_secsize);
826 	while (towrt > 0) {
827 		nblk = min(maxblkcnt, towrt);
828 
829 		if ((rv = (*dkd->d_dumpblocks)(dksc->sc_dev, va, blkno, nblk))
830 		    != 0) {
831 			DPRINTF(DKDB_DUMP, ("%s: dumpblocks %d\n", __func__,
832 			    rv));
833 			return rv;
834 		}
835 
836 		towrt -= nblk;
837 		blkno += nblk;
838 		va += nblk * lp->d_secsize;
839 	}
840 
841 	dk_dumping = 0;
842 
843 	return 0;
844 }
845 
846 /* ARGSUSED */
847 void
848 dk_getdefaultlabel(struct dk_softc *dksc, struct disklabel *lp)
849 {
850 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
851 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
852 
853 	memset(lp, 0, sizeof(*lp));
854 
855 	if (dg->dg_secperunit > UINT32_MAX)
856 		lp->d_secperunit = UINT32_MAX;
857 	else
858 		lp->d_secperunit = dg->dg_secperunit;
859 	lp->d_secsize = dg->dg_secsize;
860 	lp->d_nsectors = dg->dg_nsectors;
861 	lp->d_ntracks = dg->dg_ntracks;
862 	lp->d_ncylinders = dg->dg_ncylinders;
863 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
864 
865 	strlcpy(lp->d_typename, dksc->sc_xname, sizeof(lp->d_typename));
866 	lp->d_type = dksc->sc_dtype;
867 	strlcpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
868 	lp->d_rpm = 3600;
869 	lp->d_interleave = 1;
870 	lp->d_flags = 0;
871 
872 	lp->d_partitions[RAW_PART].p_offset = 0;
873 	lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
874 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
875 	lp->d_npartitions = RAW_PART + 1;
876 
877 	lp->d_magic = DISKMAGIC;
878 	lp->d_magic2 = DISKMAGIC;
879 
880 	if (dkd->d_label)
881 		dkd->d_label(dksc->sc_dev, lp);
882 
883 	lp->d_checksum = dkcksum(lp);
884 }
885 
886 /* ARGSUSED */
887 void
888 dk_getdisklabel(struct dk_softc *dksc, dev_t dev)
889 {
890 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
891 	struct	 disklabel *lp = dksc->sc_dkdev.dk_label;
892 	struct	 cpu_disklabel *clp = dksc->sc_dkdev.dk_cpulabel;
893 	struct   disk_geom *dg = &dksc->sc_dkdev.dk_geom;
894 	struct	 partition *pp;
895 	int	 i;
896 	const char	*errstring;
897 
898 	memset(clp, 0x0, sizeof(*clp));
899 	dk_getdefaultlabel(dksc, lp);
900 	errstring = readdisklabel(DKLABELDEV(dev), dkd->d_strategy,
901 	    dksc->sc_dkdev.dk_label, dksc->sc_dkdev.dk_cpulabel);
902 	if (errstring) {
903 		dk_makedisklabel(dksc);
904 		if (dksc->sc_flags & DKF_WARNLABEL)
905 			printf("%s: %s\n", dksc->sc_xname, errstring);
906 		return;
907 	}
908 
909 	if ((dksc->sc_flags & DKF_LABELSANITY) == 0)
910 		return;
911 
912 	/* Sanity check */
913 	if (lp->d_secperunit > dg->dg_secperunit)
914 		printf("WARNING: %s: total sector size in disklabel (%ju) "
915 		    "!= the size of %s (%ju)\n", dksc->sc_xname,
916 		    (uintmax_t)lp->d_secperunit, dksc->sc_xname,
917 		    (uintmax_t)dg->dg_secperunit);
918 	else if (lp->d_secperunit < UINT32_MAX &&
919 	         lp->d_secperunit < dg->dg_secperunit)
920 		printf("%s: %ju trailing sectors not covered by disklabel\n",
921 		    dksc->sc_xname,
922 		    (uintmax_t)dg->dg_secperunit - lp->d_secperunit);
923 
924 	for (i=0; i < lp->d_npartitions; i++) {
925 		pp = &lp->d_partitions[i];
926 		if (pp->p_offset + pp->p_size > dg->dg_secperunit)
927 			printf("WARNING: %s: end of partition `%c' exceeds "
928 			    "the size of %s (%ju)\n", dksc->sc_xname,
929 			    'a' + i, dksc->sc_xname,
930 			    (uintmax_t)dg->dg_secperunit);
931 	}
932 }
933 
934 /*
935  * Heuristic to conjure a disklabel if reading a disklabel failed.
936  *
937  * This is to allow the raw partition to be used for a filesystem
938  * without caring about the write protected label sector.
939  *
940  * If the driver provides it's own callback, use that instead.
941  */
942 /* ARGSUSED */
943 static void
944 dk_makedisklabel(struct dk_softc *dksc)
945 {
946 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
947 	struct  disklabel *lp = dksc->sc_dkdev.dk_label;
948 
949 	strlcpy(lp->d_packname, "default label", sizeof(lp->d_packname));
950 
951 	if (dkd->d_label)
952 		dkd->d_label(dksc->sc_dev, lp);
953 	else
954 		lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
955 
956 	lp->d_checksum = dkcksum(lp);
957 }
958 
959 /* This function is taken from ccd.c:1.76  --rcd */
960 
961 /*
962  * XXX this function looks too generic for dksubr.c, shouldn't we
963  *     put it somewhere better?
964  */
965 
966 /*
967  * Lookup the provided name in the filesystem.  If the file exists,
968  * is a valid block device, and isn't being used by anyone else,
969  * set *vpp to the file's vnode.
970  */
971 int
972 dk_lookup(struct pathbuf *pb, struct lwp *l, struct vnode **vpp)
973 {
974 	struct nameidata nd;
975 	struct vnode *vp;
976 	int     error;
977 
978 	if (l == NULL)
979 		return ESRCH;	/* Is ESRCH the best choice? */
980 
981 	NDINIT(&nd, LOOKUP, FOLLOW, pb);
982 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
983 		DPRINTF((DKDB_FOLLOW|DKDB_INIT),
984 		    ("%s: vn_open error = %d\n", __func__, error));
985 		return error;
986 	}
987 
988 	vp = nd.ni_vp;
989 	if (vp->v_type != VBLK) {
990 		error = ENOTBLK;
991 		goto out;
992 	}
993 
994 	/* Reopen as anonymous vnode to protect against forced unmount. */
995 	if ((error = bdevvp(vp->v_rdev, vpp)) != 0)
996 		goto out;
997 	VOP_UNLOCK(vp);
998 	if ((error = vn_close(vp, FREAD | FWRITE, l->l_cred)) != 0) {
999 		vrele(*vpp);
1000 		return error;
1001 	}
1002 	if ((error = VOP_OPEN(*vpp, FREAD | FWRITE, l->l_cred)) != 0) {
1003 		vrele(*vpp);
1004 		return error;
1005 	}
1006 	mutex_enter((*vpp)->v_interlock);
1007 	(*vpp)->v_writecount++;
1008 	mutex_exit((*vpp)->v_interlock);
1009 
1010 	IFDEBUG(DKDB_VNODE, vprint("dk_lookup: vnode info", *vpp));
1011 
1012 	return 0;
1013 out:
1014 	VOP_UNLOCK(vp);
1015 	(void) vn_close(vp, FREAD | FWRITE, l->l_cred);
1016 	return error;
1017 }
1018 
1019 MODULE(MODULE_CLASS_MISC, dk_subr, NULL);
1020 
1021 static int
1022 dk_subr_modcmd(modcmd_t cmd, void *arg)
1023 {
1024 	switch (cmd) {
1025 	case MODULE_CMD_INIT:
1026 	case MODULE_CMD_FINI:
1027 		return 0;
1028 	case MODULE_CMD_STAT:
1029 	case MODULE_CMD_AUTOUNLOAD:
1030 	default:
1031 		return ENOTTY;
1032 	}
1033 }
1034