xref: /netbsd-src/sys/dev/dksubr.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* $NetBSD: dksubr.c,v 1.102 2018/05/12 10:33:06 mlelstv Exp $ */
2 
3 /*-
4  * Copyright (c) 1996, 1997, 1998, 1999, 2002, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe and Roland C. Dowdeswell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.102 2018/05/12 10:33:06 mlelstv Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/stat.h>
38 #include <sys/proc.h>
39 #include <sys/ioctl.h>
40 #include <sys/device.h>
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/buf.h>
44 #include <sys/bufq.h>
45 #include <sys/vnode.h>
46 #include <sys/fcntl.h>
47 #include <sys/namei.h>
48 #include <sys/module.h>
49 #include <sys/syslog.h>
50 
51 #include <dev/dkvar.h>
52 #include <miscfs/specfs/specdev.h> /* for v_rdev */
53 
54 int	dkdebug = 0;
55 
56 #ifdef DEBUG
57 #define DKDB_FOLLOW	0x1
58 #define DKDB_INIT	0x2
59 #define DKDB_VNODE	0x4
60 #define DKDB_DUMP	0x8
61 
62 #define IFDEBUG(x,y)		if (dkdebug & (x)) y
63 #define DPRINTF(x,y)		IFDEBUG(x, printf y)
64 #define DPRINTF_FOLLOW(y)	DPRINTF(DKDB_FOLLOW, y)
65 #else
66 #define IFDEBUG(x,y)
67 #define DPRINTF(x,y)
68 #define DPRINTF_FOLLOW(y)
69 #endif
70 
71 #define DKF_READYFORDUMP	(DKF_INITED|DKF_TAKEDUMP)
72 
73 static int dk_subr_modcmd(modcmd_t, void *);
74 
75 #define DKLABELDEV(dev)	\
76 	(MAKEDISKDEV(major((dev)), DISKUNIT((dev)), RAW_PART))
77 
78 static void	dk_makedisklabel(struct dk_softc *);
79 static int	dk_translate(struct dk_softc *, struct buf *);
80 static void	dk_done1(struct dk_softc *, struct buf *, bool);
81 
82 void
83 dk_init(struct dk_softc *dksc, device_t dev, int dtype)
84 {
85 
86 	memset(dksc, 0x0, sizeof(*dksc));
87 	dksc->sc_dtype = dtype;
88 	dksc->sc_dev = dev;
89 
90 	strlcpy(dksc->sc_xname, device_xname(dev), DK_XNAME_SIZE);
91 	dksc->sc_dkdev.dk_name = dksc->sc_xname;
92 }
93 
94 void
95 dk_attach(struct dk_softc *dksc)
96 {
97 	KASSERT(dksc->sc_dev != NULL);
98 
99 	mutex_init(&dksc->sc_iolock, MUTEX_DEFAULT, IPL_VM);
100 	dksc->sc_flags |= DKF_READYFORDUMP;
101 #ifdef DIAGNOSTIC
102 	dksc->sc_flags |= DKF_WARNLABEL | DKF_LABELSANITY;
103 #endif
104 
105 	if ((dksc->sc_flags & DKF_NO_RND) == 0) {
106 		/* Attach the device into the rnd source list. */
107 		rnd_attach_source(&dksc->sc_rnd_source, dksc->sc_xname,
108 		    RND_TYPE_DISK, RND_FLAG_DEFAULT);
109 	}
110 }
111 
112 void
113 dk_detach(struct dk_softc *dksc)
114 {
115 	if ((dksc->sc_flags & DKF_NO_RND) == 0) {
116 		/* Unhook the entropy source. */
117 		rnd_detach_source(&dksc->sc_rnd_source);
118 	}
119 
120 	dksc->sc_flags &= ~DKF_READYFORDUMP;
121 	mutex_destroy(&dksc->sc_iolock);
122 }
123 
124 /* ARGSUSED */
125 int
126 dk_open(struct dk_softc *dksc, dev_t dev,
127     int flags, int fmt, struct lwp *l)
128 {
129 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
130 	struct	disklabel *lp = dksc->sc_dkdev.dk_label;
131 	int	part = DISKPART(dev);
132 	int	pmask = 1 << part;
133 	int	ret = 0;
134 	struct disk *dk = &dksc->sc_dkdev;
135 
136 	DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%x)\n", __func__,
137 	    dksc->sc_xname, dksc, dev, flags));
138 
139 	mutex_enter(&dk->dk_openlock);
140 
141 	/*
142 	 * If there are wedges, and this is not RAW_PART, then we
143 	 * need to fail.
144 	 */
145 	if (dk->dk_nwedges != 0 && part != RAW_PART) {
146 		ret = EBUSY;
147 		goto done;
148 	}
149 
150 	/*
151 	 * initialize driver for the first opener
152 	 */
153 	if (dk->dk_openmask == 0 && dkd->d_firstopen != NULL) {
154 		ret = (*dkd->d_firstopen)(dksc->sc_dev, dev, flags, fmt);
155 		if (ret)
156 			goto done;
157 	}
158 
159 	/*
160 	 * If we're init'ed and there are no other open partitions then
161 	 * update the in-core disklabel.
162 	 */
163 	if ((dksc->sc_flags & DKF_INITED)) {
164 		if ((dksc->sc_flags & DKF_VLABEL) == 0) {
165 			dksc->sc_flags |= DKF_VLABEL;
166 			dk_getdisklabel(dksc, dev);
167 		}
168 	}
169 
170 	/* Fail if we can't find the partition. */
171 	if (part != RAW_PART &&
172 	    ((dksc->sc_flags & DKF_VLABEL) == 0 ||
173 	     part >= lp->d_npartitions ||
174 	     lp->d_partitions[part].p_fstype == FS_UNUSED)) {
175 		ret = ENXIO;
176 		goto done;
177 	}
178 
179 	/* Mark our unit as open. */
180 	switch (fmt) {
181 	case S_IFCHR:
182 		dk->dk_copenmask |= pmask;
183 		break;
184 	case S_IFBLK:
185 		dk->dk_bopenmask |= pmask;
186 		break;
187 	}
188 
189 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
190 
191 done:
192 	mutex_exit(&dk->dk_openlock);
193 	return ret;
194 }
195 
196 /* ARGSUSED */
197 int
198 dk_close(struct dk_softc *dksc, dev_t dev,
199     int flags, int fmt, struct lwp *l)
200 {
201 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
202 	int	part = DISKPART(dev);
203 	int	pmask = 1 << part;
204 	struct disk *dk = &dksc->sc_dkdev;
205 
206 	DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%x)\n", __func__,
207 	    dksc->sc_xname, dksc, dev, flags));
208 
209 	mutex_enter(&dk->dk_openlock);
210 
211 	switch (fmt) {
212 	case S_IFCHR:
213 		dk->dk_copenmask &= ~pmask;
214 		break;
215 	case S_IFBLK:
216 		dk->dk_bopenmask &= ~pmask;
217 		break;
218 	}
219 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
220 
221 	if (dk->dk_openmask == 0) {
222 		if (dkd->d_lastclose != NULL)
223 			(*dkd->d_lastclose)(dksc->sc_dev);
224 		if ((dksc->sc_flags & DKF_KLABEL) == 0)
225 			dksc->sc_flags &= ~DKF_VLABEL;
226 	}
227 
228 	mutex_exit(&dk->dk_openlock);
229 	return 0;
230 }
231 
232 static int
233 dk_translate(struct dk_softc *dksc, struct buf *bp)
234 {
235 	int	part;
236 	int	wlabel;
237 	daddr_t	blkno;
238 	struct disklabel *lp;
239 	struct disk *dk;
240 	uint64_t numsecs;
241 	unsigned secsize;
242 
243 	lp = dksc->sc_dkdev.dk_label;
244 	dk = &dksc->sc_dkdev;
245 
246 	part = DISKPART(bp->b_dev);
247 	numsecs = dk->dk_geom.dg_secperunit;
248 	secsize = dk->dk_geom.dg_secsize;
249 
250 	/*
251 	 * The transfer must be a whole number of blocks and the offset must
252 	 * not be negative.
253 	 */
254 	if ((bp->b_bcount % secsize) != 0 || bp->b_blkno < 0) {
255 		bp->b_error = EINVAL;
256 		goto done;
257 	}
258 
259 	/* If there is nothing to do, then we are done */
260 	if (bp->b_bcount == 0)
261 		goto done;
262 
263 	wlabel = dksc->sc_flags & (DKF_WLABEL|DKF_LABELLING);
264 	if (part == RAW_PART) {
265 		uint64_t numblocks = btodb(numsecs * secsize);
266 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, numblocks) <= 0)
267 			goto done;
268 	} else {
269 		if (bounds_check_with_label(&dksc->sc_dkdev, bp, wlabel) <= 0)
270 			goto done;
271 	}
272 
273 	/*
274 	 * Convert the block number to absolute and put it in terms
275 	 * of the device's logical block size.
276 	 */
277 	if (secsize >= DEV_BSIZE)
278 		blkno = bp->b_blkno / (secsize / DEV_BSIZE);
279 	else
280 		blkno = bp->b_blkno * (DEV_BSIZE / secsize);
281 
282 	if (part != RAW_PART)
283 		blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
284 	bp->b_rawblkno = blkno;
285 
286 	return -1;
287 
288 done:
289 	bp->b_resid = bp->b_bcount;
290 	return bp->b_error;
291 }
292 
293 static int
294 dk_strategy1(struct dk_softc *dksc, struct buf *bp)
295 {
296 	int error;
297 
298 	DPRINTF_FOLLOW(("%s(%s, %p, %p)\n", __func__,
299 	    dksc->sc_xname, dksc, bp));
300 
301 	if (!(dksc->sc_flags & DKF_INITED)) {
302 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
303 		bp->b_error = ENXIO;
304 		bp->b_resid = bp->b_bcount;
305 		biodone(bp);
306 		return 1;
307 	}
308 
309 	error = dk_translate(dksc, bp);
310 	if (error >= 0) {
311 		biodone(bp);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void
319 dk_strategy(struct dk_softc *dksc, struct buf *bp)
320 {
321 	int error;
322 
323 	error = dk_strategy1(dksc, bp);
324 	if (error)
325 		return;
326 
327 	/*
328 	 * Queue buffer and start unit
329 	 */
330 	dk_start(dksc, bp);
331 }
332 
333 int
334 dk_strategy_defer(struct dk_softc *dksc, struct buf *bp)
335 {
336 	int error;
337 
338 	error = dk_strategy1(dksc, bp);
339 	if (error)
340 		return error;
341 
342 	/*
343 	 * Queue buffer only
344 	 */
345 	mutex_enter(&dksc->sc_iolock);
346 	disk_wait(&dksc->sc_dkdev);
347 	bufq_put(dksc->sc_bufq, bp);
348 	mutex_exit(&dksc->sc_iolock);
349 
350 	return 0;
351 }
352 
353 int
354 dk_strategy_pending(struct dk_softc *dksc)
355 {
356 	struct buf *bp;
357 
358 	if (!(dksc->sc_flags & DKF_INITED)) {
359 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
360 		return 0;
361 	}
362 
363 	mutex_enter(&dksc->sc_iolock);
364 	bp = bufq_peek(dksc->sc_bufq);
365 	mutex_exit(&dksc->sc_iolock);
366 
367 	return bp != NULL;
368 }
369 
370 void
371 dk_start(struct dk_softc *dksc, struct buf *bp)
372 {
373 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
374 	int error;
375 
376 	if (!(dksc->sc_flags & DKF_INITED)) {
377 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
378 		return;
379 	}
380 
381 	mutex_enter(&dksc->sc_iolock);
382 
383 	if (bp != NULL) {
384 		disk_wait(&dksc->sc_dkdev);
385 		bufq_put(dksc->sc_bufq, bp);
386 	}
387 
388 	/*
389 	 * If another thread is running the queue, increment
390 	 * busy counter to 2 so that the queue is retried,
391 	 * because the driver may now accept additional
392 	 * requests.
393 	 */
394 	if (dksc->sc_busy < 2)
395 		dksc->sc_busy++;
396 	if (dksc->sc_busy > 1)
397 		goto done;
398 
399 	/*
400 	 * Peeking at the buffer queue and committing the operation
401 	 * only after success isn't atomic.
402 	 *
403 	 * So when a diskstart fails, the buffer is saved
404 	 * and tried again before the next buffer is fetched.
405 	 * dk_drain() handles flushing of a saved buffer.
406 	 *
407 	 * This keeps order of I/O operations, unlike bufq_put.
408 	 */
409 
410 	while (dksc->sc_busy > 0) {
411 
412 		bp = dksc->sc_deferred;
413 		dksc->sc_deferred = NULL;
414 
415 		if (bp == NULL)
416 			bp = bufq_get(dksc->sc_bufq);
417 
418 		while (bp != NULL) {
419 
420 			disk_busy(&dksc->sc_dkdev);
421 			mutex_exit(&dksc->sc_iolock);
422 			error = dkd->d_diskstart(dksc->sc_dev, bp);
423 			mutex_enter(&dksc->sc_iolock);
424 			if (error == EAGAIN) {
425 				KASSERT(dksc->sc_deferred == NULL);
426 				dksc->sc_deferred = bp;
427 				disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
428 				disk_wait(&dksc->sc_dkdev);
429 				break;
430 			}
431 
432 			if (error != 0) {
433 				bp->b_error = error;
434 				bp->b_resid = bp->b_bcount;
435 				dk_done1(dksc, bp, false);
436 			}
437 
438 			bp = bufq_get(dksc->sc_bufq);
439 		}
440 
441 		dksc->sc_busy--;
442 	}
443 done:
444 	mutex_exit(&dksc->sc_iolock);
445 }
446 
447 static void
448 dk_done1(struct dk_softc *dksc, struct buf *bp, bool lock)
449 {
450 	struct disk *dk = &dksc->sc_dkdev;
451 
452 	if (bp->b_error != 0) {
453 		struct cfdriver *cd = device_cfdriver(dksc->sc_dev);
454 
455 		diskerr(bp, cd->cd_name, "error", LOG_PRINTF, 0,
456 			dk->dk_label);
457 		printf("\n");
458 	}
459 
460 	if (lock)
461 		mutex_enter(&dksc->sc_iolock);
462 	disk_unbusy(dk, bp->b_bcount - bp->b_resid, (bp->b_flags & B_READ));
463 	if (lock)
464 		mutex_exit(&dksc->sc_iolock);
465 
466 	if ((dksc->sc_flags & DKF_NO_RND) == 0)
467 		rnd_add_uint32(&dksc->sc_rnd_source, bp->b_rawblkno);
468 
469 	biodone(bp);
470 }
471 
472 void
473 dk_done(struct dk_softc *dksc, struct buf *bp)
474 {
475 	dk_done1(dksc, bp, true);
476 }
477 
478 void
479 dk_drain(struct dk_softc *dksc)
480 {
481 	struct buf *bp;
482 
483 	mutex_enter(&dksc->sc_iolock);
484 	bp = dksc->sc_deferred;
485 	dksc->sc_deferred = NULL;
486 	if (bp != NULL) {
487 		bp->b_error = EIO;
488 		bp->b_resid = bp->b_bcount;
489 		biodone(bp);
490 	}
491 	bufq_drain(dksc->sc_bufq);
492 	mutex_exit(&dksc->sc_iolock);
493 }
494 
495 int
496 dk_discard(struct dk_softc *dksc, dev_t dev, off_t pos, off_t len)
497 {
498 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
499 	unsigned secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
500 	struct buf tmp, *bp = &tmp;
501 	int maxsz;
502 	int error = 0;
503 
504 	KASSERT(len >= 0);
505 
506 	DPRINTF_FOLLOW(("%s(%s, %p, 0x"PRIx64", %jd, %jd)\n", __func__,
507 	    dksc->sc_xname, dksc, (intmax_t)pos, (intmax_t)len));
508 
509 	if (!(dksc->sc_flags & DKF_INITED)) {
510 		DPRINTF_FOLLOW(("%s: not inited\n", __func__));
511 		return ENXIO;
512 	}
513 
514 	if (secsize == 0 || (pos % secsize) != 0 || (len % secsize) != 0)
515 		return EINVAL;
516 
517 	/* largest value that b_bcount can store */
518 	maxsz = rounddown(INT_MAX, secsize);
519 
520 	while (len > 0) {
521 		/* enough data to please the bounds checking code */
522 		bp->b_dev = dev;
523 		bp->b_blkno = (daddr_t)(pos / secsize);
524 		bp->b_bcount = min(len, maxsz);
525 		bp->b_flags = B_WRITE;
526 
527 		error = dk_translate(dksc, bp);
528 		if (error >= 0)
529 			break;
530 
531 		error = dkd->d_discard(dksc->sc_dev,
532 			(off_t)bp->b_rawblkno * secsize,
533 			(off_t)bp->b_bcount);
534 		if (error)
535 			break;
536 
537 		pos += bp->b_bcount;
538 		len -= bp->b_bcount;
539 	}
540 
541 	return error;
542 }
543 
544 int
545 dk_size(struct dk_softc *dksc, dev_t dev)
546 {
547 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
548 	struct	disklabel *lp;
549 	int	is_open;
550 	int	part;
551 	int	size;
552 
553 	if ((dksc->sc_flags & DKF_INITED) == 0)
554 		return -1;
555 
556 	part = DISKPART(dev);
557 	is_open = dksc->sc_dkdev.dk_openmask & (1 << part);
558 
559 	if (!is_open && dkd->d_open(dev, 0, S_IFBLK, curlwp))
560 		return -1;
561 
562 	lp = dksc->sc_dkdev.dk_label;
563 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
564 		size = -1;
565 	else
566 		size = lp->d_partitions[part].p_size *
567 		    (lp->d_secsize / DEV_BSIZE);
568 
569 	if (!is_open && dkd->d_close(dev, 0, S_IFBLK, curlwp))
570 		return -1;
571 
572 	return size;
573 }
574 
575 int
576 dk_ioctl(struct dk_softc *dksc, dev_t dev,
577 	    u_long cmd, void *data, int flag, struct lwp *l)
578 {
579 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
580 	struct	disklabel *lp;
581 	struct	disk *dk = &dksc->sc_dkdev;
582 #ifdef __HAVE_OLD_DISKLABEL
583 	struct	disklabel newlabel;
584 #endif
585 	int	error;
586 
587 	DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%lx)\n", __func__,
588 	    dksc->sc_xname, dksc, dev, cmd));
589 
590 	/* ensure that the pseudo disk is open for writes for these commands */
591 	switch (cmd) {
592 	case DIOCSDINFO:
593 	case DIOCWDINFO:
594 #ifdef __HAVE_OLD_DISKLABEL
595 	case ODIOCSDINFO:
596 	case ODIOCWDINFO:
597 #endif
598 	case DIOCKLABEL:
599 	case DIOCWLABEL:
600 	case DIOCAWEDGE:
601 	case DIOCDWEDGE:
602 	case DIOCSSTRATEGY:
603 		if ((flag & FWRITE) == 0)
604 			return EBADF;
605 	}
606 
607 	/* ensure that the pseudo-disk is initialized for these */
608 	switch (cmd) {
609 	case DIOCGDINFO:
610 	case DIOCSDINFO:
611 	case DIOCWDINFO:
612 	case DIOCGPARTINFO:
613 	case DIOCKLABEL:
614 	case DIOCWLABEL:
615 	case DIOCGDEFLABEL:
616 	case DIOCAWEDGE:
617 	case DIOCDWEDGE:
618 	case DIOCLWEDGES:
619 	case DIOCMWEDGES:
620 	case DIOCCACHESYNC:
621 #ifdef __HAVE_OLD_DISKLABEL
622 	case ODIOCGDINFO:
623 	case ODIOCSDINFO:
624 	case ODIOCWDINFO:
625 	case ODIOCGDEFLABEL:
626 #endif
627 		if ((dksc->sc_flags & DKF_INITED) == 0)
628 			return ENXIO;
629 	}
630 
631 	error = disk_ioctl(dk, dev, cmd, data, flag, l);
632 	if (error != EPASSTHROUGH)
633 		return error;
634 	else
635 		error = 0;
636 
637 	switch (cmd) {
638 	case DIOCWDINFO:
639 	case DIOCSDINFO:
640 #ifdef __HAVE_OLD_DISKLABEL
641 	case ODIOCWDINFO:
642 	case ODIOCSDINFO:
643 #endif
644 #ifdef __HAVE_OLD_DISKLABEL
645 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
646 			memset(&newlabel, 0, sizeof newlabel);
647 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
648 			lp = &newlabel;
649 		} else
650 #endif
651 		lp = (struct disklabel *)data;
652 
653 		mutex_enter(&dk->dk_openlock);
654 		dksc->sc_flags |= DKF_LABELLING;
655 
656 		error = setdisklabel(dksc->sc_dkdev.dk_label,
657 		    lp, 0, dksc->sc_dkdev.dk_cpulabel);
658 		if (error == 0) {
659 			if (cmd == DIOCWDINFO
660 #ifdef __HAVE_OLD_DISKLABEL
661 			    || cmd == ODIOCWDINFO
662 #endif
663 			   )
664 				error = writedisklabel(DKLABELDEV(dev),
665 				    dkd->d_strategy, dksc->sc_dkdev.dk_label,
666 				    dksc->sc_dkdev.dk_cpulabel);
667 		}
668 
669 		dksc->sc_flags &= ~DKF_LABELLING;
670 		mutex_exit(&dk->dk_openlock);
671 		break;
672 
673 	case DIOCKLABEL:
674 		if (*(int *)data != 0)
675 			dksc->sc_flags |= DKF_KLABEL;
676 		else
677 			dksc->sc_flags &= ~DKF_KLABEL;
678 		break;
679 
680 	case DIOCWLABEL:
681 		if (*(int *)data != 0)
682 			dksc->sc_flags |= DKF_WLABEL;
683 		else
684 			dksc->sc_flags &= ~DKF_WLABEL;
685 		break;
686 
687 	case DIOCGDEFLABEL:
688 		dk_getdefaultlabel(dksc, (struct disklabel *)data);
689 		break;
690 
691 #ifdef __HAVE_OLD_DISKLABEL
692 	case ODIOCGDEFLABEL:
693 		dk_getdefaultlabel(dksc, &newlabel);
694 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
695 			return ENOTTY;
696 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
697 		break;
698 #endif
699 
700 	case DIOCGSTRATEGY:
701 	    {
702 		struct disk_strategy *dks = (void *)data;
703 
704 		mutex_enter(&dksc->sc_iolock);
705 		if (dksc->sc_bufq != NULL)
706 			strlcpy(dks->dks_name,
707 			    bufq_getstrategyname(dksc->sc_bufq),
708 			    sizeof(dks->dks_name));
709 		else
710 			error = EINVAL;
711 		mutex_exit(&dksc->sc_iolock);
712 		dks->dks_paramlen = 0;
713 		break;
714 	    }
715 
716 	case DIOCSSTRATEGY:
717 	    {
718 		struct disk_strategy *dks = (void *)data;
719 		struct bufq_state *new;
720 		struct bufq_state *old;
721 
722 		if (dks->dks_param != NULL) {
723 			return EINVAL;
724 		}
725 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
726 		error = bufq_alloc(&new, dks->dks_name,
727 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
728 		if (error) {
729 			return error;
730 		}
731 		mutex_enter(&dksc->sc_iolock);
732 		old = dksc->sc_bufq;
733 		if (old)
734 			bufq_move(new, old);
735 		dksc->sc_bufq = new;
736 		mutex_exit(&dksc->sc_iolock);
737 		if (old)
738 			bufq_free(old);
739 		break;
740 	    }
741 
742 	default:
743 		error = ENOTTY;
744 	}
745 
746 	return error;
747 }
748 
749 /*
750  * dk_dump dumps all of physical memory into the partition specified.
751  * This requires substantially more framework than {s,w}ddump, and hence
752  * is probably much more fragile.
753  *
754  */
755 
756 #define DKFF_READYFORDUMP(x)	(((x) & DKF_READYFORDUMP) == DKF_READYFORDUMP)
757 static volatile int	dk_dumping = 0;
758 
759 /* ARGSUSED */
760 int
761 dk_dump(struct dk_softc *dksc, dev_t dev,
762     daddr_t blkno, void *vav, size_t size)
763 {
764 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
765 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
766 	char *va = vav;
767 	struct disklabel *lp;
768 	struct partition *p;
769 	int part, towrt, maxblkcnt, nblk;
770 	int maxxfer, rv = 0;
771 
772 	/*
773 	 * ensure that we consider this device to be safe for dumping,
774 	 * and that the device is configured.
775 	 */
776 	if (!DKFF_READYFORDUMP(dksc->sc_flags)) {
777 		DPRINTF(DKDB_DUMP, ("%s: bad dump flags 0x%x\n", __func__,
778 		    dksc->sc_flags));
779 		return ENXIO;
780 	}
781 
782 	/* ensure that we are not already dumping */
783 	if (dk_dumping)
784 		return EFAULT;
785 	dk_dumping = 1;
786 
787 	if (dkd->d_dumpblocks == NULL) {
788 		DPRINTF(DKDB_DUMP, ("%s: no dumpblocks\n", __func__));
789 		return ENXIO;
790 	}
791 
792 	/* device specific max transfer size */
793 	maxxfer = MAXPHYS;
794 	if (dkd->d_iosize != NULL)
795 		(*dkd->d_iosize)(dksc->sc_dev, &maxxfer);
796 
797 	/* Convert to disk sectors.  Request must be a multiple of size. */
798 	part = DISKPART(dev);
799 	lp = dksc->sc_dkdev.dk_label;
800 	if ((size % lp->d_secsize) != 0) {
801 		DPRINTF(DKDB_DUMP, ("%s: odd size %zu\n", __func__, size));
802 		return EFAULT;
803 	}
804 	towrt = size / lp->d_secsize;
805 	blkno = dbtob(blkno) / lp->d_secsize;   /* blkno in secsize units */
806 
807 	p = &lp->d_partitions[part];
808 	if (part == RAW_PART) {
809 		if (p->p_fstype != FS_UNUSED) {
810 			DPRINTF(DKDB_DUMP, ("%s: bad fstype %d\n", __func__,
811 			    p->p_fstype));
812 			return ENXIO;
813 		}
814 		/* Check wether dump goes to a wedge */
815 		if (dksc->sc_dkdev.dk_nwedges == 0) {
816 			DPRINTF(DKDB_DUMP, ("%s: dump to raw\n", __func__));
817 			return ENXIO;
818 		}
819 		/* Check transfer bounds against media size */
820 		if (blkno < 0 || (blkno + towrt) > dg->dg_secperunit) {
821 			DPRINTF(DKDB_DUMP, ("%s: out of bounds blkno=%jd, towrt=%d, "
822 			    "nsects=%jd\n", __func__, (intmax_t)blkno, towrt, dg->dg_secperunit));
823 			return EINVAL;
824 		}
825 	} else {
826 		int nsects, sectoff;
827 
828 		if (p->p_fstype != FS_SWAP) {
829 			DPRINTF(DKDB_DUMP, ("%s: bad fstype %d\n", __func__,
830 			    p->p_fstype));
831 			return ENXIO;
832 		}
833 		nsects = p->p_size;
834 		sectoff = p->p_offset;
835 
836 		/* Check transfer bounds against partition size. */
837 		if ((blkno < 0) || ((blkno + towrt) > nsects)) {
838 			DPRINTF(DKDB_DUMP, ("%s: out of bounds blkno=%jd, towrt=%d, "
839 			    "nsects=%d\n", __func__, (intmax_t)blkno, towrt, nsects));
840 			return EINVAL;
841 		}
842 
843 		/* Offset block number to start of partition. */
844 		blkno += sectoff;
845 	}
846 
847 	/* Start dumping and return when done. */
848 	maxblkcnt = howmany(maxxfer, lp->d_secsize);
849 	while (towrt > 0) {
850 		nblk = min(maxblkcnt, towrt);
851 
852 		if ((rv = (*dkd->d_dumpblocks)(dksc->sc_dev, va, blkno, nblk))
853 		    != 0) {
854 			DPRINTF(DKDB_DUMP, ("%s: dumpblocks %d\n", __func__,
855 			    rv));
856 			return rv;
857 		}
858 
859 		towrt -= nblk;
860 		blkno += nblk;
861 		va += nblk * lp->d_secsize;
862 	}
863 
864 	dk_dumping = 0;
865 
866 	return 0;
867 }
868 
869 /* ARGSUSED */
870 void
871 dk_getdefaultlabel(struct dk_softc *dksc, struct disklabel *lp)
872 {
873 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
874 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
875 
876 	memset(lp, 0, sizeof(*lp));
877 
878 	if (dg->dg_secperunit > UINT32_MAX)
879 		lp->d_secperunit = UINT32_MAX;
880 	else
881 		lp->d_secperunit = dg->dg_secperunit;
882 	lp->d_secsize = dg->dg_secsize;
883 	lp->d_nsectors = dg->dg_nsectors;
884 	lp->d_ntracks = dg->dg_ntracks;
885 	lp->d_ncylinders = dg->dg_ncylinders;
886 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
887 
888 	strlcpy(lp->d_typename, dksc->sc_xname, sizeof(lp->d_typename));
889 	lp->d_type = dksc->sc_dtype;
890 	strlcpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
891 	lp->d_rpm = 3600;
892 	lp->d_interleave = 1;
893 	lp->d_flags = 0;
894 
895 	lp->d_partitions[RAW_PART].p_offset = 0;
896 	lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
897 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
898 	lp->d_npartitions = RAW_PART + 1;
899 
900 	lp->d_magic = DISKMAGIC;
901 	lp->d_magic2 = DISKMAGIC;
902 
903 	if (dkd->d_label)
904 		dkd->d_label(dksc->sc_dev, lp);
905 
906 	lp->d_checksum = dkcksum(lp);
907 }
908 
909 /* ARGSUSED */
910 void
911 dk_getdisklabel(struct dk_softc *dksc, dev_t dev)
912 {
913 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
914 	struct	 disklabel *lp = dksc->sc_dkdev.dk_label;
915 	struct	 cpu_disklabel *clp = dksc->sc_dkdev.dk_cpulabel;
916 	struct   disk_geom *dg = &dksc->sc_dkdev.dk_geom;
917 	struct	 partition *pp;
918 	int	 i;
919 	const char	*errstring;
920 
921 	memset(clp, 0x0, sizeof(*clp));
922 	dk_getdefaultlabel(dksc, lp);
923 	errstring = readdisklabel(DKLABELDEV(dev), dkd->d_strategy,
924 	    dksc->sc_dkdev.dk_label, dksc->sc_dkdev.dk_cpulabel);
925 	if (errstring) {
926 		dk_makedisklabel(dksc);
927 		if (dksc->sc_flags & DKF_WARNLABEL)
928 			printf("%s: %s\n", dksc->sc_xname, errstring);
929 		return;
930 	}
931 
932 	if ((dksc->sc_flags & DKF_LABELSANITY) == 0)
933 		return;
934 
935 	/* Sanity check */
936 	if (lp->d_secperunit > dg->dg_secperunit)
937 		printf("WARNING: %s: total sector size in disklabel (%ju) "
938 		    "!= the size of %s (%ju)\n", dksc->sc_xname,
939 		    (uintmax_t)lp->d_secperunit, dksc->sc_xname,
940 		    (uintmax_t)dg->dg_secperunit);
941 	else if (lp->d_secperunit < UINT32_MAX &&
942 	         lp->d_secperunit < dg->dg_secperunit)
943 		printf("%s: %ju trailing sectors not covered by disklabel\n",
944 		    dksc->sc_xname,
945 		    (uintmax_t)dg->dg_secperunit - lp->d_secperunit);
946 
947 	for (i=0; i < lp->d_npartitions; i++) {
948 		pp = &lp->d_partitions[i];
949 		if (pp->p_offset + pp->p_size > dg->dg_secperunit)
950 			printf("WARNING: %s: end of partition `%c' exceeds "
951 			    "the size of %s (%ju)\n", dksc->sc_xname,
952 			    'a' + i, dksc->sc_xname,
953 			    (uintmax_t)dg->dg_secperunit);
954 	}
955 }
956 
957 /*
958  * Heuristic to conjure a disklabel if reading a disklabel failed.
959  *
960  * This is to allow the raw partition to be used for a filesystem
961  * without caring about the write protected label sector.
962  *
963  * If the driver provides it's own callback, use that instead.
964  */
965 /* ARGSUSED */
966 static void
967 dk_makedisklabel(struct dk_softc *dksc)
968 {
969 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
970 	struct  disklabel *lp = dksc->sc_dkdev.dk_label;
971 
972 	strlcpy(lp->d_packname, "default label", sizeof(lp->d_packname));
973 
974 	if (dkd->d_label)
975 		dkd->d_label(dksc->sc_dev, lp);
976 	else
977 		lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
978 
979 	lp->d_checksum = dkcksum(lp);
980 }
981 
982 /* This function is taken from ccd.c:1.76  --rcd */
983 
984 /*
985  * XXX this function looks too generic for dksubr.c, shouldn't we
986  *     put it somewhere better?
987  */
988 
989 /*
990  * Lookup the provided name in the filesystem.  If the file exists,
991  * is a valid block device, and isn't being used by anyone else,
992  * set *vpp to the file's vnode.
993  */
994 int
995 dk_lookup(struct pathbuf *pb, struct lwp *l, struct vnode **vpp)
996 {
997 	struct nameidata nd;
998 	struct vnode *vp;
999 	int     error;
1000 
1001 	if (l == NULL)
1002 		return ESRCH;	/* Is ESRCH the best choice? */
1003 
1004 	NDINIT(&nd, LOOKUP, FOLLOW, pb);
1005 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1006 		DPRINTF((DKDB_FOLLOW|DKDB_INIT),
1007 		    ("%s: vn_open error = %d\n", __func__, error));
1008 		return error;
1009 	}
1010 
1011 	vp = nd.ni_vp;
1012 	if (vp->v_type != VBLK) {
1013 		error = ENOTBLK;
1014 		goto out;
1015 	}
1016 
1017 	/* Reopen as anonymous vnode to protect against forced unmount. */
1018 	if ((error = bdevvp(vp->v_rdev, vpp)) != 0)
1019 		goto out;
1020 	VOP_UNLOCK(vp);
1021 	if ((error = vn_close(vp, FREAD | FWRITE, l->l_cred)) != 0) {
1022 		vrele(*vpp);
1023 		return error;
1024 	}
1025 	if ((error = VOP_OPEN(*vpp, FREAD | FWRITE, l->l_cred)) != 0) {
1026 		vrele(*vpp);
1027 		return error;
1028 	}
1029 	mutex_enter((*vpp)->v_interlock);
1030 	(*vpp)->v_writecount++;
1031 	mutex_exit((*vpp)->v_interlock);
1032 
1033 	IFDEBUG(DKDB_VNODE, vprint("dk_lookup: vnode info", *vpp));
1034 
1035 	return 0;
1036 out:
1037 	VOP_UNLOCK(vp);
1038 	(void) vn_close(vp, FREAD | FWRITE, l->l_cred);
1039 	return error;
1040 }
1041 
1042 MODULE(MODULE_CLASS_MISC, dk_subr, NULL);
1043 
1044 static int
1045 dk_subr_modcmd(modcmd_t cmd, void *arg)
1046 {
1047 	switch (cmd) {
1048 	case MODULE_CMD_INIT:
1049 	case MODULE_CMD_FINI:
1050 		return 0;
1051 	case MODULE_CMD_STAT:
1052 	case MODULE_CMD_AUTOUNLOAD:
1053 	default:
1054 		return ENOTTY;
1055 	}
1056 }
1057