xref: /netbsd-src/sys/dev/ld.c (revision 7cc2f76925f078d01ddc9e640a98f4ccfc9f8c3b)
1 /*	$NetBSD: ld.c,v 1.2 2000/12/03 13:03:30 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran and Charles M. Hannum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Disk driver for use by RAID controllers.
41  */
42 
43 #include "rnd.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/device.h>
49 #include <sys/queue.h>
50 #include <sys/proc.h>
51 #include <sys/buf.h>
52 #include <sys/endian.h>
53 #include <sys/disklabel.h>
54 #include <sys/disk.h>
55 #include <sys/dkio.h>
56 #include <sys/stat.h>
57 #include <sys/lock.h>
58 #include <sys/conf.h>
59 #include <sys/fcntl.h>
60 #include <sys/vnode.h>
61 #include <sys/syslog.h>
62 #if NRND > 0
63 #include <sys/rnd.h>
64 #endif
65 
66 #include <dev/ldvar.h>
67 
68 static void	ldgetdefaultlabel(struct ld_softc *, struct disklabel *);
69 static void	ldgetdisklabel(struct ld_softc *);
70 static int	ldlock(struct ld_softc *);
71 static void	ldminphys(struct buf *bp);
72 static void	ldshutdown(void *);
73 static int	ldstart(struct ld_softc *, struct buf *);
74 static void	ldunlock(struct ld_softc *);
75 
76 extern struct	cfdriver ld_cd;
77 
78 static struct	dkdriver lddkdriver = { ldstrategy };
79 static void	*ld_sdh;
80 
81 void
82 ldattach(struct ld_softc *sc)
83 {
84 	char buf[9];
85 
86 	/* Initialise and attach the disk structure. */
87 	sc->sc_dk.dk_driver = &lddkdriver;
88 	sc->sc_dk.dk_name = sc->sc_dv.dv_xname;
89 	disk_attach(&sc->sc_dk);
90 
91 	if ((sc->sc_flags & LDF_ENABLED) == 0) {
92 		printf("%s: disabled\n", sc->sc_dv.dv_xname);
93 		return;
94 	}
95 	if (sc->sc_maxxfer > MAXPHYS)
96 		sc->sc_maxxfer = MAXPHYS;
97 
98 	format_bytes(buf, sizeof(buf), (u_int64_t)sc->sc_secperunit *
99 	    sc->sc_secsize);
100 	printf("%s: %s, %d cyl, %d head, %d sec, %d bytes/sect x %d sectors\n",
101 	    sc->sc_dv.dv_xname, buf, sc->sc_ncylinders, sc->sc_nheads,
102 	    sc->sc_nsectors, sc->sc_secsize, sc->sc_secperunit);
103 
104 #if NRND > 0
105 	/* Attach the device into the rnd source list. */
106 	rnd_attach_source(&sc->sc_rnd_source, sc->sc_dv.dv_xname,
107 	    RND_TYPE_DISK, 0);
108 #endif
109 
110 	/* Set the `shutdownhook'. */
111 	if (ld_sdh == NULL)
112 		ld_sdh = shutdownhook_establish(ldshutdown, NULL);
113 	BUFQ_INIT(&sc->sc_bufq);
114 }
115 
116 void
117 lddetach(struct ld_softc *sc)
118 {
119 	struct buf *bp;
120 	int s, bmaj, cmaj, mn;
121 
122 	/* Wait for commands queued with the hardware to complete. */
123 	if (sc->sc_queuecnt != 0)
124 		tsleep(&sc->sc_queuecnt, PRIBIO, "lddrn", 0);
125 
126 	/* Locate the major numbers. */
127 	for (bmaj = 0; bmaj <= nblkdev; bmaj++)
128 		if (bdevsw[bmaj].d_open == sdopen)
129 			break;
130 	for (cmaj = 0; cmaj <= nchrdev; cmaj++)
131 		if (cdevsw[cmaj].d_open == sdopen)
132 			break;
133 
134 	/* Kill off any queued buffers. */
135 	s = splbio();
136 	while ((bp = BUFQ_FIRST(&sc->sc_bufq)) != NULL) {
137 		BUFQ_REMOVE(&sc->sc_bufq, bp);
138 		bp->b_error = EIO;
139 		bp->b_flags |= B_ERROR;
140 		bp->b_resid = bp->b_bcount;
141 		biodone(bp);
142 	}
143 	splx(s);
144 
145 	/* Nuke the vnodes for any open instances. */
146 	mn = DISKUNIT(sc->sc_dv.dv_unit);
147 	vdevgone(bmaj, mn, mn + (MAXPARTITIONS - 1), VBLK);
148 	vdevgone(cmaj, mn, mn + (MAXPARTITIONS - 1), VCHR);
149 
150 	/* Detach from the disk list. */
151 	disk_detach(&sc->sc_dk);
152 
153 #if NRND > 0
154 	/* Unhook the entropy source. */
155 	rnd_detach_source(&sc->sc_rnd_source);
156 #endif
157 
158 	/* Flush the device's cache. */
159 	if (sc->sc_flush != NULL)
160 		if ((*sc->sc_flush)(sc) != 0)
161 			printf("%s: unable to flush cache\n",
162 			    sc->sc_dv.dv_xname);
163 }
164 
165 static void
166 ldshutdown(void *cookie)
167 {
168 	struct ld_softc *sc;
169 	int i;
170 
171 	for (i = 0; i < ld_cd.cd_ndevs; i++) {
172 		if ((sc = device_lookup(&ld_cd, i)) == NULL)
173 			continue;
174 		if (sc->sc_flush != NULL && (*sc->sc_flush)(sc) != 0)
175 			printf("%s: unable to flush cache\n",
176 			    sc->sc_dv.dv_xname);
177 	}
178 }
179 
180 int
181 ldopen(dev_t dev, int flags, int fmt, struct proc *p)
182 {
183 	struct ld_softc *sc;
184 	int unit, part;
185 
186 	unit = DISKUNIT(dev);
187 	if ((sc = device_lookup(&ld_cd, unit))== NULL)
188 		return (ENXIO);
189 	if ((sc->sc_flags & LDF_ENABLED) == 0)
190 		return (ENODEV);
191 	part = DISKPART(dev);
192 	ldlock(sc);
193 
194 	if (sc->sc_dk.dk_openmask == 0)
195 		ldgetdisklabel(sc);
196 
197 	/* Check that the partition exists. */
198 	if (part != RAW_PART && (part >= sc->sc_dk.dk_label->d_npartitions ||
199 	    sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
200 	     	ldunlock(sc);
201 		return (ENXIO);
202 	}
203 
204 	/* Ensure only one open at a time. */
205 	switch (fmt) {
206 	case S_IFCHR:
207 		sc->sc_dk.dk_copenmask |= (1 << part);
208 		break;
209 	case S_IFBLK:
210 		sc->sc_dk.dk_bopenmask |= (1 << part);
211 		break;
212 	}
213 	sc->sc_dk.dk_openmask =
214 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
215 
216 	ldunlock(sc);
217 	return (0);
218 }
219 
220 int
221 ldclose(dev_t dev, int flags, int fmt, struct proc *p)
222 {
223 	struct ld_softc *sc;
224 	int part, unit;
225 
226 	unit = DISKUNIT(dev);
227 	part = DISKPART(dev);
228 	sc = device_lookup(&ld_cd, unit);
229 	ldlock(sc);
230 
231 	switch (fmt) {
232 	case S_IFCHR:
233 		sc->sc_dk.dk_copenmask &= ~(1 << part);
234 		break;
235 	case S_IFBLK:
236 		sc->sc_dk.dk_bopenmask &= ~(1 << part);
237 		break;
238 	}
239 	sc->sc_dk.dk_openmask =
240 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
241 
242 	if (sc->sc_dk.dk_openmask == 0 && sc->sc_flush != NULL)
243 		if ((*sc->sc_flush)(sc) != 0)
244 			printf("%s: unable to flush cache\n",
245 			    sc->sc_dv.dv_xname);
246 
247 	ldunlock(sc);
248 	return (0);
249 }
250 
251 int
252 ldread(dev_t dev, struct uio *uio, int ioflag)
253 {
254 
255 	return (physio(ldstrategy, NULL, dev, B_READ, ldminphys, uio));
256 }
257 
258 int
259 ldwrite(dev_t dev, struct uio *uio, int ioflag)
260 {
261 
262 	return (physio(ldstrategy, NULL, dev, B_WRITE, ldminphys, uio));
263 }
264 
265 int
266 ldioctl(dev_t dev, u_long cmd, caddr_t addr, int32_t flag, struct proc *p)
267 {
268 	struct ld_softc *sc;
269 	int part, unit, error;
270 
271 	unit = DISKUNIT(dev);
272 	part = DISKPART(dev);
273 	sc = device_lookup(&ld_cd, unit);
274 	error = 0;
275 
276 	switch (cmd) {
277 	case DIOCGDINFO:
278 		memcpy(addr, sc->sc_dk.dk_label, sizeof(struct disklabel));
279 		return (0);
280 
281 	case DIOCGPART:
282 		((struct partinfo *)addr)->disklab = sc->sc_dk.dk_label;
283 		((struct partinfo *)addr)->part =
284 		    &sc->sc_dk.dk_label->d_partitions[part];
285 		break;
286 
287 	case DIOCWDINFO:
288 	case DIOCSDINFO:
289 		if ((flag & FWRITE) == 0)
290 			return (EBADF);
291 
292 		if ((error = ldlock(sc)) != 0)
293 			return (error);
294 		sc->sc_flags |= LDF_LABELLING;
295 
296 		error = setdisklabel(sc->sc_dk.dk_label,
297 		    (struct disklabel *)addr, /*sc->sc_dk.dk_openmask : */0,
298 		    sc->sc_dk.dk_cpulabel);
299 		if (error == 0 && cmd == DIOCWDINFO)
300 			error = writedisklabel(
301 			    MAKEDISKDEV(major(dev), DISKUNIT(dev), RAW_PART),
302 			    ldstrategy, sc->sc_dk.dk_label,
303 			    sc->sc_dk.dk_cpulabel);
304 
305 		sc->sc_flags &= ~LDF_LABELLING;
306 		ldunlock(sc);
307 		break;
308 
309 	case DIOCWLABEL:
310 		if ((flag & FWRITE) == 0)
311 			return (EBADF);
312 		if (*(int *)addr)
313 			sc->sc_flags |= LDF_WLABEL;
314 		else
315 			sc->sc_flags &= ~LDF_WLABEL;
316 		break;
317 
318 	case DIOCGDEFLABEL:
319 		ldgetdefaultlabel(sc, (struct disklabel *)addr);
320 		break;
321 
322 	default:
323 		error = ENOTTY;
324 		break;
325 	}
326 
327 	return (error);
328 }
329 
330 void
331 ldstrategy(struct buf *bp)
332 {
333 	struct ld_softc *sc;
334 	int s;
335 
336 	sc = device_lookup(&ld_cd, DISKUNIT(bp->b_dev));
337 
338 	s = splbio();
339 	if (sc->sc_queuecnt == sc->sc_maxqueuecnt) {
340 		BUFQ_INSERT_TAIL(&sc->sc_bufq, bp);
341 		splx(s);
342 		return;
343 	}
344 	splx(s);
345 	ldstart(sc, bp);
346 }
347 
348 static int
349 ldstart(struct ld_softc *sc, struct buf *bp)
350 {
351 	struct disklabel *lp;
352 	int part, s, rv;
353 
354 	if ((sc->sc_flags & LDF_DRAIN) != 0) {
355 		bp->b_error = EIO;
356 		bp->b_flags |= B_ERROR;
357 		bp->b_resid = bp->b_bcount;
358 		biodone(bp);
359 		return (-1);
360 	}
361 
362 	part = DISKPART(bp->b_dev);
363 	lp = sc->sc_dk.dk_label;
364 
365 	/*
366 	 * The transfer must be a whole number of blocks and the offset must
367 	 * not be negative.
368 	 */
369 	if ((bp->b_bcount % lp->d_secsize) != 0 || bp->b_blkno < 0) {
370 		bp->b_flags |= B_ERROR;
371 		biodone(bp);
372 		return (-1);
373 	}
374 
375 	/*
376 	 * If it's a null transfer, return.
377 	 */
378 	if (bp->b_bcount == 0) {
379 		bp->b_resid = bp->b_bcount;
380 		biodone(bp);
381 		return (-1);
382 	}
383 
384 	/*
385 	 * Do bounds checking and adjust the transfer.  If error, process.
386 	 * If past the end of partition, just return.
387 	 */
388 	if (part != RAW_PART &&
389 	    bounds_check_with_label(bp, lp,
390 	    (sc->sc_flags & (LDF_WLABEL | LDF_LABELLING)) != 0) <= 0) {
391 		bp->b_resid = bp->b_bcount;
392 		biodone(bp);
393 		return (-1);
394 	}
395 
396 	/*
397 	 * Convert the logical block number to a physical one and put it in
398 	 * terms of the device's logical block size.
399 	 */
400 	if (lp->d_secsize >= DEV_BSIZE)
401 		bp->b_rawblkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
402 	else
403 		bp->b_rawblkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
404 
405 	if (bp->b_dev != RAW_PART)
406 		bp->b_rawblkno += lp->d_partitions[part].p_offset;
407 
408 	s = splbio();
409 	disk_busy(&sc->sc_dk);
410 	sc->sc_queuecnt++;
411 	splx(s);
412 
413 	if ((rv = (*sc->sc_start)(sc, bp)) != 0) {
414 		bp->b_error = rv;
415 		bp->b_flags |= B_ERROR;
416 		bp->b_resid = bp->b_bcount;
417 		s = splbio();
418 		lddone(sc, bp);
419 		splx(s);
420 	}
421 
422 	return (0);
423 }
424 
425 void
426 lddone(struct ld_softc *sc, struct buf *bp)
427 {
428 
429 	if ((bp->b_flags & B_ERROR) != 0) {
430 		diskerr(bp, "ld", "error", LOG_PRINTF, 0, sc->sc_dk.dk_label);
431 		printf("\n");
432 	}
433 
434 	disk_unbusy(&sc->sc_dk, bp->b_bcount - bp->b_resid);
435 #if NRND > 0
436 	rnd_add_uint32(&sc->sc_rnd_source, bp->b_rawblkno);
437 #endif
438 	biodone(bp);
439 	if (--sc->sc_queuecnt == 0 && (sc->sc_flags & LDF_DRAIN) != 0)
440 		wakeup(&sc->sc_queuecnt);
441 
442 	while ((bp = BUFQ_FIRST(&sc->sc_bufq)) != NULL) {
443 		BUFQ_REMOVE(&sc->sc_bufq, bp);
444 		if (!ldstart(sc, bp))
445 			break;
446 	}
447 }
448 
449 int
450 ldsize(dev_t dev)
451 {
452 	struct ld_softc *sc;
453 	int part, unit, omask, size;
454 
455 	unit = DISKUNIT(dev);
456 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
457 		return (ENODEV);
458 	if ((sc->sc_flags & LDF_ENABLED) == 0)
459 		return (ENODEV);
460 	part = DISKPART(dev);
461 
462 	omask = sc->sc_dk.dk_openmask & (1 << part);
463 
464 	if (omask == 0 && ldopen(dev, 0, S_IFBLK, NULL) != 0)
465 		return (-1);
466 	else if (sc->sc_dk.dk_label->d_partitions[part].p_fstype != FS_SWAP)
467 		size = -1;
468 	else
469 		size = sc->sc_dk.dk_label->d_partitions[part].p_size *
470 		    (sc->sc_dk.dk_label->d_secsize / DEV_BSIZE);
471 	if (omask == 0 && ldclose(dev, 0, S_IFBLK, NULL) != 0)
472 		return (-1);
473 
474 	return (size);
475 }
476 
477 /*
478  * Load the label information from the specified device.
479  */
480 static void
481 ldgetdisklabel(struct ld_softc *sc)
482 {
483 	const char *errstring;
484 
485 	ldgetdefaultlabel(sc, sc->sc_dk.dk_label);
486 
487 	/* Call the generic disklabel extraction routine. */
488 	errstring = readdisklabel(MAKEDISKDEV(0, sc->sc_dv.dv_unit, RAW_PART),
489 	    ldstrategy, sc->sc_dk.dk_label, sc->sc_dk.dk_cpulabel);
490 	if (errstring != NULL)
491 		printf("%s: %s\n", sc->sc_dv.dv_xname, errstring);
492 }
493 
494 /*
495  * Construct a ficticious label.
496  */
497 static void
498 ldgetdefaultlabel(struct ld_softc *sc, struct disklabel *lp)
499 {
500 
501 	memset(lp, 0, sizeof(struct disklabel));
502 
503 	lp->d_secsize = sc->sc_secsize;
504 	lp->d_ntracks = sc->sc_nheads;
505 	lp->d_nsectors = sc->sc_nsectors;
506 	lp->d_ncylinders = sc->sc_ncylinders;
507 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
508 	lp->d_type = DTYPE_LD;
509 	strcpy(lp->d_typename, "unknown");
510 	strcpy(lp->d_packname, "fictitious");
511 	lp->d_secperunit = sc->sc_secperunit;
512 	lp->d_rpm = 7200;
513 	lp->d_interleave = 1;
514 	lp->d_flags = 0;
515 
516 	lp->d_partitions[RAW_PART].p_offset = 0;
517 	lp->d_partitions[RAW_PART].p_size =
518 	    lp->d_secperunit * (lp->d_secsize / DEV_BSIZE);
519 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
520 	lp->d_npartitions = RAW_PART + 1;
521 
522 	lp->d_magic = DISKMAGIC;
523 	lp->d_magic2 = DISKMAGIC;
524 	lp->d_checksum = dkcksum(lp);
525 }
526 
527 /*
528  * Wait interruptibly for an exclusive lock.
529  *
530  * XXX Several drivers do this; it should be abstracted and made MP-safe.
531  */
532 static int
533 ldlock(struct ld_softc *sc)
534 {
535 	int error;
536 
537 	while ((sc->sc_flags & LDF_LKHELD) != 0) {
538 		sc->sc_flags |= LDF_LKWANTED;
539 		if ((error = tsleep(sc, PRIBIO | PCATCH, "ldlck", 0)) != 0)
540 			return (error);
541 	}
542 	sc->sc_flags |= LDF_LKHELD;
543 	return (0);
544 }
545 
546 /*
547  * Unlock and wake up any waiters.
548  */
549 static void
550 ldunlock(struct ld_softc *sc)
551 {
552 
553 	sc->sc_flags &= ~LDF_LKHELD;
554 	if ((sc->sc_flags & LDF_LKWANTED) != 0) {
555 		sc->sc_flags &= ~LDF_LKWANTED;
556 		wakeup(sc);
557 	}
558 }
559 
560 /*
561  * Take a dump.
562  */
563 int
564 lddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
565 {
566 	struct ld_softc *sc;
567 	struct disklabel *lp;
568 	int unit, part, nsects, sectoff, towrt, nblk, maxblkcnt, rv;
569 	static int dumping;
570 
571 	/* Check if recursive dump; if so, punt. */
572 	if (dumping)
573 		return (EFAULT);
574 	dumping = 1;
575 	if (sc->sc_dump == NULL)
576 		return (ENXIO);
577 
578 	unit = DISKUNIT(dev);
579 	if ((sc = device_lookup(&ld_cd, unit)) == NULL)
580 		return (ENXIO);
581 	if ((sc->sc_flags & LDF_ENABLED) == 0)
582 		return (ENODEV);
583 	part = DISKPART(dev);
584 
585 	/* Convert to disk sectors.  Request must be a multiple of size. */
586 	lp = sc->sc_dk.dk_label;
587 	if ((size % lp->d_secsize) != 0)
588 		return (EFAULT);
589 	towrt = size / lp->d_secsize;
590 	blkno = dbtob(blkno) / lp->d_secsize;	/* blkno in DEV_BSIZE units */
591 
592 	nsects = lp->d_partitions[part].p_size;
593 	sectoff = lp->d_partitions[part].p_offset;
594 
595 	/* Check transfer bounds against partition size. */
596 	if ((blkno < 0) || ((blkno + towrt) > nsects))
597 		return (EINVAL);
598 
599 	/* Offset block number to start of partition. */
600 	blkno += sectoff;
601 
602 	/* Start dumping and return when done. */
603 	maxblkcnt = sc->sc_maxxfer / sc->sc_secsize;
604 	while (towrt > 0) {
605 		nblk = max(maxblkcnt, towrt);
606 
607 		if ((rv = (*sc->sc_dump)(sc, va, blkno, nblk)) != 0)
608 			return (rv);
609 
610 		towrt -= nblk;
611 		blkno += nblk;
612 		va += nblk * sc->sc_secsize;
613 	}
614 
615 	dumping = 0;
616 	return (0);
617 }
618 
619 /*
620  * Adjust the size of a transfer.
621  */
622 static void
623 ldminphys(struct buf *bp)
624 {
625 	struct ld_softc *sc;
626 
627 	sc = device_lookup(&ld_cd, DISKUNIT(bp->b_dev));
628 
629 	if (bp->b_bcount > sc->sc_maxxfer)
630 		bp->b_bcount = sc->sc_maxxfer;
631 	minphys(bp);
632 }
633