xref: /openbsd-src/sys/kern/subr_disk.c (revision 48950c12d106c85f315112191a0228d7b83b9510)
1 /*	$OpenBSD: subr_disk.c,v 1.147 2013/02/17 17:39:29 miod Exp $	*/
2 /*	$NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Jason R. Thorpe.  All rights reserved.
6  * Copyright (c) 1982, 1986, 1988, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/fcntl.h>
46 #include <sys/buf.h>
47 #include <sys/stat.h>
48 #include <sys/syslog.h>
49 #include <sys/device.h>
50 #include <sys/time.h>
51 #include <sys/disklabel.h>
52 #include <sys/conf.h>
53 #include <sys/lock.h>
54 #include <sys/disk.h>
55 #include <sys/reboot.h>
56 #include <sys/dkio.h>
57 #include <sys/proc.h>
58 #include <sys/vnode.h>
59 #include <sys/workq.h>
60 #include <uvm/uvm_extern.h>
61 
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 
65 #include <net/if.h>
66 
67 #include <dev/rndvar.h>
68 #include <dev/cons.h>
69 
70 #include "softraid.h"
71 
72 /*
73  * A global list of all disks attached to the system.  May grow or
74  * shrink over time.
75  */
76 struct	disklist_head disklist;	/* TAILQ_HEAD */
77 int	disk_count;		/* number of drives in global disklist */
78 int	disk_change;		/* set if a disk has been attached/detached
79 				 * since last we looked at this variable. This
80 				 * is reset by hw_sysctl()
81 				 */
82 
83 u_char	bootduid[8];		/* DUID of boot disk. */
84 u_char	rootduid[8];		/* DUID of root disk. */
85 
86 /* softraid callback, do not use! */
87 void (*softraid_disk_attach)(struct disk *, int);
88 
89 void sr_map_root(void);
90 
91 void disk_attach_callback(void *, void *);
92 
93 /*
94  * Seek sort for disks.  We depend on the driver which calls us using b_resid
95  * as the current cylinder number.
96  *
97  * The argument ap structure holds a b_actf activity chain pointer on which we
98  * keep two queues, sorted in ascending cylinder order.  The first queue holds
99  * those requests which are positioned after the current cylinder (in the first
100  * request); the second holds requests which came in after their cylinder number
101  * was passed.  Thus we implement a one way scan, retracting after reaching the
102  * end of the drive to the first request on the second queue, at which time it
103  * becomes the first queue.
104  *
105  * A one-way scan is natural because of the way UNIX read-ahead blocks are
106  * allocated.
107  */
108 
109 void
110 disksort(struct buf *ap, struct buf *bp)
111 {
112 	struct buf *bq;
113 
114 	/* If the queue is empty, then it's easy. */
115 	if (ap->b_actf == NULL) {
116 		bp->b_actf = NULL;
117 		ap->b_actf = bp;
118 		return;
119 	}
120 
121 	/*
122 	 * If we lie after the first (currently active) request, then we
123 	 * must locate the second request list and add ourselves to it.
124 	 */
125 	bq = ap->b_actf;
126 	if (bp->b_cylinder < bq->b_cylinder) {
127 		while (bq->b_actf) {
128 			/*
129 			 * Check for an ``inversion'' in the normally ascending
130 			 * cylinder numbers, indicating the start of the second
131 			 * request list.
132 			 */
133 			if (bq->b_actf->b_cylinder < bq->b_cylinder) {
134 				/*
135 				 * Search the second request list for the first
136 				 * request at a larger cylinder number.  We go
137 				 * before that; if there is no such request, we
138 				 * go at end.
139 				 */
140 				do {
141 					if (bp->b_cylinder <
142 					    bq->b_actf->b_cylinder)
143 						goto insert;
144 					if (bp->b_cylinder ==
145 					    bq->b_actf->b_cylinder &&
146 					    bp->b_blkno < bq->b_actf->b_blkno)
147 						goto insert;
148 					bq = bq->b_actf;
149 				} while (bq->b_actf);
150 				goto insert;		/* after last */
151 			}
152 			bq = bq->b_actf;
153 		}
154 		/*
155 		 * No inversions... we will go after the last, and
156 		 * be the first request in the second request list.
157 		 */
158 		goto insert;
159 	}
160 	/*
161 	 * Request is at/after the current request...
162 	 * sort in the first request list.
163 	 */
164 	while (bq->b_actf) {
165 		/*
166 		 * We want to go after the current request if there is an
167 		 * inversion after it (i.e. it is the end of the first
168 		 * request list), or if the next request is a larger cylinder
169 		 * than our request.
170 		 */
171 		if (bq->b_actf->b_cylinder < bq->b_cylinder ||
172 		    bp->b_cylinder < bq->b_actf->b_cylinder ||
173 		    (bp->b_cylinder == bq->b_actf->b_cylinder &&
174 		    bp->b_blkno < bq->b_actf->b_blkno))
175 			goto insert;
176 		bq = bq->b_actf;
177 	}
178 	/*
179 	 * Neither a second list nor a larger request... we go at the end of
180 	 * the first list, which is the same as the end of the whole schebang.
181 	 */
182 insert:	bp->b_actf = bq->b_actf;
183 	bq->b_actf = bp;
184 }
185 
186 /*
187  * Compute checksum for disk label.
188  */
189 u_int
190 dkcksum(struct disklabel *lp)
191 {
192 	u_int16_t *start, *end;
193 	u_int16_t sum = 0;
194 
195 	start = (u_int16_t *)lp;
196 	end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions];
197 	while (start < end)
198 		sum ^= *start++;
199 	return (sum);
200 }
201 
202 int
203 initdisklabel(struct disklabel *lp)
204 {
205 	int i;
206 
207 	/* minimal requirements for archetypal disk label */
208 	if (lp->d_secsize < DEV_BSIZE)
209 		lp->d_secsize = DEV_BSIZE;
210 	if (DL_GETDSIZE(lp) == 0)
211 		DL_SETDSIZE(lp, MAXDISKSIZE);
212 	if (lp->d_secpercyl == 0)
213 		return (ERANGE);
214 	lp->d_npartitions = MAXPARTITIONS;
215 	for (i = 0; i < RAW_PART; i++) {
216 		DL_SETPSIZE(&lp->d_partitions[i], 0);
217 		DL_SETPOFFSET(&lp->d_partitions[i], 0);
218 	}
219 	if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0)
220 		DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp));
221 	DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
222 	DL_SETBSTART(lp, 0);
223 	DL_SETBEND(lp, DL_GETDSIZE(lp));
224 	lp->d_version = 1;
225 	lp->d_bbsize = 8192;
226 	lp->d_sbsize = 64*1024;			/* XXX ? */
227 	return (0);
228 }
229 
230 /*
231  * Check an incoming block to make sure it is a disklabel, convert it to
232  * a newer version if needed, etc etc.
233  */
234 int
235 checkdisklabel(void *rlp, struct disklabel *lp, u_int64_t boundstart,
236     u_int64_t boundend)
237 {
238 	struct disklabel *dlp = rlp;
239 	struct __partitionv0 *v0pp;
240 	struct partition *pp;
241 	daddr64_t disksize;
242 	int error = 0;
243 	int i;
244 
245 	if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC)
246 		error = ENOENT;	/* no disk label */
247 	else if (dlp->d_npartitions > MAXPARTITIONS)
248 		error = E2BIG;	/* too many partitions */
249 	else if (dlp->d_secpercyl == 0)
250 		error = EINVAL;	/* invalid label */
251 	else if (dlp->d_secsize == 0)
252 		error = ENOSPC;	/* disk too small */
253 	else if (dkcksum(dlp) != 0)
254 		error = EINVAL;	/* incorrect checksum */
255 
256 	if (error) {
257 		u_int16_t *start, *end, sum = 0;
258 
259 		/* If it is byte-swapped, attempt to convert it */
260 		if (swap32(dlp->d_magic) != DISKMAGIC ||
261 		    swap32(dlp->d_magic2) != DISKMAGIC ||
262 		    swap16(dlp->d_npartitions) > MAXPARTITIONS)
263 			return (error);
264 
265 		/*
266 		 * Need a byte-swap aware dkcksum variant
267 		 * inlined, because dkcksum uses a sub-field
268 		 */
269 		start = (u_int16_t *)dlp;
270 		end = (u_int16_t *)&dlp->d_partitions[
271 		    swap16(dlp->d_npartitions)];
272 		while (start < end)
273 			sum ^= *start++;
274 		if (sum != 0)
275 			return (error);
276 
277 		dlp->d_magic = swap32(dlp->d_magic);
278 		dlp->d_type = swap16(dlp->d_type);
279 		dlp->d_subtype = swap16(dlp->d_subtype);
280 
281 		/* d_typename and d_packname are strings */
282 
283 		dlp->d_secsize = swap32(dlp->d_secsize);
284 		dlp->d_nsectors = swap32(dlp->d_nsectors);
285 		dlp->d_ntracks = swap32(dlp->d_ntracks);
286 		dlp->d_ncylinders = swap32(dlp->d_ncylinders);
287 		dlp->d_secpercyl = swap32(dlp->d_secpercyl);
288 		dlp->d_secperunit = swap32(dlp->d_secperunit);
289 
290 		/* d_uid is a string */
291 
292 		dlp->d_acylinders = swap32(dlp->d_acylinders);
293 
294 		dlp->d_flags = swap32(dlp->d_flags);
295 
296 		for (i = 0; i < NDDATA; i++)
297 			dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]);
298 
299 		dlp->d_secperunith = swap16(dlp->d_secperunith);
300 		dlp->d_version = swap16(dlp->d_version);
301 
302 		for (i = 0; i < NSPARE; i++)
303 			dlp->d_spare[i] = swap32(dlp->d_spare[i]);
304 
305 		dlp->d_magic2 = swap32(dlp->d_magic2);
306 
307 		dlp->d_npartitions = swap16(dlp->d_npartitions);
308 		dlp->d_bbsize = swap32(dlp->d_bbsize);
309 		dlp->d_sbsize = swap32(dlp->d_sbsize);
310 
311 		for (i = 0; i < MAXPARTITIONS; i++) {
312 			pp = &dlp->d_partitions[i];
313 			pp->p_size = swap32(pp->p_size);
314 			pp->p_offset = swap32(pp->p_offset);
315 			if (dlp->d_version == 0) {
316 				v0pp = (struct __partitionv0 *)pp;
317 				v0pp->p_fsize = swap32(v0pp->p_fsize);
318 			} else {
319 				pp->p_offseth = swap16(pp->p_offseth);
320 				pp->p_sizeh = swap16(pp->p_sizeh);
321 			}
322 			pp->p_cpg = swap16(pp->p_cpg);
323 		}
324 
325 		dlp->d_checksum = 0;
326 		dlp->d_checksum = dkcksum(dlp);
327 		error = 0;
328 	}
329 
330 	/* XXX should verify lots of other fields and whine a lot */
331 
332 	if (error)
333 		return (error);
334 
335 	/* Initial passed in lp contains the real disk size. */
336 	disksize = DL_GETDSIZE(lp);
337 
338 	if (lp != dlp)
339 		*lp = *dlp;
340 
341 	if (lp->d_version == 0) {
342 		lp->d_version = 1;
343 		lp->d_secperunith = 0;
344 
345 		v0pp = (struct __partitionv0 *)lp->d_partitions;
346 		pp = lp->d_partitions;
347 		for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) {
348 			pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp->
349 			    p_fsize, v0pp->p_frag);
350 			pp->p_offseth = 0;
351 			pp->p_sizeh = 0;
352 		}
353 	}
354 
355 #ifdef DEBUG
356 	if (DL_GETDSIZE(lp) != disksize)
357 		printf("on-disk disklabel has incorrect disksize (%lld)\n",
358 		    DL_GETDSIZE(lp));
359 	if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize)
360 		printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n",
361 		    DL_GETPSIZE(&lp->d_partitions[RAW_PART]));
362 	if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0)
363 		printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n",
364 		    DL_GETPOFFSET(&lp->d_partitions[RAW_PART]));
365 #endif
366 	DL_SETDSIZE(lp, disksize);
367 	DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize);
368 	DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
369 	DL_SETBSTART(lp, boundstart);
370 	DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp));
371 
372 	lp->d_checksum = 0;
373 	lp->d_checksum = dkcksum(lp);
374 	return (0);
375 }
376 
377 /*
378  * If dos partition table requested, attempt to load it and
379  * find disklabel inside a DOS partition. Return buffer
380  * for use in signalling errors if requested.
381  *
382  * We would like to check if each MBR has a valid BOOT_MAGIC, but
383  * we cannot because it doesn't always exist. So.. we assume the
384  * MBR is valid.
385  */
386 int
387 readdoslabel(struct buf *bp, void (*strat)(struct buf *),
388     struct disklabel *lp, int *partoffp, int spoofonly)
389 {
390 	u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp);
391 	int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset;
392 	struct dos_partition dp[NDOSPART], *dp2;
393 	daddr64_t part_blkno = DOSBBSECTOR;
394 	u_int32_t extoff = 0;
395 	int error;
396 
397 	if (lp->d_secpercyl == 0)
398 		return (EINVAL);	/* invalid label */
399 	if (lp->d_secsize == 0)
400 		return (ENOSPC);	/* disk too small */
401 
402 	/* do DOS partitions in the process of getting disklabel? */
403 
404 	/*
405 	 * Read dos partition table, follow extended partitions.
406 	 * Map the partitions to disklabel entries i-p
407 	 */
408 	while (wander && loop < DOS_MAXEBR) {
409 		loop++;
410 		wander = 0;
411 		if (part_blkno < extoff)
412 			part_blkno = extoff;
413 
414 		/* read boot record */
415 		bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp);
416 		offset = DL_BLKOFFSET(lp, part_blkno) + DOSPARTOFF;
417 		bp->b_bcount = lp->d_secsize;
418 		bp->b_error = 0; /* B_ERROR and b_error may have stale data. */
419 		CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR);
420 		SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
421 		(*strat)(bp);
422 		error = biowait(bp);
423 		if (error) {
424 /*wrong*/		if (partoffp)
425 /*wrong*/			*partoffp = -1;
426 			return (error);
427 		}
428 
429 		bcopy(bp->b_data + offset, dp, sizeof(dp));
430 
431 		if (n == 0 && part_blkno == DOSBBSECTOR) {
432 			u_int16_t mbrtest;
433 
434 			/* Check the end of sector marker. */
435 			mbrtest = ((bp->b_data[510] << 8) & 0xff00) |
436 			    (bp->b_data[511] & 0xff);
437 			if (mbrtest != 0x55aa)
438 				goto notmbr;
439 		}
440 
441 		if (ourpart == -1) {
442 			/* Search for our MBR partition */
443 			for (dp2=dp, i=0; i < NDOSPART && ourpart == -1;
444 			    i++, dp2++)
445 				if (letoh32(dp2->dp_size) &&
446 				    dp2->dp_typ == DOSPTYP_OPENBSD)
447 					ourpart = i;
448 			if (ourpart == -1)
449 				goto donot;
450 			/*
451 			 * This is our MBR partition. need sector
452 			 * address for SCSI/IDE, cylinder for
453 			 * ESDI/ST506/RLL
454 			 */
455 			dp2 = &dp[ourpart];
456 			dospartoff = letoh32(dp2->dp_start) + part_blkno;
457 			dospartend = dospartoff + letoh32(dp2->dp_size);
458 
459 			/* found our OpenBSD partition, finish up */
460 			if (partoffp)
461 				goto notfat;
462 
463 			if (lp->d_ntracks == 0)
464 				lp->d_ntracks = dp2->dp_ehd + 1;
465 			if (lp->d_nsectors == 0)
466 				lp->d_nsectors = DPSECT(dp2->dp_esect);
467 			if (lp->d_secpercyl == 0)
468 				lp->d_secpercyl = lp->d_ntracks *
469 				    lp->d_nsectors;
470 		}
471 donot:
472 		/*
473 		 * In case the disklabel read below fails, we want to
474 		 * provide a fake label in i-p.
475 		 */
476 		for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) {
477 			struct partition *pp;
478 			u_int8_t fstype;
479 
480 			if (dp2->dp_typ == DOSPTYP_OPENBSD)
481 				continue;
482 			if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp))
483 				continue;
484 			if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp))
485 				continue;
486 			if (letoh32(dp2->dp_size) == 0)
487 				continue;
488 
489 			switch (dp2->dp_typ) {
490 			case DOSPTYP_UNUSED:
491 				fstype = FS_UNUSED;
492 				break;
493 
494 			case DOSPTYP_LINUX:
495 				fstype = FS_EXT2FS;
496 				break;
497 
498 			case DOSPTYP_NTFS:
499 				fstype = FS_NTFS;
500 				break;
501 
502 			case DOSPTYP_FAT12:
503 			case DOSPTYP_FAT16S:
504 			case DOSPTYP_FAT16B:
505 			case DOSPTYP_FAT16L:
506 			case DOSPTYP_FAT32:
507 			case DOSPTYP_FAT32L:
508 				fstype = FS_MSDOS;
509 				break;
510 			case DOSPTYP_EXTEND:
511 			case DOSPTYP_EXTENDL:
512 				part_blkno = letoh32(dp2->dp_start) + extoff;
513 				if (!extoff) {
514 					extoff = letoh32(dp2->dp_start);
515 					part_blkno = 0;
516 				}
517 				wander = 1;
518 				continue;
519 				break;
520 			default:
521 				fstype = FS_OTHER;
522 				break;
523 			}
524 
525 			/*
526 			 * Don't set fstype/offset/size when just looking for
527 			 * the offset of the OpenBSD partition. It would
528 			 * invalidate the disklabel checksum!
529 			 *
530 			 * Don't try to spoof more than 8 partitions, i.e.
531 			 * 'i' -'p'.
532 			 */
533 			if (partoffp || n >= 8)
534 				continue;
535 
536 			pp = &lp->d_partitions[8+n];
537 			n++;
538 			pp->p_fstype = fstype;
539 			if (letoh32(dp2->dp_start))
540 				DL_SETPOFFSET(pp,
541 				    letoh32(dp2->dp_start) + part_blkno);
542 			DL_SETPSIZE(pp, letoh32(dp2->dp_size));
543 		}
544 	}
545 
546 notmbr:
547 	if (partoffp == NULL)
548 		/* Must not modify *lp when partoffp is set. */
549 		lp->d_npartitions = MAXPARTITIONS;
550 
551 	if (n == 0 && part_blkno == DOSBBSECTOR && ourpart == -1) {
552 		u_int16_t fattest;
553 
554 		/* Check for a valid initial jmp instruction. */
555 		switch ((u_int8_t)bp->b_data[0]) {
556 		case 0xeb:
557 			/*
558 			 * Two-byte jmp instruction. The 2nd byte is the number
559 			 * of bytes to jmp and the 3rd byte must be a NOP.
560 			 */
561 			if ((u_int8_t)bp->b_data[2] != 0x90)
562 				goto notfat;
563 			break;
564 		case 0xe9:
565 			/*
566 			 * Three-byte jmp instruction. The next two bytes are a
567 			 * little-endian 16 bit value.
568 			 */
569 			break;
570 		default:
571 			goto notfat;
572 			break;
573 		}
574 
575 		/* Check for a valid bytes per sector value. */
576 		fattest = ((bp->b_data[12] << 8) & 0xff00) |
577 		    (bp->b_data[11] & 0xff);
578 		if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0))
579 			goto notfat;
580 
581 		if (partoffp)
582 			return (ENXIO);	/* No place for disklabel on FAT! */
583 
584 		DL_SETPSIZE(&lp->d_partitions['i' - 'a'],
585 		    DL_GETPSIZE(&lp->d_partitions[RAW_PART]));
586 		DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0);
587 		lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS;
588 
589 		spoofonly = 1;	/* No disklabel to read from disk. */
590 	}
591 
592 notfat:
593 	/* record the OpenBSD partition's placement for the caller */
594 	if (partoffp)
595 		*partoffp = dospartoff;
596 	else {
597 		DL_SETBSTART(lp, dospartoff);
598 		DL_SETBEND(lp, (dospartend < DL_GETDSIZE(lp)) ? dospartend :
599 		    DL_GETDSIZE(lp));
600 	}
601 
602 	/* don't read the on-disk label if we are in spoofed-only mode */
603 	if (spoofonly)
604 		return (0);
605 
606 	bp->b_blkno = DL_BLKTOSEC(lp, dospartoff + DOS_LABELSECTOR) *
607 	    DL_BLKSPERSEC(lp);
608 	offset = DL_BLKOFFSET(lp, dospartoff + DOS_LABELSECTOR);
609 	bp->b_bcount = lp->d_secsize;
610 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
611 	SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
612 	(*strat)(bp);
613 	if (biowait(bp))
614 		return (bp->b_error);
615 
616 	/* sub-MBR disklabels are always at a LABELOFFSET of 0 */
617 	return checkdisklabel(bp->b_data + offset, lp, dospartoff, dospartend);
618 }
619 
620 /*
621  * Check new disk label for sensibility before setting it.
622  */
623 int
624 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask)
625 {
626 	struct partition *opp, *npp;
627 	struct disk *dk;
628 	u_int64_t uid;
629 	int i;
630 
631 	/* sanity clause */
632 	if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 ||
633 	    (nlp->d_secsize % DEV_BSIZE) != 0)
634 		return (EINVAL);
635 
636 	/* special case to allow disklabel to be invalidated */
637 	if (nlp->d_magic == 0xffffffff) {
638 		*olp = *nlp;
639 		return (0);
640 	}
641 
642 	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
643 	    dkcksum(nlp) != 0)
644 		return (EINVAL);
645 
646 	/* XXX missing check if other dos partitions will be overwritten */
647 
648 	while (openmask != 0) {
649 		i = ffs(openmask) - 1;
650 		openmask &= ~(1 << i);
651 		if (nlp->d_npartitions <= i)
652 			return (EBUSY);
653 		opp = &olp->d_partitions[i];
654 		npp = &nlp->d_partitions[i];
655 		if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) ||
656 		    DL_GETPSIZE(npp) < DL_GETPSIZE(opp))
657 			return (EBUSY);
658 		/*
659 		 * Copy internally-set partition information
660 		 * if new label doesn't include it.		XXX
661 		 */
662 		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
663 			npp->p_fstype = opp->p_fstype;
664 			npp->p_fragblock = opp->p_fragblock;
665 			npp->p_cpg = opp->p_cpg;
666 		}
667 	}
668 
669 	/* Generate a UID if the disklabel does not already have one. */
670 	uid = 0;
671 	if (bcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0) {
672 		do {
673 			arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid));
674 			TAILQ_FOREACH(dk, &disklist, dk_link)
675 				if (dk->dk_label && bcmp(dk->dk_label->d_uid,
676 				    nlp->d_uid, sizeof(nlp->d_uid)) == 0)
677 					break;
678 		} while (dk != NULL &&
679 		    bcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0);
680 	}
681 
682 	nlp->d_checksum = 0;
683 	nlp->d_checksum = dkcksum(nlp);
684 	*olp = *nlp;
685 
686 	disk_change = 1;
687 
688 	return (0);
689 }
690 
691 /*
692  * Determine the size of the transfer, and make sure it is within the
693  * boundaries of the partition. Adjust transfer if needed, and signal errors or
694  * early completion.
695  */
696 int
697 bounds_check_with_label(struct buf *bp, struct disklabel *lp)
698 {
699 	struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)];
700 	daddr64_t partblocks, sz;
701 
702 	/* Avoid division by zero, negative offsets, and negative sizes. */
703 	if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0)
704 		goto bad;
705 
706 	/* Ensure transfer is a whole number of aligned sectors. */
707 	if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 ||
708 	    (bp->b_bcount % lp->d_secsize) != 0)
709 		goto bad;
710 
711 	/* Ensure transfer starts within partition boundary. */
712 	partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p));
713 	if (bp->b_blkno > partblocks)
714 		goto bad;
715 
716 	/* If exactly at end of partition or null transfer, return EOF. */
717 	if (bp->b_blkno == partblocks || bp->b_bcount == 0)
718 		goto done;
719 
720 	/* Truncate request if it exceeds past the end of the partition. */
721 	sz = bp->b_bcount >> DEV_BSHIFT;
722 	if (sz > partblocks - bp->b_blkno) {
723 		sz = partblocks - bp->b_blkno;
724 		bp->b_bcount = sz << DEV_BSHIFT;
725 	}
726 
727 	/* calculate cylinder for disksort to order transfers with */
728 	bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) /
729 	    DL_SECTOBLK(lp, lp->d_secpercyl);
730 	return (0);
731 
732  bad:
733 	bp->b_error = EINVAL;
734 	bp->b_flags |= B_ERROR;
735  done:
736 	bp->b_resid = bp->b_bcount;
737 	return (-1);
738 }
739 
740 /*
741  * Disk error is the preface to plaintive error messages
742  * about failing disk transfers.  It prints messages of the form
743 
744 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
745 
746  * if the offset of the error in the transfer and a disk label
747  * are both available.  blkdone should be -1 if the position of the error
748  * is unknown; the disklabel pointer may be null from drivers that have not
749  * been converted to use them.  The message is printed with printf
750  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
751  * The message should be completed (with at least a newline) with printf
752  * or addlog, respectively.  There is no trailing space.
753  */
754 void
755 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone,
756     struct disklabel *lp)
757 {
758 	int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev);
759     	int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */;
760 	char partname = 'a' + part;
761 	daddr64_t sn;
762 
763 	if (pri != LOG_PRINTF) {
764 		static const char fmt[] = "";
765 		log(pri, fmt);
766 		pr = addlog;
767 	} else
768 		pr = printf;
769 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
770 	    bp->b_flags & B_READ ? "read" : "writ");
771 	sn = bp->b_blkno;
772 	if (bp->b_bcount <= DEV_BSIZE)
773 		(*pr)("%lld", sn);
774 	else {
775 		if (blkdone >= 0) {
776 			sn += blkdone;
777 			(*pr)("%lld of ", sn);
778 		}
779 		(*pr)("%lld-%lld", bp->b_blkno,
780 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
781 	}
782 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
783 		sn += DL_GETPOFFSET(&lp->d_partitions[part]);
784 		(*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn,
785 		    sn / lp->d_secpercyl);
786 		sn %= lp->d_secpercyl;
787 		(*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors,
788 		    sn % lp->d_nsectors);
789 	}
790 }
791 
792 /*
793  * Initialize the disklist.  Called by main() before autoconfiguration.
794  */
795 void
796 disk_init(void)
797 {
798 
799 	TAILQ_INIT(&disklist);
800 	disk_count = disk_change = 0;
801 }
802 
803 int
804 disk_construct(struct disk *diskp)
805 {
806 	rw_init(&diskp->dk_lock, "dklk");
807 	mtx_init(&diskp->dk_mtx, IPL_BIO);
808 
809 	diskp->dk_flags |= DKF_CONSTRUCTED;
810 
811 	return (0);
812 }
813 
814 /*
815  * Attach a disk.
816  */
817 void
818 disk_attach(struct device *dv, struct disk *diskp)
819 {
820 	int majdev;
821 
822 	if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED))
823 		disk_construct(diskp);
824 
825 	/*
826 	 * Allocate and initialize the disklabel structures.  Note that
827 	 * it's not safe to sleep here, since we're probably going to be
828 	 * called during autoconfiguration.
829 	 */
830 	diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF,
831 	    M_NOWAIT|M_ZERO);
832 	if (diskp->dk_label == NULL)
833 		panic("disk_attach: can't allocate storage for disklabel");
834 
835 	/*
836 	 * Set the attached timestamp.
837 	 */
838 	microuptime(&diskp->dk_attachtime);
839 
840 	/*
841 	 * Link into the disklist.
842 	 */
843 	TAILQ_INSERT_TAIL(&disklist, diskp, dk_link);
844 	++disk_count;
845 	disk_change = 1;
846 
847 	/*
848 	 * Store device structure and number for later use.
849 	 */
850 	diskp->dk_device = dv;
851 	diskp->dk_devno = NODEV;
852 	if (dv != NULL) {
853 		majdev = findblkmajor(dv);
854 		if (majdev >= 0)
855 			diskp->dk_devno =
856 			    MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
857 	}
858 	if (diskp->dk_devno != NODEV)
859 		workq_add_task(NULL, 0, disk_attach_callback,
860 		    (void *)(long)(diskp->dk_devno), NULL);
861 
862 	if (softraid_disk_attach)
863 		softraid_disk_attach(diskp, 1);
864 }
865 
866 void
867 disk_attach_callback(void *arg1, void *arg2)
868 {
869 	char errbuf[100];
870 	struct disklabel dl;
871 	struct disk *dk;
872 	dev_t dev = (dev_t)(long)arg1;
873 
874 	/* Locate disk associated with device no. */
875 	TAILQ_FOREACH(dk, &disklist, dk_link) {
876 		if (dk->dk_devno == dev)
877 			break;
878 	}
879 	if (dk == NULL)
880 		return;
881 
882 	/* XXX: Assumes dk is part of the device softc. */
883 	device_ref(dk->dk_device);
884 
885 	if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD))
886 		goto done;
887 
888 	/* Read disklabel. */
889 	if (disk_readlabel(&dl, dev, errbuf, sizeof(errbuf)) == NULL)
890 		dk->dk_flags |= DKF_LABELVALID;
891 
892 done:
893 	dk->dk_flags |= DKF_OPENED;
894 	device_unref(dk->dk_device);
895 	wakeup(dk);
896 }
897 
898 /*
899  * Detach a disk.
900  */
901 void
902 disk_detach(struct disk *diskp)
903 {
904 
905 	if (softraid_disk_attach)
906 		softraid_disk_attach(diskp, -1);
907 
908 	/*
909 	 * Free the space used by the disklabel structures.
910 	 */
911 	free(diskp->dk_label, M_DEVBUF);
912 
913 	/*
914 	 * Remove from the disklist.
915 	 */
916 	TAILQ_REMOVE(&disklist, diskp, dk_link);
917 	disk_change = 1;
918 	if (--disk_count < 0)
919 		panic("disk_detach: disk_count < 0");
920 }
921 
922 int
923 disk_openpart(struct disk *dk, int part, int fmt, int haslabel)
924 {
925 	KASSERT(part >= 0 && part < MAXPARTITIONS);
926 
927 	/* Unless opening the raw partition, check that the partition exists. */
928 	if (part != RAW_PART && (!haslabel ||
929 	    part >= dk->dk_label->d_npartitions ||
930 	    dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED))
931 		return (ENXIO);
932 
933 	/* Ensure the partition doesn't get changed under our feet. */
934 	switch (fmt) {
935 	case S_IFCHR:
936 		dk->dk_copenmask |= (1 << part);
937 		break;
938 	case S_IFBLK:
939 		dk->dk_bopenmask |= (1 << part);
940 		break;
941 	}
942 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
943 
944 	return (0);
945 }
946 
947 void
948 disk_closepart(struct disk *dk, int part, int fmt)
949 {
950 	KASSERT(part >= 0 && part < MAXPARTITIONS);
951 
952 	switch (fmt) {
953 	case S_IFCHR:
954 		dk->dk_copenmask &= ~(1 << part);
955 		break;
956 	case S_IFBLK:
957 		dk->dk_bopenmask &= ~(1 << part);
958 		break;
959 	}
960 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
961 }
962 
963 void
964 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit)
965 {
966 	int bmaj, cmaj, mn;
967 
968 	/* Locate the lowest minor number to be detached. */
969 	mn = DISKMINOR(unit, 0);
970 
971 	for (bmaj = 0; bmaj < nblkdev; bmaj++)
972 		if (bdevsw[bmaj].d_open == open)
973 			vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK);
974 	for (cmaj = 0; cmaj < nchrdev; cmaj++)
975 		if (cdevsw[cmaj].d_open == open)
976 			vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR);
977 }
978 
979 /*
980  * Increment a disk's busy counter.  If the counter is going from
981  * 0 to 1, set the timestamp.
982  */
983 void
984 disk_busy(struct disk *diskp)
985 {
986 
987 	/*
988 	 * XXX We'd like to use something as accurate as microtime(),
989 	 * but that doesn't depend on the system TOD clock.
990 	 */
991 	mtx_enter(&diskp->dk_mtx);
992 	if (diskp->dk_busy++ == 0)
993 		microuptime(&diskp->dk_timestamp);
994 	mtx_leave(&diskp->dk_mtx);
995 }
996 
997 /*
998  * Decrement a disk's busy counter, increment the byte count, total busy
999  * time, and reset the timestamp.
1000  */
1001 void
1002 disk_unbusy(struct disk *diskp, long bcount, int read)
1003 {
1004 	struct timeval dv_time, diff_time;
1005 
1006 	mtx_enter(&diskp->dk_mtx);
1007 
1008 	if (diskp->dk_busy-- == 0)
1009 		printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name);
1010 
1011 	microuptime(&dv_time);
1012 
1013 	timersub(&dv_time, &diskp->dk_timestamp, &diff_time);
1014 	timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time);
1015 
1016 	diskp->dk_timestamp = dv_time;
1017 	if (bcount > 0) {
1018 		if (read) {
1019 			diskp->dk_rbytes += bcount;
1020 			diskp->dk_rxfer++;
1021 		} else {
1022 			diskp->dk_wbytes += bcount;
1023 			diskp->dk_wxfer++;
1024 		}
1025 	} else
1026 		diskp->dk_seek++;
1027 
1028 	mtx_leave(&diskp->dk_mtx);
1029 
1030 	add_disk_randomness(bcount ^ diff_time.tv_usec);
1031 }
1032 
1033 int
1034 disk_lock(struct disk *dk)
1035 {
1036 	return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR));
1037 }
1038 
1039 void
1040 disk_lock_nointr(struct disk *dk)
1041 {
1042 	rw_enter_write(&dk->dk_lock);
1043 }
1044 
1045 void
1046 disk_unlock(struct disk *dk)
1047 {
1048 	rw_exit_write(&dk->dk_lock);
1049 }
1050 
1051 int
1052 dk_mountroot(void)
1053 {
1054 	char errbuf[100];
1055 	int part = DISKPART(rootdev);
1056 	int (*mountrootfn)(void);
1057 	struct disklabel dl;
1058 	char *error;
1059 
1060 	error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf));
1061 	if (error)
1062 		panic(error);
1063 
1064 	if (DL_GETPSIZE(&dl.d_partitions[part]) == 0)
1065 		panic("root filesystem has size 0");
1066 	switch (dl.d_partitions[part].p_fstype) {
1067 #ifdef EXT2FS
1068 	case FS_EXT2FS:
1069 		{
1070 		extern int ext2fs_mountroot(void);
1071 		mountrootfn = ext2fs_mountroot;
1072 		}
1073 		break;
1074 #endif
1075 #ifdef FFS
1076 	case FS_BSDFFS:
1077 		{
1078 		extern int ffs_mountroot(void);
1079 		mountrootfn = ffs_mountroot;
1080 		}
1081 		break;
1082 #endif
1083 #ifdef CD9660
1084 	case FS_ISO9660:
1085 		{
1086 		extern int cd9660_mountroot(void);
1087 		mountrootfn = cd9660_mountroot;
1088 		}
1089 		break;
1090 #endif
1091 	default:
1092 #ifdef FFS
1093 		{
1094 		extern int ffs_mountroot(void);
1095 
1096 		printf("filesystem type %d not known.. assuming ffs\n",
1097 		    dl.d_partitions[part].p_fstype);
1098 		mountrootfn = ffs_mountroot;
1099 		}
1100 #else
1101 		panic("disk 0x%x filesystem type %d not known",
1102 		    rootdev, dl.d_partitions[part].p_fstype);
1103 #endif
1104 	}
1105 	return (*mountrootfn)();
1106 }
1107 
1108 struct device *
1109 getdisk(char *str, int len, int defpart, dev_t *devp)
1110 {
1111 	struct device *dv;
1112 
1113 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1114 		printf("use one of: exit");
1115 		TAILQ_FOREACH(dv, &alldevs, dv_list) {
1116 			if (dv->dv_class == DV_DISK)
1117 				printf(" %s[a-p]", dv->dv_xname);
1118 #if defined(NFSCLIENT)
1119 			if (dv->dv_class == DV_IFNET)
1120 				printf(" %s", dv->dv_xname);
1121 #endif
1122 		}
1123 		printf("\n");
1124 	}
1125 	return (dv);
1126 }
1127 
1128 struct device *
1129 parsedisk(char *str, int len, int defpart, dev_t *devp)
1130 {
1131 	struct device *dv;
1132 	int majdev, part = defpart;
1133 	char c;
1134 
1135 	if (len == 0)
1136 		return (NULL);
1137 	c = str[len-1];
1138 	if (c >= 'a' && (c - 'a') < MAXPARTITIONS) {
1139 		part = c - 'a';
1140 		len -= 1;
1141 	}
1142 
1143 	TAILQ_FOREACH(dv, &alldevs, dv_list) {
1144 		if (dv->dv_class == DV_DISK &&
1145 		    strncmp(str, dv->dv_xname, len) == 0 &&
1146 		    dv->dv_xname[len] == '\0') {
1147 			majdev = findblkmajor(dv);
1148 			if (majdev < 0)
1149 				return NULL;
1150 			*devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1151 			break;
1152 		}
1153 #if defined(NFSCLIENT)
1154 		if (dv->dv_class == DV_IFNET &&
1155 		    strncmp(str, dv->dv_xname, len) == 0 &&
1156 		    dv->dv_xname[len] == '\0') {
1157 			*devp = NODEV;
1158 			break;
1159 		}
1160 #endif
1161 	}
1162 
1163 	return (dv);
1164 }
1165 
1166 void
1167 setroot(struct device *bootdv, int part, int exitflags)
1168 {
1169 	int majdev, unit, len, s, slept = 0;
1170 	struct swdevt *swp;
1171 	struct device *rootdv, *dv;
1172 	dev_t nrootdev, nswapdev = NODEV, temp = NODEV;
1173 	struct ifnet *ifp = NULL;
1174 	struct disk *dk;
1175 	u_char duid[8];
1176 	char buf[128];
1177 #if defined(NFSCLIENT)
1178 	extern char *nfsbootdevname;
1179 #endif
1180 
1181 	/* Ensure that all disk attach callbacks have completed. */
1182 	do {
1183 		TAILQ_FOREACH(dk, &disklist, dk_link) {
1184 			if (dk->dk_devno != NODEV &&
1185 			    (dk->dk_flags & DKF_OPENED) == 0) {
1186 				tsleep(dk, 0, "dkopen", hz);
1187 				slept++;
1188 				break;
1189 			}
1190 		}
1191 	} while (dk != NULL && slept < 5);
1192 
1193 	if (slept == 5) {
1194 		printf("disklabels not read:");
1195 		TAILQ_FOREACH(dk, &disklist, dk_link)
1196 			if (dk->dk_devno != NODEV &&
1197 			    (dk->dk_flags & DKF_OPENED) == 0)
1198 				printf(" %s", dk->dk_name);
1199 		printf("\n");
1200 	}
1201 
1202 	/* Locate DUID for boot disk if not already provided. */
1203 	bzero(duid, sizeof(duid));
1204 	if (bcmp(bootduid, duid, sizeof(bootduid)) == 0) {
1205 		TAILQ_FOREACH(dk, &disklist, dk_link)
1206 			if (dk->dk_device == bootdv)
1207 				break;
1208 		if (dk && (dk->dk_flags & DKF_LABELVALID))
1209 			bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid));
1210 	}
1211 	bcopy(bootduid, rootduid, sizeof(rootduid));
1212 
1213 #if NSOFTRAID > 0
1214 	sr_map_root();
1215 #endif
1216 
1217 	/*
1218 	 * If `swap generic' and we couldn't determine boot device,
1219 	 * ask the user.
1220 	 */
1221 	dk = NULL;
1222 	if (mountroot == NULL && bootdv == NULL)
1223 		boothowto |= RB_ASKNAME;
1224 	if (boothowto & RB_ASKNAME) {
1225 		while (1) {
1226 			printf("root device");
1227 			if (bootdv != NULL) {
1228 				printf(" (default %s", bootdv->dv_xname);
1229 				if (bootdv->dv_class == DV_DISK)
1230 					printf("%c", 'a' + part);
1231 				printf(")");
1232 			}
1233 			printf(": ");
1234 			s = splhigh();
1235 			cnpollc(TRUE);
1236 			len = getsn(buf, sizeof(buf));
1237 			cnpollc(FALSE);
1238 			splx(s);
1239 			if (strcmp(buf, "exit") == 0)
1240 				boot(exitflags);
1241 			if (len == 0 && bootdv != NULL) {
1242 				strlcpy(buf, bootdv->dv_xname, sizeof buf);
1243 				len = strlen(buf);
1244 			}
1245 			if (len > 0 && buf[len - 1] == '*') {
1246 				buf[--len] = '\0';
1247 				dv = getdisk(buf, len, part, &nrootdev);
1248 				if (dv != NULL) {
1249 					rootdv = dv;
1250 					nswapdev = nrootdev;
1251 					goto gotswap;
1252 				}
1253 			}
1254 			dv = getdisk(buf, len, part, &nrootdev);
1255 			if (dv != NULL) {
1256 				rootdv = dv;
1257 				break;
1258 			}
1259 		}
1260 
1261 		if (rootdv->dv_class == DV_IFNET)
1262 			goto gotswap;
1263 
1264 		/* try to build swap device out of new root device */
1265 		while (1) {
1266 			printf("swap device");
1267 			if (rootdv != NULL)
1268 				printf(" (default %s%s)", rootdv->dv_xname,
1269 				    rootdv->dv_class == DV_DISK ? "b" : "");
1270 			printf(": ");
1271 			s = splhigh();
1272 			cnpollc(TRUE);
1273 			len = getsn(buf, sizeof(buf));
1274 			cnpollc(FALSE);
1275 			splx(s);
1276 			if (strcmp(buf, "exit") == 0)
1277 				boot(exitflags);
1278 			if (len == 0 && rootdv != NULL) {
1279 				switch (rootdv->dv_class) {
1280 				case DV_IFNET:
1281 					nswapdev = NODEV;
1282 					break;
1283 				case DV_DISK:
1284 					nswapdev = MAKEDISKDEV(major(nrootdev),
1285 					    DISKUNIT(nrootdev), 1);
1286 					if (nswapdev == nrootdev)
1287 						continue;
1288 					break;
1289 				default:
1290 					break;
1291 				}
1292 				break;
1293 			}
1294 			dv = getdisk(buf, len, 1, &nswapdev);
1295 			if (dv) {
1296 				if (dv->dv_class == DV_IFNET)
1297 					nswapdev = NODEV;
1298 				if (nswapdev == nrootdev)
1299 					continue;
1300 				break;
1301 			}
1302 		}
1303 gotswap:
1304 		rootdev = nrootdev;
1305 		dumpdev = nswapdev;
1306 		swdevt[0].sw_dev = nswapdev;
1307 		swdevt[1].sw_dev = NODEV;
1308 #if defined(NFSCLIENT)
1309 	} else if (mountroot == nfs_mountroot) {
1310 		rootdv = bootdv;
1311 		rootdev = dumpdev = swapdev = NODEV;
1312 #endif
1313 	} else if (mountroot == NULL && rootdev == NODEV) {
1314 		/*
1315 		 * `swap generic'
1316 		 */
1317 		rootdv = bootdv;
1318 
1319 		if (bootdv->dv_class == DV_DISK) {
1320 			bzero(&duid, sizeof(duid));
1321 			if (bcmp(rootduid, &duid, sizeof(rootduid)) != 0) {
1322 				TAILQ_FOREACH(dk, &disklist, dk_link)
1323 					if ((dk->dk_flags & DKF_LABELVALID) &&
1324 					    dk->dk_label && bcmp(dk->dk_label->d_uid,
1325 					    &rootduid, sizeof(rootduid)) == 0)
1326 						break;
1327 				if (dk == NULL)
1328 					panic("root device (%02hx%02hx%02hx%02hx"
1329 					    "%02hx%02hx%02hx%02hx) not found",
1330 					    rootduid[0], rootduid[1], rootduid[2],
1331 					    rootduid[3], rootduid[4], rootduid[5],
1332 					    rootduid[6], rootduid[7]);
1333 				rootdv = dk->dk_device;
1334 			}
1335 		}
1336 
1337 		majdev = findblkmajor(rootdv);
1338 		if (majdev >= 0) {
1339 			/*
1340 			 * Root and swap are on the disk.
1341 			 * Assume swap is on partition b.
1342 			 */
1343 			rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part);
1344 			nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1);
1345 		} else {
1346 			/*
1347 			 * Root and swap are on a net.
1348 			 */
1349 			nswapdev = NODEV;
1350 		}
1351 		dumpdev = nswapdev;
1352 		swdevt[0].sw_dev = nswapdev;
1353 		/* swdevt[1].sw_dev = NODEV; */
1354 	} else {
1355 		/* Completely pre-configured, but we want rootdv .. */
1356 		majdev = major(rootdev);
1357 		if (findblkname(majdev) == NULL)
1358 			return;
1359 		unit = DISKUNIT(rootdev);
1360 		part = DISKPART(rootdev);
1361 		snprintf(buf, sizeof buf, "%s%d%c",
1362 		    findblkname(majdev), unit, 'a' + part);
1363 		rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev);
1364 		if (rootdv == NULL)
1365 			panic("root device (%s) not found", buf);
1366 	}
1367 
1368 	if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET)
1369 		ifp = ifunit(rootdv->dv_xname);
1370 	else if (bootdv && bootdv->dv_class == DV_IFNET)
1371 		ifp = ifunit(bootdv->dv_xname);
1372 
1373 	if (ifp)
1374 		if_addgroup(ifp, "netboot");
1375 
1376 	switch (rootdv->dv_class) {
1377 #if defined(NFSCLIENT)
1378 	case DV_IFNET:
1379 		mountroot = nfs_mountroot;
1380 		nfsbootdevname = rootdv->dv_xname;
1381 		return;
1382 #endif
1383 	case DV_DISK:
1384 		mountroot = dk_mountroot;
1385 		part = DISKPART(rootdev);
1386 		break;
1387 	default:
1388 		printf("can't figure root, hope your kernel is right\n");
1389 		return;
1390 	}
1391 
1392 	printf("root on %s%c", rootdv->dv_xname, 'a' + part);
1393 
1394 	if (dk && dk->dk_device == rootdv)
1395 		printf(" (%02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx.%c)",
1396 		    rootduid[0], rootduid[1], rootduid[2], rootduid[3],
1397 		    rootduid[4], rootduid[5], rootduid[6], rootduid[7],
1398 		    'a' + part);
1399 
1400 	/*
1401 	 * Make the swap partition on the root drive the primary swap.
1402 	 */
1403 	for (swp = swdevt; swp->sw_dev != NODEV; swp++) {
1404 		if (major(rootdev) == major(swp->sw_dev) &&
1405 		    DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) {
1406 			temp = swdevt[0].sw_dev;
1407 			swdevt[0].sw_dev = swp->sw_dev;
1408 			swp->sw_dev = temp;
1409 			break;
1410 		}
1411 	}
1412 	if (swp->sw_dev != NODEV) {
1413 		/*
1414 		 * If dumpdev was the same as the old primary swap device,
1415 		 * move it to the new primary swap device.
1416 		 */
1417 		if (temp == dumpdev)
1418 			dumpdev = swdevt[0].sw_dev;
1419 	}
1420 	if (swdevt[0].sw_dev != NODEV)
1421 		printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)),
1422 		    DISKUNIT(swdevt[0].sw_dev),
1423 		    'a' + DISKPART(swdevt[0].sw_dev));
1424 	if (dumpdev != NODEV)
1425 		printf(" dump on %s%d%c", findblkname(major(dumpdev)),
1426 		    DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev));
1427 	printf("\n");
1428 }
1429 
1430 extern struct nam2blk nam2blk[];
1431 
1432 int
1433 findblkmajor(struct device *dv)
1434 {
1435 	char buf[16], *p;
1436 	int i;
1437 
1438 	if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf)
1439 		return (-1);
1440 	for (p = buf; *p; p++)
1441 		if (*p >= '0' && *p <= '9')
1442 			*p = '\0';
1443 
1444 	for (i = 0; nam2blk[i].name; i++)
1445 		if (!strcmp(buf, nam2blk[i].name))
1446 			return (nam2blk[i].maj);
1447 	return (-1);
1448 }
1449 
1450 char *
1451 findblkname(int maj)
1452 {
1453 	int i;
1454 
1455 	for (i = 0; nam2blk[i].name; i++)
1456 		if (nam2blk[i].maj == maj)
1457 			return (nam2blk[i].name);
1458 	return (NULL);
1459 }
1460 
1461 char *
1462 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize)
1463 {
1464 	struct vnode *vn;
1465 	dev_t chrdev, rawdev;
1466 	int error;
1467 
1468 	chrdev = blktochr(dev);
1469 	rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART);
1470 
1471 #ifdef DEBUG
1472 	printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev);
1473 #endif
1474 
1475 	if (cdevvp(rawdev, &vn)) {
1476 		snprintf(errbuf, errsize,
1477 		    "cannot obtain vnode for 0x%x/0x%x", dev, rawdev);
1478 		return (errbuf);
1479 	}
1480 
1481 	error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
1482 	if (error) {
1483 		snprintf(errbuf, errsize,
1484 		    "cannot open disk, 0x%x/0x%x, error %d",
1485 		    dev, rawdev, error);
1486 		goto done;
1487 	}
1488 
1489 	error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc);
1490 	if (error) {
1491 		snprintf(errbuf, errsize,
1492 		    "cannot read disk label, 0x%x/0x%x, error %d",
1493 		    dev, rawdev, error);
1494 	}
1495 done:
1496 	VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1497 	vput(vn);
1498 	if (error)
1499 		return (errbuf);
1500 	return (NULL);
1501 }
1502 
1503 int
1504 disk_map(char *path, char *mappath, int size, int flags)
1505 {
1506 	struct disk *dk, *mdk;
1507 	u_char uid[8];
1508 	char c, part;
1509 	int i;
1510 
1511 	/*
1512 	 * Attempt to map a request for a disklabel UID to the correct device.
1513 	 * We should be supplied with a disklabel UID which has the following
1514 	 * format:
1515 	 *
1516 	 * [disklabel uid] . [partition]
1517 	 *
1518 	 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can
1519 	 * based passed on its own.
1520 	 */
1521 
1522 	if (strchr(path, '/') != NULL)
1523 		return -1;
1524 
1525 	/* Verify that the device name is properly formed. */
1526 	if (!((strlen(path) == 16 && (flags & DM_OPENPART)) ||
1527 	    (strlen(path) == 18 && path[16] == '.')))
1528 		return -1;
1529 
1530 	/* Get partition. */
1531 	if (flags & DM_OPENPART)
1532 		part = 'a' + RAW_PART;
1533 	else
1534 		part = path[17];
1535 
1536 	if (part < 'a' || part >= 'a' + MAXPARTITIONS)
1537 		return -1;
1538 
1539 	/* Derive label UID. */
1540 	bzero(uid, sizeof(uid));
1541 	for (i = 0; i < 16; i++) {
1542 		c = path[i];
1543 		if (c >= '0' && c <= '9')
1544 			c -= '0';
1545 		else if (c >= 'a' && c <= 'f')
1546 			c -= ('a' - 10);
1547                 else
1548 			return -1;
1549 
1550 		uid[i / 2] <<= 4;
1551 		uid[i / 2] |= c & 0xf;
1552 	}
1553 
1554 	mdk = NULL;
1555 	TAILQ_FOREACH(dk, &disklist, dk_link) {
1556 		if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label &&
1557 		    bcmp(dk->dk_label->d_uid, uid,
1558 		    sizeof(dk->dk_label->d_uid)) == 0) {
1559 			/* Fail if there are duplicate UIDs! */
1560 			if (mdk != NULL)
1561 				return -1;
1562 			mdk = dk;
1563 		}
1564 	}
1565 
1566 	if (mdk == NULL || mdk->dk_name == NULL)
1567 		return -1;
1568 
1569 	snprintf(mappath, size, "/dev/%s%s%c",
1570 	    (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part);
1571 
1572 	return 0;
1573 }
1574 
1575 /*
1576  * Lookup a disk device and verify that it has completed attaching.
1577  */
1578 struct device *
1579 disk_lookup(struct cfdriver *cd, int unit)
1580 {
1581 	struct device *dv;
1582 	struct disk *dk;
1583 
1584 	dv = device_lookup(cd, unit);
1585 	if (dv == NULL)
1586 		return (NULL);
1587 
1588 	TAILQ_FOREACH(dk, &disklist, dk_link)
1589 		if (dk->dk_device == dv)
1590 			break;
1591 
1592 	if (dk == NULL) {
1593 		device_unref(dv);
1594 		return (NULL);
1595 	}
1596 
1597 	return (dv);
1598 }
1599