xref: /openbsd-src/sys/kern/subr_disk.c (revision aa5e9e10509ffd51558f081f01cd78bfa3c4f2a5)
1 /*	$OpenBSD: subr_disk.c,v 1.148 2013/04/24 08:31:06 blambert Exp $	*/
2 /*	$NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Jason R. Thorpe.  All rights reserved.
6  * Copyright (c) 1982, 1986, 1988, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/fcntl.h>
46 #include <sys/buf.h>
47 #include <sys/stat.h>
48 #include <sys/syslog.h>
49 #include <sys/device.h>
50 #include <sys/time.h>
51 #include <sys/disklabel.h>
52 #include <sys/conf.h>
53 #include <sys/lock.h>
54 #include <sys/disk.h>
55 #include <sys/reboot.h>
56 #include <sys/dkio.h>
57 #include <sys/proc.h>
58 #include <sys/vnode.h>
59 #include <sys/workq.h>
60 #include <uvm/uvm_extern.h>
61 
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 
65 #include <net/if.h>
66 
67 #include <dev/rndvar.h>
68 #include <dev/cons.h>
69 
70 #include "softraid.h"
71 
72 /*
73  * A global list of all disks attached to the system.  May grow or
74  * shrink over time.
75  */
76 struct	disklist_head disklist;	/* TAILQ_HEAD */
77 int	disk_count;		/* number of drives in global disklist */
78 int	disk_change;		/* set if a disk has been attached/detached
79 				 * since last we looked at this variable. This
80 				 * is reset by hw_sysctl()
81 				 */
82 
83 u_char	bootduid[8];		/* DUID of boot disk. */
84 u_char	rootduid[8];		/* DUID of root disk. */
85 
86 /* softraid callback, do not use! */
87 void (*softraid_disk_attach)(struct disk *, int);
88 
89 void sr_map_root(void);
90 
91 void disk_attach_callback(void *, void *);
92 
93 /*
94  * Seek sort for disks.  We depend on the driver which calls us using b_resid
95  * as the current cylinder number.
96  *
97  * The argument ap structure holds a b_actf activity chain pointer on which we
98  * keep two queues, sorted in ascending cylinder order.  The first queue holds
99  * those requests which are positioned after the current cylinder (in the first
100  * request); the second holds requests which came in after their cylinder number
101  * was passed.  Thus we implement a one way scan, retracting after reaching the
102  * end of the drive to the first request on the second queue, at which time it
103  * becomes the first queue.
104  *
105  * A one-way scan is natural because of the way UNIX read-ahead blocks are
106  * allocated.
107  */
108 
109 void
110 disksort(struct buf *ap, struct buf *bp)
111 {
112 	struct buf *bq;
113 
114 	/* If the queue is empty, then it's easy. */
115 	if (ap->b_actf == NULL) {
116 		bp->b_actf = NULL;
117 		ap->b_actf = bp;
118 		return;
119 	}
120 
121 	/*
122 	 * If we lie after the first (currently active) request, then we
123 	 * must locate the second request list and add ourselves to it.
124 	 */
125 	bq = ap->b_actf;
126 	if (bp->b_cylinder < bq->b_cylinder) {
127 		while (bq->b_actf) {
128 			/*
129 			 * Check for an ``inversion'' in the normally ascending
130 			 * cylinder numbers, indicating the start of the second
131 			 * request list.
132 			 */
133 			if (bq->b_actf->b_cylinder < bq->b_cylinder) {
134 				/*
135 				 * Search the second request list for the first
136 				 * request at a larger cylinder number.  We go
137 				 * before that; if there is no such request, we
138 				 * go at end.
139 				 */
140 				do {
141 					if (bp->b_cylinder <
142 					    bq->b_actf->b_cylinder)
143 						goto insert;
144 					if (bp->b_cylinder ==
145 					    bq->b_actf->b_cylinder &&
146 					    bp->b_blkno < bq->b_actf->b_blkno)
147 						goto insert;
148 					bq = bq->b_actf;
149 				} while (bq->b_actf);
150 				goto insert;		/* after last */
151 			}
152 			bq = bq->b_actf;
153 		}
154 		/*
155 		 * No inversions... we will go after the last, and
156 		 * be the first request in the second request list.
157 		 */
158 		goto insert;
159 	}
160 	/*
161 	 * Request is at/after the current request...
162 	 * sort in the first request list.
163 	 */
164 	while (bq->b_actf) {
165 		/*
166 		 * We want to go after the current request if there is an
167 		 * inversion after it (i.e. it is the end of the first
168 		 * request list), or if the next request is a larger cylinder
169 		 * than our request.
170 		 */
171 		if (bq->b_actf->b_cylinder < bq->b_cylinder ||
172 		    bp->b_cylinder < bq->b_actf->b_cylinder ||
173 		    (bp->b_cylinder == bq->b_actf->b_cylinder &&
174 		    bp->b_blkno < bq->b_actf->b_blkno))
175 			goto insert;
176 		bq = bq->b_actf;
177 	}
178 	/*
179 	 * Neither a second list nor a larger request... we go at the end of
180 	 * the first list, which is the same as the end of the whole schebang.
181 	 */
182 insert:	bp->b_actf = bq->b_actf;
183 	bq->b_actf = bp;
184 }
185 
186 /*
187  * Compute checksum for disk label.
188  */
189 u_int
190 dkcksum(struct disklabel *lp)
191 {
192 	u_int16_t *start, *end;
193 	u_int16_t sum = 0;
194 
195 	start = (u_int16_t *)lp;
196 	end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions];
197 	while (start < end)
198 		sum ^= *start++;
199 	return (sum);
200 }
201 
202 int
203 initdisklabel(struct disklabel *lp)
204 {
205 	int i;
206 
207 	/* minimal requirements for archetypal disk label */
208 	if (lp->d_secsize < DEV_BSIZE)
209 		lp->d_secsize = DEV_BSIZE;
210 	if (DL_GETDSIZE(lp) == 0)
211 		DL_SETDSIZE(lp, MAXDISKSIZE);
212 	if (lp->d_secpercyl == 0)
213 		return (ERANGE);
214 	lp->d_npartitions = MAXPARTITIONS;
215 	for (i = 0; i < RAW_PART; i++) {
216 		DL_SETPSIZE(&lp->d_partitions[i], 0);
217 		DL_SETPOFFSET(&lp->d_partitions[i], 0);
218 	}
219 	if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0)
220 		DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp));
221 	DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
222 	DL_SETBSTART(lp, 0);
223 	DL_SETBEND(lp, DL_GETDSIZE(lp));
224 	lp->d_version = 1;
225 	lp->d_bbsize = 8192;
226 	lp->d_sbsize = 64*1024;			/* XXX ? */
227 	return (0);
228 }
229 
230 /*
231  * Check an incoming block to make sure it is a disklabel, convert it to
232  * a newer version if needed, etc etc.
233  */
234 int
235 checkdisklabel(void *rlp, struct disklabel *lp, u_int64_t boundstart,
236     u_int64_t boundend)
237 {
238 	struct disklabel *dlp = rlp;
239 	struct __partitionv0 *v0pp;
240 	struct partition *pp;
241 	daddr64_t disksize;
242 	int error = 0;
243 	int i;
244 
245 	if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC)
246 		error = ENOENT;	/* no disk label */
247 	else if (dlp->d_npartitions > MAXPARTITIONS)
248 		error = E2BIG;	/* too many partitions */
249 	else if (dlp->d_secpercyl == 0)
250 		error = EINVAL;	/* invalid label */
251 	else if (dlp->d_secsize == 0)
252 		error = ENOSPC;	/* disk too small */
253 	else if (dkcksum(dlp) != 0)
254 		error = EINVAL;	/* incorrect checksum */
255 
256 	if (error) {
257 		u_int16_t *start, *end, sum = 0;
258 
259 		/* If it is byte-swapped, attempt to convert it */
260 		if (swap32(dlp->d_magic) != DISKMAGIC ||
261 		    swap32(dlp->d_magic2) != DISKMAGIC ||
262 		    swap16(dlp->d_npartitions) > MAXPARTITIONS)
263 			return (error);
264 
265 		/*
266 		 * Need a byte-swap aware dkcksum variant
267 		 * inlined, because dkcksum uses a sub-field
268 		 */
269 		start = (u_int16_t *)dlp;
270 		end = (u_int16_t *)&dlp->d_partitions[
271 		    swap16(dlp->d_npartitions)];
272 		while (start < end)
273 			sum ^= *start++;
274 		if (sum != 0)
275 			return (error);
276 
277 		dlp->d_magic = swap32(dlp->d_magic);
278 		dlp->d_type = swap16(dlp->d_type);
279 		dlp->d_subtype = swap16(dlp->d_subtype);
280 
281 		/* d_typename and d_packname are strings */
282 
283 		dlp->d_secsize = swap32(dlp->d_secsize);
284 		dlp->d_nsectors = swap32(dlp->d_nsectors);
285 		dlp->d_ntracks = swap32(dlp->d_ntracks);
286 		dlp->d_ncylinders = swap32(dlp->d_ncylinders);
287 		dlp->d_secpercyl = swap32(dlp->d_secpercyl);
288 		dlp->d_secperunit = swap32(dlp->d_secperunit);
289 
290 		/* d_uid is a string */
291 
292 		dlp->d_acylinders = swap32(dlp->d_acylinders);
293 
294 		dlp->d_flags = swap32(dlp->d_flags);
295 
296 		for (i = 0; i < NDDATA; i++)
297 			dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]);
298 
299 		dlp->d_secperunith = swap16(dlp->d_secperunith);
300 		dlp->d_version = swap16(dlp->d_version);
301 
302 		for (i = 0; i < NSPARE; i++)
303 			dlp->d_spare[i] = swap32(dlp->d_spare[i]);
304 
305 		dlp->d_magic2 = swap32(dlp->d_magic2);
306 
307 		dlp->d_npartitions = swap16(dlp->d_npartitions);
308 		dlp->d_bbsize = swap32(dlp->d_bbsize);
309 		dlp->d_sbsize = swap32(dlp->d_sbsize);
310 
311 		for (i = 0; i < MAXPARTITIONS; i++) {
312 			pp = &dlp->d_partitions[i];
313 			pp->p_size = swap32(pp->p_size);
314 			pp->p_offset = swap32(pp->p_offset);
315 			if (dlp->d_version == 0) {
316 				v0pp = (struct __partitionv0 *)pp;
317 				v0pp->p_fsize = swap32(v0pp->p_fsize);
318 			} else {
319 				pp->p_offseth = swap16(pp->p_offseth);
320 				pp->p_sizeh = swap16(pp->p_sizeh);
321 			}
322 			pp->p_cpg = swap16(pp->p_cpg);
323 		}
324 
325 		dlp->d_checksum = 0;
326 		dlp->d_checksum = dkcksum(dlp);
327 		error = 0;
328 	}
329 
330 	/* XXX should verify lots of other fields and whine a lot */
331 
332 	if (error)
333 		return (error);
334 
335 	/* Initial passed in lp contains the real disk size. */
336 	disksize = DL_GETDSIZE(lp);
337 
338 	if (lp != dlp)
339 		*lp = *dlp;
340 
341 	if (lp->d_version == 0) {
342 		lp->d_version = 1;
343 		lp->d_secperunith = 0;
344 
345 		v0pp = (struct __partitionv0 *)lp->d_partitions;
346 		pp = lp->d_partitions;
347 		for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) {
348 			pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp->
349 			    p_fsize, v0pp->p_frag);
350 			pp->p_offseth = 0;
351 			pp->p_sizeh = 0;
352 		}
353 	}
354 
355 #ifdef DEBUG
356 	if (DL_GETDSIZE(lp) != disksize)
357 		printf("on-disk disklabel has incorrect disksize (%lld)\n",
358 		    DL_GETDSIZE(lp));
359 	if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize)
360 		printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n",
361 		    DL_GETPSIZE(&lp->d_partitions[RAW_PART]));
362 	if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0)
363 		printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n",
364 		    DL_GETPOFFSET(&lp->d_partitions[RAW_PART]));
365 #endif
366 	DL_SETDSIZE(lp, disksize);
367 	DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize);
368 	DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
369 	DL_SETBSTART(lp, boundstart);
370 	DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp));
371 
372 	lp->d_checksum = 0;
373 	lp->d_checksum = dkcksum(lp);
374 	return (0);
375 }
376 
377 /*
378  * If dos partition table requested, attempt to load it and
379  * find disklabel inside a DOS partition. Return buffer
380  * for use in signalling errors if requested.
381  *
382  * We would like to check if each MBR has a valid BOOT_MAGIC, but
383  * we cannot because it doesn't always exist. So.. we assume the
384  * MBR is valid.
385  */
386 int
387 readdoslabel(struct buf *bp, void (*strat)(struct buf *),
388     struct disklabel *lp, int *partoffp, int spoofonly)
389 {
390 	u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp);
391 	int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset;
392 	struct dos_partition dp[NDOSPART], *dp2;
393 	daddr64_t part_blkno = DOSBBSECTOR;
394 	u_int32_t extoff = 0;
395 	int error;
396 
397 	if (lp->d_secpercyl == 0)
398 		return (EINVAL);	/* invalid label */
399 	if (lp->d_secsize == 0)
400 		return (ENOSPC);	/* disk too small */
401 
402 	/* do DOS partitions in the process of getting disklabel? */
403 
404 	/*
405 	 * Read dos partition table, follow extended partitions.
406 	 * Map the partitions to disklabel entries i-p
407 	 */
408 	while (wander && loop < DOS_MAXEBR) {
409 		loop++;
410 		wander = 0;
411 		if (part_blkno < extoff)
412 			part_blkno = extoff;
413 
414 		/* read boot record */
415 		bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp);
416 		offset = DL_BLKOFFSET(lp, part_blkno) + DOSPARTOFF;
417 		bp->b_bcount = lp->d_secsize;
418 		bp->b_error = 0; /* B_ERROR and b_error may have stale data. */
419 		CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR);
420 		SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
421 		(*strat)(bp);
422 		error = biowait(bp);
423 		if (error) {
424 /*wrong*/		if (partoffp)
425 /*wrong*/			*partoffp = -1;
426 			return (error);
427 		}
428 
429 		bcopy(bp->b_data + offset, dp, sizeof(dp));
430 
431 		if (n == 0 && part_blkno == DOSBBSECTOR) {
432 			u_int16_t mbrtest;
433 
434 			/* Check the end of sector marker. */
435 			mbrtest = ((bp->b_data[510] << 8) & 0xff00) |
436 			    (bp->b_data[511] & 0xff);
437 			if (mbrtest != 0x55aa)
438 				goto notmbr;
439 		}
440 
441 		if (ourpart == -1) {
442 			/* Search for our MBR partition */
443 			for (dp2=dp, i=0; i < NDOSPART && ourpart == -1;
444 			    i++, dp2++)
445 				if (letoh32(dp2->dp_size) &&
446 				    dp2->dp_typ == DOSPTYP_OPENBSD)
447 					ourpart = i;
448 			if (ourpart == -1)
449 				goto donot;
450 			/*
451 			 * This is our MBR partition. need sector
452 			 * address for SCSI/IDE, cylinder for
453 			 * ESDI/ST506/RLL
454 			 */
455 			dp2 = &dp[ourpart];
456 			dospartoff = letoh32(dp2->dp_start) + part_blkno;
457 			dospartend = dospartoff + letoh32(dp2->dp_size);
458 
459 			/* found our OpenBSD partition, finish up */
460 			if (partoffp)
461 				goto notfat;
462 
463 			if (lp->d_ntracks == 0)
464 				lp->d_ntracks = dp2->dp_ehd + 1;
465 			if (lp->d_nsectors == 0)
466 				lp->d_nsectors = DPSECT(dp2->dp_esect);
467 			if (lp->d_secpercyl == 0)
468 				lp->d_secpercyl = lp->d_ntracks *
469 				    lp->d_nsectors;
470 		}
471 donot:
472 		/*
473 		 * In case the disklabel read below fails, we want to
474 		 * provide a fake label in i-p.
475 		 */
476 		for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) {
477 			struct partition *pp;
478 			u_int8_t fstype;
479 
480 			if (dp2->dp_typ == DOSPTYP_OPENBSD)
481 				continue;
482 			if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp))
483 				continue;
484 			if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp))
485 				continue;
486 			if (letoh32(dp2->dp_size) == 0)
487 				continue;
488 
489 			switch (dp2->dp_typ) {
490 			case DOSPTYP_UNUSED:
491 				fstype = FS_UNUSED;
492 				break;
493 
494 			case DOSPTYP_LINUX:
495 				fstype = FS_EXT2FS;
496 				break;
497 
498 			case DOSPTYP_NTFS:
499 				fstype = FS_NTFS;
500 				break;
501 
502 			case DOSPTYP_FAT12:
503 			case DOSPTYP_FAT16S:
504 			case DOSPTYP_FAT16B:
505 			case DOSPTYP_FAT16L:
506 			case DOSPTYP_FAT32:
507 			case DOSPTYP_FAT32L:
508 				fstype = FS_MSDOS;
509 				break;
510 			case DOSPTYP_EXTEND:
511 			case DOSPTYP_EXTENDL:
512 				part_blkno = letoh32(dp2->dp_start) + extoff;
513 				if (!extoff) {
514 					extoff = letoh32(dp2->dp_start);
515 					part_blkno = 0;
516 				}
517 				wander = 1;
518 				continue;
519 				break;
520 			default:
521 				fstype = FS_OTHER;
522 				break;
523 			}
524 
525 			/*
526 			 * Don't set fstype/offset/size when just looking for
527 			 * the offset of the OpenBSD partition. It would
528 			 * invalidate the disklabel checksum!
529 			 *
530 			 * Don't try to spoof more than 8 partitions, i.e.
531 			 * 'i' -'p'.
532 			 */
533 			if (partoffp || n >= 8)
534 				continue;
535 
536 			pp = &lp->d_partitions[8+n];
537 			n++;
538 			pp->p_fstype = fstype;
539 			if (letoh32(dp2->dp_start))
540 				DL_SETPOFFSET(pp,
541 				    letoh32(dp2->dp_start) + part_blkno);
542 			DL_SETPSIZE(pp, letoh32(dp2->dp_size));
543 		}
544 	}
545 
546 notmbr:
547 	if (partoffp == NULL)
548 		/* Must not modify *lp when partoffp is set. */
549 		lp->d_npartitions = MAXPARTITIONS;
550 
551 	if (n == 0 && part_blkno == DOSBBSECTOR && ourpart == -1) {
552 		u_int16_t fattest;
553 
554 		/* Check for a valid initial jmp instruction. */
555 		switch ((u_int8_t)bp->b_data[0]) {
556 		case 0xeb:
557 			/*
558 			 * Two-byte jmp instruction. The 2nd byte is the number
559 			 * of bytes to jmp and the 3rd byte must be a NOP.
560 			 */
561 			if ((u_int8_t)bp->b_data[2] != 0x90)
562 				goto notfat;
563 			break;
564 		case 0xe9:
565 			/*
566 			 * Three-byte jmp instruction. The next two bytes are a
567 			 * little-endian 16 bit value.
568 			 */
569 			break;
570 		default:
571 			goto notfat;
572 			break;
573 		}
574 
575 		/* Check for a valid bytes per sector value. */
576 		fattest = ((bp->b_data[12] << 8) & 0xff00) |
577 		    (bp->b_data[11] & 0xff);
578 		if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0))
579 			goto notfat;
580 
581 		if (partoffp)
582 			return (ENXIO);	/* No place for disklabel on FAT! */
583 
584 		DL_SETPSIZE(&lp->d_partitions['i' - 'a'],
585 		    DL_GETPSIZE(&lp->d_partitions[RAW_PART]));
586 		DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0);
587 		lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS;
588 
589 		spoofonly = 1;	/* No disklabel to read from disk. */
590 	}
591 
592 notfat:
593 	/* record the OpenBSD partition's placement for the caller */
594 	if (partoffp)
595 		*partoffp = dospartoff;
596 	else {
597 		DL_SETBSTART(lp, dospartoff);
598 		DL_SETBEND(lp, (dospartend < DL_GETDSIZE(lp)) ? dospartend :
599 		    DL_GETDSIZE(lp));
600 	}
601 
602 	/* don't read the on-disk label if we are in spoofed-only mode */
603 	if (spoofonly)
604 		return (0);
605 
606 	bp->b_blkno = DL_BLKTOSEC(lp, dospartoff + DOS_LABELSECTOR) *
607 	    DL_BLKSPERSEC(lp);
608 	offset = DL_BLKOFFSET(lp, dospartoff + DOS_LABELSECTOR);
609 	bp->b_bcount = lp->d_secsize;
610 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
611 	SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
612 	(*strat)(bp);
613 	if (biowait(bp))
614 		return (bp->b_error);
615 
616 	/* sub-MBR disklabels are always at a LABELOFFSET of 0 */
617 	return checkdisklabel(bp->b_data + offset, lp, dospartoff, dospartend);
618 }
619 
620 /*
621  * Check new disk label for sensibility before setting it.
622  */
623 int
624 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask)
625 {
626 	struct partition *opp, *npp;
627 	struct disk *dk;
628 	u_int64_t uid;
629 	int i;
630 
631 	/* sanity clause */
632 	if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 ||
633 	    (nlp->d_secsize % DEV_BSIZE) != 0)
634 		return (EINVAL);
635 
636 	/* special case to allow disklabel to be invalidated */
637 	if (nlp->d_magic == 0xffffffff) {
638 		*olp = *nlp;
639 		return (0);
640 	}
641 
642 	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
643 	    dkcksum(nlp) != 0)
644 		return (EINVAL);
645 
646 	/* XXX missing check if other dos partitions will be overwritten */
647 
648 	while (openmask != 0) {
649 		i = ffs(openmask) - 1;
650 		openmask &= ~(1 << i);
651 		if (nlp->d_npartitions <= i)
652 			return (EBUSY);
653 		opp = &olp->d_partitions[i];
654 		npp = &nlp->d_partitions[i];
655 		if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) ||
656 		    DL_GETPSIZE(npp) < DL_GETPSIZE(opp))
657 			return (EBUSY);
658 		/*
659 		 * Copy internally-set partition information
660 		 * if new label doesn't include it.		XXX
661 		 */
662 		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
663 			npp->p_fstype = opp->p_fstype;
664 			npp->p_fragblock = opp->p_fragblock;
665 			npp->p_cpg = opp->p_cpg;
666 		}
667 	}
668 
669 	/* Generate a UID if the disklabel does not already have one. */
670 	uid = 0;
671 	if (bcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0) {
672 		do {
673 			arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid));
674 			TAILQ_FOREACH(dk, &disklist, dk_link)
675 				if (dk->dk_label && bcmp(dk->dk_label->d_uid,
676 				    nlp->d_uid, sizeof(nlp->d_uid)) == 0)
677 					break;
678 		} while (dk != NULL &&
679 		    bcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0);
680 	}
681 
682 	nlp->d_checksum = 0;
683 	nlp->d_checksum = dkcksum(nlp);
684 	*olp = *nlp;
685 
686 	disk_change = 1;
687 
688 	return (0);
689 }
690 
691 /*
692  * Determine the size of the transfer, and make sure it is within the
693  * boundaries of the partition. Adjust transfer if needed, and signal errors or
694  * early completion.
695  */
696 int
697 bounds_check_with_label(struct buf *bp, struct disklabel *lp)
698 {
699 	struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)];
700 	daddr64_t partblocks, sz;
701 
702 	/* Avoid division by zero, negative offsets, and negative sizes. */
703 	if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0)
704 		goto bad;
705 
706 	/* Ensure transfer is a whole number of aligned sectors. */
707 	if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 ||
708 	    (bp->b_bcount % lp->d_secsize) != 0)
709 		goto bad;
710 
711 	/* Ensure transfer starts within partition boundary. */
712 	partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p));
713 	if (bp->b_blkno > partblocks)
714 		goto bad;
715 
716 	/* If exactly at end of partition or null transfer, return EOF. */
717 	if (bp->b_blkno == partblocks || bp->b_bcount == 0)
718 		goto done;
719 
720 	/* Truncate request if it exceeds past the end of the partition. */
721 	sz = bp->b_bcount >> DEV_BSHIFT;
722 	if (sz > partblocks - bp->b_blkno) {
723 		sz = partblocks - bp->b_blkno;
724 		bp->b_bcount = sz << DEV_BSHIFT;
725 	}
726 
727 	/* calculate cylinder for disksort to order transfers with */
728 	bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) /
729 	    DL_SECTOBLK(lp, lp->d_secpercyl);
730 	return (0);
731 
732  bad:
733 	bp->b_error = EINVAL;
734 	bp->b_flags |= B_ERROR;
735  done:
736 	bp->b_resid = bp->b_bcount;
737 	return (-1);
738 }
739 
740 /*
741  * Disk error is the preface to plaintive error messages
742  * about failing disk transfers.  It prints messages of the form
743 
744 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
745 
746  * if the offset of the error in the transfer and a disk label
747  * are both available.  blkdone should be -1 if the position of the error
748  * is unknown; the disklabel pointer may be null from drivers that have not
749  * been converted to use them.  The message is printed with printf
750  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
751  * The message should be completed (with at least a newline) with printf
752  * or addlog, respectively.  There is no trailing space.
753  */
754 void
755 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone,
756     struct disklabel *lp)
757 {
758 	int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev);
759     	int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */;
760 	char partname = 'a' + part;
761 	daddr64_t sn;
762 
763 	if (pri != LOG_PRINTF) {
764 		static const char fmt[] = "";
765 		log(pri, fmt);
766 		pr = addlog;
767 	} else
768 		pr = printf;
769 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
770 	    bp->b_flags & B_READ ? "read" : "writ");
771 	sn = bp->b_blkno;
772 	if (bp->b_bcount <= DEV_BSIZE)
773 		(*pr)("%lld", sn);
774 	else {
775 		if (blkdone >= 0) {
776 			sn += blkdone;
777 			(*pr)("%lld of ", sn);
778 		}
779 		(*pr)("%lld-%lld", bp->b_blkno,
780 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
781 	}
782 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
783 		sn += DL_GETPOFFSET(&lp->d_partitions[part]);
784 		(*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn,
785 		    sn / lp->d_secpercyl);
786 		sn %= lp->d_secpercyl;
787 		(*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors,
788 		    sn % lp->d_nsectors);
789 	}
790 }
791 
792 /*
793  * Initialize the disklist.  Called by main() before autoconfiguration.
794  */
795 void
796 disk_init(void)
797 {
798 
799 	TAILQ_INIT(&disklist);
800 	disk_count = disk_change = 0;
801 }
802 
803 int
804 disk_construct(struct disk *diskp)
805 {
806 	rw_init(&diskp->dk_lock, "dklk");
807 	mtx_init(&diskp->dk_mtx, IPL_BIO);
808 
809 	diskp->dk_flags |= DKF_CONSTRUCTED;
810 
811 	return (0);
812 }
813 
814 /*
815  * Attach a disk.
816  */
817 void
818 disk_attach(struct device *dv, struct disk *diskp)
819 {
820 	int majdev;
821 
822 	if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED))
823 		disk_construct(diskp);
824 
825 	/*
826 	 * Allocate and initialize the disklabel structures.  Note that
827 	 * it's not safe to sleep here, since we're probably going to be
828 	 * called during autoconfiguration.
829 	 */
830 	diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF,
831 	    M_NOWAIT|M_ZERO);
832 	if (diskp->dk_label == NULL)
833 		panic("disk_attach: can't allocate storage for disklabel");
834 
835 	/*
836 	 * Set the attached timestamp.
837 	 */
838 	microuptime(&diskp->dk_attachtime);
839 
840 	/*
841 	 * Link into the disklist.
842 	 */
843 	TAILQ_INSERT_TAIL(&disklist, diskp, dk_link);
844 	++disk_count;
845 	disk_change = 1;
846 
847 	/*
848 	 * Store device structure and number for later use.
849 	 */
850 	diskp->dk_device = dv;
851 	diskp->dk_devno = NODEV;
852 	if (dv != NULL) {
853 		majdev = findblkmajor(dv);
854 		if (majdev >= 0)
855 			diskp->dk_devno =
856 			    MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
857 	}
858 	if (diskp->dk_devno != NODEV)
859 		workq_add_task(NULL, 0, disk_attach_callback,
860 		    (void *)(long)(diskp->dk_devno), NULL);
861 
862 	if (softraid_disk_attach)
863 		softraid_disk_attach(diskp, 1);
864 }
865 
866 void
867 disk_attach_callback(void *arg1, void *arg2)
868 {
869 	char errbuf[100];
870 	struct disklabel dl;
871 	struct disk *dk;
872 	dev_t dev = (dev_t)(long)arg1;
873 
874 	/* Locate disk associated with device no. */
875 	TAILQ_FOREACH(dk, &disklist, dk_link) {
876 		if (dk->dk_devno == dev)
877 			break;
878 	}
879 	if (dk == NULL)
880 		return;
881 
882 	/* XXX: Assumes dk is part of the device softc. */
883 	device_ref(dk->dk_device);
884 
885 	if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD))
886 		goto done;
887 
888 	/* Read disklabel. */
889 	if (disk_readlabel(&dl, dev, errbuf, sizeof(errbuf)) == NULL) {
890 		add_timer_randomness(dl.d_checksum);
891 		dk->dk_flags |= DKF_LABELVALID;
892 	}
893 
894 done:
895 	dk->dk_flags |= DKF_OPENED;
896 	device_unref(dk->dk_device);
897 	wakeup(dk);
898 }
899 
900 /*
901  * Detach a disk.
902  */
903 void
904 disk_detach(struct disk *diskp)
905 {
906 
907 	if (softraid_disk_attach)
908 		softraid_disk_attach(diskp, -1);
909 
910 	/*
911 	 * Free the space used by the disklabel structures.
912 	 */
913 	free(diskp->dk_label, M_DEVBUF);
914 
915 	/*
916 	 * Remove from the disklist.
917 	 */
918 	TAILQ_REMOVE(&disklist, diskp, dk_link);
919 	disk_change = 1;
920 	if (--disk_count < 0)
921 		panic("disk_detach: disk_count < 0");
922 }
923 
924 int
925 disk_openpart(struct disk *dk, int part, int fmt, int haslabel)
926 {
927 	KASSERT(part >= 0 && part < MAXPARTITIONS);
928 
929 	/* Unless opening the raw partition, check that the partition exists. */
930 	if (part != RAW_PART && (!haslabel ||
931 	    part >= dk->dk_label->d_npartitions ||
932 	    dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED))
933 		return (ENXIO);
934 
935 	/* Ensure the partition doesn't get changed under our feet. */
936 	switch (fmt) {
937 	case S_IFCHR:
938 		dk->dk_copenmask |= (1 << part);
939 		break;
940 	case S_IFBLK:
941 		dk->dk_bopenmask |= (1 << part);
942 		break;
943 	}
944 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
945 
946 	return (0);
947 }
948 
949 void
950 disk_closepart(struct disk *dk, int part, int fmt)
951 {
952 	KASSERT(part >= 0 && part < MAXPARTITIONS);
953 
954 	switch (fmt) {
955 	case S_IFCHR:
956 		dk->dk_copenmask &= ~(1 << part);
957 		break;
958 	case S_IFBLK:
959 		dk->dk_bopenmask &= ~(1 << part);
960 		break;
961 	}
962 	dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
963 }
964 
965 void
966 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit)
967 {
968 	int bmaj, cmaj, mn;
969 
970 	/* Locate the lowest minor number to be detached. */
971 	mn = DISKMINOR(unit, 0);
972 
973 	for (bmaj = 0; bmaj < nblkdev; bmaj++)
974 		if (bdevsw[bmaj].d_open == open)
975 			vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK);
976 	for (cmaj = 0; cmaj < nchrdev; cmaj++)
977 		if (cdevsw[cmaj].d_open == open)
978 			vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR);
979 }
980 
981 /*
982  * Increment a disk's busy counter.  If the counter is going from
983  * 0 to 1, set the timestamp.
984  */
985 void
986 disk_busy(struct disk *diskp)
987 {
988 
989 	/*
990 	 * XXX We'd like to use something as accurate as microtime(),
991 	 * but that doesn't depend on the system TOD clock.
992 	 */
993 	mtx_enter(&diskp->dk_mtx);
994 	if (diskp->dk_busy++ == 0)
995 		microuptime(&diskp->dk_timestamp);
996 	mtx_leave(&diskp->dk_mtx);
997 }
998 
999 /*
1000  * Decrement a disk's busy counter, increment the byte count, total busy
1001  * time, and reset the timestamp.
1002  */
1003 void
1004 disk_unbusy(struct disk *diskp, long bcount, int read)
1005 {
1006 	struct timeval dv_time, diff_time;
1007 
1008 	mtx_enter(&diskp->dk_mtx);
1009 
1010 	if (diskp->dk_busy-- == 0)
1011 		printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name);
1012 
1013 	microuptime(&dv_time);
1014 
1015 	timersub(&dv_time, &diskp->dk_timestamp, &diff_time);
1016 	timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time);
1017 
1018 	diskp->dk_timestamp = dv_time;
1019 	if (bcount > 0) {
1020 		if (read) {
1021 			diskp->dk_rbytes += bcount;
1022 			diskp->dk_rxfer++;
1023 		} else {
1024 			diskp->dk_wbytes += bcount;
1025 			diskp->dk_wxfer++;
1026 		}
1027 	} else
1028 		diskp->dk_seek++;
1029 
1030 	mtx_leave(&diskp->dk_mtx);
1031 
1032 	add_disk_randomness(bcount ^ diff_time.tv_usec);
1033 }
1034 
1035 int
1036 disk_lock(struct disk *dk)
1037 {
1038 	return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR));
1039 }
1040 
1041 void
1042 disk_lock_nointr(struct disk *dk)
1043 {
1044 	rw_enter_write(&dk->dk_lock);
1045 }
1046 
1047 void
1048 disk_unlock(struct disk *dk)
1049 {
1050 	rw_exit_write(&dk->dk_lock);
1051 }
1052 
1053 int
1054 dk_mountroot(void)
1055 {
1056 	char errbuf[100];
1057 	int part = DISKPART(rootdev);
1058 	int (*mountrootfn)(void);
1059 	struct disklabel dl;
1060 	char *error;
1061 
1062 	error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf));
1063 	if (error)
1064 		panic(error);
1065 
1066 	if (DL_GETPSIZE(&dl.d_partitions[part]) == 0)
1067 		panic("root filesystem has size 0");
1068 	switch (dl.d_partitions[part].p_fstype) {
1069 #ifdef EXT2FS
1070 	case FS_EXT2FS:
1071 		{
1072 		extern int ext2fs_mountroot(void);
1073 		mountrootfn = ext2fs_mountroot;
1074 		}
1075 		break;
1076 #endif
1077 #ifdef FFS
1078 	case FS_BSDFFS:
1079 		{
1080 		extern int ffs_mountroot(void);
1081 		mountrootfn = ffs_mountroot;
1082 		}
1083 		break;
1084 #endif
1085 #ifdef CD9660
1086 	case FS_ISO9660:
1087 		{
1088 		extern int cd9660_mountroot(void);
1089 		mountrootfn = cd9660_mountroot;
1090 		}
1091 		break;
1092 #endif
1093 	default:
1094 #ifdef FFS
1095 		{
1096 		extern int ffs_mountroot(void);
1097 
1098 		printf("filesystem type %d not known.. assuming ffs\n",
1099 		    dl.d_partitions[part].p_fstype);
1100 		mountrootfn = ffs_mountroot;
1101 		}
1102 #else
1103 		panic("disk 0x%x filesystem type %d not known",
1104 		    rootdev, dl.d_partitions[part].p_fstype);
1105 #endif
1106 	}
1107 	return (*mountrootfn)();
1108 }
1109 
1110 struct device *
1111 getdisk(char *str, int len, int defpart, dev_t *devp)
1112 {
1113 	struct device *dv;
1114 
1115 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1116 		printf("use one of: exit");
1117 		TAILQ_FOREACH(dv, &alldevs, dv_list) {
1118 			if (dv->dv_class == DV_DISK)
1119 				printf(" %s[a-p]", dv->dv_xname);
1120 #if defined(NFSCLIENT)
1121 			if (dv->dv_class == DV_IFNET)
1122 				printf(" %s", dv->dv_xname);
1123 #endif
1124 		}
1125 		printf("\n");
1126 	}
1127 	return (dv);
1128 }
1129 
1130 struct device *
1131 parsedisk(char *str, int len, int defpart, dev_t *devp)
1132 {
1133 	struct device *dv;
1134 	int majdev, part = defpart;
1135 	char c;
1136 
1137 	if (len == 0)
1138 		return (NULL);
1139 	c = str[len-1];
1140 	if (c >= 'a' && (c - 'a') < MAXPARTITIONS) {
1141 		part = c - 'a';
1142 		len -= 1;
1143 	}
1144 
1145 	TAILQ_FOREACH(dv, &alldevs, dv_list) {
1146 		if (dv->dv_class == DV_DISK &&
1147 		    strncmp(str, dv->dv_xname, len) == 0 &&
1148 		    dv->dv_xname[len] == '\0') {
1149 			majdev = findblkmajor(dv);
1150 			if (majdev < 0)
1151 				return NULL;
1152 			*devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1153 			break;
1154 		}
1155 #if defined(NFSCLIENT)
1156 		if (dv->dv_class == DV_IFNET &&
1157 		    strncmp(str, dv->dv_xname, len) == 0 &&
1158 		    dv->dv_xname[len] == '\0') {
1159 			*devp = NODEV;
1160 			break;
1161 		}
1162 #endif
1163 	}
1164 
1165 	return (dv);
1166 }
1167 
1168 void
1169 setroot(struct device *bootdv, int part, int exitflags)
1170 {
1171 	int majdev, unit, len, s, slept = 0;
1172 	struct swdevt *swp;
1173 	struct device *rootdv, *dv;
1174 	dev_t nrootdev, nswapdev = NODEV, temp = NODEV;
1175 	struct ifnet *ifp = NULL;
1176 	struct disk *dk;
1177 	u_char duid[8];
1178 	char buf[128];
1179 #if defined(NFSCLIENT)
1180 	extern char *nfsbootdevname;
1181 #endif
1182 
1183 	/* Ensure that all disk attach callbacks have completed. */
1184 	do {
1185 		TAILQ_FOREACH(dk, &disklist, dk_link) {
1186 			if (dk->dk_devno != NODEV &&
1187 			    (dk->dk_flags & DKF_OPENED) == 0) {
1188 				tsleep(dk, 0, "dkopen", hz);
1189 				slept++;
1190 				break;
1191 			}
1192 		}
1193 	} while (dk != NULL && slept < 5);
1194 
1195 	if (slept == 5) {
1196 		printf("disklabels not read:");
1197 		TAILQ_FOREACH(dk, &disklist, dk_link)
1198 			if (dk->dk_devno != NODEV &&
1199 			    (dk->dk_flags & DKF_OPENED) == 0)
1200 				printf(" %s", dk->dk_name);
1201 		printf("\n");
1202 	}
1203 
1204 	/* Locate DUID for boot disk if not already provided. */
1205 	bzero(duid, sizeof(duid));
1206 	if (bcmp(bootduid, duid, sizeof(bootduid)) == 0) {
1207 		TAILQ_FOREACH(dk, &disklist, dk_link)
1208 			if (dk->dk_device == bootdv)
1209 				break;
1210 		if (dk && (dk->dk_flags & DKF_LABELVALID))
1211 			bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid));
1212 	}
1213 	bcopy(bootduid, rootduid, sizeof(rootduid));
1214 
1215 #if NSOFTRAID > 0
1216 	sr_map_root();
1217 #endif
1218 
1219 	/*
1220 	 * If `swap generic' and we couldn't determine boot device,
1221 	 * ask the user.
1222 	 */
1223 	dk = NULL;
1224 	if (mountroot == NULL && bootdv == NULL)
1225 		boothowto |= RB_ASKNAME;
1226 	if (boothowto & RB_ASKNAME) {
1227 		while (1) {
1228 			printf("root device");
1229 			if (bootdv != NULL) {
1230 				printf(" (default %s", bootdv->dv_xname);
1231 				if (bootdv->dv_class == DV_DISK)
1232 					printf("%c", 'a' + part);
1233 				printf(")");
1234 			}
1235 			printf(": ");
1236 			s = splhigh();
1237 			cnpollc(TRUE);
1238 			len = getsn(buf, sizeof(buf));
1239 			cnpollc(FALSE);
1240 			splx(s);
1241 			if (strcmp(buf, "exit") == 0)
1242 				boot(exitflags);
1243 			if (len == 0 && bootdv != NULL) {
1244 				strlcpy(buf, bootdv->dv_xname, sizeof buf);
1245 				len = strlen(buf);
1246 			}
1247 			if (len > 0 && buf[len - 1] == '*') {
1248 				buf[--len] = '\0';
1249 				dv = getdisk(buf, len, part, &nrootdev);
1250 				if (dv != NULL) {
1251 					rootdv = dv;
1252 					nswapdev = nrootdev;
1253 					goto gotswap;
1254 				}
1255 			}
1256 			dv = getdisk(buf, len, part, &nrootdev);
1257 			if (dv != NULL) {
1258 				rootdv = dv;
1259 				break;
1260 			}
1261 		}
1262 
1263 		if (rootdv->dv_class == DV_IFNET)
1264 			goto gotswap;
1265 
1266 		/* try to build swap device out of new root device */
1267 		while (1) {
1268 			printf("swap device");
1269 			if (rootdv != NULL)
1270 				printf(" (default %s%s)", rootdv->dv_xname,
1271 				    rootdv->dv_class == DV_DISK ? "b" : "");
1272 			printf(": ");
1273 			s = splhigh();
1274 			cnpollc(TRUE);
1275 			len = getsn(buf, sizeof(buf));
1276 			cnpollc(FALSE);
1277 			splx(s);
1278 			if (strcmp(buf, "exit") == 0)
1279 				boot(exitflags);
1280 			if (len == 0 && rootdv != NULL) {
1281 				switch (rootdv->dv_class) {
1282 				case DV_IFNET:
1283 					nswapdev = NODEV;
1284 					break;
1285 				case DV_DISK:
1286 					nswapdev = MAKEDISKDEV(major(nrootdev),
1287 					    DISKUNIT(nrootdev), 1);
1288 					if (nswapdev == nrootdev)
1289 						continue;
1290 					break;
1291 				default:
1292 					break;
1293 				}
1294 				break;
1295 			}
1296 			dv = getdisk(buf, len, 1, &nswapdev);
1297 			if (dv) {
1298 				if (dv->dv_class == DV_IFNET)
1299 					nswapdev = NODEV;
1300 				if (nswapdev == nrootdev)
1301 					continue;
1302 				break;
1303 			}
1304 		}
1305 gotswap:
1306 		rootdev = nrootdev;
1307 		dumpdev = nswapdev;
1308 		swdevt[0].sw_dev = nswapdev;
1309 		swdevt[1].sw_dev = NODEV;
1310 #if defined(NFSCLIENT)
1311 	} else if (mountroot == nfs_mountroot) {
1312 		rootdv = bootdv;
1313 		rootdev = dumpdev = swapdev = NODEV;
1314 #endif
1315 	} else if (mountroot == NULL && rootdev == NODEV) {
1316 		/*
1317 		 * `swap generic'
1318 		 */
1319 		rootdv = bootdv;
1320 
1321 		if (bootdv->dv_class == DV_DISK) {
1322 			bzero(&duid, sizeof(duid));
1323 			if (bcmp(rootduid, &duid, sizeof(rootduid)) != 0) {
1324 				TAILQ_FOREACH(dk, &disklist, dk_link)
1325 					if ((dk->dk_flags & DKF_LABELVALID) &&
1326 					    dk->dk_label && bcmp(dk->dk_label->d_uid,
1327 					    &rootduid, sizeof(rootduid)) == 0)
1328 						break;
1329 				if (dk == NULL)
1330 					panic("root device (%02hx%02hx%02hx%02hx"
1331 					    "%02hx%02hx%02hx%02hx) not found",
1332 					    rootduid[0], rootduid[1], rootduid[2],
1333 					    rootduid[3], rootduid[4], rootduid[5],
1334 					    rootduid[6], rootduid[7]);
1335 				rootdv = dk->dk_device;
1336 			}
1337 		}
1338 
1339 		majdev = findblkmajor(rootdv);
1340 		if (majdev >= 0) {
1341 			/*
1342 			 * Root and swap are on the disk.
1343 			 * Assume swap is on partition b.
1344 			 */
1345 			rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part);
1346 			nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1);
1347 		} else {
1348 			/*
1349 			 * Root and swap are on a net.
1350 			 */
1351 			nswapdev = NODEV;
1352 		}
1353 		dumpdev = nswapdev;
1354 		swdevt[0].sw_dev = nswapdev;
1355 		/* swdevt[1].sw_dev = NODEV; */
1356 	} else {
1357 		/* Completely pre-configured, but we want rootdv .. */
1358 		majdev = major(rootdev);
1359 		if (findblkname(majdev) == NULL)
1360 			return;
1361 		unit = DISKUNIT(rootdev);
1362 		part = DISKPART(rootdev);
1363 		snprintf(buf, sizeof buf, "%s%d%c",
1364 		    findblkname(majdev), unit, 'a' + part);
1365 		rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev);
1366 		if (rootdv == NULL)
1367 			panic("root device (%s) not found", buf);
1368 	}
1369 
1370 	if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET)
1371 		ifp = ifunit(rootdv->dv_xname);
1372 	else if (bootdv && bootdv->dv_class == DV_IFNET)
1373 		ifp = ifunit(bootdv->dv_xname);
1374 
1375 	if (ifp)
1376 		if_addgroup(ifp, "netboot");
1377 
1378 	switch (rootdv->dv_class) {
1379 #if defined(NFSCLIENT)
1380 	case DV_IFNET:
1381 		mountroot = nfs_mountroot;
1382 		nfsbootdevname = rootdv->dv_xname;
1383 		return;
1384 #endif
1385 	case DV_DISK:
1386 		mountroot = dk_mountroot;
1387 		part = DISKPART(rootdev);
1388 		break;
1389 	default:
1390 		printf("can't figure root, hope your kernel is right\n");
1391 		return;
1392 	}
1393 
1394 	printf("root on %s%c", rootdv->dv_xname, 'a' + part);
1395 
1396 	if (dk && dk->dk_device == rootdv)
1397 		printf(" (%02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx.%c)",
1398 		    rootduid[0], rootduid[1], rootduid[2], rootduid[3],
1399 		    rootduid[4], rootduid[5], rootduid[6], rootduid[7],
1400 		    'a' + part);
1401 
1402 	/*
1403 	 * Make the swap partition on the root drive the primary swap.
1404 	 */
1405 	for (swp = swdevt; swp->sw_dev != NODEV; swp++) {
1406 		if (major(rootdev) == major(swp->sw_dev) &&
1407 		    DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) {
1408 			temp = swdevt[0].sw_dev;
1409 			swdevt[0].sw_dev = swp->sw_dev;
1410 			swp->sw_dev = temp;
1411 			break;
1412 		}
1413 	}
1414 	if (swp->sw_dev != NODEV) {
1415 		/*
1416 		 * If dumpdev was the same as the old primary swap device,
1417 		 * move it to the new primary swap device.
1418 		 */
1419 		if (temp == dumpdev)
1420 			dumpdev = swdevt[0].sw_dev;
1421 	}
1422 	if (swdevt[0].sw_dev != NODEV)
1423 		printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)),
1424 		    DISKUNIT(swdevt[0].sw_dev),
1425 		    'a' + DISKPART(swdevt[0].sw_dev));
1426 	if (dumpdev != NODEV)
1427 		printf(" dump on %s%d%c", findblkname(major(dumpdev)),
1428 		    DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev));
1429 	printf("\n");
1430 }
1431 
1432 extern struct nam2blk nam2blk[];
1433 
1434 int
1435 findblkmajor(struct device *dv)
1436 {
1437 	char buf[16], *p;
1438 	int i;
1439 
1440 	if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf)
1441 		return (-1);
1442 	for (p = buf; *p; p++)
1443 		if (*p >= '0' && *p <= '9')
1444 			*p = '\0';
1445 
1446 	for (i = 0; nam2blk[i].name; i++)
1447 		if (!strcmp(buf, nam2blk[i].name))
1448 			return (nam2blk[i].maj);
1449 	return (-1);
1450 }
1451 
1452 char *
1453 findblkname(int maj)
1454 {
1455 	int i;
1456 
1457 	for (i = 0; nam2blk[i].name; i++)
1458 		if (nam2blk[i].maj == maj)
1459 			return (nam2blk[i].name);
1460 	return (NULL);
1461 }
1462 
1463 char *
1464 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize)
1465 {
1466 	struct vnode *vn;
1467 	dev_t chrdev, rawdev;
1468 	int error;
1469 
1470 	chrdev = blktochr(dev);
1471 	rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART);
1472 
1473 #ifdef DEBUG
1474 	printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev);
1475 #endif
1476 
1477 	if (cdevvp(rawdev, &vn)) {
1478 		snprintf(errbuf, errsize,
1479 		    "cannot obtain vnode for 0x%x/0x%x", dev, rawdev);
1480 		return (errbuf);
1481 	}
1482 
1483 	error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
1484 	if (error) {
1485 		snprintf(errbuf, errsize,
1486 		    "cannot open disk, 0x%x/0x%x, error %d",
1487 		    dev, rawdev, error);
1488 		goto done;
1489 	}
1490 
1491 	error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc);
1492 	if (error) {
1493 		snprintf(errbuf, errsize,
1494 		    "cannot read disk label, 0x%x/0x%x, error %d",
1495 		    dev, rawdev, error);
1496 	}
1497 done:
1498 	VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1499 	vput(vn);
1500 	if (error)
1501 		return (errbuf);
1502 	return (NULL);
1503 }
1504 
1505 int
1506 disk_map(char *path, char *mappath, int size, int flags)
1507 {
1508 	struct disk *dk, *mdk;
1509 	u_char uid[8];
1510 	char c, part;
1511 	int i;
1512 
1513 	/*
1514 	 * Attempt to map a request for a disklabel UID to the correct device.
1515 	 * We should be supplied with a disklabel UID which has the following
1516 	 * format:
1517 	 *
1518 	 * [disklabel uid] . [partition]
1519 	 *
1520 	 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can
1521 	 * based passed on its own.
1522 	 */
1523 
1524 	if (strchr(path, '/') != NULL)
1525 		return -1;
1526 
1527 	/* Verify that the device name is properly formed. */
1528 	if (!((strlen(path) == 16 && (flags & DM_OPENPART)) ||
1529 	    (strlen(path) == 18 && path[16] == '.')))
1530 		return -1;
1531 
1532 	/* Get partition. */
1533 	if (flags & DM_OPENPART)
1534 		part = 'a' + RAW_PART;
1535 	else
1536 		part = path[17];
1537 
1538 	if (part < 'a' || part >= 'a' + MAXPARTITIONS)
1539 		return -1;
1540 
1541 	/* Derive label UID. */
1542 	bzero(uid, sizeof(uid));
1543 	for (i = 0; i < 16; i++) {
1544 		c = path[i];
1545 		if (c >= '0' && c <= '9')
1546 			c -= '0';
1547 		else if (c >= 'a' && c <= 'f')
1548 			c -= ('a' - 10);
1549                 else
1550 			return -1;
1551 
1552 		uid[i / 2] <<= 4;
1553 		uid[i / 2] |= c & 0xf;
1554 	}
1555 
1556 	mdk = NULL;
1557 	TAILQ_FOREACH(dk, &disklist, dk_link) {
1558 		if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label &&
1559 		    bcmp(dk->dk_label->d_uid, uid,
1560 		    sizeof(dk->dk_label->d_uid)) == 0) {
1561 			/* Fail if there are duplicate UIDs! */
1562 			if (mdk != NULL)
1563 				return -1;
1564 			mdk = dk;
1565 		}
1566 	}
1567 
1568 	if (mdk == NULL || mdk->dk_name == NULL)
1569 		return -1;
1570 
1571 	snprintf(mappath, size, "/dev/%s%s%c",
1572 	    (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part);
1573 
1574 	return 0;
1575 }
1576 
1577 /*
1578  * Lookup a disk device and verify that it has completed attaching.
1579  */
1580 struct device *
1581 disk_lookup(struct cfdriver *cd, int unit)
1582 {
1583 	struct device *dv;
1584 	struct disk *dk;
1585 
1586 	dv = device_lookup(cd, unit);
1587 	if (dv == NULL)
1588 		return (NULL);
1589 
1590 	TAILQ_FOREACH(dk, &disklist, dk_link)
1591 		if (dk->dk_device == dv)
1592 			break;
1593 
1594 	if (dk == NULL) {
1595 		device_unref(dv);
1596 		return (NULL);
1597 	}
1598 
1599 	return (dv);
1600 }
1601