xref: /dflybsd-src/sys/kern/subr_diskslice.c (revision ad30b684748061ca0c68e4a5ca21b45c240c52c5)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.41 2007/05/21 04:21:05 dillon Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/diskslice.h>
56 #include <sys/disk.h>
57 #include <sys/diskmbr.h>
58 #include <sys/fcntl.h>
59 #include <sys/malloc.h>
60 #include <sys/stat.h>
61 #include <sys/syslog.h>
62 #include <sys/proc.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/thread2.h>
66 
67 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
68 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
69 
70 #define TRACE(str)	do { if (ds_debug) kprintf str; } while (0)
71 
72 typedef	u_char	bool_t;
73 
74 static volatile bool_t ds_debug;
75 
76 static struct disklabel *clone_label (struct disk_info *info,
77 					struct diskslice *sp);
78 static void dsiodone (struct bio *bio);
79 static char *fixlabel (const char *sname, struct diskslice *sp,
80 			   struct disklabel *lp, int writeflag);
81 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
82 			   struct diskslices *ssp, struct diskslice *sp,
83 			   struct disk_info *info);
84 static void free_ds_label (struct diskslices *ssp, int slice);
85 static void partition_info (const char *sname, int part, struct partition *pp);
86 static void slice_info (const char *sname, struct diskslice *sp);
87 static void set_ds_label (struct diskslices *ssp, int slice,
88 			      struct disklabel *lp);
89 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
90 
91 /*
92  * Create a disklabel based on a disk_info structure, initializing
93  * the appropriate fields and creating a raw partition that covers the
94  * whole disk.
95  *
96  * If a diskslice is passed, the label is truncated to the slice
97  */
98 static struct disklabel *
99 clone_label(struct disk_info *info, struct diskslice *sp)
100 {
101 	struct disklabel *lp1;
102 
103 	lp1 = kmalloc(sizeof *lp1, M_DEVBUF, M_WAITOK | M_ZERO);
104 	lp1->d_nsectors = info->d_secpertrack;
105 	lp1->d_ntracks = info->d_nheads;
106 	lp1->d_secpercyl = info->d_secpercyl;
107 	lp1->d_secsize = info->d_media_blksize;
108 
109 	if (sp)
110 		lp1->d_secperunit = (u_int)sp->ds_size;
111 	else
112 		lp1->d_secperunit = (u_int)info->d_media_blocks;
113 
114 	if (lp1->d_typename[0] == '\0')
115 		strncpy(lp1->d_typename, "amnesiac", sizeof(lp1->d_typename));
116 	if (lp1->d_packname[0] == '\0')
117 		strncpy(lp1->d_packname, "fictitious", sizeof(lp1->d_packname));
118 	if (lp1->d_nsectors == 0)
119 		lp1->d_nsectors = 32;
120 	if (lp1->d_ntracks == 0)
121 		lp1->d_ntracks = 64;
122 	lp1->d_secpercyl = lp1->d_nsectors * lp1->d_ntracks;
123 	lp1->d_ncylinders = lp1->d_secperunit / lp1->d_secpercyl;
124 	if (lp1->d_rpm == 0)
125 		lp1->d_rpm = 3600;
126 	if (lp1->d_interleave == 0)
127 		lp1->d_interleave = 1;
128 	if (lp1->d_npartitions < RAW_PART + 1)
129 		lp1->d_npartitions = MAXPARTITIONS;
130 	if (lp1->d_bbsize == 0)
131 		lp1->d_bbsize = BBSIZE;
132 	if (lp1->d_sbsize == 0)
133 		lp1->d_sbsize = SBSIZE;
134 
135 	/*
136 	 * Used by various devices to create a compatibility slice which
137 	 * allows us to mount root from devices which do not have a
138 	 * disklabel.  Particularly: CDs.
139 	 */
140 	lp1->d_partitions[RAW_PART].p_size = lp1->d_secperunit;
141 	if (info->d_dsflags & DSO_COMPATPARTA) {
142 		lp1->d_partitions[0].p_size = lp1->d_secperunit;
143 		lp1->d_partitions[0].p_fstype = FS_OTHER;
144 	}
145 	lp1->d_magic = DISKMAGIC;
146 	lp1->d_magic2 = DISKMAGIC;
147 	lp1->d_checksum = dkcksum(lp1);
148 	return (lp1);
149 }
150 
151 /*
152  * Determine the size of the transfer, and make sure it is
153  * within the boundaries of the partition. Adjust transfer
154  * if needed, and signal errors or early completion.
155  *
156  * XXX TODO:
157  *	o Split buffers that are too big for the device.
158  *	o Check for overflow.
159  *	o Finish cleaning this up.
160  *
161  * This function returns 1 on success, 0 if transfer equates
162  * to EOF (end of disk) or -1 on failure.  The appropriate
163  * 'errno' value is also set in bp->b_error and bp->b_flags
164  * is marked with B_ERROR.
165  */
166 struct bio *
167 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
168 {
169 	struct buf *bp = bio->bio_buf;
170 	struct bio *nbio;
171 	struct disklabel *lp;
172 	char *msg;
173 	long nsec;
174 	u_int64_t secno;
175 	u_int64_t endsecno;
176 	u_int64_t labelsect;
177 	u_int64_t slicerel_secno;
178 	struct diskslice *sp;
179 	u_int32_t part;
180 	u_int32_t slice;
181 	int shift;
182 	int mask;
183 
184 	slice = dkslice(dev);
185 	part  = dkpart(dev);
186 
187 	if (bio->bio_offset < 0) {
188 		kprintf("dscheck(%s): negative bio_offset %lld\n",
189 			devtoname(dev), bio->bio_offset);
190 		goto bad;
191 	}
192 	if (slice >= ssp->dss_nslices) {
193 		kprintf("dscheck(%s): slice too large %d/%d\n",
194 			devtoname(dev), slice, ssp->dss_nslices);
195 		goto bad;
196 	}
197 	sp = &ssp->dss_slices[slice];
198 
199 	/*
200 	 * Calculate secno and nsec
201 	 */
202 	if (ssp->dss_secmult == 1) {
203 		shift = DEV_BSHIFT;
204 		goto doshift;
205 	} else if (ssp->dss_secshift != -1) {
206 		shift = DEV_BSHIFT + ssp->dss_secshift;
207 doshift:
208 		mask = (1 << shift) - 1;
209 		if ((int)bp->b_bcount & mask)
210 			goto bad_bcount;
211 		if ((int)bio->bio_offset & mask)
212 			goto bad_blkno;
213 		secno = bio->bio_offset >> shift;
214 		nsec = bp->b_bcount >> shift;
215 	} else {
216 		if (bp->b_bcount % ssp->dss_secsize)
217 			goto bad_bcount;
218 		if (bio->bio_offset % ssp->dss_secsize)
219 			goto bad_blkno;
220 		secno = bio->bio_offset / ssp->dss_secsize;
221 		nsec = bp->b_bcount / ssp->dss_secsize;
222 	}
223 
224 	/*
225 	 * Calculate slice-relative sector number end slice-relative
226 	 * limit.
227 	 */
228 	if (slice == WHOLE_DISK_SLICE) {
229 		/*
230 		 * Labels have not been allowed on whole-disks for a while.
231 		 * This really puts the nail in the coffin... no disk
232 		 * snooping will occur even if you tried to write a label
233 		 * without a slice structure.
234 		 *
235 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
236 		 * and partition numbers are special-cased.  Currently numbers
237 		 * less then 128 are not allowed.  Partition numbers >= 128
238 		 * are encoded in the high 8 bits of the 64 bit buffer offset
239 		 * and are fed directly through to the device with no
240 		 * further interpretation.  In particular, no sector
241 		 * translation interpretation should occur because the
242 		 * sector size for the special raw access may not be the
243 		 * same as the nominal sector size for the device.
244 		 */
245 		lp = NULL;
246 		if (part < 128) {
247 			kprintf("dscheck(%s): illegal partition number (%d) "
248 				"for WHOLE_DISK_SLICE access\n",
249 				devtoname(dev), part);
250 			goto bad;
251 		} else if (part != WHOLE_SLICE_PART) {
252 			nbio = push_bio(bio);
253 			nbio->bio_offset = bio->bio_offset |
254 					   (u_int64_t)part << 56;
255 			return(nbio);
256 		}
257 
258 		/*
259 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE.
260 		 */
261 		labelsect = 0;	/* ignore any reserved sectors, do not sniff */
262 		endsecno = sp->ds_size;
263 		slicerel_secno = secno;
264 	} else if (part == WHOLE_SLICE_PART) {
265 		/*
266 		 * We are accessing a slice.  Snoop the label and check
267 		 * reserved blocks only if a label is present, otherwise
268 		 * do not.  A label may be present if (1) there are active
269 		 * opens on the disk (not necessarily this slice) or
270 		 * (2) the disklabel program has written an in-core label
271 		 * and now wants to write it out, or (3) the management layer
272 		 * is trying to write out an in-core layer.  In case (2) and
273 		 * (3) we MUST snoop the write or the on-disk version of the
274 		 * disklabel will not be properly translated.
275 		 *
276 		 * NOTE! opens on a whole-slice partition will not attempt
277 		 * to read a disklabel in.
278 		 */
279 		if ((lp = sp->ds_label) != NULL) {
280 			labelsect = sp->ds_skip_bsdlabel;
281 		} else {
282 			labelsect = 0;
283 		}
284 		endsecno = sp->ds_size;
285 		slicerel_secno = secno;
286 	} else if ((lp = sp->ds_label) && part < lp->d_npartitions) {
287 		/*
288 		 * Acesss through disklabel, partition present.
289 		 */
290 		struct partition *pp;
291 
292 		labelsect = sp->ds_skip_bsdlabel;
293 		pp = &lp->d_partitions[dkpart(dev)];
294 		endsecno = pp->p_size;
295 		slicerel_secno = pp->p_offset + secno;
296 	} else if (lp) {
297 		/*
298 		 * Partition out of bounds
299 		 */
300 		kprintf("dscheck(%s): partition out of bounds %d/%d\n",
301 			devtoname(dev),
302 			part, lp->d_npartitions);
303 		goto bad;
304 	} else {
305 		/*
306 		 * Attempt to access partition when no disklabel present
307 		 */
308 		kprintf("dscheck(%s): attempt to access non-existant partition\n",
309 			devtoname(dev));
310 		goto bad;
311 	}
312 
313 	/*
314 	 * labelsect will reflect the extent of any reserved blocks from
315 	 * the beginning of the slice.  We only check the slice reserved
316 	 * fields (sp->ds_skip_platform and sp->ds_skip_bsdlabel) if
317 	 * labelsect is non-zero, otherwise we ignore them.  When labelsect
318 	 * is non-zero, sp->ds_skip_platform indicates the sector where the
319 	 * disklabel begins.
320 	 *
321 	 * First determine if an attempt is being made to write to a
322 	 * reserved area when such writes are not allowed.
323 	 */
324 #if 0
325 	if (slicerel_secno < 16 && nsec &&
326 	    bp->b_cmd != BUF_CMD_READ) {
327 		kprintf("Attempt to write to reserved sector %lld labelsect %lld label %p/%p skip_plat %d skip_bsd %d WLABEL %d\n",
328 			slicerel_secno,
329 			labelsect,
330 			sp->ds_label, lp,
331 			sp->ds_skip_platform,
332 			sp->ds_skip_bsdlabel,
333 			sp->ds_wlabel);
334 	}
335 #endif
336 	if (slicerel_secno < labelsect && nsec &&
337 	    bp->b_cmd != BUF_CMD_READ && sp->ds_wlabel == 0) {
338 		bp->b_error = EROFS;
339 		goto error;
340 	}
341 
342 	/*
343 	 * If we get here, bio_offset must be on a block boundary and
344 	 * the sector size must be a power of 2.
345 	 */
346 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
347 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
348 	    ((ssp->dss_secsize << 1) - 1)) {
349 		kprintf("%s: invalid BIO offset, not sector aligned or"
350 			" invalid sector size (not power of 2) %08llx %d\n",
351 			devtoname(dev), bio->bio_offset, ssp->dss_secsize);
352 		goto bad;
353 	}
354 
355 	/*
356 	 * EOF handling
357 	 */
358 	if (secno + nsec > endsecno) {
359 		/*
360 		 * Return an error if beyond the end of the disk, or
361 		 * if B_BNOCLIP is set.  Tell the system that we do not
362 		 * need to keep the buffer around.
363 		 */
364 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
365 			goto bad;
366 
367 		/*
368 		 * If exactly at end of disk, return an EOF.  Throw away
369 		 * the buffer contents, if any, by setting B_INVAL.
370 		 */
371 		if (secno == endsecno) {
372 			bp->b_resid = bp->b_bcount;
373 			bp->b_flags |= B_INVAL;
374 			goto done;
375 		}
376 
377 		/*
378 		 * Else truncate
379 		 */
380 		nsec = endsecno - secno;
381 		bp->b_bcount = nsec * ssp->dss_secsize;
382 	}
383 
384 	nbio = push_bio(bio);
385 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
386 			   ssp->dss_secsize;
387 
388 	/*
389 	 * Snoop writes to the label area when labelsect is non-zero.
390 	 * The label sector starts at sector sp->ds_skip_platform within
391 	 * the slice and ends before sector sp->ds_skip_bsdlabel.  The
392 	 * write must contain the label sector for us to be able to snoop it.
393 	 *
394 	 * We have to adjust the label's fields to the on-disk format on
395 	 * a write and then adjust them back on completion of the write,
396 	 * or on a read.
397 	 *
398 	 * SNOOPs are required for disklabel -r and the DIOC* ioctls also
399 	 * depend on it on the backend for label operations.  XXX
400 	 *
401 	 * NOTE! ds_skip_platform is usually set to non-zero by the slice
402 	 * scanning code, indicating that the slice has reserved boot
403 	 * sector(s).  It is also set for compatibility reasons via
404 	 * the DSO_COMPATMBR flag.  But it is not a requirement and it
405 	 * can be 0, indicating that the disklabel (if present) is stored
406 	 * at the beginning of the slice.  In most cases ds_skip_platform
407 	 * will be '1'.
408 	 *
409 	 * ds_skip_bsdlabel is inclusive of ds_skip_platform.  If they are
410 	 * the same then there is no label present, even if non-zero.
411 	 */
412 	if (slicerel_secno < labelsect &&	/* also checks labelsect!=0 */
413 	    sp->ds_skip_platform < labelsect && /* degenerate case */
414 	    slicerel_secno <= sp->ds_skip_platform &&
415 	    slicerel_secno + nsec > sp->ds_skip_platform) {
416 		/*
417 		 * Set up our own callback on I/O completion to handle
418 		 * undoing the fixup we did for the write as well as
419 		 * doing the fixup for a read.
420 		 */
421 		nbio->bio_done = dsiodone;
422 		nbio->bio_caller_info1.ptr = sp;
423 		nbio->bio_caller_info2.offset =
424 		    (sp->ds_skip_platform - slicerel_secno) * ssp->dss_secsize;
425 		if (bp->b_cmd != BUF_CMD_READ) {
426 			msg = fixlabel(
427 				NULL, sp,
428 			       (struct disklabel *)
429 			       (bp->b_data + (int)nbio->bio_caller_info2.offset),
430 			       TRUE);
431 			if (msg != NULL) {
432 				kprintf("dscheck(%s): %s\n",
433 				    devtoname(dev), msg);
434 				bp->b_error = EROFS;
435 				pop_bio(nbio);
436 				goto error;
437 			}
438 		}
439 	}
440 	return (nbio);
441 
442 bad_bcount:
443 	kprintf(
444 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
445 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
446 	goto bad;
447 
448 bad_blkno:
449 	kprintf(
450 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
451 	    devtoname(dev), bio->bio_offset, ssp->dss_secsize);
452 bad:
453 	bp->b_error = EINVAL;
454 	/* fall through */
455 error:
456 	/*
457 	 * Terminate the I/O with a ranging error.  Since the buffer is
458 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
459 	 */
460 	bp->b_resid = bp->b_bcount;
461 	bp->b_flags |= B_ERROR | B_INVAL;
462 done:
463 	/*
464 	 * Caller must biodone() the originally passed bio if NULL is
465 	 * returned.
466 	 */
467 	return (NULL);
468 }
469 
470 void
471 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
472 {
473 	u_int32_t part;
474 	u_int32_t slice;
475 	struct diskslice *sp;
476 
477 	slice = dkslice(dev);
478 	part  = dkpart(dev);
479 	if (slice < ssp->dss_nslices) {
480 		sp = &ssp->dss_slices[slice];
481 		if (part < sizeof(sp->ds_openmask) * 8)
482 			sp->ds_openmask &= ~(1 << part);
483 	}
484 }
485 
486 void
487 dsgone(struct diskslices **sspp)
488 {
489 	int slice;
490 	struct diskslice *sp;
491 	struct diskslices *ssp;
492 
493 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
494 		sp = &ssp->dss_slices[slice];
495 		free_ds_label(ssp, slice);
496 	}
497 	kfree(ssp, M_DEVBUF);
498 	*sspp = NULL;
499 }
500 
501 /*
502  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
503  * is subject to the same restriction as dsopen().
504  */
505 int
506 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
507 	struct diskslices **sspp, struct disk_info *info)
508 {
509 	int error;
510 	struct disklabel *lp;
511 	int old_wlabel;
512 	u_char openmask;
513 	int part;
514 	int slice;
515 	struct diskslice *sp;
516 	struct diskslices *ssp;
517 	struct partition *pp;
518 
519 	slice = dkslice(dev);
520 	part = dkpart(dev);
521 	ssp = *sspp;
522 	if (slice >= ssp->dss_nslices)
523 		return (EINVAL);
524 	sp = &ssp->dss_slices[slice];
525 	lp = sp->ds_label;
526 	switch (cmd) {
527 
528 	case DIOCGDVIRGIN:
529 		/*
530 		 * You can only retrieve a virgin disklabel on the whole
531 		 * disk slice or whole-slice partition.
532 		 */
533 		if (slice != WHOLE_DISK_SLICE &&
534 		    part != WHOLE_SLICE_PART) {
535 			return(EINVAL);
536 		}
537 
538 		lp = (struct disklabel *)data;
539 		if (ssp->dss_slices[WHOLE_DISK_SLICE].ds_label) {
540 			*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
541 		} else {
542 			bzero(lp, sizeof(struct disklabel));
543 		}
544 		lp->d_magic = DISKMAGIC;
545 		lp->d_magic2 = DISKMAGIC;
546 
547 		lp->d_npartitions = MAXPARTITIONS;
548 		if (lp->d_interleave == 0)
549 			lp->d_interleave = 1;
550 		if (lp->d_rpm == 0)
551 			lp->d_rpm = 3600;
552 		if (lp->d_nsectors == 0)
553 			lp->d_nsectors = 32;
554 		if (lp->d_ntracks == 0)
555 			lp->d_ntracks = 64;
556 
557 		lp->d_bbsize = BBSIZE;
558 		lp->d_sbsize = SBSIZE;
559 		lp->d_secpercyl = lp->d_nsectors * lp->d_ntracks;
560 		lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
561 
562 		/*
563 		 * Set or Modify the partition sizes to accomodate the slice,
564 		 * since we started with a copy of the virgin label stored
565 		 * in the whole-disk-slice and we are probably not a
566 		 * whole-disk slice.
567 		 */
568 		lp->d_secperunit = sp->ds_size;
569 		pp = &lp->d_partitions[RAW_PART];
570 		pp->p_offset = 0;
571 		pp->p_size = lp->d_secperunit;
572 		if (info->d_dsflags & DSO_COMPATPARTA) {
573 			pp = &lp->d_partitions[0];
574 			pp->p_offset = 0;
575 			pp->p_size = lp->d_secperunit;
576 			pp->p_fstype = FS_OTHER;
577 		}
578 		lp->d_checksum = 0;
579 		lp->d_checksum = dkcksum(lp);
580 		return (0);
581 
582 	case DIOCGDINFO:
583 		/*
584 		 * You can only retrieve a disklabel on the whole
585 		 * slice partition.
586 		 *
587 		 * We do not support labels directly on whole-disks
588 		 * any more (that is, disks without slices), unless the
589 		 * device driver has asked for a compatible label (e.g.
590 		 * for a CD) to allow booting off of storage that is
591 		 * otherwise unlabeled.
592 		 */
593 		error = 0;
594 		if (part != WHOLE_SLICE_PART)
595 			return(EINVAL);
596 		if (slice == WHOLE_DISK_SLICE &&
597 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
598 			return (ENODEV);
599 		}
600 		if (sp->ds_label == NULL) {
601 			error = dsreadandsetlabel(dev, info->d_dsflags,
602 						  ssp, sp, info);
603 		}
604 		if (error == 0)
605 			*(struct disklabel *)data = *sp->ds_label;
606 		return (error);
607 
608 	case DIOCGPART:
609 		{
610 			struct partinfo *dpart = (void *)data;
611 
612 			/*
613 			 * If accessing a whole-slice partition the disk
614 			 * management layer may not have tried to read the
615 			 * disklabel.  We have to try to read the label
616 			 * in order to properly initialize the ds_skip_*
617 			 * fields.
618 			 *
619 			 * We ignore any error.
620 			 */
621 			if (sp->ds_label == NULL && part == WHOLE_SLICE_PART &&
622 			    slice != WHOLE_DISK_SLICE) {
623 				dsreadandsetlabel(dev, info->d_dsflags,
624 						  ssp, sp, info);
625 			}
626 
627 			bzero(dpart, sizeof(*dpart));
628 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
629 						info->d_media_blksize;
630 			dpart->media_size     = (u_int64_t)sp->ds_size *
631 						info->d_media_blksize;
632 			dpart->media_blocks   = sp->ds_size;
633 			dpart->media_blksize  = info->d_media_blksize;
634 			dpart->skip_platform = sp->ds_skip_platform;
635 			dpart->skip_bsdlabel = sp->ds_skip_bsdlabel;
636 
637 			if (slice != WHOLE_DISK_SLICE &&
638 			    part != WHOLE_SLICE_PART) {
639 				struct partition *p;
640 
641 				if (lp == NULL || part >= lp->d_npartitions)
642 					return(EINVAL);
643 
644 				p = &lp->d_partitions[part];
645 				dpart->fstype = p->p_fstype;
646 				dpart->media_offset += (u_int64_t)p->p_offset *
647 						       info->d_media_blksize;
648 				dpart->media_size = (u_int64_t)p->p_size *
649 						    info->d_media_blksize;
650 				dpart->media_blocks = (u_int64_t)p->p_size;
651 
652 				/*
653 				 * partition starting sector (p_offset)
654 				 * requires slice's reserved areas to be
655 				 * adjusted.
656 				 */
657 				if (dpart->skip_platform > p->p_offset)
658 					dpart->skip_platform -= p->p_offset;
659 				else
660 					dpart->skip_platform = 0;
661 				if (dpart->skip_bsdlabel > p->p_offset)
662 					dpart->skip_bsdlabel -= p->p_offset;
663 				else
664 					dpart->skip_bsdlabel = 0;
665 			}
666 
667 			/*
668 			 * Load remaining fields from the info structure
669 			 */
670 			dpart->d_nheads =	info->d_nheads;
671 			dpart->d_ncylinders =	info->d_ncylinders;
672 			dpart->d_secpertrack =	info->d_secpertrack;
673 			dpart->d_secpercyl =	info->d_secpercyl;
674 		}
675 		return (0);
676 
677 	case DIOCGSLICEINFO:
678 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
679 				 (char *)ssp);
680 		return (0);
681 
682 	case DIOCSDINFO:
683 		/*
684 		 * You can write a disklabel on the whole disk slice or
685 		 * whole-slice partition.
686 		 */
687 		if (slice != WHOLE_DISK_SLICE &&
688 		    part != WHOLE_SLICE_PART) {
689 			return(EINVAL);
690 		}
691 
692 		/*
693 		 * We no longer support writing disklabels directly to media
694 		 * without there being a slice.  Keep this as a separate
695 		 * conditional.
696 		 */
697 		if (slice == WHOLE_DISK_SLICE)
698 			return (ENODEV);
699 
700 		if (!(flags & FWRITE))
701 			return (EBADF);
702 		lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK);
703 		if (sp->ds_label == NULL)
704 			bzero(lp, sizeof *lp);
705 		else
706 			bcopy(sp->ds_label, lp, sizeof *lp);
707 		if (sp->ds_label == NULL) {
708 			openmask = 0;
709 		} else {
710 			openmask = sp->ds_openmask;
711 			if (slice == COMPATIBILITY_SLICE) {
712 				openmask |= ssp->dss_slices[
713 				    ssp->dss_first_bsd_slice].ds_openmask;
714 			} else if (slice == ssp->dss_first_bsd_slice) {
715 				openmask |= ssp->dss_slices[
716 				    COMPATIBILITY_SLICE].ds_openmask;
717 			}
718 		}
719 		error = setdisklabel(lp, (struct disklabel *)data,
720 				     (u_long)openmask);
721 		/* XXX why doesn't setdisklabel() check this? */
722 		if (error == 0 && lp->d_partitions[RAW_PART].p_offset != 0)
723 			error = EXDEV;
724 		if (error == 0) {
725 			if (lp->d_secperunit > sp->ds_size)
726 				error = ENOSPC;
727 			for (part = 0; part < lp->d_npartitions; part++)
728 				if (lp->d_partitions[part].p_size > sp->ds_size)
729 					error = ENOSPC;
730 		}
731 		if (error != 0) {
732 			kfree(lp, M_DEVBUF);
733 			return (error);
734 		}
735 		free_ds_label(ssp, slice);
736 		set_ds_label(ssp, slice, lp);
737 		return (0);
738 
739 	case DIOCSYNCSLICEINFO:
740 		/*
741 		 * This ioctl can only be done on the whole disk
742 		 */
743 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
744 			return (EINVAL);
745 
746 		if (*(int *)data == 0) {
747 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
748 				openmask = ssp->dss_slices[slice].ds_openmask;
749 				if (openmask &&
750 				    (slice != WHOLE_DISK_SLICE ||
751 				     openmask & ~(1 << RAW_PART))) {
752 					return (EBUSY);
753 				}
754 			}
755 		}
756 
757 		/*
758 		 * Temporarily forget the current slices struct and read
759 		 * the current one.
760 		 *
761 		 * NOTE:
762 		 *
763 		 * XXX should wait for current accesses on this disk to
764 		 * complete, then lock out future accesses and opens.
765 		 */
766 		*sspp = NULL;
767 		lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK);
768 		*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
769 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, info);
770 		if (error != 0) {
771 			kfree(lp, M_DEVBUF);
772 			*sspp = ssp;
773 			return (error);
774 		}
775 
776 		/*
777 		 * Reopen everything.  This is a no-op except in the "force"
778 		 * case and when the raw bdev and cdev are both open.  Abort
779 		 * if anything fails.
780 		 */
781 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
782 			for (openmask = ssp->dss_slices[slice].ds_openmask,
783 			     part = 0; openmask; openmask >>= 1, part++) {
784 				if (!(openmask & 1))
785 					continue;
786 				error = dsopen(dkmodslice(dkmodpart(dev, part),
787 							  slice),
788 					       S_IFCHR, ssp->dss_oflags, sspp,
789 					       info);
790 				if (error != 0) {
791 					kfree(lp, M_DEVBUF);
792 					*sspp = ssp;
793 					return (EBUSY);
794 				}
795 			}
796 		}
797 
798 		kfree(lp, M_DEVBUF);
799 		dsgone(&ssp);
800 		return (0);
801 
802 	case DIOCWDINFO:
803 		error = dsioctl(dev, DIOCSDINFO, data, flags, &ssp, info);
804 		if (error != 0)
805 			return (error);
806 		/*
807 		 * XXX this used to hack on dk_openpart to fake opening
808 		 * partition 0 in case that is used instead of dkpart(dev).
809 		 */
810 		old_wlabel = sp->ds_wlabel;
811 		set_ds_wlabel(ssp, slice, TRUE);
812 		error = writedisklabel(dev, sp->ds_label);
813 		/* XXX should invalidate in-core label if write failed. */
814 		set_ds_wlabel(ssp, slice, old_wlabel);
815 		return (error);
816 
817 	case DIOCWLABEL:
818 		if (slice == WHOLE_DISK_SLICE)
819 			return (ENODEV);
820 		if (!(flags & FWRITE))
821 			return (EBADF);
822 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
823 		return (0);
824 
825 	default:
826 		return (ENOIOCTL);
827 	}
828 }
829 
830 /*
831  * Chain the bio_done.  b_cmd remains valid through such chaining.
832  */
833 static void
834 dsiodone(struct bio *bio)
835 {
836 	struct buf *bp = bio->bio_buf;
837 	char *msg;
838 
839 	if (bp->b_cmd != BUF_CMD_READ
840 	    || (!(bp->b_flags & B_ERROR) && bp->b_error == 0)) {
841 		msg = fixlabel(NULL, bio->bio_caller_info1.ptr,
842 			       (struct disklabel *)
843 			       (bp->b_data + (int)bio->bio_caller_info2.offset),
844 			       FALSE);
845 		if (msg != NULL)
846 			kprintf("%s\n", msg);
847 	}
848 	biodone(bio->bio_prev);
849 }
850 
851 int
852 dsisopen(struct diskslices *ssp)
853 {
854 	int slice;
855 
856 	if (ssp == NULL)
857 		return (0);
858 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
859 		if (ssp->dss_slices[slice].ds_openmask)
860 			return (1);
861 	}
862 	return (0);
863 }
864 
865 /*
866  * Allocate a slices "struct" and initialize it to contain only an empty
867  * compatibility slice (pointing to itself), a whole disk slice (covering
868  * the disk as described by the label), and (nslices - BASE_SLICES) empty
869  * slices beginning at BASE_SLICE.
870  */
871 struct diskslices *
872 dsmakeslicestruct(int nslices, struct disk_info *info)
873 {
874 	struct diskslice *sp;
875 	struct diskslices *ssp;
876 
877 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
878 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
879 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
880 	ssp->dss_nslices = nslices;
881 	ssp->dss_oflags = 0;
882 
883 	/*
884 	 * Figure out if we can use shifts or whether we have to
885 	 * use mod/multply to translate byte offsets into sector numbers.
886 	 */
887 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
888 	     (info->d_media_blksize << 1) - 1) {
889 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
890 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
891 			ssp->dss_secshift = -1;
892 		else
893 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
894 	} else {
895 		ssp->dss_secmult = 0;
896 		ssp->dss_secshift = -1;
897 	}
898 	ssp->dss_secsize = info->d_media_blksize;
899 	sp = &ssp->dss_slices[0];
900 	bzero(sp, nslices * sizeof *sp);
901 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
902 	return (ssp);
903 }
904 
905 char *
906 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
907 {
908 	static char name[32];
909 	const char *dname;
910 	int used;
911 
912 	dname = dev_dname(dev);
913 	if (strlen(dname) > 16)
914 		dname = "nametoolong";
915 	ksnprintf(name, sizeof(name), "%s%d", dname, unit);
916 	partname[0] = '\0';
917 	used = strlen(name);
918 
919 	if (slice != WHOLE_DISK_SLICE) {
920 		/*
921 		 * slice or slice + partition.  BASE_SLICE is s1, but
922 		 * the compatibility slice (0) needs to be s0.
923 		 */
924 		used += ksnprintf(name + used, sizeof(name) - used,
925 				  "s%d", (slice ? slice - BASE_SLICE + 1 : 0));
926 		if (part != WHOLE_SLICE_PART) {
927 			used += ksnprintf(name + used, sizeof(name) - used,
928 					  "%c", 'a' + part);
929 			partname[0] = 'a' + part;
930 			partname[1] = 0;
931 		}
932 	} else if (part == WHOLE_SLICE_PART) {
933 		/*
934 		 * whole-disk-device, raw access to disk
935 		 */
936 		/* no string extension */
937 	} else if (part > 128) {
938 		/*
939 		 * whole-disk-device, extended raw access partitions.
940 		 * (typically used to access CD audio tracks)
941 		 */
942 		used += ksnprintf(name + used, sizeof(name) - used,
943 					  "t%d", part - 128);
944 	} else {
945 		/*
946 		 * whole-disk-device, illegal partition number
947 		 */
948 		used += ksnprintf(name + used, sizeof(name) - used,
949 					  "?%d", part);
950 	}
951 	return (name);
952 }
953 
954 /*
955  * This should only be called when the unit is inactive and the strategy
956  * routine should not allow it to become active unless we call it.  Our
957  * strategy routine must be special to allow activity.
958  */
959 int
960 dsopen(cdev_t dev, int mode, u_int flags,
961 	struct diskslices **sspp, struct disk_info *info)
962 {
963 	cdev_t dev1;
964 	int error;
965 	bool_t need_init;
966 	struct diskslice *sp;
967 	struct diskslices *ssp;
968 	int slice;
969 	int part;
970 
971 	dev->si_bsize_phys = info->d_media_blksize;
972 
973 	/*
974 	 * Do not attempt to read the slice table or disk label when
975 	 * accessing the whole-disk slice or a while-slice partition.
976 	 */
977 	if (dkslice(dev) == WHOLE_DISK_SLICE)
978 		flags |= DSO_ONESLICE | DSO_NOLABELS;
979 	if (dkpart(dev) == WHOLE_SLICE_PART)
980 		flags |= DSO_NOLABELS;
981 
982 	/*
983 	 * Reinitialize the slice table unless there is an open device
984 	 * on the unit.
985 	 *
986 	 * It would be nice if we didn't have to do this but when a
987 	 * user is slicing and partitioning up a disk it is a lot safer
988 	 * to not take any chances.
989 	 */
990 	ssp = *sspp;
991 	need_init = !dsisopen(ssp);
992 	if (ssp != NULL && need_init)
993 		dsgone(sspp);
994 	if (need_init) {
995 		/*
996 		 * Allocate a minimal slices "struct".  This will become
997 		 * the final slices "struct" if we don't want real slices
998 		 * or if we can't find any real slices.
999 		 *
1000 		 * Then scan the disk
1001 		 */
1002 		*sspp = dsmakeslicestruct(BASE_SLICE, info);
1003 
1004 		if ((flags & DSO_ONESLICE) == 0) {
1005 			TRACE(("mbrinit\n"));
1006 			error = mbrinit(dev, info, sspp);
1007 			if (error != 0) {
1008 				dsgone(sspp);
1009 				return (error);
1010 			}
1011 		}
1012 		ssp = *sspp;
1013 		ssp->dss_oflags = flags;
1014 
1015 		/*
1016 		 * If there are no real slices, then make the compatiblity
1017 		 * slice cover the whole disk.
1018 		 *
1019 		 * no sectors are reserved for the platform (ds_skip_platform
1020 		 * will be 0) in this case.  This means that if a disklabel
1021 		 * is installed it will be directly installed in sector 0
1022 		 * unless DSO_COMPATMBR is requested.
1023 		 */
1024 		if (ssp->dss_nslices == BASE_SLICE) {
1025 			sp = &ssp->dss_slices[COMPATIBILITY_SLICE];
1026 
1027 			sp->ds_size = info->d_media_blocks;
1028 			if (info->d_dsflags & DSO_COMPATMBR) {
1029 				sp->ds_skip_platform = 1;
1030 				sp->ds_skip_bsdlabel = sp->ds_skip_platform;
1031 			} else {
1032 				sp->ds_skip_platform = 0;
1033 				sp->ds_skip_bsdlabel = 0;
1034 			}
1035 		}
1036 
1037 		/*
1038 		 * Point the compatibility slice at the BSD slice, if any.
1039 		 */
1040 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
1041 			sp = &ssp->dss_slices[slice];
1042 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
1043 				struct diskslice *csp;
1044 
1045 				csp = &ssp->dss_slices[COMPATIBILITY_SLICE];
1046 				ssp->dss_first_bsd_slice = slice;
1047 				csp->ds_offset = sp->ds_offset;
1048 				csp->ds_size = sp->ds_size;
1049 				csp->ds_type = sp->ds_type;
1050 				csp->ds_skip_platform = sp->ds_skip_platform;
1051 				csp->ds_skip_bsdlabel = sp->ds_skip_bsdlabel;
1052 				break;
1053 			}
1054 		}
1055 
1056 		/*
1057 		 * By definition accesses via the whole-disk device do not
1058 		 * specify any reserved areas.  The whole disk may be read
1059 		 * or written by the whole-disk device.
1060 		 *
1061 		 * ds_label for a whole-disk device is only used as a
1062 		 * template.
1063 		 */
1064 		sp = &ssp->dss_slices[WHOLE_DISK_SLICE];
1065 		sp->ds_label = clone_label(info, NULL);
1066 		sp->ds_wlabel = TRUE;
1067 		sp->ds_skip_platform = 0;
1068 		sp->ds_skip_bsdlabel = 0;
1069 	}
1070 
1071 	/*
1072 	 * Load the disklabel for the slice being accessed unless it is
1073 	 * a whole-disk-slice or a whole-slice-partition (as determined
1074 	 * by DSO_NOLABELS).
1075 	 *
1076 	 * We could scan all slices here and try to load up their
1077 	 * disklabels, but that would cause us to access slices that
1078 	 * the user may otherwise not intend us to access, or corrupted
1079 	 * slices, etc.
1080 	 *
1081 	 * XXX if there are no opens on the slice we may want to re-read
1082 	 * the disklabel anyway, even if we have one cached.
1083 	 */
1084 	slice = dkslice(dev);
1085 	if (slice >= ssp->dss_nslices)
1086 		return (ENXIO);
1087 	sp = &ssp->dss_slices[slice];
1088 	part = dkpart(dev);
1089 
1090 	if ((flags & DSO_NOLABELS) == 0 && sp->ds_label == NULL) {
1091 		dev1 = dkmodslice(dkmodpart(dev, WHOLE_SLICE_PART), slice);
1092 
1093 		/*
1094 		 * If opening a raw disk we do not try to
1095 		 * read the disklabel now.  No interpretation of raw disks
1096 		 * (e.g. like 'da0') ever occurs.  We will try to read the
1097 		 * disklabel for a raw slice if asked to via DIOC* ioctls.
1098 		 *
1099 		 * Access to the label area is disallowed by default.  Note
1100 		 * however that accesses via WHOLE_DISK_SLICE, and accesses
1101 		 * via WHOLE_SLICE_PART for slices without valid disklabels,
1102 		 * will allow writes and ignore the flag.
1103 		 */
1104 		set_ds_wlabel(ssp, slice, FALSE);
1105 		dsreadandsetlabel(dev1, flags, ssp, sp, info);
1106 	}
1107 
1108 	/*
1109 	 * If opening a particular partition the disklabel must exist and
1110 	 * the partition must be present in the label.
1111 	 *
1112 	 * If the partition is the special whole-disk-slice no partition
1113 	 * table need exist.
1114 	 */
1115 	if (part != WHOLE_SLICE_PART && slice != WHOLE_DISK_SLICE) {
1116 		if (sp->ds_label == NULL || part >= sp->ds_label->d_npartitions)
1117 			return (EINVAL);
1118 		if (part < sizeof(sp->ds_openmask) * 8) {
1119 			sp->ds_openmask |= 1 << part;
1120 		}
1121 	}
1122 
1123 	/*
1124 	 * Do not allow special raw-extension partitions to be opened
1125 	 * if the device doesn't support them.  Raw-extension partitions
1126 	 * are typically used to handle CD tracks.
1127 	 */
1128 	if (slice == WHOLE_DISK_SLICE && part >= 128 &&
1129 	    part != WHOLE_SLICE_PART) {
1130 		if ((info->d_dsflags & DSO_RAWEXTENSIONS) == 0)
1131 			return (EINVAL);
1132 	}
1133 	return (0);
1134 }
1135 
1136 /*
1137  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
1138  *
1139  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
1140  * a fake label covering the whole disk.
1141  */
1142 static
1143 int
1144 dsreadandsetlabel(cdev_t dev, u_int flags,
1145 		  struct diskslices *ssp, struct diskslice *sp,
1146 		  struct disk_info *info)
1147 {
1148 	struct disklabel *lp1;
1149 	const char *msg;
1150 	const char *sname;
1151 	char partname[2];
1152 	int slice = dkslice(dev);
1153 
1154 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
1155 	lp1 = clone_label(info, sp);
1156 	msg = readdisklabel(dev, lp1);
1157 
1158 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
1159 		msg = NULL;
1160 		kfree(lp1, M_DEVBUF);
1161 		lp1 = clone_label(info, sp);
1162 	}
1163 	if (msg == NULL)
1164 		msg = fixlabel(sname, sp, lp1, FALSE);
1165 	if (msg == NULL && lp1->d_secsize != info->d_media_blksize)
1166 		msg = "inconsistent sector size";
1167 	if (msg != NULL) {
1168 		if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
1169 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
1170 			    sname, msg);
1171 		kfree(lp1, M_DEVBUF);
1172 	} else {
1173 		set_ds_label(ssp, slice, lp1);
1174 		set_ds_wlabel(ssp, slice, FALSE);
1175 	}
1176 	return (msg ? EINVAL : 0);
1177 }
1178 
1179 int64_t
1180 dssize(cdev_t dev, struct diskslices **sspp)
1181 {
1182 	struct disklabel *lp;
1183 	int part;
1184 	int slice;
1185 	struct diskslices *ssp;
1186 
1187 	slice = dkslice(dev);
1188 	part = dkpart(dev);
1189 	ssp = *sspp;
1190 	if (ssp == NULL || slice >= ssp->dss_nslices
1191 	    || !(ssp->dss_slices[slice].ds_openmask & (1 << part))) {
1192 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
1193 			return (-1);
1194 		dev_dclose(dev, FREAD, S_IFCHR);
1195 		ssp = *sspp;
1196 	}
1197 	lp = ssp->dss_slices[slice].ds_label;
1198 	if (lp == NULL)
1199 		return (-1);
1200 	return ((int64_t)lp->d_partitions[part].p_size);
1201 }
1202 
1203 static void
1204 free_ds_label(struct diskslices *ssp, int slice)
1205 {
1206 	struct disklabel *lp;
1207 	struct diskslice *sp;
1208 
1209 	sp = &ssp->dss_slices[slice];
1210 	lp = sp->ds_label;
1211 	if (lp == NULL)
1212 		return;
1213 	kfree(lp, M_DEVBUF);
1214 	set_ds_label(ssp, slice, (struct disklabel *)NULL);
1215 }
1216 
1217 static char *
1218 fixlabel(const char *sname, struct diskslice *sp, struct disklabel *lp, int writeflag)
1219 {
1220 	u_int64_t start;
1221 	u_int64_t end;
1222 	u_int64_t offset;
1223 	int part;
1224 	struct partition *pp;
1225 	bool_t warned;
1226 
1227 	/* These errors "can't happen" so don't bother reporting details. */
1228 	if (lp->d_magic != DISKMAGIC || lp->d_magic2 != DISKMAGIC)
1229 		return ("fixlabel: invalid magic");
1230 	if (dkcksum(lp) != 0)
1231 		return ("fixlabel: invalid checksum");
1232 
1233 	pp = &lp->d_partitions[RAW_PART];
1234 
1235 	/*
1236 	 * What a mess.  For ages old backwards compatibility the disklabel
1237 	 * on-disk stores absolute offsets instead of slice-relative offsets.
1238 	 * So fix it up when reading, writing, or snooping.
1239 	 *
1240 	 * The in-core label is always slice-relative.
1241 	 */
1242 	if (writeflag) {
1243 		start = 0;
1244 		offset = sp->ds_offset;
1245 	} else {
1246 		start = sp->ds_offset;
1247 		offset = -sp->ds_offset;
1248 	}
1249 	if (pp->p_offset != start) {
1250 		if (sname != NULL) {
1251 			kprintf(
1252 "%s: rejecting BSD label: raw partition offset != slice offset\n",
1253 			       sname);
1254 			slice_info(sname, sp);
1255 			partition_info(sname, RAW_PART, pp);
1256 		}
1257 		return ("fixlabel: raw partition offset != slice offset");
1258 	}
1259 	if (pp->p_size != sp->ds_size) {
1260 		if (sname != NULL) {
1261 			kprintf("%s: raw partition size != slice size\n", sname);
1262 			slice_info(sname, sp);
1263 			partition_info(sname, RAW_PART, pp);
1264 		}
1265 		if (pp->p_size > sp->ds_size) {
1266 			if (sname == NULL)
1267 				return ("fixlabel: raw partition size > slice size");
1268 			kprintf("%s: truncating raw partition\n", sname);
1269 			pp->p_size = sp->ds_size;
1270 		}
1271 	}
1272 	end = start + sp->ds_size;
1273 	if (start > end)
1274 		return ("fixlabel: slice wraps");
1275 	if (lp->d_secpercyl <= 0)
1276 		return ("fixlabel: d_secpercyl <= 0");
1277 	pp -= RAW_PART;
1278 	warned = FALSE;
1279 	for (part = 0; part < lp->d_npartitions; part++, pp++) {
1280 		if (pp->p_offset != 0 || pp->p_size != 0) {
1281 			if (pp->p_offset < start
1282 			    || pp->p_offset + pp->p_size > end
1283 			    || pp->p_offset + pp->p_size < pp->p_offset) {
1284 				if (sname != NULL) {
1285 					kprintf(
1286 "%s: rejecting partition in BSD label: it isn't entirely within the slice\n",
1287 					       sname);
1288 					if (!warned) {
1289 						slice_info(sname, sp);
1290 						warned = TRUE;
1291 					}
1292 					partition_info(sname, part, pp);
1293 				}
1294 				/* XXX else silently discard junk. */
1295 				bzero(pp, sizeof *pp);
1296 			} else {
1297 				pp->p_offset += offset;
1298 			}
1299 		}
1300 	}
1301 	lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
1302 	lp->d_secperunit = sp->ds_size;
1303  	lp->d_checksum = 0;
1304  	lp->d_checksum = dkcksum(lp);
1305 	return (NULL);
1306 }
1307 
1308 static void
1309 partition_info(const char *sname, int part, struct partition *pp)
1310 {
1311 	kprintf("%s%c: start %lu, end %lu, size %lu\n", sname, 'a' + part,
1312 	       (u_long)pp->p_offset, (u_long)(pp->p_offset + pp->p_size - 1),
1313 	       (u_long)pp->p_size);
1314 }
1315 
1316 static void
1317 slice_info(const char *sname, struct diskslice *sp)
1318 {
1319 	kprintf("%s: start %llu, end %llu, size %llu\n", sname,
1320 	       sp->ds_offset, sp->ds_offset + sp->ds_size - 1, sp->ds_size);
1321 }
1322 
1323 static void
1324 set_ds_label(struct diskslices *ssp, int slice, struct disklabel *lp)
1325 {
1326 	struct diskslice *sp1 = &ssp->dss_slices[slice];
1327 	struct diskslice *sp2;
1328 
1329 	if (slice == COMPATIBILITY_SLICE)
1330 		sp2 = &ssp->dss_slices[ssp->dss_first_bsd_slice];
1331 	else if (slice == ssp->dss_first_bsd_slice)
1332 		sp2 = &ssp->dss_slices[COMPATIBILITY_SLICE];
1333 	else
1334 		sp2 = NULL;
1335 	sp1->ds_label = lp;
1336 	if (sp2)
1337 		sp2->ds_label = lp;
1338 
1339 	/*
1340 	 * If the slice is not the whole-disk slice, setup the reserved
1341 	 * area(s).
1342 	 *
1343 	 * The reserved area for the original bsd disklabel, inclusive of
1344 	 * the label and space for boot2, is 15 sectors.  If you've
1345 	 * noticed people traditionally skipping 16 sectors its because
1346 	 * the sector numbers start at the beginning of the slice rather
1347 	 * then the beginning of the disklabel and traditional dos slices
1348 	 * reserve a sector at the beginning for the boot code.
1349 	 *
1350 	 * NOTE! With the traditional bsdlabel, the first N bytes of boot2
1351 	 * overlap with the disklabel.  The disklabel program checks that
1352 	 * they are 0.
1353 	 *
1354 	 * When clearing a label, the bsdlabel reserved area is reset.
1355 	 */
1356 	if (slice != WHOLE_DISK_SLICE) {
1357 		if (lp) {
1358 			/*
1359 			 * leave room for the disklabel and boot2 -
1360 			 * traditional label only.  XXX bad hack.  Such
1361 			 * labels cannot install a boot area due to
1362 			 * insufficient space.
1363 			 */
1364 			int lsects = SBSIZE / ssp->dss_secsize -
1365 				     sp1->ds_skip_platform;
1366 			if (lsects <= 0)
1367 				lsects = 1;
1368 			sp1->ds_skip_bsdlabel = sp1->ds_skip_platform + lsects;
1369 			if (sp2)
1370 				sp2->ds_skip_bsdlabel = sp1->ds_skip_bsdlabel;
1371 		} else {
1372 			sp1->ds_skip_bsdlabel = sp1->ds_skip_platform;
1373 			if (sp2)
1374 				sp2->ds_skip_bsdlabel = sp1->ds_skip_platform;
1375 		}
1376 	}
1377 }
1378 
1379 static void
1380 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
1381 {
1382 	ssp->dss_slices[slice].ds_wlabel = wlabel;
1383 	if (slice == COMPATIBILITY_SLICE)
1384 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_wlabel = wlabel;
1385 	else if (slice == ssp->dss_first_bsd_slice)
1386 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_wlabel = wlabel;
1387 }
1388