xref: /dflybsd-src/sys/kern/subr_diskslice.c (revision fc2504ef909281659f2c55bc181e08911a69ea34)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.42 2007/06/13 20:58:37 dillon Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/diskslice.h>
56 #include <sys/disk.h>
57 #include <sys/diskmbr.h>
58 #include <sys/fcntl.h>
59 #include <sys/malloc.h>
60 #include <sys/stat.h>
61 #include <sys/syslog.h>
62 #include <sys/proc.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/thread2.h>
66 
67 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
68 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
69 
70 #define TRACE(str)	do { if (ds_debug) kprintf str; } while (0)
71 
72 typedef	u_char	bool_t;
73 
74 static volatile bool_t ds_debug;
75 
76 static struct disklabel *clone_label (struct disk_info *info,
77 					struct diskslice *sp);
78 static void dsiodone (struct bio *bio);
79 static char *fixlabel (const char *sname, struct diskslice *sp,
80 			   struct disklabel *lp, int writeflag);
81 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
82 			   struct diskslices *ssp, struct diskslice *sp,
83 			   struct disk_info *info);
84 static void free_ds_label (struct diskslices *ssp, int slice);
85 static void partition_info (const char *sname, int part, struct partition *pp);
86 static void slice_info (const char *sname, struct diskslice *sp);
87 static void set_ds_label (struct diskslices *ssp, int slice,
88 			      struct disklabel *lp);
89 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
90 
91 /*
92  * Create a disklabel based on a disk_info structure, initializing
93  * the appropriate fields and creating a raw partition that covers the
94  * whole disk.
95  *
96  * If a diskslice is passed, the label is truncated to the slice
97  */
98 static struct disklabel *
99 clone_label(struct disk_info *info, struct diskslice *sp)
100 {
101 	struct disklabel *lp1;
102 
103 	lp1 = kmalloc(sizeof *lp1, M_DEVBUF, M_WAITOK | M_ZERO);
104 	lp1->d_nsectors = info->d_secpertrack;
105 	lp1->d_ntracks = info->d_nheads;
106 	lp1->d_secpercyl = info->d_secpercyl;
107 	lp1->d_secsize = info->d_media_blksize;
108 
109 	if (sp)
110 		lp1->d_secperunit = (u_int)sp->ds_size;
111 	else
112 		lp1->d_secperunit = (u_int)info->d_media_blocks;
113 
114 	if (lp1->d_typename[0] == '\0')
115 		strncpy(lp1->d_typename, "amnesiac", sizeof(lp1->d_typename));
116 	if (lp1->d_packname[0] == '\0')
117 		strncpy(lp1->d_packname, "fictitious", sizeof(lp1->d_packname));
118 	if (lp1->d_nsectors == 0)
119 		lp1->d_nsectors = 32;
120 	if (lp1->d_ntracks == 0)
121 		lp1->d_ntracks = 64;
122 	lp1->d_secpercyl = lp1->d_nsectors * lp1->d_ntracks;
123 	lp1->d_ncylinders = lp1->d_secperunit / lp1->d_secpercyl;
124 	if (lp1->d_rpm == 0)
125 		lp1->d_rpm = 3600;
126 	if (lp1->d_interleave == 0)
127 		lp1->d_interleave = 1;
128 	if (lp1->d_npartitions < RAW_PART + 1)
129 		lp1->d_npartitions = MAXPARTITIONS;
130 	if (lp1->d_bbsize == 0)
131 		lp1->d_bbsize = BBSIZE;
132 	if (lp1->d_sbsize == 0)
133 		lp1->d_sbsize = SBSIZE;
134 
135 	/*
136 	 * Used by various devices to create a compatibility slice which
137 	 * allows us to mount root from devices which do not have a
138 	 * disklabel.  Particularly: CDs.
139 	 */
140 	lp1->d_partitions[RAW_PART].p_size = lp1->d_secperunit;
141 	if (info->d_dsflags & DSO_COMPATPARTA) {
142 		lp1->d_partitions[0].p_size = lp1->d_secperunit;
143 		lp1->d_partitions[0].p_fstype = FS_OTHER;
144 	}
145 	lp1->d_magic = DISKMAGIC;
146 	lp1->d_magic2 = DISKMAGIC;
147 	lp1->d_checksum = dkcksum(lp1);
148 	return (lp1);
149 }
150 
151 /*
152  * Determine the size of the transfer, and make sure it is
153  * within the boundaries of the partition. Adjust transfer
154  * if needed, and signal errors or early completion.
155  *
156  * XXX TODO:
157  *	o Split buffers that are too big for the device.
158  *	o Check for overflow.
159  *	o Finish cleaning this up.
160  *
161  * This function returns 1 on success, 0 if transfer equates
162  * to EOF (end of disk) or -1 on failure.  The appropriate
163  * 'errno' value is also set in bp->b_error and bp->b_flags
164  * is marked with B_ERROR.
165  */
166 struct bio *
167 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
168 {
169 	struct buf *bp = bio->bio_buf;
170 	struct bio *nbio;
171 	struct disklabel *lp;
172 	char *msg;
173 	long nsec;
174 	u_int64_t secno;
175 	u_int64_t endsecno;
176 	u_int64_t labelsect;
177 	u_int64_t slicerel_secno;
178 	struct diskslice *sp;
179 	u_int32_t part;
180 	u_int32_t slice;
181 	int shift;
182 	int mask;
183 
184 	slice = dkslice(dev);
185 	part  = dkpart(dev);
186 
187 	if (bio->bio_offset < 0) {
188 		kprintf("dscheck(%s): negative bio_offset %lld\n",
189 			devtoname(dev), bio->bio_offset);
190 		goto bad;
191 	}
192 	if (slice >= ssp->dss_nslices) {
193 		kprintf("dscheck(%s): slice too large %d/%d\n",
194 			devtoname(dev), slice, ssp->dss_nslices);
195 		goto bad;
196 	}
197 	sp = &ssp->dss_slices[slice];
198 
199 	/*
200 	 * Calculate secno and nsec
201 	 */
202 	if (ssp->dss_secmult == 1) {
203 		shift = DEV_BSHIFT;
204 		goto doshift;
205 	} else if (ssp->dss_secshift != -1) {
206 		shift = DEV_BSHIFT + ssp->dss_secshift;
207 doshift:
208 		mask = (1 << shift) - 1;
209 		if ((int)bp->b_bcount & mask)
210 			goto bad_bcount;
211 		if ((int)bio->bio_offset & mask)
212 			goto bad_blkno;
213 		secno = bio->bio_offset >> shift;
214 		nsec = bp->b_bcount >> shift;
215 	} else {
216 		if (bp->b_bcount % ssp->dss_secsize)
217 			goto bad_bcount;
218 		if (bio->bio_offset % ssp->dss_secsize)
219 			goto bad_blkno;
220 		secno = bio->bio_offset / ssp->dss_secsize;
221 		nsec = bp->b_bcount / ssp->dss_secsize;
222 	}
223 
224 	/*
225 	 * Calculate slice-relative sector number end slice-relative
226 	 * limit.
227 	 */
228 	if (slice == WHOLE_DISK_SLICE) {
229 		/*
230 		 * Labels have not been allowed on whole-disks for a while.
231 		 * This really puts the nail in the coffin... no disk
232 		 * snooping will occur even if you tried to write a label
233 		 * without a slice structure.
234 		 *
235 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
236 		 * and partition numbers are special-cased.  Currently numbers
237 		 * less then 128 are not allowed.  Partition numbers >= 128
238 		 * are encoded in the high 8 bits of the 64 bit buffer offset
239 		 * and are fed directly through to the device with no
240 		 * further interpretation.  In particular, no sector
241 		 * translation interpretation should occur because the
242 		 * sector size for the special raw access may not be the
243 		 * same as the nominal sector size for the device.
244 		 */
245 		lp = NULL;
246 		if (part < 128) {
247 			kprintf("dscheck(%s): illegal partition number (%d) "
248 				"for WHOLE_DISK_SLICE access\n",
249 				devtoname(dev), part);
250 			goto bad;
251 		} else if (part != WHOLE_SLICE_PART) {
252 			nbio = push_bio(bio);
253 			nbio->bio_offset = bio->bio_offset |
254 					   (u_int64_t)part << 56;
255 			return(nbio);
256 		}
257 
258 		/*
259 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE.
260 		 */
261 		labelsect = 0;	/* ignore any reserved sectors, do not sniff */
262 		endsecno = sp->ds_size;
263 		slicerel_secno = secno;
264 	} else if (part == WHOLE_SLICE_PART) {
265 		/*
266 		 * We are accessing a slice.  Snoop the label and check
267 		 * reserved blocks only if a label is present, otherwise
268 		 * do not.  A label may be present if (1) there are active
269 		 * opens on the disk (not necessarily this slice) or
270 		 * (2) the disklabel program has written an in-core label
271 		 * and now wants to write it out, or (3) the management layer
272 		 * is trying to write out an in-core layer.  In case (2) and
273 		 * (3) we MUST snoop the write or the on-disk version of the
274 		 * disklabel will not be properly translated.
275 		 *
276 		 * NOTE! opens on a whole-slice partition will not attempt
277 		 * to read a disklabel in.
278 		 */
279 		if ((lp = sp->ds_label) != NULL) {
280 			labelsect = sp->ds_skip_bsdlabel;
281 		} else {
282 			labelsect = 0;
283 		}
284 		endsecno = sp->ds_size;
285 		slicerel_secno = secno;
286 	} else if ((lp = sp->ds_label) && part < lp->d_npartitions) {
287 		/*
288 		 * Acesss through disklabel, partition present.
289 		 */
290 		struct partition *pp;
291 
292 		labelsect = sp->ds_skip_bsdlabel;
293 		pp = &lp->d_partitions[dkpart(dev)];
294 		endsecno = pp->p_size;
295 		slicerel_secno = pp->p_offset + secno;
296 	} else if (lp) {
297 		/*
298 		 * Partition out of bounds
299 		 */
300 		kprintf("dscheck(%s): partition out of bounds %d/%d\n",
301 			devtoname(dev),
302 			part, lp->d_npartitions);
303 		goto bad;
304 	} else {
305 		/*
306 		 * Attempt to access partition when no disklabel present
307 		 */
308 		kprintf("dscheck(%s): attempt to access non-existant partition\n",
309 			devtoname(dev));
310 		goto bad;
311 	}
312 
313 	/*
314 	 * labelsect will reflect the extent of any reserved blocks from
315 	 * the beginning of the slice.  We only check the slice reserved
316 	 * fields (sp->ds_skip_platform and sp->ds_skip_bsdlabel) if
317 	 * labelsect is non-zero, otherwise we ignore them.  When labelsect
318 	 * is non-zero, sp->ds_skip_platform indicates the sector where the
319 	 * disklabel begins.
320 	 *
321 	 * First determine if an attempt is being made to write to a
322 	 * reserved area when such writes are not allowed.
323 	 */
324 #if 0
325 	if (slicerel_secno < 16 && nsec &&
326 	    bp->b_cmd != BUF_CMD_READ) {
327 		kprintf("Attempt to write to reserved sector %lld labelsect %lld label %p/%p skip_plat %d skip_bsd %d WLABEL %d\n",
328 			slicerel_secno,
329 			labelsect,
330 			sp->ds_label, lp,
331 			sp->ds_skip_platform,
332 			sp->ds_skip_bsdlabel,
333 			sp->ds_wlabel);
334 	}
335 #endif
336 	if (slicerel_secno < labelsect && nsec &&
337 	    bp->b_cmd != BUF_CMD_READ && sp->ds_wlabel == 0) {
338 		bp->b_error = EROFS;
339 		goto error;
340 	}
341 
342 	/*
343 	 * If we get here, bio_offset must be on a block boundary and
344 	 * the sector size must be a power of 2.
345 	 */
346 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
347 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
348 	    ((ssp->dss_secsize << 1) - 1)) {
349 		kprintf("%s: invalid BIO offset, not sector aligned or"
350 			" invalid sector size (not power of 2) %08llx %d\n",
351 			devtoname(dev), bio->bio_offset, ssp->dss_secsize);
352 		goto bad;
353 	}
354 
355 	/*
356 	 * EOF handling
357 	 */
358 	if (secno + nsec > endsecno) {
359 		/*
360 		 * Return an error if beyond the end of the disk, or
361 		 * if B_BNOCLIP is set.  Tell the system that we do not
362 		 * need to keep the buffer around.
363 		 */
364 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
365 			goto bad;
366 
367 		/*
368 		 * If exactly at end of disk, return an EOF.  Throw away
369 		 * the buffer contents, if any, by setting B_INVAL.
370 		 */
371 		if (secno == endsecno) {
372 			bp->b_resid = bp->b_bcount;
373 			bp->b_flags |= B_INVAL;
374 			goto done;
375 		}
376 
377 		/*
378 		 * Else truncate
379 		 */
380 		nsec = endsecno - secno;
381 		bp->b_bcount = nsec * ssp->dss_secsize;
382 	}
383 
384 	nbio = push_bio(bio);
385 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
386 			   ssp->dss_secsize;
387 
388 	/*
389 	 * Snoop writes to the label area when labelsect is non-zero.
390 	 * The label sector starts at sector sp->ds_skip_platform within
391 	 * the slice and ends before sector sp->ds_skip_bsdlabel.  The
392 	 * write must contain the label sector for us to be able to snoop it.
393 	 *
394 	 * We have to adjust the label's fields to the on-disk format on
395 	 * a write and then adjust them back on completion of the write,
396 	 * or on a read.
397 	 *
398 	 * SNOOPs are required for disklabel -r and the DIOC* ioctls also
399 	 * depend on it on the backend for label operations.  XXX
400 	 *
401 	 * NOTE! ds_skip_platform is usually set to non-zero by the slice
402 	 * scanning code, indicating that the slice has reserved boot
403 	 * sector(s).  It is also set for compatibility reasons via
404 	 * the DSO_COMPATMBR flag.  But it is not a requirement and it
405 	 * can be 0, indicating that the disklabel (if present) is stored
406 	 * at the beginning of the slice.  In most cases ds_skip_platform
407 	 * will be '1'.
408 	 *
409 	 * ds_skip_bsdlabel is inclusive of ds_skip_platform.  If they are
410 	 * the same then there is no label present, even if non-zero.
411 	 */
412 	if (slicerel_secno < labelsect &&	/* also checks labelsect!=0 */
413 	    sp->ds_skip_platform < labelsect && /* degenerate case */
414 	    slicerel_secno <= sp->ds_skip_platform &&
415 	    slicerel_secno + nsec > sp->ds_skip_platform) {
416 		/*
417 		 * Set up our own callback on I/O completion to handle
418 		 * undoing the fixup we did for the write as well as
419 		 * doing the fixup for a read.
420 		 */
421 		nbio->bio_done = dsiodone;
422 		nbio->bio_caller_info1.ptr = sp;
423 		nbio->bio_caller_info2.offset =
424 		    (sp->ds_skip_platform - slicerel_secno) * ssp->dss_secsize;
425 		if (bp->b_cmd != BUF_CMD_READ) {
426 			msg = fixlabel(
427 				NULL, sp,
428 			       (struct disklabel *)
429 			       (bp->b_data + (int)nbio->bio_caller_info2.offset),
430 			       TRUE);
431 			if (msg != NULL) {
432 				kprintf("dscheck(%s): %s\n",
433 				    devtoname(dev), msg);
434 				bp->b_error = EROFS;
435 				pop_bio(nbio);
436 				goto error;
437 			}
438 		}
439 	}
440 	return (nbio);
441 
442 bad_bcount:
443 	kprintf(
444 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
445 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
446 	goto bad;
447 
448 bad_blkno:
449 	kprintf(
450 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
451 	    devtoname(dev), bio->bio_offset, ssp->dss_secsize);
452 bad:
453 	bp->b_error = EINVAL;
454 	/* fall through */
455 error:
456 	/*
457 	 * Terminate the I/O with a ranging error.  Since the buffer is
458 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
459 	 */
460 	bp->b_resid = bp->b_bcount;
461 	bp->b_flags |= B_ERROR | B_INVAL;
462 done:
463 	/*
464 	 * Caller must biodone() the originally passed bio if NULL is
465 	 * returned.
466 	 */
467 	return (NULL);
468 }
469 
470 void
471 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
472 {
473 	u_int32_t part;
474 	u_int32_t slice;
475 	struct diskslice *sp;
476 
477 	slice = dkslice(dev);
478 	part  = dkpart(dev);
479 	if (slice < ssp->dss_nslices) {
480 		sp = &ssp->dss_slices[slice];
481 		dsclrmask(sp, part);
482 	}
483 }
484 
485 void
486 dsgone(struct diskslices **sspp)
487 {
488 	int slice;
489 	struct diskslice *sp;
490 	struct diskslices *ssp;
491 
492 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
493 		sp = &ssp->dss_slices[slice];
494 		free_ds_label(ssp, slice);
495 	}
496 	kfree(ssp, M_DEVBUF);
497 	*sspp = NULL;
498 }
499 
500 /*
501  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
502  * is subject to the same restriction as dsopen().
503  */
504 int
505 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
506 	struct diskslices **sspp, struct disk_info *info)
507 {
508 	int error;
509 	struct disklabel *lp;
510 	int old_wlabel;
511 	u_int32_t openmask[DKMAXPARTITIONS/sizeof(u_int32_t)];
512 	int part;
513 	int slice;
514 	struct diskslice *sp;
515 	struct diskslices *ssp;
516 	struct partition *pp;
517 
518 	slice = dkslice(dev);
519 	part = dkpart(dev);
520 	ssp = *sspp;
521 	if (slice >= ssp->dss_nslices)
522 		return (EINVAL);
523 	sp = &ssp->dss_slices[slice];
524 	lp = sp->ds_label;
525 	switch (cmd) {
526 
527 	case DIOCGDVIRGIN:
528 		/*
529 		 * You can only retrieve a virgin disklabel on the whole
530 		 * disk slice or whole-slice partition.
531 		 */
532 		if (slice != WHOLE_DISK_SLICE &&
533 		    part != WHOLE_SLICE_PART) {
534 			return(EINVAL);
535 		}
536 
537 		lp = (struct disklabel *)data;
538 		if (ssp->dss_slices[WHOLE_DISK_SLICE].ds_label) {
539 			*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
540 		} else {
541 			bzero(lp, sizeof(struct disklabel));
542 		}
543 		lp->d_magic = DISKMAGIC;
544 		lp->d_magic2 = DISKMAGIC;
545 
546 		lp->d_npartitions = MAXPARTITIONS;
547 		if (lp->d_interleave == 0)
548 			lp->d_interleave = 1;
549 		if (lp->d_rpm == 0)
550 			lp->d_rpm = 3600;
551 		if (lp->d_nsectors == 0)
552 			lp->d_nsectors = 32;
553 		if (lp->d_ntracks == 0)
554 			lp->d_ntracks = 64;
555 
556 		lp->d_bbsize = BBSIZE;
557 		lp->d_sbsize = SBSIZE;
558 		lp->d_secpercyl = lp->d_nsectors * lp->d_ntracks;
559 		lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
560 
561 		/*
562 		 * Set or Modify the partition sizes to accomodate the slice,
563 		 * since we started with a copy of the virgin label stored
564 		 * in the whole-disk-slice and we are probably not a
565 		 * whole-disk slice.
566 		 */
567 		lp->d_secperunit = sp->ds_size;
568 		pp = &lp->d_partitions[RAW_PART];
569 		pp->p_offset = 0;
570 		pp->p_size = lp->d_secperunit;
571 		if (info->d_dsflags & DSO_COMPATPARTA) {
572 			pp = &lp->d_partitions[0];
573 			pp->p_offset = 0;
574 			pp->p_size = lp->d_secperunit;
575 			pp->p_fstype = FS_OTHER;
576 		}
577 		lp->d_checksum = 0;
578 		lp->d_checksum = dkcksum(lp);
579 		return (0);
580 
581 	case DIOCGDINFO:
582 		/*
583 		 * You can only retrieve a disklabel on the whole
584 		 * slice partition.
585 		 *
586 		 * We do not support labels directly on whole-disks
587 		 * any more (that is, disks without slices), unless the
588 		 * device driver has asked for a compatible label (e.g.
589 		 * for a CD) to allow booting off of storage that is
590 		 * otherwise unlabeled.
591 		 */
592 		error = 0;
593 		if (part != WHOLE_SLICE_PART)
594 			return(EINVAL);
595 		if (slice == WHOLE_DISK_SLICE &&
596 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
597 			return (ENODEV);
598 		}
599 		if (sp->ds_label == NULL) {
600 			error = dsreadandsetlabel(dev, info->d_dsflags,
601 						  ssp, sp, info);
602 		}
603 		if (error == 0)
604 			*(struct disklabel *)data = *sp->ds_label;
605 		return (error);
606 
607 	case DIOCGPART:
608 		{
609 			struct partinfo *dpart = (void *)data;
610 
611 			/*
612 			 * If accessing a whole-slice partition the disk
613 			 * management layer may not have tried to read the
614 			 * disklabel.  We have to try to read the label
615 			 * in order to properly initialize the ds_skip_*
616 			 * fields.
617 			 *
618 			 * We ignore any error.
619 			 */
620 			if (sp->ds_label == NULL && part == WHOLE_SLICE_PART &&
621 			    slice != WHOLE_DISK_SLICE) {
622 				dsreadandsetlabel(dev, info->d_dsflags,
623 						  ssp, sp, info);
624 			}
625 
626 			bzero(dpart, sizeof(*dpart));
627 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
628 						info->d_media_blksize;
629 			dpart->media_size     = (u_int64_t)sp->ds_size *
630 						info->d_media_blksize;
631 			dpart->media_blocks   = sp->ds_size;
632 			dpart->media_blksize  = info->d_media_blksize;
633 			dpart->skip_platform = sp->ds_skip_platform;
634 			dpart->skip_bsdlabel = sp->ds_skip_bsdlabel;
635 
636 			if (slice != WHOLE_DISK_SLICE &&
637 			    part != WHOLE_SLICE_PART) {
638 				struct partition *p;
639 
640 				if (lp == NULL || part >= lp->d_npartitions)
641 					return(EINVAL);
642 
643 				p = &lp->d_partitions[part];
644 				dpart->fstype = p->p_fstype;
645 				dpart->media_offset += (u_int64_t)p->p_offset *
646 						       info->d_media_blksize;
647 				dpart->media_size = (u_int64_t)p->p_size *
648 						    info->d_media_blksize;
649 				dpart->media_blocks = (u_int64_t)p->p_size;
650 
651 				/*
652 				 * partition starting sector (p_offset)
653 				 * requires slice's reserved areas to be
654 				 * adjusted.
655 				 */
656 				if (dpart->skip_platform > p->p_offset)
657 					dpart->skip_platform -= p->p_offset;
658 				else
659 					dpart->skip_platform = 0;
660 				if (dpart->skip_bsdlabel > p->p_offset)
661 					dpart->skip_bsdlabel -= p->p_offset;
662 				else
663 					dpart->skip_bsdlabel = 0;
664 			}
665 
666 			/*
667 			 * Load remaining fields from the info structure
668 			 */
669 			dpart->d_nheads =	info->d_nheads;
670 			dpart->d_ncylinders =	info->d_ncylinders;
671 			dpart->d_secpertrack =	info->d_secpertrack;
672 			dpart->d_secpercyl =	info->d_secpercyl;
673 		}
674 		return (0);
675 
676 	case DIOCGSLICEINFO:
677 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
678 				 (char *)ssp);
679 		return (0);
680 
681 	case DIOCSDINFO:
682 		/*
683 		 * You can write a disklabel on the whole disk slice or
684 		 * whole-slice partition.
685 		 */
686 		if (slice != WHOLE_DISK_SLICE &&
687 		    part != WHOLE_SLICE_PART) {
688 			return(EINVAL);
689 		}
690 
691 		/*
692 		 * We no longer support writing disklabels directly to media
693 		 * without there being a slice.  Keep this as a separate
694 		 * conditional.
695 		 */
696 		if (slice == WHOLE_DISK_SLICE)
697 			return (ENODEV);
698 
699 		if (!(flags & FWRITE))
700 			return (EBADF);
701 		lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK);
702 		if (sp->ds_label == NULL)
703 			bzero(lp, sizeof *lp);
704 		else
705 			bcopy(sp->ds_label, lp, sizeof *lp);
706 		if (sp->ds_label == NULL) {
707 			bzero(openmask, sizeof(openmask));
708 		} else {
709 			bcopy(sp->ds_openmask, openmask, sizeof(openmask));
710 			if (slice == COMPATIBILITY_SLICE) {
711 				dssetmaskfrommask(&ssp->dss_slices[
712 						  ssp->dss_first_bsd_slice],
713 						  openmask);
714 			} else if (slice == ssp->dss_first_bsd_slice) {
715 				dssetmaskfrommask(&ssp->dss_slices[
716 						  COMPATIBILITY_SLICE],
717 						  openmask);
718 			}
719 		}
720 		error = setdisklabel(lp, (struct disklabel *)data, openmask);
721 		/* XXX why doesn't setdisklabel() check this? */
722 		if (error == 0 && lp->d_partitions[RAW_PART].p_offset != 0)
723 			error = EXDEV;
724 		if (error == 0) {
725 			if (lp->d_secperunit > sp->ds_size)
726 				error = ENOSPC;
727 			for (part = 0; part < lp->d_npartitions; part++)
728 				if (lp->d_partitions[part].p_size > sp->ds_size)
729 					error = ENOSPC;
730 		}
731 		if (error != 0) {
732 			kfree(lp, M_DEVBUF);
733 			return (error);
734 		}
735 		free_ds_label(ssp, slice);
736 		set_ds_label(ssp, slice, lp);
737 		return (0);
738 
739 	case DIOCSYNCSLICEINFO:
740 		/*
741 		 * This ioctl can only be done on the whole disk
742 		 */
743 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
744 			return (EINVAL);
745 
746 		if (*(int *)data == 0) {
747 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
748 				struct diskslice *ds = &ssp->dss_slices[slice];
749 
750 				switch(dscountmask(ds)) {
751 				case 0:
752 					break;
753 				case 1:
754 					if (slice != WHOLE_DISK_SLICE)
755 						return (EBUSY);
756 					if (!dschkmask(ds, RAW_PART))
757 						return (EBUSY);
758 					break;
759 				default:
760 					return (EBUSY);
761 				}
762 			}
763 		}
764 
765 		/*
766 		 * Temporarily forget the current slices struct and read
767 		 * the current one.
768 		 *
769 		 * NOTE:
770 		 *
771 		 * XXX should wait for current accesses on this disk to
772 		 * complete, then lock out future accesses and opens.
773 		 */
774 		*sspp = NULL;
775 		lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK);
776 		*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
777 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, info);
778 		if (error != 0) {
779 			kfree(lp, M_DEVBUF);
780 			*sspp = ssp;
781 			return (error);
782 		}
783 
784 		/*
785 		 * Reopen everything.  This is a no-op except in the "force"
786 		 * case and when the raw bdev and cdev are both open.  Abort
787 		 * if anything fails.
788 		 */
789 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
790 			for (part = 0; part < DKMAXPARTITIONS; ++part) {
791 				if (!dschkmask(&ssp->dss_slices[slice], part))
792 					continue;
793 				error = dsopen(dkmodslice(dkmodpart(dev, part),
794 							  slice),
795 					       S_IFCHR, ssp->dss_oflags, sspp,
796 					       info);
797 				if (error != 0) {
798 					kfree(lp, M_DEVBUF);
799 					*sspp = ssp;
800 					return (EBUSY);
801 				}
802 			}
803 		}
804 
805 		kfree(lp, M_DEVBUF);
806 		dsgone(&ssp);
807 		return (0);
808 
809 	case DIOCWDINFO:
810 		error = dsioctl(dev, DIOCSDINFO, data, flags, &ssp, info);
811 		if (error != 0)
812 			return (error);
813 		/*
814 		 * XXX this used to hack on dk_openpart to fake opening
815 		 * partition 0 in case that is used instead of dkpart(dev).
816 		 */
817 		old_wlabel = sp->ds_wlabel;
818 		set_ds_wlabel(ssp, slice, TRUE);
819 		error = writedisklabel(dev, sp->ds_label);
820 		/* XXX should invalidate in-core label if write failed. */
821 		set_ds_wlabel(ssp, slice, old_wlabel);
822 		return (error);
823 
824 	case DIOCWLABEL:
825 		if (slice == WHOLE_DISK_SLICE)
826 			return (ENODEV);
827 		if (!(flags & FWRITE))
828 			return (EBADF);
829 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
830 		return (0);
831 
832 	default:
833 		return (ENOIOCTL);
834 	}
835 }
836 
837 /*
838  * Chain the bio_done.  b_cmd remains valid through such chaining.
839  */
840 static void
841 dsiodone(struct bio *bio)
842 {
843 	struct buf *bp = bio->bio_buf;
844 	char *msg;
845 
846 	if (bp->b_cmd != BUF_CMD_READ
847 	    || (!(bp->b_flags & B_ERROR) && bp->b_error == 0)) {
848 		msg = fixlabel(NULL, bio->bio_caller_info1.ptr,
849 			       (struct disklabel *)
850 			       (bp->b_data + (int)bio->bio_caller_info2.offset),
851 			       FALSE);
852 		if (msg != NULL)
853 			kprintf("%s\n", msg);
854 	}
855 	biodone(bio->bio_prev);
856 }
857 
858 int
859 dsisopen(struct diskslices *ssp)
860 {
861 	int slice;
862 
863 	if (ssp == NULL)
864 		return (0);
865 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
866 		if (dscountmask(&ssp->dss_slices[slice]))
867 			return (1);
868 	}
869 	return (0);
870 }
871 
872 /*
873  * Allocate a slices "struct" and initialize it to contain only an empty
874  * compatibility slice (pointing to itself), a whole disk slice (covering
875  * the disk as described by the label), and (nslices - BASE_SLICES) empty
876  * slices beginning at BASE_SLICE.
877  */
878 struct diskslices *
879 dsmakeslicestruct(int nslices, struct disk_info *info)
880 {
881 	struct diskslice *sp;
882 	struct diskslices *ssp;
883 
884 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
885 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
886 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
887 	ssp->dss_nslices = nslices;
888 	ssp->dss_oflags = 0;
889 
890 	/*
891 	 * Figure out if we can use shifts or whether we have to
892 	 * use mod/multply to translate byte offsets into sector numbers.
893 	 */
894 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
895 	     (info->d_media_blksize << 1) - 1) {
896 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
897 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
898 			ssp->dss_secshift = -1;
899 		else
900 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
901 	} else {
902 		ssp->dss_secmult = 0;
903 		ssp->dss_secshift = -1;
904 	}
905 	ssp->dss_secsize = info->d_media_blksize;
906 	sp = &ssp->dss_slices[0];
907 	bzero(sp, nslices * sizeof *sp);
908 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
909 	return (ssp);
910 }
911 
912 char *
913 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
914 {
915 	static char name[32];
916 	const char *dname;
917 	int used;
918 
919 	dname = dev_dname(dev);
920 	if (strlen(dname) > 16)
921 		dname = "nametoolong";
922 	ksnprintf(name, sizeof(name), "%s%d", dname, unit);
923 	partname[0] = '\0';
924 	used = strlen(name);
925 
926 	if (slice != WHOLE_DISK_SLICE) {
927 		/*
928 		 * slice or slice + partition.  BASE_SLICE is s1, but
929 		 * the compatibility slice (0) needs to be s0.
930 		 */
931 		used += ksnprintf(name + used, sizeof(name) - used,
932 				  "s%d", (slice ? slice - BASE_SLICE + 1 : 0));
933 		if (part != WHOLE_SLICE_PART) {
934 			used += ksnprintf(name + used, sizeof(name) - used,
935 					  "%c", 'a' + part);
936 			partname[0] = 'a' + part;
937 			partname[1] = 0;
938 		}
939 	} else if (part == WHOLE_SLICE_PART) {
940 		/*
941 		 * whole-disk-device, raw access to disk
942 		 */
943 		/* no string extension */
944 	} else if (part > 128) {
945 		/*
946 		 * whole-disk-device, extended raw access partitions.
947 		 * (typically used to access CD audio tracks)
948 		 */
949 		used += ksnprintf(name + used, sizeof(name) - used,
950 					  "t%d", part - 128);
951 	} else {
952 		/*
953 		 * whole-disk-device, illegal partition number
954 		 */
955 		used += ksnprintf(name + used, sizeof(name) - used,
956 					  "?%d", part);
957 	}
958 	return (name);
959 }
960 
961 /*
962  * This should only be called when the unit is inactive and the strategy
963  * routine should not allow it to become active unless we call it.  Our
964  * strategy routine must be special to allow activity.
965  */
966 int
967 dsopen(cdev_t dev, int mode, u_int flags,
968 	struct diskslices **sspp, struct disk_info *info)
969 {
970 	cdev_t dev1;
971 	int error;
972 	bool_t need_init;
973 	struct diskslice *sp;
974 	struct diskslices *ssp;
975 	int slice;
976 	int part;
977 
978 	dev->si_bsize_phys = info->d_media_blksize;
979 
980 	/*
981 	 * Do not attempt to read the slice table or disk label when
982 	 * accessing the whole-disk slice or a while-slice partition.
983 	 */
984 	if (dkslice(dev) == WHOLE_DISK_SLICE)
985 		flags |= DSO_ONESLICE | DSO_NOLABELS;
986 	if (dkpart(dev) == WHOLE_SLICE_PART)
987 		flags |= DSO_NOLABELS;
988 
989 	/*
990 	 * Reinitialize the slice table unless there is an open device
991 	 * on the unit.
992 	 *
993 	 * It would be nice if we didn't have to do this but when a
994 	 * user is slicing and partitioning up a disk it is a lot safer
995 	 * to not take any chances.
996 	 */
997 	ssp = *sspp;
998 	need_init = !dsisopen(ssp);
999 	if (ssp != NULL && need_init)
1000 		dsgone(sspp);
1001 	if (need_init) {
1002 		/*
1003 		 * Allocate a minimal slices "struct".  This will become
1004 		 * the final slices "struct" if we don't want real slices
1005 		 * or if we can't find any real slices.
1006 		 *
1007 		 * Then scan the disk
1008 		 */
1009 		*sspp = dsmakeslicestruct(BASE_SLICE, info);
1010 
1011 		if ((flags & DSO_ONESLICE) == 0) {
1012 			TRACE(("mbrinit\n"));
1013 			error = mbrinit(dev, info, sspp);
1014 			if (error != 0) {
1015 				dsgone(sspp);
1016 				return (error);
1017 			}
1018 		}
1019 		ssp = *sspp;
1020 		ssp->dss_oflags = flags;
1021 
1022 		/*
1023 		 * If there are no real slices, then make the compatiblity
1024 		 * slice cover the whole disk.
1025 		 *
1026 		 * no sectors are reserved for the platform (ds_skip_platform
1027 		 * will be 0) in this case.  This means that if a disklabel
1028 		 * is installed it will be directly installed in sector 0
1029 		 * unless DSO_COMPATMBR is requested.
1030 		 */
1031 		if (ssp->dss_nslices == BASE_SLICE) {
1032 			sp = &ssp->dss_slices[COMPATIBILITY_SLICE];
1033 
1034 			sp->ds_size = info->d_media_blocks;
1035 			if (info->d_dsflags & DSO_COMPATMBR) {
1036 				sp->ds_skip_platform = 1;
1037 				sp->ds_skip_bsdlabel = sp->ds_skip_platform;
1038 			} else {
1039 				sp->ds_skip_platform = 0;
1040 				sp->ds_skip_bsdlabel = 0;
1041 			}
1042 		}
1043 
1044 		/*
1045 		 * Point the compatibility slice at the BSD slice, if any.
1046 		 */
1047 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
1048 			sp = &ssp->dss_slices[slice];
1049 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
1050 				struct diskslice *csp;
1051 
1052 				csp = &ssp->dss_slices[COMPATIBILITY_SLICE];
1053 				ssp->dss_first_bsd_slice = slice;
1054 				csp->ds_offset = sp->ds_offset;
1055 				csp->ds_size = sp->ds_size;
1056 				csp->ds_type = sp->ds_type;
1057 				csp->ds_skip_platform = sp->ds_skip_platform;
1058 				csp->ds_skip_bsdlabel = sp->ds_skip_bsdlabel;
1059 				break;
1060 			}
1061 		}
1062 
1063 		/*
1064 		 * By definition accesses via the whole-disk device do not
1065 		 * specify any reserved areas.  The whole disk may be read
1066 		 * or written by the whole-disk device.
1067 		 *
1068 		 * ds_label for a whole-disk device is only used as a
1069 		 * template.
1070 		 */
1071 		sp = &ssp->dss_slices[WHOLE_DISK_SLICE];
1072 		sp->ds_label = clone_label(info, NULL);
1073 		sp->ds_wlabel = TRUE;
1074 		sp->ds_skip_platform = 0;
1075 		sp->ds_skip_bsdlabel = 0;
1076 	}
1077 
1078 	/*
1079 	 * Load the disklabel for the slice being accessed unless it is
1080 	 * a whole-disk-slice or a whole-slice-partition (as determined
1081 	 * by DSO_NOLABELS).
1082 	 *
1083 	 * We could scan all slices here and try to load up their
1084 	 * disklabels, but that would cause us to access slices that
1085 	 * the user may otherwise not intend us to access, or corrupted
1086 	 * slices, etc.
1087 	 *
1088 	 * XXX if there are no opens on the slice we may want to re-read
1089 	 * the disklabel anyway, even if we have one cached.
1090 	 */
1091 	slice = dkslice(dev);
1092 	if (slice >= ssp->dss_nslices)
1093 		return (ENXIO);
1094 	sp = &ssp->dss_slices[slice];
1095 	part = dkpart(dev);
1096 
1097 	if ((flags & DSO_NOLABELS) == 0 && sp->ds_label == NULL) {
1098 		dev1 = dkmodslice(dkmodpart(dev, WHOLE_SLICE_PART), slice);
1099 
1100 		/*
1101 		 * If opening a raw disk we do not try to
1102 		 * read the disklabel now.  No interpretation of raw disks
1103 		 * (e.g. like 'da0') ever occurs.  We will try to read the
1104 		 * disklabel for a raw slice if asked to via DIOC* ioctls.
1105 		 *
1106 		 * Access to the label area is disallowed by default.  Note
1107 		 * however that accesses via WHOLE_DISK_SLICE, and accesses
1108 		 * via WHOLE_SLICE_PART for slices without valid disklabels,
1109 		 * will allow writes and ignore the flag.
1110 		 */
1111 		set_ds_wlabel(ssp, slice, FALSE);
1112 		dsreadandsetlabel(dev1, flags, ssp, sp, info);
1113 	}
1114 
1115 	/*
1116 	 * If opening a particular partition the disklabel must exist and
1117 	 * the partition must be present in the label.
1118 	 *
1119 	 * If the partition is the special whole-disk-slice no partition
1120 	 * table need exist.
1121 	 */
1122 	if (part != WHOLE_SLICE_PART && slice != WHOLE_DISK_SLICE) {
1123 		if (sp->ds_label == NULL || part >= sp->ds_label->d_npartitions)
1124 			return (EINVAL);
1125 	}
1126 	dssetmask(sp, part);
1127 
1128 	/*
1129 	 * Do not allow special raw-extension partitions to be opened
1130 	 * if the device doesn't support them.  Raw-extension partitions
1131 	 * are typically used to handle CD tracks.
1132 	 */
1133 	if (slice == WHOLE_DISK_SLICE && part >= 128 &&
1134 	    part != WHOLE_SLICE_PART) {
1135 		if ((info->d_dsflags & DSO_RAWEXTENSIONS) == 0)
1136 			return (EINVAL);
1137 	}
1138 	return (0);
1139 }
1140 
1141 /*
1142  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
1143  *
1144  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
1145  * a fake label covering the whole disk.
1146  */
1147 static
1148 int
1149 dsreadandsetlabel(cdev_t dev, u_int flags,
1150 		  struct diskslices *ssp, struct diskslice *sp,
1151 		  struct disk_info *info)
1152 {
1153 	struct disklabel *lp1;
1154 	const char *msg;
1155 	const char *sname;
1156 	char partname[2];
1157 	int slice = dkslice(dev);
1158 
1159 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
1160 	lp1 = clone_label(info, sp);
1161 	msg = readdisklabel(dev, lp1);
1162 
1163 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
1164 		msg = NULL;
1165 		kfree(lp1, M_DEVBUF);
1166 		lp1 = clone_label(info, sp);
1167 	}
1168 	if (msg == NULL)
1169 		msg = fixlabel(sname, sp, lp1, FALSE);
1170 	if (msg == NULL && lp1->d_secsize != info->d_media_blksize)
1171 		msg = "inconsistent sector size";
1172 	if (msg != NULL) {
1173 		if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
1174 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
1175 			    sname, msg);
1176 		kfree(lp1, M_DEVBUF);
1177 	} else {
1178 		set_ds_label(ssp, slice, lp1);
1179 		set_ds_wlabel(ssp, slice, FALSE);
1180 	}
1181 	return (msg ? EINVAL : 0);
1182 }
1183 
1184 int64_t
1185 dssize(cdev_t dev, struct diskslices **sspp)
1186 {
1187 	struct disklabel *lp;
1188 	int part;
1189 	int slice;
1190 	struct diskslices *ssp;
1191 
1192 	slice = dkslice(dev);
1193 	part = dkpart(dev);
1194 	ssp = *sspp;
1195 	if (ssp == NULL || slice >= ssp->dss_nslices
1196 	    || !dschkmask(&ssp->dss_slices[slice], part)) {
1197 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
1198 			return (-1);
1199 		dev_dclose(dev, FREAD, S_IFCHR);
1200 		ssp = *sspp;
1201 	}
1202 	lp = ssp->dss_slices[slice].ds_label;
1203 	if (lp == NULL)
1204 		return (-1);
1205 	return ((int64_t)lp->d_partitions[part].p_size);
1206 }
1207 
1208 static void
1209 free_ds_label(struct diskslices *ssp, int slice)
1210 {
1211 	struct disklabel *lp;
1212 	struct diskslice *sp;
1213 
1214 	sp = &ssp->dss_slices[slice];
1215 	lp = sp->ds_label;
1216 	if (lp == NULL)
1217 		return;
1218 	kfree(lp, M_DEVBUF);
1219 	set_ds_label(ssp, slice, (struct disklabel *)NULL);
1220 }
1221 
1222 static char *
1223 fixlabel(const char *sname, struct diskslice *sp, struct disklabel *lp, int writeflag)
1224 {
1225 	u_int64_t start;
1226 	u_int64_t end;
1227 	u_int64_t offset;
1228 	int part;
1229 	struct partition *pp;
1230 	bool_t warned;
1231 
1232 	/* These errors "can't happen" so don't bother reporting details. */
1233 	if (lp->d_magic != DISKMAGIC || lp->d_magic2 != DISKMAGIC)
1234 		return ("fixlabel: invalid magic");
1235 	if (dkcksum(lp) != 0)
1236 		return ("fixlabel: invalid checksum");
1237 
1238 	pp = &lp->d_partitions[RAW_PART];
1239 
1240 	/*
1241 	 * What a mess.  For ages old backwards compatibility the disklabel
1242 	 * on-disk stores absolute offsets instead of slice-relative offsets.
1243 	 * So fix it up when reading, writing, or snooping.
1244 	 *
1245 	 * The in-core label is always slice-relative.
1246 	 */
1247 	if (writeflag) {
1248 		start = 0;
1249 		offset = sp->ds_offset;
1250 	} else {
1251 		start = sp->ds_offset;
1252 		offset = -sp->ds_offset;
1253 	}
1254 	if (pp->p_offset != start) {
1255 		if (sname != NULL) {
1256 			kprintf(
1257 "%s: rejecting BSD label: raw partition offset != slice offset\n",
1258 			       sname);
1259 			slice_info(sname, sp);
1260 			partition_info(sname, RAW_PART, pp);
1261 		}
1262 		return ("fixlabel: raw partition offset != slice offset");
1263 	}
1264 	if (pp->p_size != sp->ds_size) {
1265 		if (sname != NULL) {
1266 			kprintf("%s: raw partition size != slice size\n", sname);
1267 			slice_info(sname, sp);
1268 			partition_info(sname, RAW_PART, pp);
1269 		}
1270 		if (pp->p_size > sp->ds_size) {
1271 			if (sname == NULL)
1272 				return ("fixlabel: raw partition size > slice size");
1273 			kprintf("%s: truncating raw partition\n", sname);
1274 			pp->p_size = sp->ds_size;
1275 		}
1276 	}
1277 	end = start + sp->ds_size;
1278 	if (start > end)
1279 		return ("fixlabel: slice wraps");
1280 	if (lp->d_secpercyl <= 0)
1281 		return ("fixlabel: d_secpercyl <= 0");
1282 	pp -= RAW_PART;
1283 	warned = FALSE;
1284 	for (part = 0; part < lp->d_npartitions; part++, pp++) {
1285 		if (pp->p_offset != 0 || pp->p_size != 0) {
1286 			if (pp->p_offset < start
1287 			    || pp->p_offset + pp->p_size > end
1288 			    || pp->p_offset + pp->p_size < pp->p_offset) {
1289 				if (sname != NULL) {
1290 					kprintf(
1291 "%s: rejecting partition in BSD label: it isn't entirely within the slice\n",
1292 					       sname);
1293 					if (!warned) {
1294 						slice_info(sname, sp);
1295 						warned = TRUE;
1296 					}
1297 					partition_info(sname, part, pp);
1298 				}
1299 				/* XXX else silently discard junk. */
1300 				bzero(pp, sizeof *pp);
1301 			} else {
1302 				pp->p_offset += offset;
1303 			}
1304 		}
1305 	}
1306 	lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
1307 	lp->d_secperunit = sp->ds_size;
1308  	lp->d_checksum = 0;
1309  	lp->d_checksum = dkcksum(lp);
1310 	return (NULL);
1311 }
1312 
1313 static void
1314 partition_info(const char *sname, int part, struct partition *pp)
1315 {
1316 	kprintf("%s%c: start %lu, end %lu, size %lu\n", sname, 'a' + part,
1317 	       (u_long)pp->p_offset, (u_long)(pp->p_offset + pp->p_size - 1),
1318 	       (u_long)pp->p_size);
1319 }
1320 
1321 static void
1322 slice_info(const char *sname, struct diskslice *sp)
1323 {
1324 	kprintf("%s: start %llu, end %llu, size %llu\n", sname,
1325 	       sp->ds_offset, sp->ds_offset + sp->ds_size - 1, sp->ds_size);
1326 }
1327 
1328 static void
1329 set_ds_label(struct diskslices *ssp, int slice, struct disklabel *lp)
1330 {
1331 	struct diskslice *sp1 = &ssp->dss_slices[slice];
1332 	struct diskslice *sp2;
1333 
1334 	if (slice == COMPATIBILITY_SLICE)
1335 		sp2 = &ssp->dss_slices[ssp->dss_first_bsd_slice];
1336 	else if (slice == ssp->dss_first_bsd_slice)
1337 		sp2 = &ssp->dss_slices[COMPATIBILITY_SLICE];
1338 	else
1339 		sp2 = NULL;
1340 	sp1->ds_label = lp;
1341 	if (sp2)
1342 		sp2->ds_label = lp;
1343 
1344 	/*
1345 	 * If the slice is not the whole-disk slice, setup the reserved
1346 	 * area(s).
1347 	 *
1348 	 * The reserved area for the original bsd disklabel, inclusive of
1349 	 * the label and space for boot2, is 15 sectors.  If you've
1350 	 * noticed people traditionally skipping 16 sectors its because
1351 	 * the sector numbers start at the beginning of the slice rather
1352 	 * then the beginning of the disklabel and traditional dos slices
1353 	 * reserve a sector at the beginning for the boot code.
1354 	 *
1355 	 * NOTE! With the traditional bsdlabel, the first N bytes of boot2
1356 	 * overlap with the disklabel.  The disklabel program checks that
1357 	 * they are 0.
1358 	 *
1359 	 * When clearing a label, the bsdlabel reserved area is reset.
1360 	 */
1361 	if (slice != WHOLE_DISK_SLICE) {
1362 		if (lp) {
1363 			/*
1364 			 * leave room for the disklabel and boot2 -
1365 			 * traditional label only.  XXX bad hack.  Such
1366 			 * labels cannot install a boot area due to
1367 			 * insufficient space.
1368 			 */
1369 			int lsects = SBSIZE / ssp->dss_secsize -
1370 				     sp1->ds_skip_platform;
1371 			if (lsects <= 0)
1372 				lsects = 1;
1373 			sp1->ds_skip_bsdlabel = sp1->ds_skip_platform + lsects;
1374 			if (sp2)
1375 				sp2->ds_skip_bsdlabel = sp1->ds_skip_bsdlabel;
1376 		} else {
1377 			sp1->ds_skip_bsdlabel = sp1->ds_skip_platform;
1378 			if (sp2)
1379 				sp2->ds_skip_bsdlabel = sp1->ds_skip_platform;
1380 		}
1381 	}
1382 }
1383 
1384 static void
1385 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
1386 {
1387 	ssp->dss_slices[slice].ds_wlabel = wlabel;
1388 	if (slice == COMPATIBILITY_SLICE)
1389 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_wlabel = wlabel;
1390 	else if (slice == ssp->dss_first_bsd_slice)
1391 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_wlabel = wlabel;
1392 }
1393