xref: /dflybsd-src/sys/kern/subr_diskslice.c (revision 9eb43faf72ce76f389a1809874498ae8e6299709)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.20 2006/07/04 19:54:08 dillon Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/diskslice.h>
56 #include <sys/diskmbr.h>
57 #include <sys/fcntl.h>
58 #include <sys/malloc.h>
59 #include <sys/stat.h>
60 #include <sys/syslog.h>
61 #include <sys/vnode.h>
62 #include <sys/device.h>
63 #include <sys/thread2.h>
64 
65 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
66 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
67 
68 #define TRACE(str)	do { if (ds_debug) printf str; } while (0)
69 
70 typedef	u_char	bool_t;
71 
72 static volatile bool_t ds_debug;
73 
74 static struct disklabel *clone_label (struct disklabel *lp);
75 static void dsiodone (struct bio *bio);
76 static char *fixlabel (char *sname, struct diskslice *sp,
77 			   struct disklabel *lp, int writeflag);
78 static void free_ds_label (struct diskslices *ssp, int slice);
79 static void partition_info (char *sname, int part, struct partition *pp);
80 static void slice_info (char *sname, struct diskslice *sp);
81 static void set_ds_label (struct diskslices *ssp, int slice,
82 			      struct disklabel *lp);
83 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
84 
85 /*
86  * Duplicate a label for the whole disk, and initialize defaults in the
87  * copy for fields that are not already initialized.  The caller only
88  * needs to initialize d_secsize and d_secperunit, and zero the fields
89  * that are to be defaulted.
90  */
91 static struct disklabel *
92 clone_label(struct disklabel *lp)
93 {
94 	struct disklabel *lp1;
95 
96 	lp1 = malloc(sizeof *lp1, M_DEVBUF, M_WAITOK);
97 	*lp1 = *lp;
98 	lp = NULL;
99 	if (lp1->d_typename[0] == '\0')
100 		strncpy(lp1->d_typename, "amnesiac", sizeof(lp1->d_typename));
101 	if (lp1->d_packname[0] == '\0')
102 		strncpy(lp1->d_packname, "fictitious", sizeof(lp1->d_packname));
103 	if (lp1->d_nsectors == 0)
104 		lp1->d_nsectors = 32;
105 	if (lp1->d_ntracks == 0)
106 		lp1->d_ntracks = 64;
107 	lp1->d_secpercyl = lp1->d_nsectors * lp1->d_ntracks;
108 	lp1->d_ncylinders = lp1->d_secperunit / lp1->d_secpercyl;
109 	if (lp1->d_rpm == 0)
110 		lp1->d_rpm = 3600;
111 	if (lp1->d_interleave == 0)
112 		lp1->d_interleave = 1;
113 	if (lp1->d_npartitions < RAW_PART + 1)
114 		lp1->d_npartitions = MAXPARTITIONS;
115 	if (lp1->d_bbsize == 0)
116 		lp1->d_bbsize = BBSIZE;
117 	if (lp1->d_sbsize == 0)
118 		lp1->d_sbsize = SBSIZE;
119 	lp1->d_partitions[RAW_PART].p_size = lp1->d_secperunit;
120 	lp1->d_magic = DISKMAGIC;
121 	lp1->d_magic2 = DISKMAGIC;
122 	lp1->d_checksum = dkcksum(lp1);
123 	return (lp1);
124 }
125 
126 /*
127  * Determine the size of the transfer, and make sure it is
128  * within the boundaries of the partition. Adjust transfer
129  * if needed, and signal errors or early completion.
130  *
131  * XXX TODO:
132  *	o Split buffers that are too big for the device.
133  *	o Check for overflow.
134  *	o Finish cleaning this up.
135  *
136  * This function returns 1 on success, 0 if transfer equates
137  * to EOF (end of disk) or -1 on failure.  The appropriate
138  * 'errno' value is also set in bp->b_error and bp->b_flags
139  * is marked with B_ERROR.
140  */
141 struct bio *
142 dscheck(dev_t dev, struct bio *bio, struct diskslices *ssp)
143 {
144 	struct buf *bp = bio->bio_buf;
145 	struct bio *nbio;
146 	u_long	endsecno;
147 	daddr_t	labelsect;
148 	struct disklabel *lp;
149 	char *msg;
150 	long nsec;
151 	struct partition *pp;
152 	daddr_t	secno;
153 	daddr_t	slicerel_secno;
154 	struct diskslice *sp;
155 	int shift;
156 	int mask;
157 
158 	if (bio->bio_offset < 0) {
159 		printf("dscheck(%s): negative bio_offset %lld\n",
160 		    devtoname(dev), bio->bio_offset);
161 		goto bad;
162 	}
163 	sp = &ssp->dss_slices[dkslice(dev)];
164 	lp = sp->ds_label;
165 
166 	if (ssp->dss_secmult == 1) {
167 		shift = DEV_BSHIFT;
168 		goto doshift;
169 	} else if (ssp->dss_secshift != -1) {
170 		shift = DEV_BSHIFT + ssp->dss_secshift;
171 doshift:
172 		mask = (1 << shift) - 1;
173 		if ((int)bp->b_bcount & mask)
174 			goto bad_bcount;
175 		if ((int)bio->bio_offset & mask)
176 			goto bad_blkno;
177 		secno = (daddr_t)(bio->bio_offset >> shift);
178 		nsec = bp->b_bcount >> shift;
179 	} else {
180 		if (bp->b_bcount % ssp->dss_secsize)
181 			goto bad_bcount;
182 		if (bio->bio_offset % ssp->dss_secsize)
183 			goto bad_blkno;
184 		secno = (daddr_t)(bio->bio_offset / ssp->dss_secsize);
185 		nsec = bp->b_bcount / ssp->dss_secsize;
186 	}
187 	if (lp == NULL) {
188 		labelsect = -LABELSECTOR - 1;
189 		endsecno = sp->ds_size;
190 		slicerel_secno = secno;
191 	} else {
192 		labelsect = lp->d_partitions[LABEL_PART].p_offset;
193 		if (labelsect != 0)
194 			Debugger("labelsect != 0 in dscheck()");
195 		pp = &lp->d_partitions[dkpart(dev)];
196 		endsecno = pp->p_size;
197 		slicerel_secno = pp->p_offset + secno;
198 	}
199 
200 	/* overwriting disk label ? */
201 	/* XXX should also protect bootstrap in first 8K */
202 	if (slicerel_secno <= LABELSECTOR + labelsect &&
203 #if LABELSECTOR != 0
204 	    slicerel_secno + nsec > LABELSECTOR + labelsect &&
205 #endif
206 	    bp->b_cmd != BUF_CMD_READ && sp->ds_wlabel == 0) {
207 		bp->b_error = EROFS;
208 		goto error;
209 	}
210 
211 #if defined(DOSBBSECTOR) && defined(notyet)
212 	/* overwriting master boot record? */
213 	if (slicerel_secno <= DOSBBSECTOR && bp->b_cmd != BUF_CMD_READ &&
214 	    sp->ds_wlabel == 0) {
215 		bp->b_error = EROFS;
216 		goto error;
217 	}
218 #endif
219 
220 	/*
221 	 * EOF handling
222 	 */
223 	if (secno + nsec > endsecno) {
224 		/*
225 		 * Return an error if beyond the end of the disk, or
226 		 * if B_BNOCLIP is set.  Tell the system that we do not
227 		 * need to keep the buffer around.
228 		 */
229 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
230 			goto bad;
231 
232 		/*
233 		 * If exactly at end of disk, return an EOF.  Throw away
234 		 * the buffer contents, if any, by setting B_INVAL.
235 		 */
236 		if (secno == endsecno) {
237 			bp->b_resid = bp->b_bcount;
238 			bp->b_flags |= B_INVAL;
239 			goto done;
240 		}
241 
242 		/*
243 		 * Else truncate
244 		 */
245 		nsec = endsecno - secno;
246 		bp->b_bcount = nsec * ssp->dss_secsize;
247 	}
248 
249 	nbio = push_bio(bio);
250 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
251 			   ssp->dss_secsize;
252 
253 	/*
254 	 * Snoop on label accesses if the slice offset is nonzero.  Fudge
255 	 * offsets in the label to keep the in-core label coherent with
256 	 * the on-disk one.
257 	 */
258 	if (slicerel_secno <= LABELSECTOR + labelsect
259 #if LABELSECTOR != 0
260 	    && slicerel_secno + nsec > LABELSECTOR + labelsect
261 #endif
262 	    && sp->ds_offset != 0) {
263 		nbio->bio_done = dsiodone;
264 		nbio->bio_caller_info1.ptr = sp;
265 		nbio->bio_caller_info2.offset = (off_t)(LABELSECTOR + labelsect -
266 					 slicerel_secno) * ssp->dss_secsize;
267 		if (bp->b_cmd != BUF_CMD_READ) {
268 			/*
269 			 * XXX even disklabel(8) writes directly so we need
270 			 * to adjust writes.  Perhaps we should drop support
271 			 * for DIOCWLABEL (always write protect labels) and
272 			 * require the use of DIOCWDINFO.
273 			 *
274 			 * XXX probably need to copy the data to avoid even
275 			 * temporarily corrupting the in-core copy.
276 			 */
277 			/* XXX need name here. */
278 			msg = fixlabel(
279 				NULL, sp,
280 			       (struct disklabel *)
281 			       (bp->b_data + (int)nbio->bio_caller_info2.offset),
282 			       TRUE);
283 			if (msg != NULL) {
284 				printf("dscheck(%s): %s\n",
285 				    devtoname(dev), msg);
286 				bp->b_error = EROFS;
287 				pop_bio(nbio);
288 				goto error;
289 			}
290 		}
291 	}
292 	return (nbio);
293 
294 bad_bcount:
295 	printf(
296 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
297 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
298 	goto bad;
299 
300 bad_blkno:
301 	printf(
302 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
303 	    devtoname(dev), bio->bio_offset, ssp->dss_secsize);
304 bad:
305 	bp->b_error = EINVAL;
306 	/* fall through */
307 error:
308 	/*
309 	 * Terminate the I/O with a ranging error.  Since the buffer is
310 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
311 	 */
312 	bp->b_resid = bp->b_bcount;
313 	bp->b_flags |= B_ERROR | B_INVAL;
314 done:
315 	/*
316 	 * Caller must biodone() the originally passed bio if NULL is
317 	 * returned.
318 	 */
319 	return (NULL);
320 }
321 
322 void
323 dsclose(dev_t dev, int mode, struct diskslices *ssp)
324 {
325 	u_char mask;
326 	struct diskslice *sp;
327 
328 	sp = &ssp->dss_slices[dkslice(dev)];
329 	mask = 1 << dkpart(dev);
330 	sp->ds_openmask &= ~mask;
331 }
332 
333 void
334 dsgone(struct diskslices **sspp)
335 {
336 	int slice;
337 	struct diskslice *sp;
338 	struct diskslices *ssp;
339 
340 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
341 		sp = &ssp->dss_slices[slice];
342 		free_ds_label(ssp, slice);
343 	}
344 	free(ssp, M_DEVBUF);
345 	*sspp = NULL;
346 }
347 
348 /*
349  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
350  * is subject to the same restriction as dsopen().
351  */
352 int
353 dsioctl(dev_t dev, u_long cmd, caddr_t data,
354 	int flags, struct diskslices **sspp)
355 {
356 	int error;
357 	struct disklabel *lp;
358 	int old_wlabel;
359 	u_char openmask;
360 	int part;
361 	int slice;
362 	struct diskslice *sp;
363 	struct diskslices *ssp;
364 	struct partition *pp;
365 
366 	slice = dkslice(dev);
367 	ssp = *sspp;
368 	sp = &ssp->dss_slices[slice];
369 	lp = sp->ds_label;
370 	switch (cmd) {
371 
372 	case DIOCGDVIRGIN:
373 		lp = (struct disklabel *)data;
374 		if (ssp->dss_slices[WHOLE_DISK_SLICE].ds_label) {
375 			*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
376 		} else {
377 			bzero(lp, sizeof(struct disklabel));
378 		}
379 
380 		lp->d_magic = DISKMAGIC;
381 		lp->d_magic2 = DISKMAGIC;
382 		pp = &lp->d_partitions[RAW_PART];
383 		pp->p_offset = 0;
384 		pp->p_size = sp->ds_size;
385 
386 		lp->d_npartitions = MAXPARTITIONS;
387 		if (lp->d_interleave == 0)
388 			lp->d_interleave = 1;
389 		if (lp->d_rpm == 0)
390 			lp->d_rpm = 3600;
391 		if (lp->d_nsectors == 0)
392 			lp->d_nsectors = 32;
393 		if (lp->d_ntracks == 0)
394 			lp->d_ntracks = 64;
395 
396 		lp->d_bbsize = BBSIZE;
397 		lp->d_sbsize = SBSIZE;
398 		lp->d_secpercyl = lp->d_nsectors * lp->d_ntracks;
399 		lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
400 		lp->d_secperunit = sp->ds_size;
401 		lp->d_checksum = 0;
402 		lp->d_checksum = dkcksum(lp);
403 		return (0);
404 
405 	case DIOCGDINFO:
406 		if (lp == NULL)
407 			return (EINVAL);
408 		*(struct disklabel *)data = *lp;
409 		return (0);
410 
411 #ifdef notyet
412 	case DIOCGDINFOP:
413 		if (lp == NULL)
414 			return (EINVAL);
415 		*(struct disklabel **)data = lp;
416 		return (0);
417 #endif
418 
419 	case DIOCGPART:
420 		if (lp == NULL)
421 			return (EINVAL);
422 		((struct partinfo *)data)->disklab = lp;
423 		((struct partinfo *)data)->part
424 			= &lp->d_partitions[dkpart(dev)];
425 		return (0);
426 
427 	case DIOCGSLICEINFO:
428 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
429 				 (char *)ssp);
430 		return (0);
431 
432 	case DIOCSDINFO:
433 		if (slice == WHOLE_DISK_SLICE)
434 			return (ENODEV);
435 		if (!(flags & FWRITE))
436 			return (EBADF);
437 		lp = malloc(sizeof *lp, M_DEVBUF, M_WAITOK);
438 		if (sp->ds_label == NULL)
439 			bzero(lp, sizeof *lp);
440 		else
441 			bcopy(sp->ds_label, lp, sizeof *lp);
442 		if (sp->ds_label == NULL)
443 			openmask = 0;
444 		else {
445 			openmask = sp->ds_openmask;
446 			if (slice == COMPATIBILITY_SLICE)
447 				openmask |= ssp->dss_slices[
448 				    ssp->dss_first_bsd_slice].ds_openmask;
449 			else if (slice == ssp->dss_first_bsd_slice)
450 				openmask |= ssp->dss_slices[
451 				    COMPATIBILITY_SLICE].ds_openmask;
452 		}
453 		error = setdisklabel(lp, (struct disklabel *)data,
454 				     (u_long)openmask);
455 		/* XXX why doesn't setdisklabel() check this? */
456 		if (error == 0 && lp->d_partitions[RAW_PART].p_offset != 0)
457 			error = EXDEV;
458 		if (error == 0) {
459 			if (lp->d_secperunit > sp->ds_size)
460 				error = ENOSPC;
461 			for (part = 0; part < lp->d_npartitions; part++)
462 				if (lp->d_partitions[part].p_size > sp->ds_size)
463 					error = ENOSPC;
464 		}
465 		if (error != 0) {
466 			free(lp, M_DEVBUF);
467 			return (error);
468 		}
469 		free_ds_label(ssp, slice);
470 		set_ds_label(ssp, slice, lp);
471 		return (0);
472 
473 	case DIOCSYNCSLICEINFO:
474 		if (slice != WHOLE_DISK_SLICE || dkpart(dev) != RAW_PART)
475 			return (EINVAL);
476 		if (!*(int *)data)
477 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
478 				openmask = ssp->dss_slices[slice].ds_openmask;
479 				if (openmask
480 				    && (slice != WHOLE_DISK_SLICE
481 					|| openmask & ~(1 << RAW_PART)))
482 					return (EBUSY);
483 			}
484 
485 		/*
486 		 * Temporarily forget the current slices struct and read
487 		 * the current one.
488 		 * XXX should wait for current accesses on this disk to
489 		 * complete, then lock out future accesses and opens.
490 		 */
491 		*sspp = NULL;
492 		lp = malloc(sizeof *lp, M_DEVBUF, M_WAITOK);
493 		*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
494 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, lp);
495 		if (error != 0) {
496 			free(lp, M_DEVBUF);
497 			*sspp = ssp;
498 			return (error);
499 		}
500 
501 		/*
502 		 * Reopen everything.  This is a no-op except in the "force"
503 		 * case and when the raw bdev and cdev are both open.  Abort
504 		 * if anything fails.
505 		 */
506 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
507 			for (openmask = ssp->dss_slices[slice].ds_openmask,
508 			     part = 0; openmask; openmask >>= 1, part++) {
509 				if (!(openmask & 1))
510 					continue;
511 				error = dsopen(dkmodslice(dkmodpart(dev, part),
512 							  slice),
513 					       S_IFCHR, ssp->dss_oflags, sspp,
514 					       lp);
515 				if (error != 0) {
516 					free(lp, M_DEVBUF);
517 					*sspp = ssp;
518 					return (EBUSY);
519 				}
520 			}
521 		}
522 
523 		free(lp, M_DEVBUF);
524 		dsgone(&ssp);
525 		return (0);
526 
527 	case DIOCWDINFO:
528 		error = dsioctl(dev, DIOCSDINFO, data, flags, &ssp);
529 		if (error != 0)
530 			return (error);
531 		/*
532 		 * XXX this used to hack on dk_openpart to fake opening
533 		 * partition 0 in case that is used instead of dkpart(dev).
534 		 */
535 		old_wlabel = sp->ds_wlabel;
536 		set_ds_wlabel(ssp, slice, TRUE);
537 		error = writedisklabel(dev, sp->ds_label);
538 		/* XXX should invalidate in-core label if write failed. */
539 		set_ds_wlabel(ssp, slice, old_wlabel);
540 		return (error);
541 
542 	case DIOCWLABEL:
543 		if (slice == WHOLE_DISK_SLICE)
544 			return (ENODEV);
545 		if (!(flags & FWRITE))
546 			return (EBADF);
547 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
548 		return (0);
549 
550 	default:
551 		return (ENOIOCTL);
552 	}
553 }
554 
555 /*
556  * Chain the bio_done.  b_cmd remains valid through such chaining.
557  */
558 static void
559 dsiodone(struct bio *bio)
560 {
561 	struct buf *bp = bio->bio_buf;
562 	char *msg;
563 
564 	if (bp->b_cmd != BUF_CMD_READ
565 	    || (!(bp->b_flags & B_ERROR) && bp->b_error == 0)) {
566 		msg = fixlabel(NULL, bio->bio_caller_info1.ptr,
567 			       (struct disklabel *)
568 			       (bp->b_data + (int)bio->bio_caller_info2.offset),
569 			       FALSE);
570 		if (msg != NULL)
571 			printf("%s\n", msg);
572 	}
573 	biodone(bio->bio_prev);
574 }
575 
576 int
577 dsisopen(struct diskslices *ssp)
578 {
579 	int slice;
580 
581 	if (ssp == NULL)
582 		return (0);
583 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
584 		if (ssp->dss_slices[slice].ds_openmask)
585 			return (1);
586 	}
587 	return (0);
588 }
589 
590 /*
591  * Allocate a slices "struct" and initialize it to contain only an empty
592  * compatibility slice (pointing to itself), a whole disk slice (covering
593  * the disk as described by the label), and (nslices - BASE_SLICES) empty
594  * slices beginning at BASE_SLICE.
595  */
596 struct diskslices *
597 dsmakeslicestruct(int nslices, struct disklabel *lp)
598 {
599 	struct diskslice *sp;
600 	struct diskslices *ssp;
601 
602 	ssp = malloc(offsetof(struct diskslices, dss_slices) +
603 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
604 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
605 	ssp->dss_nslices = nslices;
606 	ssp->dss_oflags = 0;
607 	ssp->dss_secmult = lp->d_secsize / DEV_BSIZE;
608 	if (ssp->dss_secmult & (ssp->dss_secmult - 1))
609 		ssp->dss_secshift = -1;
610 	else
611 		ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
612 	ssp->dss_secsize = lp->d_secsize;
613 	sp = &ssp->dss_slices[0];
614 	bzero(sp, nslices * sizeof *sp);
615 	sp[WHOLE_DISK_SLICE].ds_size = lp->d_secperunit;
616 	return (ssp);
617 }
618 
619 char *
620 dsname(dev_t dev, int unit, int slice, int part, char *partname)
621 {
622 	static char name[32];
623 	const char *dname;
624 
625 	dname = dev_dname(dev);
626 	if (strlen(dname) > 16)
627 		dname = "nametoolong";
628 	snprintf(name, sizeof(name), "%s%d", dname, unit);
629 	partname[0] = '\0';
630 	if (slice != WHOLE_DISK_SLICE || part != RAW_PART) {
631 		partname[0] = 'a' + part;
632 		partname[1] = '\0';
633 		if (slice != COMPATIBILITY_SLICE) {
634 			snprintf(name + strlen(name),
635 			    sizeof(name) - strlen(name), "s%d", slice - 1);
636 		}
637 	}
638 	return (name);
639 }
640 
641 /*
642  * This should only be called when the unit is inactive and the strategy
643  * routine should not allow it to become active unless we call it.  Our
644  * strategy routine must be special to allow activity.
645  */
646 int
647 dsopen(dev_t dev, int mode, u_int flags,
648 	struct diskslices **sspp, struct disklabel *lp)
649 {
650 	dev_t dev1;
651 	int error;
652 	struct disklabel *lp1;
653 	char *msg;
654 	u_char mask;
655 	bool_t need_init;
656 	int part;
657 	char partname[2];
658 	int slice;
659 	char *sname;
660 	struct diskslice *sp;
661 	struct diskslices *ssp;
662 	int unit;
663 
664 	dev->si_bsize_phys = lp->d_secsize;
665 
666 	unit = dkunit(dev);
667 	if (lp->d_secsize % DEV_BSIZE) {
668 		printf("%s: invalid sector size %lu\n", devtoname(dev),
669 		    (u_long)lp->d_secsize);
670 		return (EINVAL);
671 	}
672 
673 	/*
674 	 * Do not attempt to read the slice table or disk label when
675 	 * accessing the raw disk.
676 	 */
677 	if (dkslice(dev) == WHOLE_DISK_SLICE && dkpart(dev) == RAW_PART) {
678 		flags |= DSO_ONESLICE | DSO_NOLABELS;
679 	}
680 
681 	/*
682 	 * XXX reinitialize the slice table unless there is an open device
683 	 * on the unit.  This should only be done if the media has changed.
684 	 */
685 	ssp = *sspp;
686 	need_init = !dsisopen(ssp);
687 	if (ssp != NULL && need_init)
688 		dsgone(sspp);
689 	if (need_init) {
690 		/*
691 		 * Allocate a minimal slices "struct".  This will become
692 		 * the final slices "struct" if we don't want real slices
693 		 * or if we can't find any real slices.
694 		 */
695 		*sspp = dsmakeslicestruct(BASE_SLICE, lp);
696 
697 		if (!(flags & DSO_ONESLICE)) {
698 			TRACE(("dsinit\n"));
699 			error = dsinit(dev, lp, sspp);
700 			if (error != 0) {
701 				dsgone(sspp);
702 				return (error);
703 			}
704 		}
705 		ssp = *sspp;
706 		ssp->dss_oflags = flags;
707 
708 		/*
709 		 * If there are no real slices, then make the compatiblity
710 		 * slice cover the whole disk.
711 		 */
712 		if (ssp->dss_nslices == BASE_SLICE)
713 			ssp->dss_slices[COMPATIBILITY_SLICE].ds_size
714 				= lp->d_secperunit;
715 
716 		/* Point the compatibility slice at the BSD slice, if any. */
717 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
718 			sp = &ssp->dss_slices[slice];
719 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
720 				ssp->dss_first_bsd_slice = slice;
721 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_offset
722 					= sp->ds_offset;
723 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_size
724 					= sp->ds_size;
725 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_type
726 					= sp->ds_type;
727 				break;
728 			}
729 		}
730 
731 		ssp->dss_slices[WHOLE_DISK_SLICE].ds_label = clone_label(lp);
732 		ssp->dss_slices[WHOLE_DISK_SLICE].ds_wlabel = TRUE;
733 	}
734 
735 	/*
736 	 * Initialize secondary info for all slices.  It is needed for more
737 	 * than the current slice in the DEVFS case.  XXX DEVFS is no more.
738 	 */
739 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
740 		sp = &ssp->dss_slices[slice];
741 		if (sp->ds_label != NULL)
742 			continue;
743 		dev1 = dkmodslice(dkmodpart(dev, RAW_PART), slice);
744 		sname = dsname(dev, unit, slice, RAW_PART, partname);
745 		/*
746 		 * XXX this should probably only be done for the need_init
747 		 * case, but there may be a problem with DIOCSYNCSLICEINFO.
748 		 */
749 		set_ds_wlabel(ssp, slice, TRUE);	/* XXX invert */
750 		lp1 = clone_label(lp);
751 		TRACE(("readdisklabel\n"));
752 		if (flags & DSO_NOLABELS)
753 			msg = NULL;
754 		else {
755 			msg = readdisklabel(dev1, lp1);
756 
757 			/*
758 			 * readdisklabel() returns NULL for success, and an
759 			 * error string for failure.
760 			 *
761 			 * If there isn't a label on the disk, and if the
762 			 * DSO_COMPATLABEL is set, we want to use the
763 			 * faked-up label provided by the caller.
764 			 *
765 			 * So we set msg to NULL to indicate that there is
766 			 * no failure (since we have a faked-up label),
767 			 * free lp1, and then clone it again from lp.
768 			 * (In case readdisklabel() modified lp1.)
769 			 */
770 			if (msg != NULL && (flags & DSO_COMPATLABEL)) {
771 				msg = NULL;
772 				free(lp1, M_DEVBUF);
773 				lp1 = clone_label(lp);
774 			}
775 		}
776 		if (msg == NULL)
777 			msg = fixlabel(sname, sp, lp1, FALSE);
778 		if (msg == NULL && lp1->d_secsize != ssp->dss_secsize)
779 			msg = "inconsistent sector size";
780 		if (msg != NULL) {
781 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
782 				log(LOG_WARNING, "%s: cannot find label (%s)\n",
783 				    sname, msg);
784 			free(lp1, M_DEVBUF);
785 			continue;
786 		}
787 		if (lp1->d_flags & D_BADSECT) {
788 			log(LOG_ERR, "%s: bad sector table not supported\n",
789 			    sname);
790 			free(lp1, M_DEVBUF);
791 			continue;
792 		}
793 		set_ds_label(ssp, slice, lp1);
794 		set_ds_wlabel(ssp, slice, FALSE);
795 	}
796 
797 	slice = dkslice(dev);
798 	if (slice >= ssp->dss_nslices)
799 		return (ENXIO);
800 	sp = &ssp->dss_slices[slice];
801 	part = dkpart(dev);
802 	if (part != RAW_PART
803 	    && (sp->ds_label == NULL || part >= sp->ds_label->d_npartitions))
804 		return (EINVAL);	/* XXX needs translation */
805 	mask = 1 << part;
806 	sp->ds_openmask |= mask;
807 	return (0);
808 }
809 
810 int
811 dssize(dev_t dev, struct diskslices **sspp)
812 {
813 	struct disklabel *lp;
814 	int part;
815 	int slice;
816 	struct diskslices *ssp;
817 
818 	slice = dkslice(dev);
819 	part = dkpart(dev);
820 	ssp = *sspp;
821 	if (ssp == NULL || slice >= ssp->dss_nslices
822 	    || !(ssp->dss_slices[slice].ds_openmask & (1 << part))) {
823 		if (dev_dopen(dev, FREAD, S_IFCHR, NULL) != 0)
824 			return (-1);
825 		dev_dclose(dev, FREAD, S_IFCHR, NULL);
826 		ssp = *sspp;
827 	}
828 	lp = ssp->dss_slices[slice].ds_label;
829 	if (lp == NULL)
830 		return (-1);
831 	return ((int)lp->d_partitions[part].p_size);
832 }
833 
834 static void
835 free_ds_label(struct diskslices *ssp, int slice)
836 {
837 	struct disklabel *lp;
838 	struct diskslice *sp;
839 
840 	sp = &ssp->dss_slices[slice];
841 	lp = sp->ds_label;
842 	if (lp == NULL)
843 		return;
844 	free(lp, M_DEVBUF);
845 	set_ds_label(ssp, slice, (struct disklabel *)NULL);
846 }
847 
848 static char *
849 fixlabel(char *sname, struct diskslice *sp, struct disklabel *lp, int writeflag)
850 {
851 	u_long end;
852 	u_long offset;
853 	int part;
854 	struct partition *pp;
855 	u_long start;
856 	bool_t warned;
857 
858 	/* These errors "can't happen" so don't bother reporting details. */
859 	if (lp->d_magic != DISKMAGIC || lp->d_magic2 != DISKMAGIC)
860 		return ("fixlabel: invalid magic");
861 	if (dkcksum(lp) != 0)
862 		return ("fixlabel: invalid checksum");
863 
864 	pp = &lp->d_partitions[RAW_PART];
865 	if (writeflag) {
866 		start = 0;
867 		offset = sp->ds_offset;
868 	} else {
869 		start = sp->ds_offset;
870 		offset = -sp->ds_offset;
871 	}
872 	if (pp->p_offset != start) {
873 		if (sname != NULL) {
874 			printf(
875 "%s: rejecting BSD label: raw partition offset != slice offset\n",
876 			       sname);
877 			slice_info(sname, sp);
878 			partition_info(sname, RAW_PART, pp);
879 		}
880 		return ("fixlabel: raw partition offset != slice offset");
881 	}
882 	if (pp->p_size != sp->ds_size) {
883 		if (sname != NULL) {
884 			printf("%s: raw partition size != slice size\n", sname);
885 			slice_info(sname, sp);
886 			partition_info(sname, RAW_PART, pp);
887 		}
888 		if (pp->p_size > sp->ds_size) {
889 			if (sname == NULL)
890 				return ("fixlabel: raw partition size > slice size");
891 			printf("%s: truncating raw partition\n", sname);
892 			pp->p_size = sp->ds_size;
893 		}
894 	}
895 	end = start + sp->ds_size;
896 	if (start > end)
897 		return ("fixlabel: slice wraps");
898 	if (lp->d_secpercyl <= 0)
899 		return ("fixlabel: d_secpercyl <= 0");
900 	pp -= RAW_PART;
901 	warned = FALSE;
902 	for (part = 0; part < lp->d_npartitions; part++, pp++) {
903 		if (pp->p_offset != 0 || pp->p_size != 0) {
904 			if (pp->p_offset < start
905 			    || pp->p_offset + pp->p_size > end
906 			    || pp->p_offset + pp->p_size < pp->p_offset) {
907 				if (sname != NULL) {
908 					printf(
909 "%s: rejecting partition in BSD label: it isn't entirely within the slice\n",
910 					       sname);
911 					if (!warned) {
912 						slice_info(sname, sp);
913 						warned = TRUE;
914 					}
915 					partition_info(sname, part, pp);
916 				}
917 				/* XXX else silently discard junk. */
918 				bzero(pp, sizeof *pp);
919 			} else
920 				pp->p_offset += offset;
921 		}
922 	}
923 	lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
924 	lp->d_secperunit = sp->ds_size;
925  	lp->d_checksum = 0;
926  	lp->d_checksum = dkcksum(lp);
927 	return (NULL);
928 }
929 
930 static void
931 partition_info(char *sname, int part, struct partition *pp)
932 {
933 	printf("%s%c: start %lu, end %lu, size %lu\n", sname, 'a' + part,
934 	       (u_long)pp->p_offset, (u_long)(pp->p_offset + pp->p_size - 1),
935 	       (u_long)pp->p_size);
936 }
937 
938 static void
939 slice_info(char *sname, struct diskslice *sp)
940 {
941 	printf("%s: start %lu, end %lu, size %lu\n", sname,
942 	       sp->ds_offset, sp->ds_offset + sp->ds_size - 1, sp->ds_size);
943 }
944 
945 static void
946 set_ds_label(struct diskslices *ssp, int slice, struct disklabel *lp)
947 {
948 	ssp->dss_slices[slice].ds_label = lp;
949 	if (slice == COMPATIBILITY_SLICE)
950 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_label = lp;
951 	else if (slice == ssp->dss_first_bsd_slice)
952 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_label = lp;
953 }
954 
955 static void
956 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
957 {
958 	ssp->dss_slices[slice].ds_wlabel = wlabel;
959 	if (slice == COMPATIBILITY_SLICE)
960 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_wlabel = wlabel;
961 	else if (slice == ssp->dss_first_bsd_slice)
962 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_wlabel = wlabel;
963 }
964