xref: /dflybsd-src/sys/kern/subr_diskslice.c (revision 3e82b46c18bc48fdb3c1d60729c7661b3a0bf6bf)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.51 2008/08/29 20:08:36 dillon Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/disklabel32.h>
56 #include <sys/disklabel64.h>
57 #include <sys/diskslice.h>
58 #include <sys/disk.h>
59 #include <sys/diskmbr.h>
60 #include <sys/fcntl.h>
61 #include <sys/malloc.h>
62 #include <sys/stat.h>
63 #include <sys/syslog.h>
64 #include <sys/proc.h>
65 #include <sys/vnode.h>
66 #include <sys/device.h>
67 #include <sys/thread2.h>
68 
69 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
70 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
71 
72 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
73 			   struct diskslices *ssp, struct diskslice *sp,
74 			   struct disk_info *info);
75 static void free_ds_label (struct diskslices *ssp, int slice);
76 static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
77 			   disklabel_ops_t ops);
78 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
79 
80 /*
81  * Determine the size of the transfer, and make sure it is
82  * within the boundaries of the partition. Adjust transfer
83  * if needed, and signal errors or early completion.
84  *
85  * XXX TODO:
86  *	o Split buffers that are too big for the device.
87  *	o Check for overflow.
88  *	o Finish cleaning this up.
89  *
90  * This function returns 1 on success, 0 if transfer equates
91  * to EOF (end of disk) or -1 on failure.  The appropriate
92  * 'errno' value is also set in bp->b_error and bp->b_flags
93  * is marked with B_ERROR.
94  */
95 struct bio *
96 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
97 {
98 	struct buf *bp = bio->bio_buf;
99 	struct bio *nbio;
100 	disklabel_t lp;
101 	disklabel_ops_t ops;
102 	long nsec;
103 	u_int64_t secno;
104 	u_int64_t endsecno;
105 	u_int64_t slicerel_secno;
106 	struct diskslice *sp;
107 	u_int32_t part;
108 	u_int32_t slice;
109 	int shift;
110 	int mask;
111 
112 	slice = dkslice(dev);
113 	part  = dkpart(dev);
114 
115 	if (bio->bio_offset < 0) {
116 		kprintf("dscheck(%s): negative bio_offset %lld\n",
117 			devtoname(dev), (long long)bio->bio_offset);
118 		goto bad;
119 	}
120 	if (slice >= ssp->dss_nslices) {
121 		kprintf("dscheck(%s): slice too large %d/%d\n",
122 			devtoname(dev), slice, ssp->dss_nslices);
123 		goto bad;
124 	}
125 	sp = &ssp->dss_slices[slice];
126 	/*
127 	 * Calculate secno and nsec
128 	 */
129 	if (ssp->dss_secmult == 1) {
130 		shift = DEV_BSHIFT;
131 		goto doshift;
132 	} else if (ssp->dss_secshift != -1) {
133 		shift = DEV_BSHIFT + ssp->dss_secshift;
134 doshift:
135 		mask = (1 << shift) - 1;
136 		if ((int)bp->b_bcount & mask)
137 			goto bad_bcount;
138 		if ((int)bio->bio_offset & mask)
139 			goto bad_blkno;
140 		secno = bio->bio_offset >> shift;
141 		nsec = bp->b_bcount >> shift;
142 	} else {
143 		if (bp->b_bcount % ssp->dss_secsize)
144 			goto bad_bcount;
145 		if (bio->bio_offset % ssp->dss_secsize)
146 			goto bad_blkno;
147 		secno = bio->bio_offset / ssp->dss_secsize;
148 		nsec = bp->b_bcount / ssp->dss_secsize;
149 	}
150 
151 	/*
152 	 * Calculate slice-relative sector number end slice-relative
153 	 * limit.
154 	 */
155 	if (slice == WHOLE_DISK_SLICE) {
156 		/*
157 		 * Labels have not been allowed on whole-disks for a while.
158 		 * This really puts the nail in the coffin.
159 		 *
160 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
161 		 * and partition numbers are special-cased.  Currently numbers
162 		 * less then 128 are not allowed.  Partition numbers >= 128
163 		 * are encoded in the high 8 bits of the 64 bit buffer offset
164 		 * and are fed directly through to the device with no
165 		 * further interpretation.  In particular, no sector
166 		 * translation interpretation should occur because the
167 		 * sector size for the special raw access may not be the
168 		 * same as the nominal sector size for the device.
169 		 */
170 		lp.opaque = NULL;
171 		if (part < 128) {
172 			kprintf("dscheck(%s): illegal partition number (%d) "
173 				"for WHOLE_DISK_SLICE access\n",
174 				devtoname(dev), part);
175 			goto bad;
176 		} else if (part != WHOLE_SLICE_PART) {
177 			nbio = push_bio(bio);
178 			nbio->bio_offset = bio->bio_offset |
179 					   (u_int64_t)part << 56;
180 			return(nbio);
181 		}
182 
183 		/*
184 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
185 		 * there are no reserved areas.
186 		 */
187 		endsecno = sp->ds_size;
188 		slicerel_secno = secno;
189 	} else if (part == WHOLE_SLICE_PART) {
190 		/*
191 		 * NOTE! opens on a whole-slice partition will not attempt
192 		 * to read a disklabel in, so there may not be an in-core
193 		 * disklabel even if there is one on the disk.
194 		 */
195 		endsecno = sp->ds_size;
196 		slicerel_secno = secno;
197 	} else if ((lp = sp->ds_label).opaque != NULL) {
198 		/*
199 		 * A label is present, extract the partition.  Snooping of
200 		 * the disklabel is not supported even if accessible.  Of
201 		 * course, the reserved area is still write protected.
202 		 */
203 		ops = sp->ds_ops;
204 		if (ops->op_getpartbounds(ssp, lp, part,
205 					  &slicerel_secno, &endsecno)) {
206 			kprintf("dscheck(%s): partition %d out of bounds\n",
207 				devtoname(dev), part);
208 			goto bad;
209 		}
210 		slicerel_secno += secno;
211 	} else {
212 		/*
213 		 * Attempt to access partition when no disklabel present
214 		 */
215 		kprintf("dscheck(%s): attempt to access non-existent partition\n",
216 			devtoname(dev));
217 		goto bad;
218 	}
219 
220 	/*
221 	 * Disallow writes to reserved areas unless ds_wlabel allows it.
222 	 */
223 	if (slicerel_secno < sp->ds_reserved && nsec &&
224 	    bp->b_cmd == BUF_CMD_WRITE && sp->ds_wlabel == 0) {
225 		bp->b_error = EROFS;
226 		goto error;
227 	}
228 
229 	/*
230 	 * If we get here, bio_offset must be on a block boundary and
231 	 * the sector size must be a power of 2.
232 	 */
233 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
234 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
235 	    ((ssp->dss_secsize << 1) - 1)) {
236 		kprintf("%s: invalid BIO offset, not sector aligned or"
237 			" invalid sector size (not power of 2) %08llx %d\n",
238 			devtoname(dev), (long long)bio->bio_offset,
239 			ssp->dss_secsize);
240 		goto bad;
241 	}
242 
243 	/*
244 	 * EOF handling
245 	 */
246 	if (secno + nsec > endsecno) {
247 		/*
248 		 * Return an error if beyond the end of the disk, or
249 		 * if B_BNOCLIP is set.  Tell the system that we do not
250 		 * need to keep the buffer around.
251 		 */
252 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
253 			goto bad;
254 
255 		/*
256 		 * If exactly at end of disk, return an EOF.  Throw away
257 		 * the buffer contents, if any, by setting B_INVAL.
258 		 */
259 		if (secno == endsecno) {
260 			bp->b_resid = bp->b_bcount;
261 			bp->b_flags |= B_INVAL;
262 			goto done;
263 		}
264 
265 		/*
266 		 * Else truncate
267 		 */
268 		nsec = endsecno - secno;
269 		bp->b_bcount = nsec * ssp->dss_secsize;
270 	}
271 
272 	nbio = push_bio(bio);
273 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
274 			   ssp->dss_secsize;
275 	return (nbio);
276 
277 bad_bcount:
278 	kprintf(
279 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
280 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
281 	goto bad;
282 
283 bad_blkno:
284 	kprintf(
285 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
286 	    devtoname(dev), (long long)bio->bio_offset, ssp->dss_secsize);
287 bad:
288 	bp->b_error = EINVAL;
289 	/* fall through */
290 error:
291 	/*
292 	 * Terminate the I/O with a ranging error.  Since the buffer is
293 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
294 	 */
295 	bp->b_resid = bp->b_bcount;
296 	bp->b_flags |= B_ERROR | B_INVAL;
297 done:
298 	/*
299 	 * Caller must biodone() the originally passed bio if NULL is
300 	 * returned.
301 	 */
302 	return (NULL);
303 }
304 
305 void
306 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
307 {
308 	u_int32_t part;
309 	u_int32_t slice;
310 	struct diskslice *sp;
311 
312 	slice = dkslice(dev);
313 	part  = dkpart(dev);
314 	if (slice < ssp->dss_nslices) {
315 		sp = &ssp->dss_slices[slice];
316 		dsclrmask(sp, part);
317 	}
318 }
319 
320 void
321 dsgone(struct diskslices **sspp)
322 {
323 	int slice;
324 	struct diskslice *sp;
325 	struct diskslices *ssp;
326 
327 	kprintf("dsgone is called... fear!\n");
328 
329 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
330 		sp = &ssp->dss_slices[slice];
331 		free_ds_label(ssp, slice);
332 	}
333 	kfree(ssp, M_DEVBUF);
334 	*sspp = NULL;
335 }
336 
337 /*
338  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
339  * is subject to the same restriction as dsopen().
340  */
341 int
342 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
343 	struct diskslices **sspp, struct disk_info *info)
344 {
345 	int error;
346 	disklabel_t lp;
347 	disklabel_t lptmp;
348 	disklabel_ops_t ops;
349 	int old_wlabel;
350 	u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
351 	int part;
352 	int slice;
353 	struct diskslice *sp;
354 	struct diskslices *ssp;
355 
356 	slice = dkslice(dev);
357 	part = dkpart(dev);
358 	ssp = *sspp;
359 	if (slice >= ssp->dss_nslices)
360 		return (EINVAL);
361 	sp = &ssp->dss_slices[slice];
362 	lp = sp->ds_label;
363 	ops = sp->ds_ops;	/* may be NULL if no label */
364 
365 	switch (cmd) {
366 	case DIOCGDVIRGIN32:
367 		ops = &disklabel32_ops;
368 		/* fall through */
369 	case DIOCGDVIRGIN64:
370 		if (cmd != DIOCGDVIRGIN32)
371 			ops = &disklabel64_ops;
372 		/*
373 		 * You can only retrieve a virgin disklabel on the whole
374 		 * disk slice or whole-slice partition.
375 		 */
376 		if (slice != WHOLE_DISK_SLICE &&
377 		    part != WHOLE_SLICE_PART) {
378 			return(EINVAL);
379 		}
380 
381 		lp.opaque = data;
382 		ops->op_makevirginlabel(lp, ssp, sp, info);
383 		return (0);
384 
385 	case DIOCGDINFO32:
386 	case DIOCGDINFO64:
387 		/*
388 		 * You can only retrieve a disklabel on the whole
389 		 * slice partition.
390 		 *
391 		 * We do not support labels directly on whole-disks
392 		 * any more (that is, disks without slices), unless the
393 		 * device driver has asked for a compatible label (e.g.
394 		 * for a CD) to allow booting off of storage that is
395 		 * otherwise unlabeled.
396 		 */
397 		error = 0;
398 		if (part != WHOLE_SLICE_PART)
399 			return(EINVAL);
400 		if (slice == WHOLE_DISK_SLICE &&
401 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
402 			return (ENODEV);
403 		}
404 		if (sp->ds_label.opaque == NULL) {
405 			error = dsreadandsetlabel(dev, info->d_dsflags,
406 						  ssp, sp, info);
407 			ops = sp->ds_ops;	/* may be NULL */
408 		}
409 
410 		/*
411 		 * The type of label we found must match the type of
412 		 * label requested.
413 		 */
414 		if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
415 			error = ENOATTR;
416 		if (error == 0)
417 			bcopy(sp->ds_label.opaque, data, ops->labelsize);
418 		return (error);
419 
420 	case DIOCGPART:
421 		{
422 			struct partinfo *dpart = (void *)data;
423 
424 			/*
425 			 * The disk management layer may not have read the
426 			 * disklabel yet because simply opening a slice no
427 			 * longer 'probes' the disk that way.  Be sure we
428 			 * have tried.
429 			 *
430 			 * We ignore any error.
431 			 */
432 			if (sp->ds_label.opaque == NULL &&
433 			    part == WHOLE_SLICE_PART &&
434 			    slice != WHOLE_DISK_SLICE) {
435 				kprintf("dsioctl: I shouldn't be here...\n");
436 				dsreadandsetlabel(dev, info->d_dsflags,
437 						  ssp, sp, info);
438 				ops = sp->ds_ops;	/* may be NULL */
439 			}
440 
441 			bzero(dpart, sizeof(*dpart));
442 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
443 						info->d_media_blksize;
444 			dpart->media_size     = (u_int64_t)sp->ds_size *
445 						info->d_media_blksize;
446 			dpart->media_blocks   = sp->ds_size;
447 			dpart->media_blksize  = info->d_media_blksize;
448 			dpart->reserved_blocks= sp->ds_reserved;
449 			dpart->fstype_uuid = sp->ds_type_uuid;
450 			dpart->storage_uuid = sp->ds_stor_uuid;
451 
452 			if (slice != WHOLE_DISK_SLICE &&
453 			    part != WHOLE_SLICE_PART) {
454 				u_int64_t start;
455 				u_int64_t blocks;
456 				if (lp.opaque == NULL)
457 					return(EINVAL);
458 				if (ops->op_getpartbounds(ssp, lp, part,
459 							  &start, &blocks)) {
460 					return(EINVAL);
461 				}
462 				ops->op_loadpartinfo(lp, part, dpart);
463 				dpart->media_offset += start *
464 						       info->d_media_blksize;
465 				dpart->media_size = blocks *
466 						    info->d_media_blksize;
467 				dpart->media_blocks = blocks;
468 
469 				/*
470 				 * partition starting sector (p_offset)
471 				 * requires slice's reserved areas to be
472 				 * adjusted.
473 				 */
474 				if (dpart->reserved_blocks > start)
475 					dpart->reserved_blocks -= start;
476 				else
477 					dpart->reserved_blocks = 0;
478 			}
479 
480 			/*
481 			 * Load remaining fields from the info structure
482 			 */
483 			dpart->d_nheads =	info->d_nheads;
484 			dpart->d_ncylinders =	info->d_ncylinders;
485 			dpart->d_secpertrack =	info->d_secpertrack;
486 			dpart->d_secpercyl =	info->d_secpercyl;
487 		}
488 		return (0);
489 
490 	case DIOCGSLICEINFO:
491 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
492 				 (char *)ssp);
493 		return (0);
494 
495 	case DIOCSDINFO32:
496 		ops = &disklabel32_ops;
497 		/* fall through */
498 	case DIOCSDINFO64:
499 		if (cmd != DIOCSDINFO32)
500 			ops = &disklabel64_ops;
501 		/*
502 		 * You can write a disklabel on the whole disk slice or
503 		 * whole-slice partition.
504 		 */
505 		if (slice != WHOLE_DISK_SLICE &&
506 		    part != WHOLE_SLICE_PART) {
507 			return(EINVAL);
508 		}
509 
510 		/*
511 		 * We no longer support writing disklabels directly to media
512 		 * without there being a slice.  Keep this as a separate
513 		 * conditional.
514 		 */
515 		if (slice == WHOLE_DISK_SLICE)
516 			return (ENODEV);
517 		if (!(flags & FWRITE))
518 			return (EBADF);
519 
520 		/*
521 		 * If an existing label is present it must be the same
522 		 * type as the label being passed by the ioctl.
523 		 */
524 		if (sp->ds_label.opaque && sp->ds_ops != ops)
525 			return (ENOATTR);
526 
527 		/*
528 		 * Create a temporary copy of the existing label
529 		 * (if present) so setdisklabel can compare it against
530 		 * the new label.
531 		 */
532 		lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
533 		if (sp->ds_label.opaque == NULL)
534 			bzero(lp.opaque, ops->labelsize);
535 		else
536 			bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
537 		if (sp->ds_label.opaque == NULL) {
538 			bzero(openmask, sizeof(openmask));
539 		} else {
540 			bcopy(sp->ds_openmask, openmask, sizeof(openmask));
541 		}
542 		lptmp.opaque = data;
543 		error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
544 		//XXX: send reprobe message here.
545 		disk_msg_send(DISK_SLICE_REPROBE, dev->si_disk, sp);
546 		if (error != 0) {
547 			kfree(lp.opaque, M_DEVBUF);
548 			return (error);
549 		}
550 		free_ds_label(ssp, slice);
551 		set_ds_label(ssp, slice, lp, ops);
552 		return (0);
553 
554 	case DIOCSYNCSLICEINFO:
555 		/*
556 		 * This ioctl can only be done on the whole disk
557 		 */
558 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
559 			return (EINVAL);
560 
561 		if (*(int *)data == 0) {
562 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
563 				struct diskslice *ds = &ssp->dss_slices[slice];
564 
565 				switch(dscountmask(ds)) {
566 				case 0:
567 					break;
568 				case 1:
569 					if (slice != WHOLE_DISK_SLICE)
570 						return (EBUSY);
571 					if (!dschkmask(ds, RAW_PART))
572 						return (EBUSY);
573 					break;
574 				default:
575 					return (EBUSY);
576 				}
577 			}
578 		}
579 
580 		disk_msg_send(DISK_DISK_REPROBE, dev->si_disk, NULL);
581 		return 0;
582 
583 		/*
584 		 * Temporarily forget the current slices struct and read
585 		 * the current one.
586 		 *
587 		 * NOTE:
588 		 *
589 		 * XXX should wait for current accesses on this disk to
590 		 * complete, then lock out future accesses and opens.
591 		 */
592 		kprintf("dsioctl messed with our stuff!\n");
593 		*sspp = NULL;
594 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, info);
595 		if (error != 0) {
596 			*sspp = ssp;
597 			return (error);
598 		}
599 
600 		/*
601 		 * Reopen everything.  This is a no-op except in the "force"
602 		 * case and when the raw bdev and cdev are both open.  Abort
603 		 * if anything fails.
604 		 */
605 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
606 			for (part = 0; part < DKMAXPARTITIONS; ++part) {
607 				if (!dschkmask(&ssp->dss_slices[slice], part))
608 					continue;
609 				error = dsopen(dkmodslice(dkmodpart(dev, part),
610 							  slice),
611 					       S_IFCHR, ssp->dss_oflags, sspp,
612 					       info);
613 				if (error != 0) {
614 					*sspp = ssp;
615 					return (EBUSY);
616 				}
617 			}
618 		}
619 
620 		//XXX: recheck this...
621 		dsgone(&ssp);
622 		return (0);
623 
624 	case DIOCWDINFO32:
625 	case DIOCWDINFO64:
626 		error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
627 					DIOCSDINFO32 : DIOCSDINFO64),
628 				data, flags, &ssp, info);
629 		if (error == 0 && sp->ds_label.opaque == NULL)
630 			error = EINVAL;
631 		if (error != 0)
632 			return (error);
633 
634 		/*
635 		 * Allow the reserved area to be written, reload ops
636 		 * because the DIOCSDINFO op above may have installed
637 		 * a new label type.
638 		 */
639 		ops = sp->ds_ops;
640 		old_wlabel = sp->ds_wlabel;
641 		set_ds_wlabel(ssp, slice, TRUE);
642 		error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
643 		disk_msg_send(DISK_SLICE_REPROBE, dev->si_disk, sp);
644 		set_ds_wlabel(ssp, slice, old_wlabel);
645 		/* XXX should invalidate in-core label if write failed. */
646 		return (error);
647 
648 	case DIOCWLABEL:
649 		if (slice == WHOLE_DISK_SLICE)
650 			return (ENODEV);
651 		if (!(flags & FWRITE))
652 			return (EBADF);
653 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
654 		return (0);
655 
656 	default:
657 		return (ENOIOCTL);
658 	}
659 }
660 
661 int
662 dsisopen(struct diskslices *ssp)
663 {
664 	int slice;
665 
666 	if (ssp == NULL)
667 		return (0);
668 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
669 		if (dscountmask(&ssp->dss_slices[slice]))
670 			return (1);
671 	}
672 	return (0);
673 }
674 
675 /*
676  * Allocate a slices "struct" and initialize it to contain only an empty
677  * compatibility slice (pointing to itself), a whole disk slice (covering
678  * the disk as described by the label), and (nslices - BASE_SLICES) empty
679  * slices beginning at BASE_SLICE.
680  *
681  * Note that the compatibility slice is no longer really a compatibility
682  * slice.  It is slice 0 if a GPT label is present, and the dangerously
683  * dedicated slice if no slice table otherwise exists.  Else it is 0-sized.
684  */
685 struct diskslices *
686 dsmakeslicestruct(int nslices, struct disk_info *info)
687 {
688 	struct diskslice *sp;
689 	struct diskslices *ssp;
690 
691 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
692 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
693 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
694 	ssp->dss_nslices = nslices;
695 	ssp->dss_oflags = 0;
696 
697 	/*
698 	 * Figure out if we can use shifts or whether we have to
699 	 * use mod/multply to translate byte offsets into sector numbers.
700 	 */
701 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
702 	     (info->d_media_blksize << 1) - 1) {
703 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
704 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
705 			ssp->dss_secshift = -1;
706 		else
707 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
708 	} else {
709 		ssp->dss_secmult = 0;
710 		ssp->dss_secshift = -1;
711 	}
712 	ssp->dss_secsize = info->d_media_blksize;
713 	sp = &ssp->dss_slices[0];
714 	bzero(sp, nslices * sizeof *sp);
715 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
716 	return (ssp);
717 }
718 
719 char *
720 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
721 {
722 	return dev->si_name;
723 }
724 
725 /*
726  * This should only be called when the unit is inactive and the strategy
727  * routine should not allow it to become active unless we call it.  Our
728  * strategy routine must be special to allow activity.
729  */
730 int
731 dsopen(cdev_t dev, int mode, u_int flags,
732 	struct diskslices **sspp, struct disk_info *info)
733 {
734 	cdev_t dev1;
735 	int error;
736 	int need_init;
737 	struct diskslice *sp;
738 	struct diskslices *ssp;
739 	int slice;
740 	int part;
741 
742 	ssp = *sspp;
743 	dev->si_bsize_phys = info->d_media_blksize;
744 	slice = dkslice(dev);
745 	part = dkpart(dev);
746 	sp = &ssp->dss_slices[slice];
747 	dssetmask(sp, part);
748 
749 	return 0;
750 
751 	/*
752 	 * Do not attempt to read the slice table or disk label when
753 	 * accessing the whole-disk slice or a while-slice partition.
754 	 */
755 	if (dkslice(dev) == WHOLE_DISK_SLICE)
756 		flags |= DSO_ONESLICE | DSO_NOLABELS;
757 	if (dkpart(dev) == WHOLE_SLICE_PART)
758 		flags |= DSO_NOLABELS;
759 
760 	/*
761 	 * Reinitialize the slice table unless there is an open device
762 	 * on the unit.
763 	 *
764 	 * It would be nice if we didn't have to do this but when a
765 	 * user is slicing and partitioning up a disk it is a lot safer
766 	 * to not take any chances.
767 	 */
768 	ssp = *sspp;
769 	need_init = !dsisopen(ssp);
770 	if (ssp != NULL && need_init)
771 		dsgone(sspp);
772 	if (need_init) {
773 		/*
774 		 * Allocate a minimal slices "struct".  This will become
775 		 * the final slices "struct" if we don't want real slices
776 		 * or if we can't find any real slices.
777 		 *
778 		 * Then scan the disk
779 		 */
780 		*sspp = dsmakeslicestruct(BASE_SLICE, info);
781 
782 		if ((flags & DSO_ONESLICE) == 0) {
783 			error = mbrinit(dev, info, sspp);
784 			if (error != 0) {
785 				dsgone(sspp);
786 				return (error);
787 			}
788 		}
789 		ssp = *sspp;
790 		ssp->dss_oflags = flags;
791 
792 		/*
793 		 * If there are no real slices, then make the compatiblity
794 		 * slice cover the whole disk.
795 		 */
796 		if (ssp->dss_nslices == BASE_SLICE) {
797 			sp = &ssp->dss_slices[COMPATIBILITY_SLICE];
798 
799 			sp->ds_size = info->d_media_blocks;
800 			sp->ds_reserved = 0;
801 		}
802 
803 		/*
804 		 * Set dss_first_bsd_slice to point at the first BSD
805 		 * slice, if any.
806 		 */
807 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
808 			sp = &ssp->dss_slices[slice];
809 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
810 #if 0
811 				struct diskslice *csp;
812 #endif
813 
814 				ssp->dss_first_bsd_slice = slice;
815 #if 0
816 				/*
817 				 * no longer supported, s0 is a real slice
818 				 * for GPT
819 				 */
820 				csp = &ssp->dss_slices[COMPATIBILITY_SLICE];
821 				csp->ds_offset = sp->ds_offset;
822 				csp->ds_size = sp->ds_size;
823 				csp->ds_type = sp->ds_type;
824 				csp->ds_reserved = sp->ds_reserved;
825 #endif
826 				break;
827 			}
828 		}
829 
830 		/*
831 		 * By definition accesses via the whole-disk device do not
832 		 * specify any reserved areas.  The whole disk may be read
833 		 * or written by the whole-disk device.
834 		 *
835 		 * The whole-disk slice does not ever have a label.
836 		 */
837 		sp = &ssp->dss_slices[WHOLE_DISK_SLICE];
838 		sp->ds_wlabel = TRUE;
839 		sp->ds_reserved = 0;
840 	}
841 
842 	/*
843 	 * Load the disklabel for the slice being accessed unless it is
844 	 * a whole-disk-slice or a whole-slice-partition (as determined
845 	 * by DSO_NOLABELS).
846 	 *
847 	 * We could scan all slices here and try to load up their
848 	 * disklabels, but that would cause us to access slices that
849 	 * the user may otherwise not intend us to access, or corrupted
850 	 * slices, etc.
851 	 *
852 	 * XXX if there are no opens on the slice we may want to re-read
853 	 * the disklabel anyway, even if we have one cached.
854 	 */
855 	slice = dkslice(dev);
856 	if (slice >= ssp->dss_nslices)
857 		return (ENXIO);
858 	sp = &ssp->dss_slices[slice];
859 	part = dkpart(dev);
860 
861 	if ((flags & DSO_NOLABELS) == 0 && sp->ds_label.opaque == NULL) {
862 		dev1 = dkmodslice(dkmodpart(dev, WHOLE_SLICE_PART), slice);
863 
864 		/*
865 		 * If opening a raw disk we do not try to
866 		 * read the disklabel now.  No interpretation of raw disks
867 		 * (e.g. like 'da0') ever occurs.  We will try to read the
868 		 * disklabel for a raw slice if asked to via DIOC* ioctls.
869 		 *
870 		 * Access to the label area is disallowed by default.  Note
871 		 * however that accesses via WHOLE_DISK_SLICE, and accesses
872 		 * via WHOLE_SLICE_PART for slices without valid disklabels,
873 		 * will allow writes and ignore the flag.
874 		 */
875 		set_ds_wlabel(ssp, slice, FALSE);
876 		dsreadandsetlabel(dev1, flags, ssp, sp, info);
877 	}
878 
879 	/*
880 	 * If opening a particular partition the disklabel must exist and
881 	 * the partition must be present in the label.
882 	 *
883 	 * If the partition is the special whole-disk-slice no partition
884 	 * table need exist.
885 	 */
886 	if (part != WHOLE_SLICE_PART && slice != WHOLE_DISK_SLICE) {
887 		if (sp->ds_label.opaque == NULL ||
888 		    part >= sp->ds_ops->op_getnumparts(sp->ds_label)) {
889 			return (EINVAL);
890 		}
891 	}
892 
893 	/*
894 	 * Do not allow special raw-extension partitions to be opened
895 	 * if the device doesn't support them.  Raw-extension partitions
896 	 * are typically used to handle CD tracks.
897 	 */
898 	if (slice == WHOLE_DISK_SLICE && part >= 128 &&
899 	    part != WHOLE_SLICE_PART) {
900 		if ((info->d_dsflags & DSO_RAWEXTENSIONS) == 0)
901 			return (EINVAL);
902 	}
903 
904 	/*
905 	 * Ok, we are open
906 	 */
907 	dssetmask(sp, part);
908 	return (0);
909 }
910 
911 /*
912  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
913  *
914  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
915  * a fake label covering the whole disk.
916  */
917 static
918 int
919 dsreadandsetlabel(cdev_t dev, u_int flags,
920 		  struct diskslices *ssp, struct diskslice *sp,
921 		  struct disk_info *info)
922 {
923 	disklabel_t lp;
924 	disklabel_ops_t ops;
925 	const char *msg;
926 	const char *sname;
927 	char partname[2];
928 	int slice = dkslice(dev);
929 
930 	/*
931 	 * Probe the disklabel
932 	 */
933 	lp.opaque = NULL;
934 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
935 	ops = &disklabel32_ops;
936 	msg = ops->op_readdisklabel(dev, sp, &lp, info);
937 	if (msg && strcmp(msg, "no disk label") == 0) {
938 		ops = &disklabel64_ops;
939 		msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
940 	}
941 
942 	/*
943 	 * If we failed and COMPATLABEL is set, create a dummy disklabel.
944 	 */
945 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
946 		msg = NULL;
947 		if (sp->ds_size >= 0x100000000ULL)
948 			ops = &disklabel64_ops;
949 		else
950 			ops = &disklabel32_ops;
951 		lp = ops->op_clone_label(info, sp);
952 	}
953 	if (msg != NULL) {
954 		if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
955 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
956 			    sname, msg);
957 		if (lp.opaque)
958 			kfree(lp.opaque, M_DEVBUF);
959 	} else {
960 		set_ds_label(ssp, slice, lp, ops);
961 		set_ds_wlabel(ssp, slice, FALSE);
962 	}
963 	return (msg ? EINVAL : 0);
964 }
965 
966 int64_t
967 dssize(cdev_t dev, struct diskslices **sspp)
968 {
969 	disklabel_t lp;
970 	disklabel_ops_t ops;
971 	int part;
972 	int slice;
973 	struct diskslices *ssp;
974 	u_int64_t start;
975 	u_int64_t blocks;
976 
977 	slice = dkslice(dev);
978 	part = dkpart(dev);
979 	ssp = *sspp;
980 	if (ssp == NULL || slice >= ssp->dss_nslices
981 	    || !dschkmask(&ssp->dss_slices[slice], part)) {
982 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
983 			return (-1);
984 		dev_dclose(dev, FREAD, S_IFCHR);
985 		ssp = *sspp;
986 	}
987 	lp = ssp->dss_slices[slice].ds_label;
988 	if (lp.opaque == NULL)
989 		return (-1);
990 	ops = ssp->dss_slices[slice].ds_ops;
991 	if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
992 		return (-1);
993 	return ((int64_t)blocks);
994 }
995 
996 static void
997 free_ds_label(struct diskslices *ssp, int slice)
998 {
999 	struct diskslice *sp;
1000 	disklabel_t lp;
1001 
1002 	sp = &ssp->dss_slices[slice];
1003 	lp = sp->ds_label;
1004 	if (lp.opaque != NULL) {
1005 		kfree(lp.opaque, M_DEVBUF);
1006 		lp.opaque = NULL;
1007 		set_ds_label(ssp, slice, lp, NULL);
1008 	}
1009 }
1010 
1011 static void
1012 set_ds_label(struct diskslices *ssp, int slice,
1013 	     disklabel_t lp, disklabel_ops_t ops)
1014 {
1015 	struct diskslice *sp = &ssp->dss_slices[slice];
1016 
1017 	sp->ds_label = lp;
1018 	sp->ds_ops = ops;
1019 	if (lp.opaque && slice != WHOLE_DISK_SLICE)
1020 		ops->op_adjust_label_reserved(ssp, slice, sp);
1021 	else
1022 		sp->ds_reserved = 0;
1023 }
1024 
1025 static void
1026 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
1027 {
1028 	ssp->dss_slices[slice].ds_wlabel = wlabel;
1029 }
1030 
1031