xref: /dflybsd-src/sys/kern/subr_diskslice.c (revision 4efe7afba0805dbbcb59524efb839339a65b0b04)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.51 2008/08/29 20:08:36 dillon Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/disklabel32.h>
56 #include <sys/disklabel64.h>
57 #include <sys/diskslice.h>
58 #include <sys/disk.h>
59 #include <sys/diskmbr.h>
60 #include <sys/fcntl.h>
61 #include <sys/malloc.h>
62 #include <sys/stat.h>
63 #include <sys/syslog.h>
64 #include <sys/proc.h>
65 #include <sys/vnode.h>
66 #include <sys/device.h>
67 #include <sys/thread2.h>
68 
69 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
70 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
71 
72 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
73 			   struct diskslices *ssp, struct diskslice *sp,
74 			   struct disk_info *info);
75 static void free_ds_label (struct diskslices *ssp, int slice);
76 static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
77 			   disklabel_ops_t ops);
78 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
79 
80 /*
81  * Determine the size of the transfer, and make sure it is
82  * within the boundaries of the partition. Adjust transfer
83  * if needed, and signal errors or early completion.
84  *
85  * XXX TODO:
86  *	o Split buffers that are too big for the device.
87  *	o Check for overflow.
88  *	o Finish cleaning this up.
89  *
90  * This function returns 1 on success, 0 if transfer equates
91  * to EOF (end of disk) or -1 on failure.  The appropriate
92  * 'errno' value is also set in bp->b_error and bp->b_flags
93  * is marked with B_ERROR.
94  */
95 struct bio *
96 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
97 {
98 	struct buf *bp = bio->bio_buf;
99 	struct bio *nbio;
100 	disklabel_t lp;
101 	disklabel_ops_t ops;
102 	long nsec;
103 	u_int64_t secno;
104 	u_int64_t endsecno;
105 	u_int64_t slicerel_secno;
106 	struct diskslice *sp;
107 	u_int32_t part;
108 	u_int32_t slice;
109 	int shift;
110 	int mask;
111 
112 	slice = dkslice(dev);
113 	part  = dkpart(dev);
114 
115 	if (bio->bio_offset < 0) {
116 		kprintf("dscheck(%s): negative bio_offset %lld\n",
117 			devtoname(dev), (long long)bio->bio_offset);
118 		goto bad;
119 	}
120 	if (slice >= ssp->dss_nslices) {
121 		kprintf("dscheck(%s): slice too large %d/%d\n",
122 			devtoname(dev), slice, ssp->dss_nslices);
123 		goto bad;
124 	}
125 	sp = &ssp->dss_slices[slice];
126 
127 	/*
128 	 * Calculate secno and nsec
129 	 */
130 	if (ssp->dss_secmult == 1) {
131 		shift = DEV_BSHIFT;
132 		goto doshift;
133 	} else if (ssp->dss_secshift != -1) {
134 		shift = DEV_BSHIFT + ssp->dss_secshift;
135 doshift:
136 		mask = (1 << shift) - 1;
137 		if ((int)bp->b_bcount & mask)
138 			goto bad_bcount;
139 		if ((int)bio->bio_offset & mask)
140 			goto bad_blkno;
141 		secno = bio->bio_offset >> shift;
142 		nsec = bp->b_bcount >> shift;
143 	} else {
144 		if (bp->b_bcount % ssp->dss_secsize)
145 			goto bad_bcount;
146 		if (bio->bio_offset % ssp->dss_secsize)
147 			goto bad_blkno;
148 		secno = bio->bio_offset / ssp->dss_secsize;
149 		nsec = bp->b_bcount / ssp->dss_secsize;
150 	}
151 
152 	/*
153 	 * Calculate slice-relative sector number end slice-relative
154 	 * limit.
155 	 */
156 	if (slice == WHOLE_DISK_SLICE) {
157 		/*
158 		 * Labels have not been allowed on whole-disks for a while.
159 		 * This really puts the nail in the coffin.
160 		 *
161 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
162 		 * and partition numbers are special-cased.  Currently numbers
163 		 * less then 128 are not allowed.  Partition numbers >= 128
164 		 * are encoded in the high 8 bits of the 64 bit buffer offset
165 		 * and are fed directly through to the device with no
166 		 * further interpretation.  In particular, no sector
167 		 * translation interpretation should occur because the
168 		 * sector size for the special raw access may not be the
169 		 * same as the nominal sector size for the device.
170 		 */
171 		lp.opaque = NULL;
172 		if (part < 128) {
173 			kprintf("dscheck(%s): illegal partition number (%d) "
174 				"for WHOLE_DISK_SLICE access\n",
175 				devtoname(dev), part);
176 			goto bad;
177 		} else if (part != WHOLE_SLICE_PART) {
178 			nbio = push_bio(bio);
179 			nbio->bio_offset = bio->bio_offset |
180 					   (u_int64_t)part << 56;
181 			return(nbio);
182 		}
183 
184 		/*
185 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
186 		 * there are no reserved areas.
187 		 */
188 		endsecno = sp->ds_size;
189 		slicerel_secno = secno;
190 	} else if (part == WHOLE_SLICE_PART) {
191 		/*
192 		 * NOTE! opens on a whole-slice partition will not attempt
193 		 * to read a disklabel in, so there may not be an in-core
194 		 * disklabel even if there is one on the disk.
195 		 */
196 		endsecno = sp->ds_size;
197 		slicerel_secno = secno;
198 	} else if ((lp = sp->ds_label).opaque != NULL) {
199 		/*
200 		 * A label is present, extract the partition.  Snooping of
201 		 * the disklabel is not supported even if accessible.  Of
202 		 * course, the reserved area is still write protected.
203 		 */
204 		ops = sp->ds_ops;
205 		if (ops->op_getpartbounds(ssp, lp, part,
206 					  &slicerel_secno, &endsecno)) {
207 			kprintf("dscheck(%s): partition %d out of bounds\n",
208 				devtoname(dev), part);
209 			goto bad;
210 		}
211 		slicerel_secno += secno;
212 	} else {
213 		/*
214 		 * Attempt to access partition when no disklabel present
215 		 */
216 		kprintf("dscheck(%s): attempt to access non-existent partition\n",
217 			devtoname(dev));
218 		goto bad;
219 	}
220 
221 	/*
222 	 * Disallow writes to reserved areas unless ds_wlabel allows it.
223 	 */
224 	if (slicerel_secno < sp->ds_reserved && nsec &&
225 	    bp->b_cmd == BUF_CMD_WRITE && sp->ds_wlabel == 0) {
226 		bp->b_error = EROFS;
227 		goto error;
228 	}
229 
230 	/*
231 	 * If we get here, bio_offset must be on a block boundary and
232 	 * the sector size must be a power of 2.
233 	 */
234 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
235 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
236 	    ((ssp->dss_secsize << 1) - 1)) {
237 		kprintf("%s: invalid BIO offset, not sector aligned or"
238 			" invalid sector size (not power of 2) %08llx %d\n",
239 			devtoname(dev), (long long)bio->bio_offset,
240 			ssp->dss_secsize);
241 		goto bad;
242 	}
243 
244 	/*
245 	 * EOF handling
246 	 */
247 	if (secno + nsec > endsecno) {
248 		/*
249 		 * Return an error if beyond the end of the disk, or
250 		 * if B_BNOCLIP is set.  Tell the system that we do not
251 		 * need to keep the buffer around.
252 		 */
253 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
254 			goto bad;
255 
256 		/*
257 		 * If exactly at end of disk, return an EOF.  Throw away
258 		 * the buffer contents, if any, by setting B_INVAL.
259 		 */
260 		if (secno == endsecno) {
261 			bp->b_resid = bp->b_bcount;
262 			bp->b_flags |= B_INVAL;
263 			goto done;
264 		}
265 
266 		/*
267 		 * Else truncate
268 		 */
269 		nsec = endsecno - secno;
270 		bp->b_bcount = nsec * ssp->dss_secsize;
271 	}
272 
273 	nbio = push_bio(bio);
274 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
275 			   ssp->dss_secsize;
276 	return (nbio);
277 
278 bad_bcount:
279 	kprintf(
280 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
281 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
282 	goto bad;
283 
284 bad_blkno:
285 	kprintf(
286 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
287 	    devtoname(dev), (long long)bio->bio_offset, ssp->dss_secsize);
288 bad:
289 	bp->b_error = EINVAL;
290 	/* fall through */
291 error:
292 	/*
293 	 * Terminate the I/O with a ranging error.  Since the buffer is
294 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
295 	 */
296 	bp->b_resid = bp->b_bcount;
297 	bp->b_flags |= B_ERROR | B_INVAL;
298 done:
299 	/*
300 	 * Caller must biodone() the originally passed bio if NULL is
301 	 * returned.
302 	 */
303 	return (NULL);
304 }
305 
306 void
307 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
308 {
309 	u_int32_t part;
310 	u_int32_t slice;
311 	struct diskslice *sp;
312 
313 	slice = dkslice(dev);
314 	part  = dkpart(dev);
315 	if (slice < ssp->dss_nslices) {
316 		sp = &ssp->dss_slices[slice];
317 		dsclrmask(sp, part);
318 	}
319 }
320 
321 void
322 dsgone(struct diskslices **sspp)
323 {
324 	int slice;
325 	struct diskslice *sp;
326 	struct diskslices *ssp;
327 
328 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
329 		sp = &ssp->dss_slices[slice];
330 		free_ds_label(ssp, slice);
331 	}
332 	kfree(ssp, M_DEVBUF);
333 	*sspp = NULL;
334 }
335 
336 /*
337  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
338  * is subject to the same restriction as dsopen().
339  */
340 int
341 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
342 	struct diskslices **sspp, struct disk_info *info)
343 {
344 	int error;
345 	disklabel_t lp;
346 	disklabel_t lptmp;
347 	disklabel_ops_t ops;
348 	int old_wlabel;
349 	u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
350 	int part;
351 	int slice;
352 	struct diskslice *sp;
353 	struct diskslices *ssp;
354 
355 	slice = dkslice(dev);
356 	part = dkpart(dev);
357 	ssp = *sspp;
358 	if (slice >= ssp->dss_nslices)
359 		return (EINVAL);
360 	sp = &ssp->dss_slices[slice];
361 	lp = sp->ds_label;
362 	ops = sp->ds_ops;	/* may be NULL if no label */
363 
364 	switch (cmd) {
365 	case DIOCGDVIRGIN32:
366 		ops = &disklabel32_ops;
367 		/* fall through */
368 	case DIOCGDVIRGIN64:
369 		if (cmd != DIOCGDVIRGIN32)
370 			ops = &disklabel64_ops;
371 		/*
372 		 * You can only retrieve a virgin disklabel on the whole
373 		 * disk slice or whole-slice partition.
374 		 */
375 		if (slice != WHOLE_DISK_SLICE &&
376 		    part != WHOLE_SLICE_PART) {
377 			return(EINVAL);
378 		}
379 
380 		lp.opaque = data;
381 		ops->op_makevirginlabel(lp, ssp, sp, info);
382 		return (0);
383 
384 	case DIOCGDINFO32:
385 	case DIOCGDINFO64:
386 		/*
387 		 * You can only retrieve a disklabel on the whole
388 		 * slice partition.
389 		 *
390 		 * We do not support labels directly on whole-disks
391 		 * any more (that is, disks without slices), unless the
392 		 * device driver has asked for a compatible label (e.g.
393 		 * for a CD) to allow booting off of storage that is
394 		 * otherwise unlabeled.
395 		 */
396 		error = 0;
397 		if (part != WHOLE_SLICE_PART)
398 			return(EINVAL);
399 		if (slice == WHOLE_DISK_SLICE &&
400 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
401 			return (ENODEV);
402 		}
403 		if (sp->ds_label.opaque == NULL) {
404 			error = dsreadandsetlabel(dev, info->d_dsflags,
405 						  ssp, sp, info);
406 			ops = sp->ds_ops;	/* may be NULL */
407 		}
408 
409 		/*
410 		 * The type of label we found must match the type of
411 		 * label requested.
412 		 */
413 		if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
414 			error = ENOATTR;
415 		if (error == 0)
416 			bcopy(sp->ds_label.opaque, data, ops->labelsize);
417 		return (error);
418 
419 	case DIOCGPART:
420 		{
421 			struct partinfo *dpart = (void *)data;
422 
423 			/*
424 			 * The disk management layer may not have read the
425 			 * disklabel yet because simply opening a slice no
426 			 * longer 'probes' the disk that way.  Be sure we
427 			 * have tried.
428 			 *
429 			 * We ignore any error.
430 			 */
431 			if (sp->ds_label.opaque == NULL &&
432 			    part == WHOLE_SLICE_PART &&
433 			    slice != WHOLE_DISK_SLICE) {
434 				dsreadandsetlabel(dev, info->d_dsflags,
435 						  ssp, sp, info);
436 				ops = sp->ds_ops;	/* may be NULL */
437 			}
438 
439 			bzero(dpart, sizeof(*dpart));
440 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
441 						info->d_media_blksize;
442 			dpart->media_size     = (u_int64_t)sp->ds_size *
443 						info->d_media_blksize;
444 			dpart->media_blocks   = sp->ds_size;
445 			dpart->media_blksize  = info->d_media_blksize;
446 			dpart->reserved_blocks= sp->ds_reserved;
447 			dpart->fstype_uuid = sp->ds_type_uuid;
448 			dpart->storage_uuid = sp->ds_stor_uuid;
449 
450 			if (slice != WHOLE_DISK_SLICE &&
451 			    part != WHOLE_SLICE_PART) {
452 				u_int64_t start;
453 				u_int64_t blocks;
454 				if (lp.opaque == NULL)
455 					return(EINVAL);
456 				if (ops->op_getpartbounds(ssp, lp, part,
457 							  &start, &blocks)) {
458 					return(EINVAL);
459 				}
460 				ops->op_loadpartinfo(lp, part, dpart);
461 				dpart->media_offset += start *
462 						       info->d_media_blksize;
463 				dpart->media_size = blocks *
464 						    info->d_media_blksize;
465 				dpart->media_blocks = blocks;
466 
467 				/*
468 				 * partition starting sector (p_offset)
469 				 * requires slice's reserved areas to be
470 				 * adjusted.
471 				 */
472 				if (dpart->reserved_blocks > start)
473 					dpart->reserved_blocks -= start;
474 				else
475 					dpart->reserved_blocks = 0;
476 			}
477 
478 			/*
479 			 * Load remaining fields from the info structure
480 			 */
481 			dpart->d_nheads =	info->d_nheads;
482 			dpart->d_ncylinders =	info->d_ncylinders;
483 			dpart->d_secpertrack =	info->d_secpertrack;
484 			dpart->d_secpercyl =	info->d_secpercyl;
485 		}
486 		return (0);
487 
488 	case DIOCGSLICEINFO:
489 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
490 				 (char *)ssp);
491 		return (0);
492 
493 	case DIOCSDINFO32:
494 		ops = &disklabel32_ops;
495 		/* fall through */
496 	case DIOCSDINFO64:
497 		if (cmd != DIOCSDINFO32)
498 			ops = &disklabel64_ops;
499 		/*
500 		 * You can write a disklabel on the whole disk slice or
501 		 * whole-slice partition.
502 		 */
503 		if (slice != WHOLE_DISK_SLICE &&
504 		    part != WHOLE_SLICE_PART) {
505 			return(EINVAL);
506 		}
507 
508 		/*
509 		 * We no longer support writing disklabels directly to media
510 		 * without there being a slice.  Keep this as a separate
511 		 * conditional.
512 		 */
513 		if (slice == WHOLE_DISK_SLICE)
514 			return (ENODEV);
515 		if (!(flags & FWRITE))
516 			return (EBADF);
517 
518 		/*
519 		 * If an existing label is present it must be the same
520 		 * type as the label being passed by the ioctl.
521 		 */
522 		if (sp->ds_label.opaque && sp->ds_ops != ops)
523 			return (ENOATTR);
524 
525 		/*
526 		 * Create a temporary copy of the existing label
527 		 * (if present) so setdisklabel can compare it against
528 		 * the new label.
529 		 */
530 		lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
531 		if (sp->ds_label.opaque == NULL)
532 			bzero(lp.opaque, ops->labelsize);
533 		else
534 			bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
535 		if (sp->ds_label.opaque == NULL) {
536 			bzero(openmask, sizeof(openmask));
537 		} else {
538 			bcopy(sp->ds_openmask, openmask, sizeof(openmask));
539 		}
540 		lptmp.opaque = data;
541 		error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
542 		if (error != 0) {
543 			kfree(lp.opaque, M_DEVBUF);
544 			return (error);
545 		}
546 		free_ds_label(ssp, slice);
547 		set_ds_label(ssp, slice, lp, ops);
548 		return (0);
549 
550 	case DIOCSYNCSLICEINFO:
551 		/*
552 		 * This ioctl can only be done on the whole disk
553 		 */
554 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
555 			return (EINVAL);
556 
557 		if (*(int *)data == 0) {
558 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
559 				struct diskslice *ds = &ssp->dss_slices[slice];
560 
561 				switch(dscountmask(ds)) {
562 				case 0:
563 					break;
564 				case 1:
565 					if (slice != WHOLE_DISK_SLICE)
566 						return (EBUSY);
567 					if (!dschkmask(ds, RAW_PART))
568 						return (EBUSY);
569 					break;
570 				default:
571 					return (EBUSY);
572 				}
573 			}
574 		}
575 
576 		/*
577 		 * Temporarily forget the current slices struct and read
578 		 * the current one.
579 		 *
580 		 * NOTE:
581 		 *
582 		 * XXX should wait for current accesses on this disk to
583 		 * complete, then lock out future accesses and opens.
584 		 */
585 		*sspp = NULL;
586 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, info);
587 		if (error != 0) {
588 			*sspp = ssp;
589 			return (error);
590 		}
591 
592 		/*
593 		 * Reopen everything.  This is a no-op except in the "force"
594 		 * case and when the raw bdev and cdev are both open.  Abort
595 		 * if anything fails.
596 		 */
597 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
598 			for (part = 0; part < DKMAXPARTITIONS; ++part) {
599 				if (!dschkmask(&ssp->dss_slices[slice], part))
600 					continue;
601 				error = dsopen(dkmodslice(dkmodpart(dev, part),
602 							  slice),
603 					       S_IFCHR, ssp->dss_oflags, sspp,
604 					       info);
605 				if (error != 0) {
606 					*sspp = ssp;
607 					return (EBUSY);
608 				}
609 			}
610 		}
611 
612 		dsgone(&ssp);
613 		return (0);
614 
615 	case DIOCWDINFO32:
616 	case DIOCWDINFO64:
617 		error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
618 					DIOCSDINFO32 : DIOCSDINFO64),
619 				data, flags, &ssp, info);
620 		if (error == 0 && sp->ds_label.opaque == NULL)
621 			error = EINVAL;
622 		if (error != 0)
623 			return (error);
624 
625 		/*
626 		 * Allow the reserved area to be written, reload ops
627 		 * because the DIOCSDINFO op above may have installed
628 		 * a new label type.
629 		 */
630 		ops = sp->ds_ops;
631 		old_wlabel = sp->ds_wlabel;
632 		set_ds_wlabel(ssp, slice, TRUE);
633 		error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
634 		set_ds_wlabel(ssp, slice, old_wlabel);
635 		/* XXX should invalidate in-core label if write failed. */
636 		return (error);
637 
638 	case DIOCWLABEL:
639 		if (slice == WHOLE_DISK_SLICE)
640 			return (ENODEV);
641 		if (!(flags & FWRITE))
642 			return (EBADF);
643 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
644 		return (0);
645 
646 	default:
647 		return (ENOIOCTL);
648 	}
649 }
650 
651 int
652 dsisopen(struct diskslices *ssp)
653 {
654 	int slice;
655 
656 	if (ssp == NULL)
657 		return (0);
658 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
659 		if (dscountmask(&ssp->dss_slices[slice]))
660 			return (1);
661 	}
662 	return (0);
663 }
664 
665 /*
666  * Allocate a slices "struct" and initialize it to contain only an empty
667  * compatibility slice (pointing to itself), a whole disk slice (covering
668  * the disk as described by the label), and (nslices - BASE_SLICES) empty
669  * slices beginning at BASE_SLICE.
670  *
671  * Note that the compatibility slice is no longer really a compatibility
672  * slice.  It is slice 0 if a GPT label is present, and the dangerously
673  * dedicated slice if no slice table otherwise exists.  Else it is 0-sized.
674  */
675 struct diskslices *
676 dsmakeslicestruct(int nslices, struct disk_info *info)
677 {
678 	struct diskslice *sp;
679 	struct diskslices *ssp;
680 
681 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
682 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
683 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
684 	ssp->dss_nslices = nslices;
685 	ssp->dss_oflags = 0;
686 
687 	/*
688 	 * Figure out if we can use shifts or whether we have to
689 	 * use mod/multply to translate byte offsets into sector numbers.
690 	 */
691 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
692 	     (info->d_media_blksize << 1) - 1) {
693 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
694 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
695 			ssp->dss_secshift = -1;
696 		else
697 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
698 	} else {
699 		ssp->dss_secmult = 0;
700 		ssp->dss_secshift = -1;
701 	}
702 	ssp->dss_secsize = info->d_media_blksize;
703 	sp = &ssp->dss_slices[0];
704 	bzero(sp, nslices * sizeof *sp);
705 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
706 	return (ssp);
707 }
708 
709 char *
710 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
711 {
712 	static char name[32];
713 	const char *dname;
714 	int used;
715 
716 	dname = dev_dname(dev);
717 	if (strlen(dname) > 16)
718 		dname = "nametoolong";
719 	ksnprintf(name, sizeof(name), "%s%d", dname, unit);
720 	partname[0] = '\0';
721 	used = strlen(name);
722 
723 	if (slice != WHOLE_DISK_SLICE) {
724 		/*
725 		 * slice or slice + partition.  BASE_SLICE is s1, but
726 		 * the compatibility slice (0) needs to be s0.
727 		 */
728 		used += ksnprintf(name + used, sizeof(name) - used,
729 				  "s%d", (slice ? slice - BASE_SLICE + 1 : 0));
730 		if (part != WHOLE_SLICE_PART) {
731 			used += ksnprintf(name + used, sizeof(name) - used,
732 					  "%c", 'a' + part);
733 			partname[0] = 'a' + part;
734 			partname[1] = 0;
735 		}
736 	} else if (part == WHOLE_SLICE_PART) {
737 		/*
738 		 * whole-disk-device, raw access to disk
739 		 */
740 		/* no string extension */
741 	} else if (part > 128) {
742 		/*
743 		 * whole-disk-device, extended raw access partitions.
744 		 * (typically used to access CD audio tracks)
745 		 */
746 		used += ksnprintf(name + used, sizeof(name) - used,
747 					  "t%d", part - 128);
748 	} else {
749 		/*
750 		 * whole-disk-device, illegal partition number
751 		 */
752 		used += ksnprintf(name + used, sizeof(name) - used,
753 					  "?%d", part);
754 	}
755 	return (name);
756 }
757 
758 /*
759  * This should only be called when the unit is inactive and the strategy
760  * routine should not allow it to become active unless we call it.  Our
761  * strategy routine must be special to allow activity.
762  */
763 int
764 dsopen(cdev_t dev, int mode, u_int flags,
765 	struct diskslices **sspp, struct disk_info *info)
766 {
767 	cdev_t dev1;
768 	int error;
769 	int need_init;
770 	struct diskslice *sp;
771 	struct diskslices *ssp;
772 	int slice;
773 	int part;
774 
775 	dev->si_bsize_phys = info->d_media_blksize;
776 
777 	/*
778 	 * Do not attempt to read the slice table or disk label when
779 	 * accessing the whole-disk slice or a while-slice partition.
780 	 */
781 	if (dkslice(dev) == WHOLE_DISK_SLICE)
782 		flags |= DSO_ONESLICE | DSO_NOLABELS;
783 	if (dkpart(dev) == WHOLE_SLICE_PART)
784 		flags |= DSO_NOLABELS;
785 
786 	/*
787 	 * Reinitialize the slice table unless there is an open device
788 	 * on the unit.
789 	 *
790 	 * It would be nice if we didn't have to do this but when a
791 	 * user is slicing and partitioning up a disk it is a lot safer
792 	 * to not take any chances.
793 	 */
794 	ssp = *sspp;
795 	need_init = !dsisopen(ssp);
796 	if (ssp != NULL && need_init)
797 		dsgone(sspp);
798 	if (need_init) {
799 		/*
800 		 * Allocate a minimal slices "struct".  This will become
801 		 * the final slices "struct" if we don't want real slices
802 		 * or if we can't find any real slices.
803 		 *
804 		 * Then scan the disk
805 		 */
806 		*sspp = dsmakeslicestruct(BASE_SLICE, info);
807 
808 		if ((flags & DSO_ONESLICE) == 0) {
809 			error = mbrinit(dev, info, sspp);
810 			if (error != 0) {
811 				dsgone(sspp);
812 				return (error);
813 			}
814 		}
815 		ssp = *sspp;
816 		ssp->dss_oflags = flags;
817 
818 		/*
819 		 * If there are no real slices, then make the compatiblity
820 		 * slice cover the whole disk.
821 		 */
822 		if (ssp->dss_nslices == BASE_SLICE) {
823 			sp = &ssp->dss_slices[COMPATIBILITY_SLICE];
824 
825 			sp->ds_size = info->d_media_blocks;
826 			sp->ds_reserved = 0;
827 		}
828 
829 		/*
830 		 * Set dss_first_bsd_slice to point at the first BSD
831 		 * slice, if any.
832 		 */
833 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
834 			sp = &ssp->dss_slices[slice];
835 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
836 #if 0
837 				struct diskslice *csp;
838 #endif
839 
840 				ssp->dss_first_bsd_slice = slice;
841 #if 0
842 				/*
843 				 * no longer supported, s0 is a real slice
844 				 * for GPT
845 				 */
846 				csp = &ssp->dss_slices[COMPATIBILITY_SLICE];
847 				csp->ds_offset = sp->ds_offset;
848 				csp->ds_size = sp->ds_size;
849 				csp->ds_type = sp->ds_type;
850 				csp->ds_reserved = sp->ds_reserved;
851 #endif
852 				break;
853 			}
854 		}
855 
856 		/*
857 		 * By definition accesses via the whole-disk device do not
858 		 * specify any reserved areas.  The whole disk may be read
859 		 * or written by the whole-disk device.
860 		 *
861 		 * The whole-disk slice does not ever have a label.
862 		 */
863 		sp = &ssp->dss_slices[WHOLE_DISK_SLICE];
864 		sp->ds_wlabel = TRUE;
865 		sp->ds_reserved = 0;
866 	}
867 
868 	/*
869 	 * Load the disklabel for the slice being accessed unless it is
870 	 * a whole-disk-slice or a whole-slice-partition (as determined
871 	 * by DSO_NOLABELS).
872 	 *
873 	 * We could scan all slices here and try to load up their
874 	 * disklabels, but that would cause us to access slices that
875 	 * the user may otherwise not intend us to access, or corrupted
876 	 * slices, etc.
877 	 *
878 	 * XXX if there are no opens on the slice we may want to re-read
879 	 * the disklabel anyway, even if we have one cached.
880 	 */
881 	slice = dkslice(dev);
882 	if (slice >= ssp->dss_nslices)
883 		return (ENXIO);
884 	sp = &ssp->dss_slices[slice];
885 	part = dkpart(dev);
886 
887 	if ((flags & DSO_NOLABELS) == 0 && sp->ds_label.opaque == NULL) {
888 		dev1 = dkmodslice(dkmodpart(dev, WHOLE_SLICE_PART), slice);
889 
890 		/*
891 		 * If opening a raw disk we do not try to
892 		 * read the disklabel now.  No interpretation of raw disks
893 		 * (e.g. like 'da0') ever occurs.  We will try to read the
894 		 * disklabel for a raw slice if asked to via DIOC* ioctls.
895 		 *
896 		 * Access to the label area is disallowed by default.  Note
897 		 * however that accesses via WHOLE_DISK_SLICE, and accesses
898 		 * via WHOLE_SLICE_PART for slices without valid disklabels,
899 		 * will allow writes and ignore the flag.
900 		 */
901 		set_ds_wlabel(ssp, slice, FALSE);
902 		dsreadandsetlabel(dev1, flags, ssp, sp, info);
903 	}
904 
905 	/*
906 	 * If opening a particular partition the disklabel must exist and
907 	 * the partition must be present in the label.
908 	 *
909 	 * If the partition is the special whole-disk-slice no partition
910 	 * table need exist.
911 	 */
912 	if (part != WHOLE_SLICE_PART && slice != WHOLE_DISK_SLICE) {
913 		if (sp->ds_label.opaque == NULL ||
914 		    part >= sp->ds_ops->op_getnumparts(sp->ds_label)) {
915 			return (EINVAL);
916 		}
917 	}
918 
919 	/*
920 	 * Do not allow special raw-extension partitions to be opened
921 	 * if the device doesn't support them.  Raw-extension partitions
922 	 * are typically used to handle CD tracks.
923 	 */
924 	if (slice == WHOLE_DISK_SLICE && part >= 128 &&
925 	    part != WHOLE_SLICE_PART) {
926 		if ((info->d_dsflags & DSO_RAWEXTENSIONS) == 0)
927 			return (EINVAL);
928 	}
929 
930 	/*
931 	 * Ok, we are open
932 	 */
933 	dssetmask(sp, part);
934 	return (0);
935 }
936 
937 /*
938  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
939  *
940  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
941  * a fake label covering the whole disk.
942  */
943 static
944 int
945 dsreadandsetlabel(cdev_t dev, u_int flags,
946 		  struct diskslices *ssp, struct diskslice *sp,
947 		  struct disk_info *info)
948 {
949 	disklabel_t lp;
950 	disklabel_ops_t ops;
951 	const char *msg;
952 	const char *sname;
953 	char partname[2];
954 	int slice = dkslice(dev);
955 
956 	/*
957 	 * Probe the disklabel
958 	 */
959 	lp.opaque = NULL;
960 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
961 	ops = &disklabel32_ops;
962 	msg = ops->op_readdisklabel(dev, sp, &lp, info);
963 	if (msg && strcmp(msg, "no disk label") == 0) {
964 		ops = &disklabel64_ops;
965 		msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
966 	}
967 
968 	/*
969 	 * If we failed and COMPATLABEL is set, create a dummy disklabel.
970 	 */
971 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
972 		msg = NULL;
973 		if (sp->ds_size >= 0x100000000ULL)
974 			ops = &disklabel64_ops;
975 		else
976 			ops = &disklabel32_ops;
977 		lp = ops->op_clone_label(info, sp);
978 	}
979 	if (msg != NULL) {
980 		if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
981 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
982 			    sname, msg);
983 		if (lp.opaque)
984 			kfree(lp.opaque, M_DEVBUF);
985 	} else {
986 		set_ds_label(ssp, slice, lp, ops);
987 		set_ds_wlabel(ssp, slice, FALSE);
988 	}
989 	return (msg ? EINVAL : 0);
990 }
991 
992 int64_t
993 dssize(cdev_t dev, struct diskslices **sspp)
994 {
995 	disklabel_t lp;
996 	disklabel_ops_t ops;
997 	int part;
998 	int slice;
999 	struct diskslices *ssp;
1000 	u_int64_t start;
1001 	u_int64_t blocks;
1002 
1003 	slice = dkslice(dev);
1004 	part = dkpart(dev);
1005 	ssp = *sspp;
1006 	if (ssp == NULL || slice >= ssp->dss_nslices
1007 	    || !dschkmask(&ssp->dss_slices[slice], part)) {
1008 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
1009 			return (-1);
1010 		dev_dclose(dev, FREAD, S_IFCHR);
1011 		ssp = *sspp;
1012 	}
1013 	lp = ssp->dss_slices[slice].ds_label;
1014 	if (lp.opaque == NULL)
1015 		return (-1);
1016 	ops = ssp->dss_slices[slice].ds_ops;
1017 	if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
1018 		return (-1);
1019 	return ((int64_t)blocks);
1020 }
1021 
1022 static void
1023 free_ds_label(struct diskslices *ssp, int slice)
1024 {
1025 	struct diskslice *sp;
1026 	disklabel_t lp;
1027 
1028 	sp = &ssp->dss_slices[slice];
1029 	lp = sp->ds_label;
1030 	if (lp.opaque != NULL) {
1031 		kfree(lp.opaque, M_DEVBUF);
1032 		lp.opaque = NULL;
1033 		set_ds_label(ssp, slice, lp, NULL);
1034 	}
1035 }
1036 
1037 static void
1038 set_ds_label(struct diskslices *ssp, int slice,
1039 	     disklabel_t lp, disklabel_ops_t ops)
1040 {
1041 	struct diskslice *sp = &ssp->dss_slices[slice];
1042 
1043 	sp->ds_label = lp;
1044 	sp->ds_ops = ops;
1045 	if (lp.opaque && slice != WHOLE_DISK_SLICE)
1046 		ops->op_adjust_label_reserved(ssp, slice, sp);
1047 	else
1048 		sp->ds_reserved = 0;
1049 }
1050 
1051 static void
1052 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
1053 {
1054 	ssp->dss_slices[slice].ds_wlabel = wlabel;
1055 }
1056 
1057