1 /* $OpenBSD: subr_disk.c,v 1.103 2010/05/03 15:27:28 jsing Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/dkstat.h> /* XXX */ 58 #include <sys/proc.h> 59 #include <sys/vnode.h> 60 #include <uvm/uvm_extern.h> 61 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 65 #include <net/if.h> 66 67 #include <dev/rndvar.h> 68 #include <dev/cons.h> 69 70 /* 71 * A global list of all disks attached to the system. May grow or 72 * shrink over time. 73 */ 74 struct disklist_head disklist; /* TAILQ_HEAD */ 75 int disk_count; /* number of drives in global disklist */ 76 int disk_change; /* set if a disk has been attached/detached 77 * since last we looked at this variable. This 78 * is reset by hw_sysctl() 79 */ 80 81 /* softraid callback, do not use! */ 82 void (*softraid_disk_attach)(struct disk *, int); 83 84 /* 85 * Seek sort for disks. We depend on the driver which calls us using b_resid 86 * as the current cylinder number. 87 * 88 * The argument ap structure holds a b_actf activity chain pointer on which we 89 * keep two queues, sorted in ascending cylinder order. The first queue holds 90 * those requests which are positioned after the current cylinder (in the first 91 * request); the second holds requests which came in after their cylinder number 92 * was passed. Thus we implement a one way scan, retracting after reaching the 93 * end of the drive to the first request on the second queue, at which time it 94 * becomes the first queue. 95 * 96 * A one-way scan is natural because of the way UNIX read-ahead blocks are 97 * allocated. 98 */ 99 100 void 101 disksort(struct buf *ap, struct buf *bp) 102 { 103 struct buf *bq; 104 105 /* If the queue is empty, then it's easy. */ 106 if (ap->b_actf == NULL) { 107 bp->b_actf = NULL; 108 ap->b_actf = bp; 109 return; 110 } 111 112 /* 113 * If we lie after the first (currently active) request, then we 114 * must locate the second request list and add ourselves to it. 115 */ 116 bq = ap->b_actf; 117 if (bp->b_cylinder < bq->b_cylinder) { 118 while (bq->b_actf) { 119 /* 120 * Check for an ``inversion'' in the normally ascending 121 * cylinder numbers, indicating the start of the second 122 * request list. 123 */ 124 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 125 /* 126 * Search the second request list for the first 127 * request at a larger cylinder number. We go 128 * before that; if there is no such request, we 129 * go at end. 130 */ 131 do { 132 if (bp->b_cylinder < 133 bq->b_actf->b_cylinder) 134 goto insert; 135 if (bp->b_cylinder == 136 bq->b_actf->b_cylinder && 137 bp->b_blkno < bq->b_actf->b_blkno) 138 goto insert; 139 bq = bq->b_actf; 140 } while (bq->b_actf); 141 goto insert; /* after last */ 142 } 143 bq = bq->b_actf; 144 } 145 /* 146 * No inversions... we will go after the last, and 147 * be the first request in the second request list. 148 */ 149 goto insert; 150 } 151 /* 152 * Request is at/after the current request... 153 * sort in the first request list. 154 */ 155 while (bq->b_actf) { 156 /* 157 * We want to go after the current request if there is an 158 * inversion after it (i.e. it is the end of the first 159 * request list), or if the next request is a larger cylinder 160 * than our request. 161 */ 162 if (bq->b_actf->b_cylinder < bq->b_cylinder || 163 bp->b_cylinder < bq->b_actf->b_cylinder || 164 (bp->b_cylinder == bq->b_actf->b_cylinder && 165 bp->b_blkno < bq->b_actf->b_blkno)) 166 goto insert; 167 bq = bq->b_actf; 168 } 169 /* 170 * Neither a second list nor a larger request... we go at the end of 171 * the first list, which is the same as the end of the whole schebang. 172 */ 173 insert: bp->b_actf = bq->b_actf; 174 bq->b_actf = bp; 175 } 176 177 /* 178 * Compute checksum for disk label. 179 */ 180 u_int 181 dkcksum(struct disklabel *lp) 182 { 183 u_int16_t *start, *end; 184 u_int16_t sum = 0; 185 186 start = (u_int16_t *)lp; 187 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 188 while (start < end) 189 sum ^= *start++; 190 return (sum); 191 } 192 193 int 194 initdisklabel(struct disklabel *lp) 195 { 196 int i; 197 198 /* minimal requirements for archetypal disk label */ 199 if (lp->d_secsize < DEV_BSIZE) 200 lp->d_secsize = DEV_BSIZE; 201 if (DL_GETDSIZE(lp) == 0) 202 DL_SETDSIZE(lp, MAXDISKSIZE); 203 if (lp->d_secpercyl == 0) 204 return (ERANGE); 205 lp->d_npartitions = MAXPARTITIONS; 206 for (i = 0; i < RAW_PART; i++) { 207 DL_SETPSIZE(&lp->d_partitions[i], 0); 208 DL_SETPOFFSET(&lp->d_partitions[i], 0); 209 } 210 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 211 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 212 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 213 DL_SETBSTART(lp, 0); 214 DL_SETBEND(lp, DL_GETDSIZE(lp)); 215 lp->d_version = 1; 216 lp->d_bbsize = 8192; 217 lp->d_sbsize = 64*1024; /* XXX ? */ 218 return (0); 219 } 220 221 /* 222 * Check an incoming block to make sure it is a disklabel, convert it to 223 * a newer version if needed, etc etc. 224 */ 225 int 226 checkdisklabel(void *rlp, struct disklabel *lp, 227 u_int64_t boundstart, u_int64_t boundend) 228 { 229 struct disklabel *dlp = rlp; 230 struct __partitionv0 *v0pp; 231 struct partition *pp; 232 daddr64_t disksize; 233 int error = 0; 234 int i; 235 236 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 237 error = ENOENT; /* no disk label */ 238 else if (dlp->d_npartitions > MAXPARTITIONS) 239 error = E2BIG; /* too many partitions */ 240 else if (dlp->d_secpercyl == 0) 241 error = EINVAL; /* invalid label */ 242 else if (dlp->d_secsize == 0) 243 error = ENOSPC; /* disk too small */ 244 else if (dkcksum(dlp) != 0) 245 error = EINVAL; /* incorrect checksum */ 246 247 if (error) { 248 u_int16_t *start, *end, sum = 0; 249 250 /* If it is byte-swapped, attempt to convert it */ 251 if (swap32(dlp->d_magic) != DISKMAGIC || 252 swap32(dlp->d_magic2) != DISKMAGIC || 253 swap16(dlp->d_npartitions) > MAXPARTITIONS) 254 return (error); 255 256 /* 257 * Need a byte-swap aware dkcksum variant 258 * inlined, because dkcksum uses a sub-field 259 */ 260 start = (u_int16_t *)dlp; 261 end = (u_int16_t *)&dlp->d_partitions[ 262 swap16(dlp->d_npartitions)]; 263 while (start < end) 264 sum ^= *start++; 265 if (sum != 0) 266 return (error); 267 268 dlp->d_magic = swap32(dlp->d_magic); 269 dlp->d_type = swap16(dlp->d_type); 270 dlp->d_subtype = swap16(dlp->d_subtype); 271 272 /* d_typename and d_packname are strings */ 273 274 dlp->d_secsize = swap32(dlp->d_secsize); 275 dlp->d_nsectors = swap32(dlp->d_nsectors); 276 dlp->d_ntracks = swap32(dlp->d_ntracks); 277 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 278 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 279 dlp->d_secperunit = swap32(dlp->d_secperunit); 280 281 /* d_uid is a string */ 282 283 dlp->d_acylinders = swap32(dlp->d_acylinders); 284 285 dlp->d_flags = swap32(dlp->d_flags); 286 287 for (i = 0; i < NDDATA; i++) 288 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 289 290 dlp->d_secperunith = swap16(dlp->d_secperunith); 291 dlp->d_version = swap16(dlp->d_version); 292 293 for (i = 0; i < NSPARE; i++) 294 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 295 296 dlp->d_magic2 = swap32(dlp->d_magic2); 297 dlp->d_checksum = swap16(dlp->d_checksum); 298 299 dlp->d_npartitions = swap16(dlp->d_npartitions); 300 dlp->d_bbsize = swap32(dlp->d_bbsize); 301 dlp->d_sbsize = swap32(dlp->d_sbsize); 302 303 for (i = 0; i < MAXPARTITIONS; i++) { 304 pp = &dlp->d_partitions[i]; 305 pp->p_size = swap32(pp->p_size); 306 pp->p_offset = swap32(pp->p_offset); 307 if (dlp->d_version == 0) { 308 v0pp = (struct __partitionv0 *)pp; 309 v0pp->p_fsize = swap32(v0pp->p_fsize); 310 } else { 311 pp->p_offseth = swap16(pp->p_offseth); 312 pp->p_sizeh = swap16(pp->p_sizeh); 313 } 314 pp->p_cpg = swap16(pp->p_cpg); 315 } 316 317 dlp->d_checksum = 0; 318 dlp->d_checksum = dkcksum(dlp); 319 error = 0; 320 } 321 322 /* XXX should verify lots of other fields and whine a lot */ 323 324 if (error) 325 return (error); 326 327 /* Initial passed in lp contains the real disk size. */ 328 disksize = DL_GETDSIZE(lp); 329 330 if (lp != dlp) 331 *lp = *dlp; 332 333 if (lp->d_version == 0) { 334 lp->d_version = 1; 335 lp->d_secperunith = 0; 336 337 v0pp = (struct __partitionv0 *)lp->d_partitions; 338 pp = lp->d_partitions; 339 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 340 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 341 p_fsize, v0pp->p_frag); 342 pp->p_offseth = 0; 343 pp->p_sizeh = 0; 344 } 345 } 346 347 #ifdef DEBUG 348 if (DL_GETDSIZE(lp) != disksize) 349 printf("on-disk disklabel has incorrect disksize (%lld)\n", 350 DL_GETDSIZE(lp)); 351 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 352 printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n", 353 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 354 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 355 printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n", 356 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 357 #endif 358 DL_SETDSIZE(lp, disksize); 359 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 360 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 361 DL_SETBSTART(lp, boundstart); 362 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 363 364 lp->d_checksum = 0; 365 lp->d_checksum = dkcksum(lp); 366 return (0); 367 } 368 369 /* 370 * If dos partition table requested, attempt to load it and 371 * find disklabel inside a DOS partition. Return buffer 372 * for use in signalling errors if requested. 373 * 374 * We would like to check if each MBR has a valid BOOT_MAGIC, but 375 * we cannot because it doesn't always exist. So.. we assume the 376 * MBR is valid. 377 */ 378 int 379 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 380 struct disklabel *lp, int *partoffp, int spoofonly) 381 { 382 u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp); 383 int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset; 384 struct dos_partition dp[NDOSPART], *dp2; 385 daddr64_t part_blkno = DOSBBSECTOR; 386 u_int32_t extoff = 0; 387 int error; 388 389 if (lp->d_secpercyl == 0) 390 return (EINVAL); /* invalid label */ 391 if (lp->d_secsize == 0) 392 return (ENOSPC); /* disk too small */ 393 394 /* do DOS partitions in the process of getting disklabel? */ 395 396 /* 397 * Read dos partition table, follow extended partitions. 398 * Map the partitions to disklabel entries i-p 399 */ 400 while (wander && n < 8 && loop < 8) { 401 loop++; 402 wander = 0; 403 if (part_blkno < extoff) 404 part_blkno = extoff; 405 406 /* read boot record */ 407 bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp); 408 offset = DL_BLKOFFSET(lp, part_blkno) + DOSPARTOFF; 409 bp->b_bcount = lp->d_secsize; 410 bp->b_flags = B_BUSY | B_READ | B_RAW; 411 (*strat)(bp); 412 error = biowait(bp); 413 if (error) { 414 /*wrong*/ if (partoffp) 415 /*wrong*/ *partoffp = -1; 416 return (error); 417 } 418 419 bcopy(bp->b_data + offset, dp, sizeof(dp)); 420 421 if (n == 0 && part_blkno == DOSBBSECTOR) { 422 u_int16_t fattest; 423 424 /* Check the end of sector marker. */ 425 fattest = ((bp->b_data[510] << 8) & 0xff00) | 426 (bp->b_data[511] & 0xff); 427 if (fattest != 0x55aa) 428 goto notfat; 429 } 430 431 if (ourpart == -1) { 432 /* Search for our MBR partition */ 433 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 434 i++, dp2++) 435 if (letoh32(dp2->dp_size) && 436 dp2->dp_typ == DOSPTYP_OPENBSD) 437 ourpart = i; 438 if (ourpart == -1) 439 goto donot; 440 /* 441 * This is our MBR partition. need sector 442 * address for SCSI/IDE, cylinder for 443 * ESDI/ST506/RLL 444 */ 445 dp2 = &dp[ourpart]; 446 dospartoff = letoh32(dp2->dp_start) + part_blkno; 447 dospartend = dospartoff + letoh32(dp2->dp_size); 448 449 /* found our OpenBSD partition, finish up */ 450 if (partoffp) 451 goto notfat; 452 453 if (lp->d_ntracks == 0) 454 lp->d_ntracks = dp2->dp_ehd + 1; 455 if (lp->d_nsectors == 0) 456 lp->d_nsectors = DPSECT(dp2->dp_esect); 457 if (lp->d_secpercyl == 0) 458 lp->d_secpercyl = lp->d_ntracks * 459 lp->d_nsectors; 460 } 461 donot: 462 /* 463 * In case the disklabel read below fails, we want to 464 * provide a fake label in i-p. 465 */ 466 for (dp2=dp, i=0; i < NDOSPART && n < 8; i++, dp2++) { 467 struct partition *pp = &lp->d_partitions[8+n]; 468 u_int8_t fstype; 469 470 if (dp2->dp_typ == DOSPTYP_OPENBSD) 471 continue; 472 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 473 continue; 474 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 475 continue; 476 if (letoh32(dp2->dp_size) == 0) 477 continue; 478 479 switch (dp2->dp_typ) { 480 case DOSPTYP_UNUSED: 481 fstype = FS_UNUSED; 482 n++; 483 break; 484 485 case DOSPTYP_LINUX: 486 fstype = FS_EXT2FS; 487 n++; 488 break; 489 490 case DOSPTYP_NTFS: 491 fstype = FS_NTFS; 492 n++; 493 break; 494 495 case DOSPTYP_FAT12: 496 case DOSPTYP_FAT16S: 497 case DOSPTYP_FAT16B: 498 case DOSPTYP_FAT16L: 499 case DOSPTYP_FAT32: 500 case DOSPTYP_FAT32L: 501 fstype = FS_MSDOS; 502 n++; 503 break; 504 case DOSPTYP_EXTEND: 505 case DOSPTYP_EXTENDL: 506 part_blkno = letoh32(dp2->dp_start) + extoff; 507 if (!extoff) { 508 extoff = letoh32(dp2->dp_start); 509 part_blkno = 0; 510 } 511 wander = 1; 512 continue; 513 break; 514 default: 515 fstype = FS_OTHER; 516 n++; 517 break; 518 } 519 520 /* 521 * Don't set fstype/offset/size when just looking for 522 * the offset of the OpenBSD partition. It would 523 * invalidate the disklabel checksum! 524 */ 525 if (partoffp) 526 continue; 527 528 pp->p_fstype = fstype; 529 if (letoh32(dp2->dp_start)) 530 DL_SETPOFFSET(pp, 531 letoh32(dp2->dp_start) + part_blkno); 532 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 533 } 534 } 535 if (partoffp) 536 /* dospartoff has been set and we must not modify *lp. */ 537 goto notfat; 538 539 lp->d_npartitions = MAXPARTITIONS; 540 541 if (n == 0 && part_blkno == DOSBBSECTOR) { 542 u_int16_t fattest; 543 544 /* Check for a valid initial jmp instruction. */ 545 switch ((u_int8_t)bp->b_data[0]) { 546 case 0xeb: 547 /* 548 * Two-byte jmp instruction. The 2nd byte is the number 549 * of bytes to jmp and the 3rd byte must be a NOP. 550 */ 551 if ((u_int8_t)bp->b_data[2] != 0x90) 552 goto notfat; 553 break; 554 case 0xe9: 555 /* 556 * Three-byte jmp instruction. The next two bytes are a 557 * little-endian 16 bit value. 558 */ 559 break; 560 default: 561 goto notfat; 562 break; 563 } 564 565 /* Check for a valid bytes per sector value. */ 566 fattest = ((bp->b_data[12] << 8) & 0xff00) | 567 (bp->b_data[11] & 0xff); 568 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 569 goto notfat; 570 571 /* Looks like a FAT filesystem. Spoof 'i'. */ 572 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 573 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 574 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 575 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 576 } 577 notfat: 578 /* record the OpenBSD partition's placement for the caller */ 579 if (partoffp) 580 *partoffp = dospartoff; 581 else { 582 DL_SETBSTART(lp, dospartoff); 583 DL_SETBEND(lp, 584 dospartend < DL_GETDSIZE(lp) ? dospartend : DL_GETDSIZE(lp)); 585 } 586 587 /* don't read the on-disk label if we are in spoofed-only mode */ 588 if (spoofonly) 589 return (0); 590 591 bp->b_blkno = DL_BLKTOSEC(lp, dospartoff + DOS_LABELSECTOR) * 592 DL_BLKSPERSEC(lp); 593 offset = DL_BLKOFFSET(lp, dospartoff + DOS_LABELSECTOR); 594 bp->b_bcount = lp->d_secsize; 595 bp->b_flags = B_BUSY | B_READ | B_RAW; 596 (*strat)(bp); 597 if (biowait(bp)) 598 return (bp->b_error); 599 600 /* sub-MBR disklabels are always at a LABELOFFSET of 0 */ 601 return checkdisklabel(bp->b_data + offset, lp, dospartoff, dospartend); 602 } 603 604 /* 605 * Check new disk label for sensibility before setting it. 606 */ 607 int 608 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 609 { 610 struct partition *opp, *npp; 611 struct disk *dk; 612 u_int64_t uid; 613 int i; 614 615 /* sanity clause */ 616 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 617 (nlp->d_secsize % DEV_BSIZE) != 0) 618 return (EINVAL); 619 620 /* special case to allow disklabel to be invalidated */ 621 if (nlp->d_magic == 0xffffffff) { 622 *olp = *nlp; 623 return (0); 624 } 625 626 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 627 dkcksum(nlp) != 0) 628 return (EINVAL); 629 630 /* XXX missing check if other dos partitions will be overwritten */ 631 632 while (openmask != 0) { 633 i = ffs(openmask) - 1; 634 openmask &= ~(1 << i); 635 if (nlp->d_npartitions <= i) 636 return (EBUSY); 637 opp = &olp->d_partitions[i]; 638 npp = &nlp->d_partitions[i]; 639 if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 640 DL_GETPSIZE(npp) < DL_GETPSIZE(opp)) 641 return (EBUSY); 642 /* 643 * Copy internally-set partition information 644 * if new label doesn't include it. XXX 645 */ 646 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 647 npp->p_fstype = opp->p_fstype; 648 npp->p_fragblock = opp->p_fragblock; 649 npp->p_cpg = opp->p_cpg; 650 } 651 } 652 653 /* Generate a UID if the disklabel does not already have one. */ 654 uid = 0; 655 if (bcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0) { 656 do { 657 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 658 TAILQ_FOREACH(dk, &disklist, dk_link) 659 if (dk->dk_label && bcmp(dk->dk_label->d_uid, 660 nlp->d_uid, sizeof(nlp->d_uid)) == 0) 661 break; 662 } while (dk != NULL); 663 } 664 665 nlp->d_checksum = 0; 666 nlp->d_checksum = dkcksum(nlp); 667 *olp = *nlp; 668 return (0); 669 } 670 671 /* 672 * Determine the size of the transfer, and make sure it is within the 673 * boundaries of the partition. Adjust transfer if needed, and signal errors or 674 * early completion. 675 */ 676 int 677 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 678 { 679 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 680 daddr64_t sz = howmany(bp->b_bcount, DEV_BSIZE); 681 682 /* Avoid division by zero, negative offsets and negative sizes. */ 683 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || sz < 0) 684 goto bad; 685 686 /* beyond partition? */ 687 if (bp->b_blkno + sz > DL_SECTOBLK(lp, DL_GETPSIZE(p))) { 688 sz = DL_SECTOBLK(lp, DL_GETPSIZE(p)) - bp->b_blkno; 689 if (sz == 0) { 690 /* If exactly at end of disk, return EOF. */ 691 bp->b_resid = bp->b_bcount; 692 return (-1); 693 } 694 if (sz < 0) 695 /* If past end of disk, return EINVAL. */ 696 goto bad; 697 698 /* Otherwise, truncate request. */ 699 bp->b_bcount = sz << DEV_BSHIFT; 700 } 701 702 /* calculate cylinder for disksort to order transfers with */ 703 bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) / 704 DL_SECTOBLK(lp, lp->d_secpercyl); 705 return (1); 706 707 bad: 708 bp->b_error = EINVAL; 709 bp->b_flags |= B_ERROR; 710 return (-1); 711 } 712 713 /* 714 * Disk error is the preface to plaintive error messages 715 * about failing disk transfers. It prints messages of the form 716 717 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 718 719 * if the offset of the error in the transfer and a disk label 720 * are both available. blkdone should be -1 if the position of the error 721 * is unknown; the disklabel pointer may be null from drivers that have not 722 * been converted to use them. The message is printed with printf 723 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 724 * The message should be completed (with at least a newline) with printf 725 * or addlog, respectively. There is no trailing space. 726 */ 727 void 728 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 729 struct disklabel *lp) 730 { 731 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 732 int (*pr)(const char *, ...); 733 char partname = 'a' + part; 734 daddr64_t sn; 735 736 if (pri != LOG_PRINTF) { 737 static const char fmt[] = ""; 738 log(pri, fmt); 739 pr = addlog; 740 } else 741 pr = printf; 742 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 743 bp->b_flags & B_READ ? "read" : "writ"); 744 sn = bp->b_blkno; 745 if (bp->b_bcount <= DEV_BSIZE) 746 (*pr)("%lld", sn); 747 else { 748 if (blkdone >= 0) { 749 sn += blkdone; 750 (*pr)("%lld of ", sn); 751 } 752 (*pr)("%lld-%lld", bp->b_blkno, 753 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 754 } 755 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 756 sn += DL_GETPOFFSET(&lp->d_partitions[part]); 757 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn, 758 sn / lp->d_secpercyl); 759 sn %= lp->d_secpercyl; 760 (*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors, 761 sn % lp->d_nsectors); 762 } 763 } 764 765 /* 766 * Initialize the disklist. Called by main() before autoconfiguration. 767 */ 768 void 769 disk_init(void) 770 { 771 772 TAILQ_INIT(&disklist); 773 disk_count = disk_change = 0; 774 } 775 776 int 777 disk_construct(struct disk *diskp, char *lockname) 778 { 779 rw_init(&diskp->dk_lock, "dklk"); 780 mtx_init(&diskp->dk_mtx, IPL_BIO); 781 782 diskp->dk_flags |= DKF_CONSTRUCTED; 783 784 return (0); 785 } 786 787 /* 788 * Attach a disk. 789 */ 790 void 791 disk_attach(struct disk *diskp) 792 { 793 794 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 795 disk_construct(diskp, diskp->dk_name); 796 797 /* 798 * Allocate and initialize the disklabel structures. Note that 799 * it's not safe to sleep here, since we're probably going to be 800 * called during autoconfiguration. 801 */ 802 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 803 M_NOWAIT|M_ZERO); 804 if (diskp->dk_label == NULL) 805 panic("disk_attach: can't allocate storage for disklabel"); 806 807 /* 808 * Set the attached timestamp. 809 */ 810 microuptime(&diskp->dk_attachtime); 811 812 /* 813 * Link into the disklist. 814 */ 815 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 816 ++disk_count; 817 disk_change = 1; 818 819 if (softraid_disk_attach) 820 softraid_disk_attach(diskp, 1); 821 } 822 823 /* 824 * Detach a disk. 825 */ 826 void 827 disk_detach(struct disk *diskp) 828 { 829 830 if (softraid_disk_attach) 831 softraid_disk_attach(diskp, -1); 832 833 /* 834 * Free the space used by the disklabel structures. 835 */ 836 free(diskp->dk_label, M_DEVBUF); 837 838 /* 839 * Remove from the disklist. 840 */ 841 TAILQ_REMOVE(&disklist, diskp, dk_link); 842 disk_change = 1; 843 if (--disk_count < 0) 844 panic("disk_detach: disk_count < 0"); 845 } 846 847 /* 848 * Increment a disk's busy counter. If the counter is going from 849 * 0 to 1, set the timestamp. 850 */ 851 void 852 disk_busy(struct disk *diskp) 853 { 854 855 /* 856 * XXX We'd like to use something as accurate as microtime(), 857 * but that doesn't depend on the system TOD clock. 858 */ 859 mtx_enter(&diskp->dk_mtx); 860 if (diskp->dk_busy++ == 0) 861 microuptime(&diskp->dk_timestamp); 862 mtx_leave(&diskp->dk_mtx); 863 } 864 865 /* 866 * Decrement a disk's busy counter, increment the byte count, total busy 867 * time, and reset the timestamp. 868 */ 869 void 870 disk_unbusy(struct disk *diskp, long bcount, int read) 871 { 872 struct timeval dv_time, diff_time; 873 874 mtx_enter(&diskp->dk_mtx); 875 876 if (diskp->dk_busy-- == 0) 877 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 878 879 microuptime(&dv_time); 880 881 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 882 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 883 884 diskp->dk_timestamp = dv_time; 885 if (bcount > 0) { 886 if (read) { 887 diskp->dk_rbytes += bcount; 888 diskp->dk_rxfer++; 889 } else { 890 diskp->dk_wbytes += bcount; 891 diskp->dk_wxfer++; 892 } 893 } else 894 diskp->dk_seek++; 895 896 mtx_leave(&diskp->dk_mtx); 897 898 add_disk_randomness(bcount ^ diff_time.tv_usec); 899 } 900 901 int 902 disk_lock(struct disk *dk) 903 { 904 int error; 905 906 error = rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR); 907 908 return (error); 909 } 910 911 void 912 disk_unlock(struct disk *dk) 913 { 914 rw_exit(&dk->dk_lock); 915 } 916 917 int 918 dk_mountroot(void) 919 { 920 dev_t rawdev, rrootdev; 921 int part = DISKPART(rootdev); 922 int (*mountrootfn)(void); 923 struct disklabel dl; 924 struct vnode *vn; 925 int error; 926 927 rrootdev = blktochr(rootdev); 928 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 929 #ifdef DEBUG 930 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 931 rrootdev, rawdev); 932 #endif 933 934 /* 935 * open device, ioctl for the disklabel, and close it. 936 */ 937 if (cdevvp(rawdev, &vn)) 938 panic("cannot obtain vnode for 0x%x/0x%x", rootdev, rrootdev); 939 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 940 if (error) 941 panic("cannot open disk, 0x%x/0x%x, error %d", 942 rootdev, rrootdev, error); 943 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&dl, FREAD, NOCRED, 0); 944 if (error) 945 panic("cannot read disk label, 0x%x/0x%x, error %d", 946 rootdev, rrootdev, error); 947 error = VOP_CLOSE(vn, FREAD, NOCRED, 0); 948 if (error) 949 panic("cannot close disk , 0x%x/0x%x, error %d", 950 rootdev, rrootdev, error); 951 vput(vn); 952 953 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 954 panic("root filesystem has size 0"); 955 switch (dl.d_partitions[part].p_fstype) { 956 #ifdef EXT2FS 957 case FS_EXT2FS: 958 { 959 extern int ext2fs_mountroot(void); 960 mountrootfn = ext2fs_mountroot; 961 } 962 break; 963 #endif 964 #ifdef FFS 965 case FS_BSDFFS: 966 { 967 extern int ffs_mountroot(void); 968 mountrootfn = ffs_mountroot; 969 } 970 break; 971 #endif 972 #ifdef CD9660 973 case FS_ISO9660: 974 { 975 extern int cd9660_mountroot(void); 976 mountrootfn = cd9660_mountroot; 977 } 978 break; 979 #endif 980 default: 981 #ifdef FFS 982 { 983 extern int ffs_mountroot(void); 984 985 printf("filesystem type %d not known.. assuming ffs\n", 986 dl.d_partitions[part].p_fstype); 987 mountrootfn = ffs_mountroot; 988 } 989 #else 990 panic("disk 0x%x/0x%x filesystem type %d not known", 991 rootdev, rrootdev, dl.d_partitions[part].p_fstype); 992 #endif 993 } 994 return (*mountrootfn)(); 995 } 996 997 struct device * 998 getdisk(char *str, int len, int defpart, dev_t *devp) 999 { 1000 struct device *dv; 1001 1002 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1003 printf("use one of: exit"); 1004 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1005 if (dv->dv_class == DV_DISK) 1006 printf(" %s[a-p]", dv->dv_xname); 1007 #if defined(NFSCLIENT) 1008 if (dv->dv_class == DV_IFNET) 1009 printf(" %s", dv->dv_xname); 1010 #endif 1011 } 1012 printf("\n"); 1013 } 1014 return (dv); 1015 } 1016 1017 struct device * 1018 parsedisk(char *str, int len, int defpart, dev_t *devp) 1019 { 1020 struct device *dv; 1021 int majdev, part = defpart; 1022 char c; 1023 1024 if (len == 0) 1025 return (NULL); 1026 c = str[len-1]; 1027 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1028 part = c - 'a'; 1029 len -= 1; 1030 } 1031 1032 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1033 if (dv->dv_class == DV_DISK && 1034 strncmp(str, dv->dv_xname, len) == 0 && 1035 dv->dv_xname[len] == '\0') { 1036 majdev = findblkmajor(dv); 1037 if (majdev < 0) 1038 panic("parsedisk"); 1039 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1040 break; 1041 } 1042 #if defined(NFSCLIENT) 1043 if (dv->dv_class == DV_IFNET && 1044 strncmp(str, dv->dv_xname, len) == 0 && 1045 dv->dv_xname[len] == '\0') { 1046 *devp = NODEV; 1047 break; 1048 } 1049 #endif 1050 } 1051 1052 return (dv); 1053 } 1054 1055 void 1056 setroot(struct device *bootdv, int part, int exitflags) 1057 { 1058 int majdev, unit, len, s; 1059 struct swdevt *swp; 1060 struct device *rootdv, *dv; 1061 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1062 struct ifnet *ifp = NULL; 1063 char buf[128]; 1064 #if defined(NFSCLIENT) 1065 extern char *nfsbootdevname; 1066 #endif 1067 1068 /* 1069 * If `swap generic' and we couldn't determine boot device, 1070 * ask the user. 1071 */ 1072 if (mountroot == NULL && bootdv == NULL) 1073 boothowto |= RB_ASKNAME; 1074 if (boothowto & RB_ASKNAME) { 1075 while (1) { 1076 printf("root device"); 1077 if (bootdv != NULL) { 1078 printf(" (default %s", bootdv->dv_xname); 1079 if (bootdv->dv_class == DV_DISK) 1080 printf("%c", 'a' + part); 1081 printf(")"); 1082 } 1083 printf(": "); 1084 s = splhigh(); 1085 cnpollc(TRUE); 1086 len = getsn(buf, sizeof(buf)); 1087 cnpollc(FALSE); 1088 splx(s); 1089 if (strcmp(buf, "exit") == 0) 1090 boot(exitflags); 1091 if (len == 0 && bootdv != NULL) { 1092 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1093 len = strlen(buf); 1094 } 1095 if (len > 0 && buf[len - 1] == '*') { 1096 buf[--len] = '\0'; 1097 dv = getdisk(buf, len, part, &nrootdev); 1098 if (dv != NULL) { 1099 rootdv = dv; 1100 nswapdev = nrootdev; 1101 goto gotswap; 1102 } 1103 } 1104 dv = getdisk(buf, len, part, &nrootdev); 1105 if (dv != NULL) { 1106 rootdv = dv; 1107 break; 1108 } 1109 } 1110 1111 if (rootdv->dv_class == DV_IFNET) 1112 goto gotswap; 1113 1114 /* try to build swap device out of new root device */ 1115 while (1) { 1116 printf("swap device"); 1117 if (rootdv != NULL) 1118 printf(" (default %s%s)", rootdv->dv_xname, 1119 rootdv->dv_class == DV_DISK ? "b" : ""); 1120 printf(": "); 1121 s = splhigh(); 1122 cnpollc(TRUE); 1123 len = getsn(buf, sizeof(buf)); 1124 cnpollc(FALSE); 1125 splx(s); 1126 if (strcmp(buf, "exit") == 0) 1127 boot(exitflags); 1128 if (len == 0 && rootdv != NULL) { 1129 switch (rootdv->dv_class) { 1130 case DV_IFNET: 1131 nswapdev = NODEV; 1132 break; 1133 case DV_DISK: 1134 nswapdev = MAKEDISKDEV(major(nrootdev), 1135 DISKUNIT(nrootdev), 1); 1136 if (nswapdev == nrootdev) 1137 continue; 1138 break; 1139 default: 1140 break; 1141 } 1142 break; 1143 } 1144 dv = getdisk(buf, len, 1, &nswapdev); 1145 if (dv) { 1146 if (dv->dv_class == DV_IFNET) 1147 nswapdev = NODEV; 1148 if (nswapdev == nrootdev) 1149 continue; 1150 break; 1151 } 1152 } 1153 gotswap: 1154 rootdev = nrootdev; 1155 dumpdev = nswapdev; 1156 swdevt[0].sw_dev = nswapdev; 1157 swdevt[1].sw_dev = NODEV; 1158 #if defined(NFSCLIENT) 1159 } else if (mountroot == nfs_mountroot) { 1160 rootdv = bootdv; 1161 rootdev = dumpdev = swapdev = NODEV; 1162 #endif 1163 } else if (mountroot == NULL && rootdev == NODEV) { 1164 /* 1165 * `swap generic' 1166 */ 1167 rootdv = bootdv; 1168 majdev = findblkmajor(rootdv); 1169 if (majdev >= 0) { 1170 /* 1171 * Root and swap are on the disk. 1172 * Assume swap is on partition b. 1173 */ 1174 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1175 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1176 } else { 1177 /* 1178 * Root and swap are on a net. 1179 */ 1180 nswapdev = NODEV; 1181 } 1182 dumpdev = nswapdev; 1183 swdevt[0].sw_dev = nswapdev; 1184 /* swdevt[1].sw_dev = NODEV; */ 1185 } else { 1186 /* Completely pre-configured, but we want rootdv .. */ 1187 majdev = major(rootdev); 1188 if (findblkname(majdev) == NULL) 1189 return; 1190 unit = DISKUNIT(rootdev); 1191 part = DISKPART(rootdev); 1192 snprintf(buf, sizeof buf, "%s%d%c", 1193 findblkname(majdev), unit, 'a' + part); 1194 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1195 if (rootdv == NULL) 1196 panic("root device (%s) not found", buf); 1197 } 1198 1199 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1200 ifp = ifunit(rootdv->dv_xname); 1201 else if (bootdv && bootdv->dv_class == DV_IFNET) 1202 ifp = ifunit(bootdv->dv_xname); 1203 1204 if (ifp) 1205 if_addgroup(ifp, "netboot"); 1206 1207 switch (rootdv->dv_class) { 1208 #if defined(NFSCLIENT) 1209 case DV_IFNET: 1210 mountroot = nfs_mountroot; 1211 nfsbootdevname = rootdv->dv_xname; 1212 return; 1213 #endif 1214 case DV_DISK: 1215 mountroot = dk_mountroot; 1216 part = DISKPART(rootdev); 1217 break; 1218 default: 1219 printf("can't figure root, hope your kernel is right\n"); 1220 return; 1221 } 1222 1223 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1224 1225 /* 1226 * Make the swap partition on the root drive the primary swap. 1227 */ 1228 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1229 if (major(rootdev) == major(swp->sw_dev) && 1230 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1231 temp = swdevt[0].sw_dev; 1232 swdevt[0].sw_dev = swp->sw_dev; 1233 swp->sw_dev = temp; 1234 break; 1235 } 1236 } 1237 if (swp->sw_dev != NODEV) { 1238 /* 1239 * If dumpdev was the same as the old primary swap device, 1240 * move it to the new primary swap device. 1241 */ 1242 if (temp == dumpdev) 1243 dumpdev = swdevt[0].sw_dev; 1244 } 1245 if (swdevt[0].sw_dev != NODEV) 1246 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1247 DISKUNIT(swdevt[0].sw_dev), 1248 'a' + DISKPART(swdevt[0].sw_dev)); 1249 if (dumpdev != NODEV) 1250 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1251 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1252 printf("\n"); 1253 } 1254 1255 extern struct nam2blk nam2blk[]; 1256 1257 int 1258 findblkmajor(struct device *dv) 1259 { 1260 char buf[16], *p; 1261 int i; 1262 1263 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1264 return (-1); 1265 for (p = buf; *p; p++) 1266 if (*p >= '0' && *p <= '9') 1267 *p = '\0'; 1268 1269 for (i = 0; nam2blk[i].name; i++) 1270 if (!strcmp(buf, nam2blk[i].name)) 1271 return (nam2blk[i].maj); 1272 return (-1); 1273 } 1274 1275 char * 1276 findblkname(int maj) 1277 { 1278 int i; 1279 1280 for (i = 0; nam2blk[i].name; i++) 1281 if (nam2blk[i].maj == maj) 1282 return (nam2blk[i].name); 1283 return (NULL); 1284 } 1285 1286 int 1287 disk_map(char *path, char *mappath, int size, int flags) 1288 { 1289 struct disk *dk, *mdk; 1290 u_char uid[8]; 1291 char c, part; 1292 int i; 1293 1294 /* 1295 * Attempt to map a request for a disklabel UID to the correct device. 1296 * We should be supplied with a disklabel UID which has the following 1297 * format: 1298 * 1299 * [disklabel uid] . [partition] 1300 * 1301 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1302 * based passed on its own. 1303 */ 1304 1305 if (strchr(path, '/') != NULL) 1306 return -1; 1307 1308 /* Verify that the device name is properly formed. */ 1309 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1310 (strlen(path) == 18 && path[16] == '.'))) 1311 return -1; 1312 1313 /* Get partition. */ 1314 if (flags & DM_OPENPART) 1315 part = 'a' + RAW_PART; 1316 else 1317 part = path[17]; 1318 1319 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1320 return -1; 1321 1322 /* Derive label UID. */ 1323 bzero(uid, sizeof(uid)); 1324 for (i = 0; i < 16; i++) { 1325 c = path[i]; 1326 if (c >= '0' && c <= '9') 1327 c -= '0'; 1328 else if (c >= 'a' && c <= 'f') 1329 c -= ('a' - 10); 1330 else 1331 return -1; 1332 1333 uid[i / 2] <<= 4; 1334 uid[i / 2] |= c & 0xf; 1335 } 1336 1337 mdk = NULL; 1338 TAILQ_FOREACH(dk, &disklist, dk_link) { 1339 if (dk->dk_label && bcmp(dk->dk_label->d_uid, uid, 1340 sizeof(dk->dk_label->d_uid)) == 0) { 1341 /* Fail if there are duplicate UIDs! */ 1342 if (mdk != NULL) 1343 return -1; 1344 mdk = dk; 1345 } 1346 } 1347 1348 if (mdk == NULL || mdk->dk_name == NULL) 1349 return -1; 1350 1351 snprintf(mappath, size, "/dev/%s%s%c", 1352 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1353 1354 return 0; 1355 } 1356