1 /* $OpenBSD: subr_disk.c,v 1.227 2016/05/21 14:22:31 jsing Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/vnode.h> 58 #include <sys/task.h> 59 #include <sys/stdint.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 64 #include <net/if.h> 65 66 #include <dev/rndvar.h> 67 #include <dev/cons.h> 68 69 #include <lib/libz/zlib.h> 70 71 #include "softraid.h" 72 73 #ifdef DEBUG 74 #define DPRINTF(x...) printf(x) 75 #else 76 #define DPRINTF(x...) 77 #endif 78 79 /* 80 * A global list of all disks attached to the system. May grow or 81 * shrink over time. 82 */ 83 struct disklist_head disklist; /* TAILQ_HEAD */ 84 int disk_count; /* number of drives in global disklist */ 85 int disk_change; /* set if a disk has been attached/detached 86 * since last we looked at this variable. This 87 * is reset by hw_sysctl() 88 */ 89 90 #define DUID_SIZE 8 91 92 u_char bootduid[DUID_SIZE]; /* DUID of boot disk. */ 93 u_char rootduid[DUID_SIZE]; /* DUID of root disk. */ 94 95 /* softraid callback, do not use! */ 96 void (*softraid_disk_attach)(struct disk *, int); 97 98 void sr_map_root(void); 99 100 struct disk_attach_task { 101 struct task task; 102 struct disk *dk; 103 }; 104 105 void disk_attach_callback(void *); 106 107 int spoofgptlabel(struct buf *, void (*)(struct buf *), struct disklabel *); 108 109 int gpt_chk_mbr(struct dos_partition *, u_int64_t); 110 int gpt_chk_hdr(struct gpt_header *, struct disklabel *); 111 int gpt_chk_parts(struct gpt_header *, struct gpt_partition *); 112 int gpt_get_fstype(struct uuid *); 113 114 int duid_equal(u_char *, u_char *); 115 116 /* 117 * Compute checksum for disk label. 118 */ 119 u_int 120 dkcksum(struct disklabel *lp) 121 { 122 u_int16_t *start, *end; 123 u_int16_t sum = 0; 124 125 start = (u_int16_t *)lp; 126 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 127 while (start < end) 128 sum ^= *start++; 129 return (sum); 130 } 131 132 int 133 initdisklabel(struct disklabel *lp) 134 { 135 int i; 136 137 /* minimal requirements for archetypal disk label */ 138 if (lp->d_secsize < DEV_BSIZE) 139 lp->d_secsize = DEV_BSIZE; 140 if (DL_GETDSIZE(lp) == 0) 141 DL_SETDSIZE(lp, MAXDISKSIZE); 142 if (lp->d_secpercyl == 0) 143 return (ERANGE); 144 lp->d_npartitions = MAXPARTITIONS; 145 for (i = 0; i < RAW_PART; i++) { 146 DL_SETPSIZE(&lp->d_partitions[i], 0); 147 DL_SETPOFFSET(&lp->d_partitions[i], 0); 148 } 149 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 150 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 151 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 152 DL_SETBSTART(lp, 0); 153 DL_SETBEND(lp, DL_GETDSIZE(lp)); 154 lp->d_version = 1; 155 lp->d_bbsize = 8192; 156 lp->d_sbsize = 64*1024; /* XXX ? */ 157 return (0); 158 } 159 160 /* 161 * Check an incoming block to make sure it is a disklabel, convert it to 162 * a newer version if needed, etc etc. 163 */ 164 int 165 checkdisklabel(void *rlp, struct disklabel *lp, u_int64_t boundstart, 166 u_int64_t boundend) 167 { 168 struct disklabel *dlp = rlp; 169 struct __partitionv0 *v0pp; 170 struct partition *pp; 171 u_int64_t disksize; 172 int error = 0; 173 int i; 174 175 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 176 error = ENOENT; /* no disk label */ 177 else if (dlp->d_npartitions > MAXPARTITIONS) 178 error = E2BIG; /* too many partitions */ 179 else if (dlp->d_secpercyl == 0) 180 error = EINVAL; /* invalid label */ 181 else if (dlp->d_secsize == 0) 182 error = ENOSPC; /* disk too small */ 183 else if (dkcksum(dlp) != 0) 184 error = EINVAL; /* incorrect checksum */ 185 186 if (error) { 187 u_int16_t *start, *end, sum = 0; 188 189 /* If it is byte-swapped, attempt to convert it */ 190 if (swap32(dlp->d_magic) != DISKMAGIC || 191 swap32(dlp->d_magic2) != DISKMAGIC || 192 swap16(dlp->d_npartitions) > MAXPARTITIONS) 193 return (error); 194 195 /* 196 * Need a byte-swap aware dkcksum variant 197 * inlined, because dkcksum uses a sub-field 198 */ 199 start = (u_int16_t *)dlp; 200 end = (u_int16_t *)&dlp->d_partitions[ 201 swap16(dlp->d_npartitions)]; 202 while (start < end) 203 sum ^= *start++; 204 if (sum != 0) 205 return (error); 206 207 dlp->d_magic = swap32(dlp->d_magic); 208 dlp->d_type = swap16(dlp->d_type); 209 210 /* d_typename and d_packname are strings */ 211 212 dlp->d_secsize = swap32(dlp->d_secsize); 213 dlp->d_nsectors = swap32(dlp->d_nsectors); 214 dlp->d_ntracks = swap32(dlp->d_ntracks); 215 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 216 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 217 dlp->d_secperunit = swap32(dlp->d_secperunit); 218 219 /* d_uid is a string */ 220 221 dlp->d_acylinders = swap32(dlp->d_acylinders); 222 223 dlp->d_flags = swap32(dlp->d_flags); 224 225 for (i = 0; i < NDDATA; i++) 226 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 227 228 dlp->d_secperunith = swap16(dlp->d_secperunith); 229 dlp->d_version = swap16(dlp->d_version); 230 231 for (i = 0; i < NSPARE; i++) 232 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 233 234 dlp->d_magic2 = swap32(dlp->d_magic2); 235 236 dlp->d_npartitions = swap16(dlp->d_npartitions); 237 dlp->d_bbsize = swap32(dlp->d_bbsize); 238 dlp->d_sbsize = swap32(dlp->d_sbsize); 239 240 for (i = 0; i < MAXPARTITIONS; i++) { 241 pp = &dlp->d_partitions[i]; 242 pp->p_size = swap32(pp->p_size); 243 pp->p_offset = swap32(pp->p_offset); 244 if (dlp->d_version == 0) { 245 v0pp = (struct __partitionv0 *)pp; 246 v0pp->p_fsize = swap32(v0pp->p_fsize); 247 } else { 248 pp->p_offseth = swap16(pp->p_offseth); 249 pp->p_sizeh = swap16(pp->p_sizeh); 250 } 251 pp->p_cpg = swap16(pp->p_cpg); 252 } 253 254 dlp->d_checksum = 0; 255 dlp->d_checksum = dkcksum(dlp); 256 error = 0; 257 } 258 259 /* XXX should verify lots of other fields and whine a lot */ 260 261 /* Initial passed in lp contains the real disk size. */ 262 disksize = DL_GETDSIZE(lp); 263 264 if (lp != dlp) 265 *lp = *dlp; 266 267 if (lp->d_version == 0) { 268 lp->d_version = 1; 269 lp->d_secperunith = 0; 270 271 v0pp = (struct __partitionv0 *)lp->d_partitions; 272 pp = lp->d_partitions; 273 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 274 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 275 p_fsize, v0pp->p_frag); 276 pp->p_offseth = 0; 277 pp->p_sizeh = 0; 278 } 279 } 280 281 #ifdef DEBUG 282 if (DL_GETDSIZE(lp) != disksize) 283 printf("on-disk disklabel has incorrect disksize (%llu)\n", 284 DL_GETDSIZE(lp)); 285 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 286 printf("on-disk disklabel RAW_PART has incorrect size (%llu)\n", 287 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 288 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 289 printf("on-disk disklabel RAW_PART offset != 0 (%llu)\n", 290 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 291 #endif 292 DL_SETDSIZE(lp, disksize); 293 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 294 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 295 DL_SETBSTART(lp, boundstart); 296 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 297 298 lp->d_checksum = 0; 299 lp->d_checksum = dkcksum(lp); 300 return (0); 301 } 302 303 /* 304 * Read a disk sector. 305 */ 306 int 307 readdisksector(struct buf *bp, void (*strat)(struct buf *), 308 struct disklabel *lp, u_int64_t sector) 309 { 310 bp->b_blkno = DL_SECTOBLK(lp, sector); 311 bp->b_bcount = lp->d_secsize; 312 bp->b_error = 0; 313 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 314 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 315 316 (*strat)(bp); 317 318 return (biowait(bp)); 319 } 320 321 /* 322 * If dos partition table requested, attempt to load it and 323 * find disklabel inside a DOS partition. Return buffer 324 * for use in signalling errors if requested. 325 * 326 * We would like to check if each MBR has a valid BOOT_MAGIC, but 327 * we cannot because it doesn't always exist. So.. we assume the 328 * MBR is valid. 329 */ 330 int 331 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 332 struct disklabel *lp, daddr_t *partoffp, int spoofonly) 333 { 334 struct disklabel *gptlp; 335 u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp); 336 int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset; 337 struct dos_partition dp[NDOSPART], *dp2; 338 u_int64_t sector = DOSBBSECTOR; 339 u_int32_t extoff = 0; 340 int error; 341 342 if (lp->d_secpercyl == 0) 343 return (EINVAL); /* invalid label */ 344 if (lp->d_secsize == 0) 345 return (ENOSPC); /* disk too small */ 346 347 /* do DOS partitions in the process of getting disklabel? */ 348 349 /* 350 * Read dos partition table, follow extended partitions. 351 * Map the partitions to disklabel entries i-p 352 */ 353 while (wander && loop < DOS_MAXEBR) { 354 loop++; 355 wander = 0; 356 if (sector < extoff) 357 sector = extoff; 358 359 /* read MBR/EBR */ 360 error = readdisksector(bp, strat, lp, sector); 361 if (error) { 362 /*wrong*/ if (partoffp) 363 /*wrong*/ *partoffp = -1; 364 return (error); 365 } 366 367 bcopy(bp->b_data + DOSPARTOFF, dp, sizeof(dp)); 368 369 if (n == 0 && sector == DOSBBSECTOR) { 370 u_int16_t mbrtest; 371 372 /* Check the end of sector marker. */ 373 mbrtest = ((bp->b_data[510] << 8) & 0xff00) | 374 (bp->b_data[511] & 0xff); 375 if (mbrtest != 0x55aa) 376 goto notmbr; 377 378 if (gpt_chk_mbr(dp, DL_GETDSIZE(lp)) != 0) 379 goto notgpt; 380 381 gptlp = malloc(sizeof(struct disklabel), M_DEVBUF, 382 M_NOWAIT); 383 if (gptlp == NULL) 384 return (ENOMEM); 385 *gptlp = *lp; 386 error = spoofgptlabel(bp, strat, gptlp); 387 if (error == 0) { 388 dospartoff = DL_GETBSTART(gptlp); 389 dospartend = DL_GETBEND(gptlp); 390 if (partoffp) { 391 if (dospartoff == 0) 392 return (ENXIO); 393 else 394 goto notfat; 395 } 396 *lp = *gptlp; 397 free(gptlp, M_DEVBUF, 398 sizeof(struct disklabel)); 399 goto notfat; 400 } else { 401 free(gptlp, M_DEVBUF, 402 sizeof(struct disklabel)); 403 goto notmbr; 404 } 405 } 406 407 notgpt: 408 if (ourpart == -1) { 409 /* Search for our MBR partition */ 410 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 411 i++, dp2++) 412 if (letoh32(dp2->dp_size) && 413 dp2->dp_typ == DOSPTYP_OPENBSD) 414 ourpart = i; 415 if (ourpart == -1) 416 goto donot; 417 /* 418 * This is our MBR partition. need sector 419 * address for SCSI/IDE, cylinder for 420 * ESDI/ST506/RLL 421 */ 422 dp2 = &dp[ourpart]; 423 dospartoff = letoh32(dp2->dp_start) + sector; 424 dospartend = dospartoff + letoh32(dp2->dp_size); 425 426 /* 427 * Record the OpenBSD partition's placement (in 428 * 512-byte blocks!) for the caller. No need to 429 * finish spoofing. 430 */ 431 if (partoffp) { 432 *partoffp = DL_SECTOBLK(lp, dospartoff); 433 return (0); 434 } 435 436 if (lp->d_ntracks == 0) 437 lp->d_ntracks = dp2->dp_ehd + 1; 438 if (lp->d_nsectors == 0) 439 lp->d_nsectors = DPSECT(dp2->dp_esect); 440 if (lp->d_secpercyl == 0) 441 lp->d_secpercyl = lp->d_ntracks * 442 lp->d_nsectors; 443 } 444 donot: 445 /* 446 * In case the disklabel read below fails, we want to 447 * provide a fake label in i-p. 448 */ 449 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 450 struct partition *pp; 451 u_int8_t fstype; 452 453 if (dp2->dp_typ == DOSPTYP_OPENBSD || 454 dp2->dp_typ == DOSPTYP_EFI) 455 continue; 456 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 457 continue; 458 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 459 continue; 460 if (letoh32(dp2->dp_size) == 0) 461 continue; 462 463 switch (dp2->dp_typ) { 464 case DOSPTYP_UNUSED: 465 fstype = FS_UNUSED; 466 break; 467 468 case DOSPTYP_LINUX: 469 fstype = FS_EXT2FS; 470 break; 471 472 case DOSPTYP_NTFS: 473 fstype = FS_NTFS; 474 break; 475 476 case DOSPTYP_EFISYS: 477 case DOSPTYP_FAT12: 478 case DOSPTYP_FAT16S: 479 case DOSPTYP_FAT16B: 480 case DOSPTYP_FAT16L: 481 case DOSPTYP_FAT32: 482 case DOSPTYP_FAT32L: 483 fstype = FS_MSDOS; 484 break; 485 case DOSPTYP_EXTEND: 486 case DOSPTYP_EXTENDL: 487 sector = letoh32(dp2->dp_start) + extoff; 488 if (!extoff) { 489 extoff = letoh32(dp2->dp_start); 490 sector = 0; 491 } 492 wander = 1; 493 continue; 494 break; 495 default: 496 fstype = FS_OTHER; 497 break; 498 } 499 500 /* 501 * Don't set fstype/offset/size when just looking for 502 * the offset of the OpenBSD partition. It would 503 * invalidate the disklabel checksum! 504 * 505 * Don't try to spoof more than 8 partitions, i.e. 506 * 'i' -'p'. 507 */ 508 if (partoffp || n >= 8) 509 continue; 510 511 pp = &lp->d_partitions[8+n]; 512 n++; 513 pp->p_fstype = fstype; 514 if (letoh32(dp2->dp_start)) 515 DL_SETPOFFSET(pp, 516 letoh32(dp2->dp_start) + sector); 517 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 518 } 519 } 520 521 notmbr: 522 if (n == 0 && sector == DOSBBSECTOR && ourpart == -1) { 523 u_int16_t fattest; 524 525 /* Check for a valid initial jmp instruction. */ 526 switch ((u_int8_t)bp->b_data[0]) { 527 case 0xeb: 528 /* 529 * Two-byte jmp instruction. The 2nd byte is the number 530 * of bytes to jmp and the 3rd byte must be a NOP. 531 */ 532 if ((u_int8_t)bp->b_data[2] != 0x90) 533 goto notfat; 534 break; 535 case 0xe9: 536 /* 537 * Three-byte jmp instruction. The next two bytes are a 538 * little-endian 16 bit value. 539 */ 540 break; 541 default: 542 goto notfat; 543 break; 544 } 545 546 /* Check for a valid bytes per sector value. */ 547 fattest = ((bp->b_data[12] << 8) & 0xff00) | 548 (bp->b_data[11] & 0xff); 549 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 550 goto notfat; 551 552 if (partoffp) 553 return (ENXIO); /* No place for disklabel on FAT! */ 554 555 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 556 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 557 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 558 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 559 560 spoofonly = 1; /* No disklabel to read from disk. */ 561 } 562 563 notfat: 564 /* record the OpenBSD partition's placement for the caller */ 565 if (partoffp) 566 *partoffp = DL_SECTOBLK(lp, dospartoff); 567 else { 568 DL_SETBSTART(lp, dospartoff); 569 DL_SETBEND(lp, (dospartend < DL_GETDSIZE(lp)) ? dospartend : 570 DL_GETDSIZE(lp)); 571 } 572 573 /* don't read the on-disk label if we are in spoofed-only mode */ 574 if (spoofonly) 575 return (0); 576 577 error = readdisksector(bp, strat, lp, dospartoff + 578 DL_BLKTOSEC(lp, DOS_LABELSECTOR)); 579 if (error) 580 return (bp->b_error); 581 582 offset = DL_BLKOFFSET(lp, DOS_LABELSECTOR); 583 error = checkdisklabel(bp->b_data + offset, lp, 584 DL_GETBSTART((struct disklabel*)(bp->b_data+offset)), 585 DL_GETBEND((struct disklabel *)(bp->b_data+offset))); 586 587 return (error); 588 } 589 590 /* 591 * Returns 0 if the MBR with the provided partition array is a GPT protective 592 * MBR, and returns 1 otherwise. A GPT protective MBR would have one and only 593 * one MBR partition, an EFI partition that either covers the whole disk or as 594 * much of it as is possible with a 32bit size field. 595 * 596 * NOTE: MS always uses a size of UINT32_MAX for the EFI partition!** 597 */ 598 int 599 gpt_chk_mbr(struct dos_partition *dp, u_int64_t dsize) 600 { 601 struct dos_partition *dp2; 602 int efi, found, i; 603 u_int32_t psize; 604 605 found = efi = 0; 606 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 607 if (dp2->dp_typ == DOSPTYP_UNUSED) 608 continue; 609 found++; 610 if (dp2->dp_typ != DOSPTYP_EFI) 611 continue; 612 psize = letoh32(dp2->dp_size); 613 if (psize == (dsize - 1) || 614 psize == UINT32_MAX) { 615 if (letoh32(dp2->dp_start) == 1) 616 efi++; 617 } 618 } 619 if (found == 1 && efi == 1) 620 return (0); 621 622 return (1); 623 } 624 625 int 626 gpt_chk_hdr(struct gpt_header *gh, struct disklabel *lp) 627 { 628 uint64_t ghpartlba; 629 uint64_t ghlbaend, ghlbastart; 630 uint32_t orig_gh_csum; 631 uint32_t ghsize, ghpartsize, ghpartspersec, ghpartnum; 632 633 if (letoh64(gh->gh_sig) != GPTSIGNATURE) 634 return (EINVAL); 635 636 if (letoh32(gh->gh_rev) != GPTREVISION) 637 return (EINVAL); 638 639 ghsize = letoh32(gh->gh_size); 640 ghpartsize = letoh32(gh->gh_part_size); 641 ghpartspersec = lp->d_secsize / ghpartsize; 642 ghpartnum = letoh32(gh->gh_part_num); 643 ghpartlba = letoh64(gh->gh_part_lba); 644 ghlbaend = letoh64(gh->gh_lba_end); 645 ghlbastart = letoh64(gh->gh_lba_start); 646 647 if (ghsize < GPTMINHDRSIZE || ghsize > sizeof(struct gpt_header)) 648 return (EINVAL); 649 650 orig_gh_csum = gh->gh_csum; 651 gh->gh_csum = 0; 652 gh->gh_csum = crc32(0, (unsigned char *)gh, ghsize); 653 654 if (orig_gh_csum != gh->gh_csum) 655 return (EINVAL); 656 657 if (ghlbastart >= DL_GETDSIZE(lp) || 658 ghlbaend >= DL_GETDSIZE(lp) || 659 ghpartlba >= DL_GETDSIZE(lp)) 660 return (EINVAL); 661 662 /* 663 * Size per partition entry shall be 128*(2**n) with n >= 0. 664 * We don't support partition entries larger than block size. 665 */ 666 if (ghpartsize % GPTMINPARTSIZE || ghpartsize > lp->d_secsize 667 || ghpartspersec == 0) { 668 DPRINTF("invalid partition size\n"); 669 return (EINVAL); 670 } 671 672 /* XXX: we don't support multiples of GPTMINPARTSIZE yet */ 673 if (ghpartsize != GPTMINPARTSIZE) { 674 DPRINTF("partition sizes larger than %d bytes are not " 675 "supported", GPTMINPARTSIZE); 676 return (EINVAL); 677 } 678 679 if (letoh64(gh->gh_lba_alt) >= DL_GETDSIZE(lp)) { 680 DPRINTF("alternate header's position is bogus\n"); 681 return (EINVAL); 682 } 683 684 return 0; 685 } 686 687 int 688 gpt_chk_parts(struct gpt_header *gh, struct gpt_partition *gp) 689 { 690 u_int32_t checksum; 691 checksum = crc32(0, (unsigned char *)gp, 692 letoh32(gh->gh_part_num) * letoh32(gh->gh_part_size)); 693 694 if (checksum != gh->gh_part_csum) 695 return (EINVAL); 696 697 return 0; 698 } 699 700 int 701 gpt_get_fstype(struct uuid *uuid_part) 702 { 703 static int init = 0; 704 static struct uuid uuid_openbsd, uuid_msdos, uuid_chromefs, 705 uuid_linux, uuid_hfs, uuid_unused, uuid_efi_system; 706 static const uint8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 707 static const uint8_t gpt_uuid_msdos[] = GPT_UUID_MSDOS; 708 static const uint8_t gpt_uuid_chromerootfs[] = GPT_UUID_CHROMEROOTFS; 709 static const uint8_t gpt_uuid_linux[] = GPT_UUID_LINUX; 710 static const uint8_t gpt_uuid_hfs[] = GPT_UUID_APPLE_HFS; 711 static const uint8_t gpt_uuid_unused[] = GPT_UUID_UNUSED; 712 static const uint8_t gpt_uuid_efi_system[] = GPT_UUID_EFI_SYSTEM; 713 714 if (init == 0) { 715 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 716 uuid_dec_be(gpt_uuid_msdos, &uuid_msdos); 717 uuid_dec_be(gpt_uuid_chromerootfs, &uuid_chromefs); 718 uuid_dec_be(gpt_uuid_linux, &uuid_linux); 719 uuid_dec_be(gpt_uuid_hfs, &uuid_hfs); 720 uuid_dec_be(gpt_uuid_unused, &uuid_unused); 721 uuid_dec_be(gpt_uuid_efi_system, &uuid_efi_system); 722 init = 1; 723 } 724 725 if (!memcmp(uuid_part, &uuid_unused, sizeof(struct uuid))) 726 return FS_UNUSED; 727 else if (!memcmp(uuid_part, &uuid_openbsd, sizeof(struct uuid))) 728 return FS_BSDFFS; 729 else if (!memcmp(uuid_part, &uuid_msdos, sizeof(struct uuid))) 730 return FS_MSDOS; 731 else if (!memcmp(uuid_part, &uuid_chromefs, sizeof(struct uuid))) 732 return FS_EXT2FS; 733 else if (!memcmp(uuid_part, &uuid_linux, sizeof(struct uuid))) 734 return FS_EXT2FS; 735 else if (!memcmp(uuid_part, &uuid_hfs, sizeof(struct uuid))) 736 return FS_HFS; 737 else if (!memcmp(uuid_part, &uuid_efi_system, sizeof(struct uuid))) 738 return FS_MSDOS; 739 else 740 return FS_OTHER; 741 } 742 743 /* 744 * Spoof a disklabel based on the GPT information on the disk. 745 */ 746 int 747 spoofgptlabel(struct buf *bp, void (*strat)(struct buf *), 748 struct disklabel *lp) 749 { 750 static const u_int8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 751 struct gpt_header gh; 752 struct uuid uuid_part, uuid_openbsd; 753 struct gpt_partition *gp, *gp_tmp; 754 struct partition *pp; 755 size_t gpsz; 756 u_int64_t ghlbaend, ghlbastart, gptpartoff, gptpartend, sector; 757 u_int64_t start, end; 758 int i, altheader = 0, error, n; 759 uint32_t ghpartnum; 760 761 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 762 763 for (sector = GPTSECTOR; ; sector = DL_GETDSIZE(lp)-1, altheader = 1) { 764 uint64_t ghpartlba; 765 uint32_t ghpartsize; 766 uint32_t ghpartspersec; 767 768 error = readdisksector(bp, strat, lp, sector); 769 if (error) { 770 DPRINTF("error reading from disk\n"); 771 return (error); 772 } 773 774 bcopy(bp->b_data, &gh, sizeof(gh)); 775 776 if (gpt_chk_hdr(&gh, lp)) { 777 if (altheader) { 778 DPRINTF("alternate header also broken\n"); 779 return (EINVAL); 780 } 781 continue; 782 } 783 784 ghpartsize = letoh32(gh.gh_part_size); 785 ghpartspersec = lp->d_secsize / ghpartsize; 786 ghpartnum = letoh32(gh.gh_part_num); 787 ghpartlba = letoh64(gh.gh_part_lba); 788 ghlbaend = letoh64(gh.gh_lba_end); 789 ghlbastart = letoh64(gh.gh_lba_start); 790 791 /* read GPT partition entry array */ 792 gp = mallocarray(ghpartnum, sizeof(struct gpt_partition), 793 M_DEVBUF, M_NOWAIT|M_ZERO); 794 if (gp == NULL) 795 return (ENOMEM); 796 gpsz = ghpartnum * sizeof(struct gpt_partition); 797 798 /* 799 * XXX: Fails if # of partition entries is not a multiple of 800 * ghpartspersec. 801 */ 802 sector = ghpartlba; 803 for (i = 0; i < ghpartnum / ghpartspersec; i++, sector++) { 804 error = readdisksector(bp, strat, lp, sector); 805 if (error) { 806 free(gp, M_DEVBUF, gpsz); 807 return (error); 808 } 809 810 bcopy(bp->b_data, gp + i * ghpartspersec, 811 ghpartspersec * sizeof(struct gpt_partition)); 812 } 813 814 if (gpt_chk_parts(&gh, gp)) { 815 free(gp, M_DEVBUF, gpsz); 816 if (altheader) { 817 DPRINTF("alternate partition entries are also " 818 "broken\n"); 819 return (EINVAL); 820 } 821 continue; 822 } 823 break; 824 } 825 826 /* Find OpenBSD partition and spoof others along the way. */ 827 n = 0; 828 gptpartoff = 0; 829 gptpartend = DL_GETBEND(lp); 830 for (gp_tmp = gp, i = 0; i < ghpartnum; gp_tmp++, i++) { 831 start = letoh64(gp_tmp->gp_lba_start); 832 end = letoh64(gp_tmp->gp_lba_end); 833 if (start > end || start < ghlbastart || end > ghlbaend) 834 continue; /* entry invalid */ 835 836 uuid_dec_le(&gp_tmp->gp_type, &uuid_part); 837 if (!memcmp(&uuid_part, &uuid_openbsd, sizeof(struct uuid))) { 838 if (gptpartoff == 0) { 839 gptpartoff = start; 840 gptpartend = end + 1; 841 } 842 continue; /* Do *NOT* spoof OpenBSD partitions! */ 843 } 844 845 /* 846 * Don't try to spoof more than 8 partitions, i.e. 847 * 'i' -'p'. 848 */ 849 if (n >= 8) 850 continue; 851 852 pp = &lp->d_partitions[8+n]; 853 n++; 854 pp->p_fstype = gpt_get_fstype(&uuid_part); 855 DL_SETPOFFSET(pp, start); 856 DL_SETPSIZE(pp, end - start + 1); 857 } 858 859 free(gp, M_DEVBUF, gpsz); 860 861 DL_SETBSTART(lp, gptpartoff); 862 DL_SETBEND(lp, (gptpartend < DL_GETDSIZE(lp)) ? gptpartend : 863 DL_GETDSIZE(lp)); 864 865 return (0); 866 } 867 868 /* 869 * Check new disk label for sensibility before setting it. 870 */ 871 int 872 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 873 { 874 struct partition *opp, *npp; 875 struct disk *dk; 876 int i; 877 878 /* sanity clause */ 879 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 880 (nlp->d_secsize % DEV_BSIZE) != 0) 881 return (EINVAL); 882 883 /* special case to allow disklabel to be invalidated */ 884 if (nlp->d_magic == 0xffffffff) { 885 *olp = *nlp; 886 return (0); 887 } 888 889 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 890 dkcksum(nlp) != 0) 891 return (EINVAL); 892 893 /* XXX missing check if other dos partitions will be overwritten */ 894 895 for (i = 0; i < MAXPARTITIONS; i++) { 896 opp = &olp->d_partitions[i]; 897 npp = &nlp->d_partitions[i]; 898 if ((openmask & (1 << i)) && 899 (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 900 DL_GETPSIZE(npp) < DL_GETPSIZE(opp))) 901 return (EBUSY); 902 /* 903 * Copy internally-set partition information 904 * if new label doesn't include it. XXX 905 */ 906 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 907 npp->p_fragblock = opp->p_fragblock; 908 npp->p_cpg = opp->p_cpg; 909 } 910 } 911 912 /* Generate a UID if the disklabel does not already have one. */ 913 if (duid_iszero(nlp->d_uid)) { 914 do { 915 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 916 TAILQ_FOREACH(dk, &disklist, dk_link) 917 if (dk->dk_label && 918 duid_equal(dk->dk_label->d_uid, nlp->d_uid)) 919 break; 920 } while (dk != NULL || duid_iszero(nlp->d_uid)); 921 } 922 923 /* Preserve the disk size and RAW_PART values. */ 924 DL_SETDSIZE(nlp, DL_GETDSIZE(olp)); 925 npp = &nlp->d_partitions[RAW_PART]; 926 DL_SETPOFFSET(npp, 0); 927 DL_SETPSIZE(npp, DL_GETDSIZE(nlp)); 928 929 nlp->d_checksum = 0; 930 nlp->d_checksum = dkcksum(nlp); 931 *olp = *nlp; 932 933 disk_change = 1; 934 935 return (0); 936 } 937 938 /* 939 * Determine the size of the transfer, and make sure it is within the 940 * boundaries of the partition. Adjust transfer if needed, and signal errors or 941 * early completion. 942 */ 943 int 944 bounds_check_with_label(struct buf *bp, struct disklabel *lp) 945 { 946 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 947 daddr_t partblocks, sz; 948 949 /* Avoid division by zero, negative offsets, and negative sizes. */ 950 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0) 951 goto bad; 952 953 /* Ensure transfer is a whole number of aligned sectors. */ 954 if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 || 955 (bp->b_bcount % lp->d_secsize) != 0) 956 goto bad; 957 958 /* Ensure transfer starts within partition boundary. */ 959 partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p)); 960 if (bp->b_blkno > partblocks) 961 goto bad; 962 963 /* If exactly at end of partition or null transfer, return EOF. */ 964 if (bp->b_blkno == partblocks || bp->b_bcount == 0) 965 goto done; 966 967 /* Truncate request if it extends past the end of the partition. */ 968 sz = bp->b_bcount >> DEV_BSHIFT; 969 if (sz > partblocks - bp->b_blkno) { 970 sz = partblocks - bp->b_blkno; 971 bp->b_bcount = sz << DEV_BSHIFT; 972 } 973 974 return (0); 975 976 bad: 977 bp->b_error = EINVAL; 978 bp->b_flags |= B_ERROR; 979 done: 980 bp->b_resid = bp->b_bcount; 981 return (-1); 982 } 983 984 /* 985 * Disk error is the preface to plaintive error messages 986 * about failing disk transfers. It prints messages of the form 987 988 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 989 990 * if the offset of the error in the transfer and a disk label 991 * are both available. blkdone should be -1 if the position of the error 992 * is unknown; the disklabel pointer may be null from drivers that have not 993 * been converted to use them. The message is printed with printf 994 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 995 * The message should be completed (with at least a newline) with printf 996 * or addlog, respectively. There is no trailing space. 997 */ 998 void 999 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 1000 struct disklabel *lp) 1001 { 1002 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 1003 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))); 1004 char partname = 'a' + part; 1005 daddr_t sn; 1006 1007 if (pri != LOG_PRINTF) { 1008 log(pri, "%s", ""); 1009 pr = addlog; 1010 } else 1011 pr = printf; 1012 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 1013 bp->b_flags & B_READ ? "read" : "writ"); 1014 sn = bp->b_blkno; 1015 if (bp->b_bcount <= DEV_BSIZE) 1016 (*pr)("%lld", (long long)sn); 1017 else { 1018 if (blkdone >= 0) { 1019 sn += blkdone; 1020 (*pr)("%lld of ", (long long)sn); 1021 } 1022 (*pr)("%lld-%lld", (long long)bp->b_blkno, 1023 (long long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE)); 1024 } 1025 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 1026 sn += DL_SECTOBLK(lp, DL_GETPOFFSET(&lp->d_partitions[part])); 1027 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, (long long)sn, 1028 (long long)(sn / DL_SECTOBLK(lp, lp->d_secpercyl))); 1029 sn %= DL_SECTOBLK(lp, lp->d_secpercyl); 1030 (*pr)(" tn %lld sn %lld)", 1031 (long long)(sn / DL_SECTOBLK(lp, lp->d_nsectors)), 1032 (long long)(sn % DL_SECTOBLK(lp, lp->d_nsectors))); 1033 } 1034 } 1035 1036 /* 1037 * Initialize the disklist. Called by main() before autoconfiguration. 1038 */ 1039 void 1040 disk_init(void) 1041 { 1042 1043 TAILQ_INIT(&disklist); 1044 disk_count = disk_change = 0; 1045 } 1046 1047 int 1048 disk_construct(struct disk *diskp) 1049 { 1050 rw_init(&diskp->dk_lock, "dklk"); 1051 mtx_init(&diskp->dk_mtx, IPL_BIO); 1052 1053 diskp->dk_flags |= DKF_CONSTRUCTED; 1054 1055 return (0); 1056 } 1057 1058 /* 1059 * Attach a disk. 1060 */ 1061 void 1062 disk_attach(struct device *dv, struct disk *diskp) 1063 { 1064 int majdev; 1065 1066 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 1067 disk_construct(diskp); 1068 1069 /* 1070 * Allocate and initialize the disklabel structures. Note that 1071 * it's not safe to sleep here, since we're probably going to be 1072 * called during autoconfiguration. 1073 */ 1074 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 1075 M_NOWAIT|M_ZERO); 1076 if (diskp->dk_label == NULL) 1077 panic("disk_attach: can't allocate storage for disklabel"); 1078 1079 /* 1080 * Set the attached timestamp. 1081 */ 1082 microuptime(&diskp->dk_attachtime); 1083 1084 /* 1085 * Link into the disklist. 1086 */ 1087 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 1088 ++disk_count; 1089 disk_change = 1; 1090 1091 /* 1092 * Store device structure and number for later use. 1093 */ 1094 diskp->dk_device = dv; 1095 diskp->dk_devno = NODEV; 1096 if (dv != NULL) { 1097 majdev = findblkmajor(dv); 1098 if (majdev >= 0) 1099 diskp->dk_devno = 1100 MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 1101 1102 if (diskp->dk_devno != NODEV) { 1103 struct disk_attach_task *dat; 1104 1105 dat = malloc(sizeof(*dat), M_TEMP, M_WAITOK); 1106 1107 /* XXX: Assumes dk is part of the device softc. */ 1108 device_ref(dv); 1109 dat->dk = diskp; 1110 1111 task_set(&dat->task, disk_attach_callback, dat); 1112 task_add(systq, &dat->task); 1113 } 1114 } 1115 1116 if (softraid_disk_attach) 1117 softraid_disk_attach(diskp, 1); 1118 } 1119 1120 void 1121 disk_attach_callback(void *xdat) 1122 { 1123 struct disk_attach_task *dat = xdat; 1124 struct disk *dk = dat->dk; 1125 struct disklabel dl; 1126 char errbuf[100]; 1127 1128 free(dat, M_TEMP, sizeof(*dat)); 1129 1130 if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD)) 1131 goto done; 1132 1133 /* Read disklabel. */ 1134 if (disk_readlabel(&dl, dk->dk_devno, errbuf, sizeof(errbuf)) == NULL) { 1135 add_timer_randomness(dl.d_checksum); 1136 dk->dk_flags |= DKF_LABELVALID; 1137 } 1138 1139 done: 1140 dk->dk_flags |= DKF_OPENED; 1141 device_unref(dk->dk_device); 1142 wakeup(dk); 1143 } 1144 1145 /* 1146 * Detach a disk. 1147 */ 1148 void 1149 disk_detach(struct disk *diskp) 1150 { 1151 1152 if (softraid_disk_attach) 1153 softraid_disk_attach(diskp, -1); 1154 1155 /* 1156 * Free the space used by the disklabel structures. 1157 */ 1158 free(diskp->dk_label, M_DEVBUF, sizeof(*diskp->dk_label)); 1159 1160 /* 1161 * Remove from the disklist. 1162 */ 1163 TAILQ_REMOVE(&disklist, diskp, dk_link); 1164 disk_change = 1; 1165 if (--disk_count < 0) 1166 panic("disk_detach: disk_count < 0"); 1167 } 1168 1169 int 1170 disk_openpart(struct disk *dk, int part, int fmt, int haslabel) 1171 { 1172 KASSERT(part >= 0 && part < MAXPARTITIONS); 1173 1174 /* Unless opening the raw partition, check that the partition exists. */ 1175 if (part != RAW_PART && (!haslabel || 1176 part >= dk->dk_label->d_npartitions || 1177 dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED)) 1178 return (ENXIO); 1179 1180 /* Ensure the partition doesn't get changed under our feet. */ 1181 switch (fmt) { 1182 case S_IFCHR: 1183 dk->dk_copenmask |= (1 << part); 1184 break; 1185 case S_IFBLK: 1186 dk->dk_bopenmask |= (1 << part); 1187 break; 1188 } 1189 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1190 1191 return (0); 1192 } 1193 1194 void 1195 disk_closepart(struct disk *dk, int part, int fmt) 1196 { 1197 KASSERT(part >= 0 && part < MAXPARTITIONS); 1198 1199 switch (fmt) { 1200 case S_IFCHR: 1201 dk->dk_copenmask &= ~(1 << part); 1202 break; 1203 case S_IFBLK: 1204 dk->dk_bopenmask &= ~(1 << part); 1205 break; 1206 } 1207 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1208 } 1209 1210 void 1211 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit) 1212 { 1213 int bmaj, cmaj, mn; 1214 1215 /* Locate the lowest minor number to be detached. */ 1216 mn = DISKMINOR(unit, 0); 1217 1218 for (bmaj = 0; bmaj < nblkdev; bmaj++) 1219 if (bdevsw[bmaj].d_open == open) 1220 vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK); 1221 for (cmaj = 0; cmaj < nchrdev; cmaj++) 1222 if (cdevsw[cmaj].d_open == open) 1223 vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR); 1224 } 1225 1226 /* 1227 * Increment a disk's busy counter. If the counter is going from 1228 * 0 to 1, set the timestamp. 1229 */ 1230 void 1231 disk_busy(struct disk *diskp) 1232 { 1233 1234 /* 1235 * XXX We'd like to use something as accurate as microtime(), 1236 * but that doesn't depend on the system TOD clock. 1237 */ 1238 mtx_enter(&diskp->dk_mtx); 1239 if (diskp->dk_busy++ == 0) 1240 microuptime(&diskp->dk_timestamp); 1241 mtx_leave(&diskp->dk_mtx); 1242 } 1243 1244 /* 1245 * Decrement a disk's busy counter, increment the byte count, total busy 1246 * time, and reset the timestamp. 1247 */ 1248 void 1249 disk_unbusy(struct disk *diskp, long bcount, int read) 1250 { 1251 struct timeval dv_time, diff_time; 1252 1253 mtx_enter(&diskp->dk_mtx); 1254 1255 if (diskp->dk_busy-- == 0) 1256 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 1257 1258 microuptime(&dv_time); 1259 1260 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 1261 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 1262 1263 diskp->dk_timestamp = dv_time; 1264 if (bcount > 0) { 1265 if (read) { 1266 diskp->dk_rbytes += bcount; 1267 diskp->dk_rxfer++; 1268 } else { 1269 diskp->dk_wbytes += bcount; 1270 diskp->dk_wxfer++; 1271 } 1272 } else 1273 diskp->dk_seek++; 1274 1275 mtx_leave(&diskp->dk_mtx); 1276 1277 add_disk_randomness(bcount ^ diff_time.tv_usec); 1278 } 1279 1280 int 1281 disk_lock(struct disk *dk) 1282 { 1283 return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR)); 1284 } 1285 1286 void 1287 disk_lock_nointr(struct disk *dk) 1288 { 1289 rw_enter_write(&dk->dk_lock); 1290 } 1291 1292 void 1293 disk_unlock(struct disk *dk) 1294 { 1295 rw_exit_write(&dk->dk_lock); 1296 } 1297 1298 int 1299 dk_mountroot(void) 1300 { 1301 char errbuf[100]; 1302 int part = DISKPART(rootdev); 1303 int (*mountrootfn)(void); 1304 struct disklabel dl; 1305 char *error; 1306 1307 error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf)); 1308 if (error) 1309 panic("%s", error); 1310 1311 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 1312 panic("root filesystem has size 0"); 1313 switch (dl.d_partitions[part].p_fstype) { 1314 #ifdef EXT2FS 1315 case FS_EXT2FS: 1316 { 1317 extern int ext2fs_mountroot(void); 1318 mountrootfn = ext2fs_mountroot; 1319 } 1320 break; 1321 #endif 1322 #ifdef FFS 1323 case FS_BSDFFS: 1324 { 1325 extern int ffs_mountroot(void); 1326 mountrootfn = ffs_mountroot; 1327 } 1328 break; 1329 #endif 1330 #ifdef CD9660 1331 case FS_ISO9660: 1332 { 1333 extern int cd9660_mountroot(void); 1334 mountrootfn = cd9660_mountroot; 1335 } 1336 break; 1337 #endif 1338 default: 1339 #ifdef FFS 1340 { 1341 extern int ffs_mountroot(void); 1342 1343 printf("filesystem type %d not known.. assuming ffs\n", 1344 dl.d_partitions[part].p_fstype); 1345 mountrootfn = ffs_mountroot; 1346 } 1347 #else 1348 panic("disk 0x%x filesystem type %d not known", 1349 rootdev, dl.d_partitions[part].p_fstype); 1350 #endif 1351 } 1352 return (*mountrootfn)(); 1353 } 1354 1355 struct device * 1356 getdisk(char *str, int len, int defpart, dev_t *devp) 1357 { 1358 struct device *dv; 1359 1360 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1361 printf("use one of: exit"); 1362 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1363 if (dv->dv_class == DV_DISK) 1364 printf(" %s[a-p]", dv->dv_xname); 1365 #if defined(NFSCLIENT) 1366 if (dv->dv_class == DV_IFNET) 1367 printf(" %s", dv->dv_xname); 1368 #endif 1369 } 1370 printf("\n"); 1371 } 1372 return (dv); 1373 } 1374 1375 struct device * 1376 parsedisk(char *str, int len, int defpart, dev_t *devp) 1377 { 1378 struct device *dv; 1379 int majdev, part = defpart; 1380 char c; 1381 1382 if (len == 0) 1383 return (NULL); 1384 c = str[len-1]; 1385 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1386 part = c - 'a'; 1387 len -= 1; 1388 } 1389 1390 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1391 if (dv->dv_class == DV_DISK && 1392 strncmp(str, dv->dv_xname, len) == 0 && 1393 dv->dv_xname[len] == '\0') { 1394 majdev = findblkmajor(dv); 1395 if (majdev < 0) 1396 return NULL; 1397 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1398 break; 1399 } 1400 #if defined(NFSCLIENT) 1401 if (dv->dv_class == DV_IFNET && 1402 strncmp(str, dv->dv_xname, len) == 0 && 1403 dv->dv_xname[len] == '\0') { 1404 *devp = NODEV; 1405 break; 1406 } 1407 #endif 1408 } 1409 1410 return (dv); 1411 } 1412 1413 void 1414 setroot(struct device *bootdv, int part, int exitflags) 1415 { 1416 int majdev, unit, len, s, slept = 0; 1417 struct swdevt *swp; 1418 struct device *rootdv, *dv; 1419 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1420 struct ifnet *ifp = NULL; 1421 struct disk *dk; 1422 char buf[128]; 1423 #if defined(NFSCLIENT) 1424 extern char *nfsbootdevname; 1425 #endif 1426 1427 /* Ensure that all disk attach callbacks have completed. */ 1428 do { 1429 TAILQ_FOREACH(dk, &disklist, dk_link) { 1430 if (dk->dk_devno != NODEV && 1431 (dk->dk_flags & DKF_OPENED) == 0) { 1432 tsleep(dk, 0, "dkopen", hz); 1433 slept++; 1434 break; 1435 } 1436 } 1437 } while (dk != NULL && slept < 5); 1438 1439 if (slept == 5) { 1440 printf("disklabels not read:"); 1441 TAILQ_FOREACH(dk, &disklist, dk_link) 1442 if (dk->dk_devno != NODEV && 1443 (dk->dk_flags & DKF_OPENED) == 0) 1444 printf(" %s", dk->dk_name); 1445 printf("\n"); 1446 } 1447 1448 if (duid_iszero(bootduid)) { 1449 /* Locate DUID for boot disk since it was not provided. */ 1450 TAILQ_FOREACH(dk, &disklist, dk_link) 1451 if (dk->dk_device == bootdv) 1452 break; 1453 if (dk && (dk->dk_flags & DKF_LABELVALID)) 1454 bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid)); 1455 } else if (bootdv == NULL) { 1456 /* Locate boot disk based on the provided DUID. */ 1457 TAILQ_FOREACH(dk, &disklist, dk_link) 1458 if (duid_equal(dk->dk_label->d_uid, bootduid)) 1459 break; 1460 if (dk && (dk->dk_flags & DKF_LABELVALID)) 1461 bootdv = dk->dk_device; 1462 } 1463 bcopy(bootduid, rootduid, sizeof(rootduid)); 1464 1465 #if NSOFTRAID > 0 1466 sr_map_root(); 1467 #endif 1468 1469 /* 1470 * If `swap generic' and we couldn't determine boot device, 1471 * ask the user. 1472 */ 1473 dk = NULL; 1474 if (mountroot == NULL && bootdv == NULL) 1475 boothowto |= RB_ASKNAME; 1476 if (boothowto & RB_ASKNAME) { 1477 while (1) { 1478 printf("root device"); 1479 if (bootdv != NULL) { 1480 printf(" (default %s", bootdv->dv_xname); 1481 if (bootdv->dv_class == DV_DISK) 1482 printf("%c", 'a' + part); 1483 printf(")"); 1484 } 1485 printf(": "); 1486 s = splhigh(); 1487 cnpollc(1); 1488 len = getsn(buf, sizeof(buf)); 1489 cnpollc(0); 1490 splx(s); 1491 if (strcmp(buf, "exit") == 0) 1492 reboot(exitflags); 1493 if (len == 0 && bootdv != NULL) { 1494 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1495 len = strlen(buf); 1496 } 1497 if (len > 0 && buf[len - 1] == '*') { 1498 buf[--len] = '\0'; 1499 dv = getdisk(buf, len, part, &nrootdev); 1500 if (dv != NULL) { 1501 rootdv = dv; 1502 nswapdev = nrootdev; 1503 goto gotswap; 1504 } 1505 } 1506 dv = getdisk(buf, len, part, &nrootdev); 1507 if (dv != NULL) { 1508 rootdv = dv; 1509 break; 1510 } 1511 } 1512 1513 if (rootdv->dv_class == DV_IFNET) 1514 goto gotswap; 1515 1516 /* try to build swap device out of new root device */ 1517 while (1) { 1518 printf("swap device"); 1519 if (rootdv != NULL) 1520 printf(" (default %s%s)", rootdv->dv_xname, 1521 rootdv->dv_class == DV_DISK ? "b" : ""); 1522 printf(": "); 1523 s = splhigh(); 1524 cnpollc(1); 1525 len = getsn(buf, sizeof(buf)); 1526 cnpollc(0); 1527 splx(s); 1528 if (strcmp(buf, "exit") == 0) 1529 reboot(exitflags); 1530 if (len == 0 && rootdv != NULL) { 1531 switch (rootdv->dv_class) { 1532 case DV_IFNET: 1533 nswapdev = NODEV; 1534 break; 1535 case DV_DISK: 1536 nswapdev = MAKEDISKDEV(major(nrootdev), 1537 DISKUNIT(nrootdev), 1); 1538 if (nswapdev == nrootdev) 1539 continue; 1540 break; 1541 default: 1542 break; 1543 } 1544 break; 1545 } 1546 dv = getdisk(buf, len, 1, &nswapdev); 1547 if (dv) { 1548 if (dv->dv_class == DV_IFNET) 1549 nswapdev = NODEV; 1550 if (nswapdev == nrootdev) 1551 continue; 1552 break; 1553 } 1554 } 1555 gotswap: 1556 rootdev = nrootdev; 1557 dumpdev = nswapdev; 1558 swdevt[0].sw_dev = nswapdev; 1559 swdevt[1].sw_dev = NODEV; 1560 #if defined(NFSCLIENT) 1561 } else if (mountroot == nfs_mountroot) { 1562 rootdv = bootdv; 1563 rootdev = dumpdev = swapdev = NODEV; 1564 #endif 1565 } else if (mountroot == NULL && rootdev == NODEV) { 1566 /* 1567 * `swap generic' 1568 */ 1569 rootdv = bootdv; 1570 1571 if (bootdv->dv_class == DV_DISK) { 1572 if (!duid_iszero(rootduid)) { 1573 TAILQ_FOREACH(dk, &disklist, dk_link) 1574 if ((dk->dk_flags & DKF_LABELVALID) && 1575 dk->dk_label && duid_equal( 1576 dk->dk_label->d_uid, rootduid)) 1577 break; 1578 if (dk == NULL) 1579 panic("root device (%s) not found", 1580 duid_format(rootduid)); 1581 rootdv = dk->dk_device; 1582 } 1583 } 1584 1585 majdev = findblkmajor(rootdv); 1586 if (majdev >= 0) { 1587 /* 1588 * Root and swap are on the disk. 1589 * Assume swap is on partition b. 1590 */ 1591 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1592 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1593 } else { 1594 /* 1595 * Root and swap are on a net. 1596 */ 1597 nswapdev = NODEV; 1598 } 1599 dumpdev = nswapdev; 1600 swdevt[0].sw_dev = nswapdev; 1601 /* swdevt[1].sw_dev = NODEV; */ 1602 } else { 1603 /* Completely pre-configured, but we want rootdv .. */ 1604 majdev = major(rootdev); 1605 if (findblkname(majdev) == NULL) 1606 return; 1607 unit = DISKUNIT(rootdev); 1608 part = DISKPART(rootdev); 1609 snprintf(buf, sizeof buf, "%s%d%c", 1610 findblkname(majdev), unit, 'a' + part); 1611 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1612 if (rootdv == NULL) 1613 panic("root device (%s) not found", buf); 1614 } 1615 1616 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1617 ifp = ifunit(rootdv->dv_xname); 1618 else if (bootdv && bootdv->dv_class == DV_IFNET) 1619 ifp = ifunit(bootdv->dv_xname); 1620 1621 if (ifp) 1622 if_addgroup(ifp, "netboot"); 1623 1624 switch (rootdv->dv_class) { 1625 #if defined(NFSCLIENT) 1626 case DV_IFNET: 1627 mountroot = nfs_mountroot; 1628 nfsbootdevname = rootdv->dv_xname; 1629 return; 1630 #endif 1631 case DV_DISK: 1632 mountroot = dk_mountroot; 1633 part = DISKPART(rootdev); 1634 break; 1635 default: 1636 printf("can't figure root, hope your kernel is right\n"); 1637 return; 1638 } 1639 1640 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1641 1642 if (dk && dk->dk_device == rootdv) 1643 printf(" (%s.%c)", duid_format(rootduid), 'a' + part); 1644 1645 /* 1646 * Make the swap partition on the root drive the primary swap. 1647 */ 1648 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1649 if (major(rootdev) == major(swp->sw_dev) && 1650 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1651 temp = swdevt[0].sw_dev; 1652 swdevt[0].sw_dev = swp->sw_dev; 1653 swp->sw_dev = temp; 1654 break; 1655 } 1656 } 1657 if (swp->sw_dev != NODEV) { 1658 /* 1659 * If dumpdev was the same as the old primary swap device, 1660 * move it to the new primary swap device. 1661 */ 1662 if (temp == dumpdev) 1663 dumpdev = swdevt[0].sw_dev; 1664 } 1665 if (swdevt[0].sw_dev != NODEV) 1666 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1667 DISKUNIT(swdevt[0].sw_dev), 1668 'a' + DISKPART(swdevt[0].sw_dev)); 1669 if (dumpdev != NODEV) 1670 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1671 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1672 printf("\n"); 1673 } 1674 1675 extern struct nam2blk nam2blk[]; 1676 1677 int 1678 findblkmajor(struct device *dv) 1679 { 1680 char buf[16], *p; 1681 int i; 1682 1683 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1684 return (-1); 1685 for (p = buf; *p; p++) 1686 if (*p >= '0' && *p <= '9') 1687 *p = '\0'; 1688 1689 for (i = 0; nam2blk[i].name; i++) 1690 if (!strcmp(buf, nam2blk[i].name)) 1691 return (nam2blk[i].maj); 1692 return (-1); 1693 } 1694 1695 char * 1696 findblkname(int maj) 1697 { 1698 int i; 1699 1700 for (i = 0; nam2blk[i].name; i++) 1701 if (nam2blk[i].maj == maj) 1702 return (nam2blk[i].name); 1703 return (NULL); 1704 } 1705 1706 char * 1707 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize) 1708 { 1709 struct vnode *vn; 1710 dev_t chrdev, rawdev; 1711 int error; 1712 1713 chrdev = blktochr(dev); 1714 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART); 1715 1716 #ifdef DEBUG 1717 printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev); 1718 #endif 1719 1720 if (cdevvp(rawdev, &vn)) { 1721 snprintf(errbuf, errsize, 1722 "cannot obtain vnode for 0x%x/0x%x", dev, rawdev); 1723 return (errbuf); 1724 } 1725 1726 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1727 if (error) { 1728 snprintf(errbuf, errsize, 1729 "cannot open disk, 0x%x/0x%x, error %d", 1730 dev, rawdev, error); 1731 goto done; 1732 } 1733 1734 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc); 1735 if (error) { 1736 snprintf(errbuf, errsize, 1737 "cannot read disk label, 0x%x/0x%x, error %d", 1738 dev, rawdev, error); 1739 } 1740 done: 1741 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1742 vput(vn); 1743 if (error) 1744 return (errbuf); 1745 return (NULL); 1746 } 1747 1748 int 1749 disk_map(char *path, char *mappath, int size, int flags) 1750 { 1751 struct disk *dk, *mdk; 1752 u_char uid[8]; 1753 char c, part; 1754 int i; 1755 1756 /* 1757 * Attempt to map a request for a disklabel UID to the correct device. 1758 * We should be supplied with a disklabel UID which has the following 1759 * format: 1760 * 1761 * [disklabel uid] . [partition] 1762 * 1763 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1764 * based passed on its own. 1765 */ 1766 1767 if (strchr(path, '/') != NULL) 1768 return -1; 1769 1770 /* Verify that the device name is properly formed. */ 1771 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1772 (strlen(path) == 18 && path[16] == '.'))) 1773 return -1; 1774 1775 /* Get partition. */ 1776 if (flags & DM_OPENPART) 1777 part = 'a' + RAW_PART; 1778 else 1779 part = path[17]; 1780 1781 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1782 return -1; 1783 1784 /* Derive label UID. */ 1785 memset(uid, 0, sizeof(uid)); 1786 for (i = 0; i < 16; i++) { 1787 c = path[i]; 1788 if (c >= '0' && c <= '9') 1789 c -= '0'; 1790 else if (c >= 'a' && c <= 'f') 1791 c -= ('a' - 10); 1792 else 1793 return -1; 1794 1795 uid[i / 2] <<= 4; 1796 uid[i / 2] |= c & 0xf; 1797 } 1798 1799 mdk = NULL; 1800 TAILQ_FOREACH(dk, &disklist, dk_link) { 1801 if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label && 1802 memcmp(dk->dk_label->d_uid, uid, 1803 sizeof(dk->dk_label->d_uid)) == 0) { 1804 /* Fail if there are duplicate UIDs! */ 1805 if (mdk != NULL) 1806 return -1; 1807 mdk = dk; 1808 } 1809 } 1810 1811 if (mdk == NULL || mdk->dk_name == NULL) 1812 return -1; 1813 1814 snprintf(mappath, size, "/dev/%s%s%c", 1815 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1816 1817 return 0; 1818 } 1819 1820 /* 1821 * Lookup a disk device and verify that it has completed attaching. 1822 */ 1823 struct device * 1824 disk_lookup(struct cfdriver *cd, int unit) 1825 { 1826 struct device *dv; 1827 struct disk *dk; 1828 1829 dv = device_lookup(cd, unit); 1830 if (dv == NULL) 1831 return (NULL); 1832 1833 TAILQ_FOREACH(dk, &disklist, dk_link) 1834 if (dk->dk_device == dv) 1835 break; 1836 1837 if (dk == NULL) { 1838 device_unref(dv); 1839 return (NULL); 1840 } 1841 1842 return (dv); 1843 } 1844 1845 int 1846 duid_equal(u_char *duid1, u_char *duid2) 1847 { 1848 return (memcmp(duid1, duid2, DUID_SIZE) == 0); 1849 } 1850 1851 int 1852 duid_iszero(u_char *duid) 1853 { 1854 u_char zeroduid[DUID_SIZE]; 1855 1856 memset(zeroduid, 0, sizeof(zeroduid)); 1857 1858 return (duid_equal(duid, zeroduid)); 1859 } 1860 1861 const char * 1862 duid_format(u_char *duid) 1863 { 1864 static char duid_str[17]; 1865 1866 snprintf(duid_str, sizeof(duid_str), 1867 "%02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx", 1868 duid[0], duid[1], duid[2], duid[3], 1869 duid[4], duid[5], duid[6], duid[7]); 1870 1871 return (duid_str); 1872 } 1873