1 /* $OpenBSD: subr_disk.c,v 1.223 2016/02/08 22:12:51 krw Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/vnode.h> 58 #include <sys/task.h> 59 #include <sys/stdint.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 64 #include <net/if.h> 65 66 #include <dev/rndvar.h> 67 #include <dev/cons.h> 68 69 #include <lib/libz/zlib.h> 70 71 #include "softraid.h" 72 73 #ifdef DEBUG 74 #define DPRINTF(x...) printf(x) 75 #else 76 #define DPRINTF(x...) 77 #endif 78 79 /* 80 * A global list of all disks attached to the system. May grow or 81 * shrink over time. 82 */ 83 struct disklist_head disklist; /* TAILQ_HEAD */ 84 int disk_count; /* number of drives in global disklist */ 85 int disk_change; /* set if a disk has been attached/detached 86 * since last we looked at this variable. This 87 * is reset by hw_sysctl() 88 */ 89 90 u_char bootduid[8]; /* DUID of boot disk. */ 91 u_char rootduid[8]; /* DUID of root disk. */ 92 93 /* softraid callback, do not use! */ 94 void (*softraid_disk_attach)(struct disk *, int); 95 96 void sr_map_root(void); 97 98 struct disk_attach_task { 99 struct task task; 100 struct disk *dk; 101 }; 102 103 void disk_attach_callback(void *); 104 105 int spoofgptlabel(struct buf *, void (*)(struct buf *), struct disklabel *); 106 107 int gpt_chk_mbr(struct dos_partition *, u_int64_t); 108 int gpt_chk_hdr(struct gpt_header *, struct disklabel *); 109 int gpt_chk_parts(struct gpt_header *, struct gpt_partition *); 110 int gpt_get_fstype(struct uuid *); 111 112 /* 113 * Compute checksum for disk label. 114 */ 115 u_int 116 dkcksum(struct disklabel *lp) 117 { 118 u_int16_t *start, *end; 119 u_int16_t sum = 0; 120 121 start = (u_int16_t *)lp; 122 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 123 while (start < end) 124 sum ^= *start++; 125 return (sum); 126 } 127 128 int 129 initdisklabel(struct disklabel *lp) 130 { 131 int i; 132 133 /* minimal requirements for archetypal disk label */ 134 if (lp->d_secsize < DEV_BSIZE) 135 lp->d_secsize = DEV_BSIZE; 136 if (DL_GETDSIZE(lp) == 0) 137 DL_SETDSIZE(lp, MAXDISKSIZE); 138 if (lp->d_secpercyl == 0) 139 return (ERANGE); 140 lp->d_npartitions = MAXPARTITIONS; 141 for (i = 0; i < RAW_PART; i++) { 142 DL_SETPSIZE(&lp->d_partitions[i], 0); 143 DL_SETPOFFSET(&lp->d_partitions[i], 0); 144 } 145 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 146 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 147 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 148 DL_SETBSTART(lp, 0); 149 DL_SETBEND(lp, DL_GETDSIZE(lp)); 150 lp->d_version = 1; 151 lp->d_bbsize = 8192; 152 lp->d_sbsize = 64*1024; /* XXX ? */ 153 return (0); 154 } 155 156 /* 157 * Check an incoming block to make sure it is a disklabel, convert it to 158 * a newer version if needed, etc etc. 159 */ 160 int 161 checkdisklabel(void *rlp, struct disklabel *lp, u_int64_t boundstart, 162 u_int64_t boundend) 163 { 164 struct disklabel *dlp = rlp; 165 struct __partitionv0 *v0pp; 166 struct partition *pp; 167 u_int64_t disksize; 168 int error = 0; 169 int i; 170 171 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 172 error = ENOENT; /* no disk label */ 173 else if (dlp->d_npartitions > MAXPARTITIONS) 174 error = E2BIG; /* too many partitions */ 175 else if (dlp->d_secpercyl == 0) 176 error = EINVAL; /* invalid label */ 177 else if (dlp->d_secsize == 0) 178 error = ENOSPC; /* disk too small */ 179 else if (dkcksum(dlp) != 0) 180 error = EINVAL; /* incorrect checksum */ 181 182 if (error) { 183 u_int16_t *start, *end, sum = 0; 184 185 /* If it is byte-swapped, attempt to convert it */ 186 if (swap32(dlp->d_magic) != DISKMAGIC || 187 swap32(dlp->d_magic2) != DISKMAGIC || 188 swap16(dlp->d_npartitions) > MAXPARTITIONS) 189 return (error); 190 191 /* 192 * Need a byte-swap aware dkcksum variant 193 * inlined, because dkcksum uses a sub-field 194 */ 195 start = (u_int16_t *)dlp; 196 end = (u_int16_t *)&dlp->d_partitions[ 197 swap16(dlp->d_npartitions)]; 198 while (start < end) 199 sum ^= *start++; 200 if (sum != 0) 201 return (error); 202 203 dlp->d_magic = swap32(dlp->d_magic); 204 dlp->d_type = swap16(dlp->d_type); 205 206 /* d_typename and d_packname are strings */ 207 208 dlp->d_secsize = swap32(dlp->d_secsize); 209 dlp->d_nsectors = swap32(dlp->d_nsectors); 210 dlp->d_ntracks = swap32(dlp->d_ntracks); 211 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 212 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 213 dlp->d_secperunit = swap32(dlp->d_secperunit); 214 215 /* d_uid is a string */ 216 217 dlp->d_acylinders = swap32(dlp->d_acylinders); 218 219 dlp->d_flags = swap32(dlp->d_flags); 220 221 for (i = 0; i < NDDATA; i++) 222 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 223 224 dlp->d_secperunith = swap16(dlp->d_secperunith); 225 dlp->d_version = swap16(dlp->d_version); 226 227 for (i = 0; i < NSPARE; i++) 228 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 229 230 dlp->d_magic2 = swap32(dlp->d_magic2); 231 232 dlp->d_npartitions = swap16(dlp->d_npartitions); 233 dlp->d_bbsize = swap32(dlp->d_bbsize); 234 dlp->d_sbsize = swap32(dlp->d_sbsize); 235 236 for (i = 0; i < MAXPARTITIONS; i++) { 237 pp = &dlp->d_partitions[i]; 238 pp->p_size = swap32(pp->p_size); 239 pp->p_offset = swap32(pp->p_offset); 240 if (dlp->d_version == 0) { 241 v0pp = (struct __partitionv0 *)pp; 242 v0pp->p_fsize = swap32(v0pp->p_fsize); 243 } else { 244 pp->p_offseth = swap16(pp->p_offseth); 245 pp->p_sizeh = swap16(pp->p_sizeh); 246 } 247 pp->p_cpg = swap16(pp->p_cpg); 248 } 249 250 dlp->d_checksum = 0; 251 dlp->d_checksum = dkcksum(dlp); 252 error = 0; 253 } 254 255 /* XXX should verify lots of other fields and whine a lot */ 256 257 /* Initial passed in lp contains the real disk size. */ 258 disksize = DL_GETDSIZE(lp); 259 260 if (lp != dlp) 261 *lp = *dlp; 262 263 if (lp->d_version == 0) { 264 lp->d_version = 1; 265 lp->d_secperunith = 0; 266 267 v0pp = (struct __partitionv0 *)lp->d_partitions; 268 pp = lp->d_partitions; 269 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 270 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 271 p_fsize, v0pp->p_frag); 272 pp->p_offseth = 0; 273 pp->p_sizeh = 0; 274 } 275 } 276 277 #ifdef DEBUG 278 if (DL_GETDSIZE(lp) != disksize) 279 printf("on-disk disklabel has incorrect disksize (%llu)\n", 280 DL_GETDSIZE(lp)); 281 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 282 printf("on-disk disklabel RAW_PART has incorrect size (%llu)\n", 283 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 284 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 285 printf("on-disk disklabel RAW_PART offset != 0 (%llu)\n", 286 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 287 #endif 288 DL_SETDSIZE(lp, disksize); 289 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 290 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 291 DL_SETBSTART(lp, boundstart); 292 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 293 294 lp->d_checksum = 0; 295 lp->d_checksum = dkcksum(lp); 296 return (0); 297 } 298 299 /* 300 * Read a disk sector. 301 */ 302 int 303 readdisksector(struct buf *bp, void (*strat)(struct buf *), 304 struct disklabel *lp, u_int64_t sector) 305 { 306 bp->b_blkno = DL_SECTOBLK(lp, sector); 307 bp->b_bcount = lp->d_secsize; 308 bp->b_error = 0; 309 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 310 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 311 312 (*strat)(bp); 313 314 return (biowait(bp)); 315 } 316 317 /* 318 * If dos partition table requested, attempt to load it and 319 * find disklabel inside a DOS partition. Return buffer 320 * for use in signalling errors if requested. 321 * 322 * We would like to check if each MBR has a valid BOOT_MAGIC, but 323 * we cannot because it doesn't always exist. So.. we assume the 324 * MBR is valid. 325 */ 326 int 327 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 328 struct disklabel *lp, daddr_t *partoffp, int spoofonly) 329 { 330 struct disklabel *gptlp; 331 u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp); 332 int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset; 333 struct dos_partition dp[NDOSPART], *dp2; 334 u_int64_t sector = DOSBBSECTOR; 335 u_int32_t extoff = 0; 336 int error; 337 338 if (lp->d_secpercyl == 0) 339 return (EINVAL); /* invalid label */ 340 if (lp->d_secsize == 0) 341 return (ENOSPC); /* disk too small */ 342 343 /* do DOS partitions in the process of getting disklabel? */ 344 345 /* 346 * Read dos partition table, follow extended partitions. 347 * Map the partitions to disklabel entries i-p 348 */ 349 while (wander && loop < DOS_MAXEBR) { 350 loop++; 351 wander = 0; 352 if (sector < extoff) 353 sector = extoff; 354 355 /* read MBR/EBR */ 356 error = readdisksector(bp, strat, lp, sector); 357 if (error) { 358 /*wrong*/ if (partoffp) 359 /*wrong*/ *partoffp = -1; 360 return (error); 361 } 362 363 bcopy(bp->b_data + DOSPARTOFF, dp, sizeof(dp)); 364 365 if (n == 0 && sector == DOSBBSECTOR) { 366 u_int16_t mbrtest; 367 368 /* Check the end of sector marker. */ 369 mbrtest = ((bp->b_data[510] << 8) & 0xff00) | 370 (bp->b_data[511] & 0xff); 371 if (mbrtest != 0x55aa) 372 goto notmbr; 373 374 if (gpt_chk_mbr(dp, DL_GETDSIZE(lp)) != 0) 375 goto notgpt; 376 377 gptlp = malloc(sizeof(struct disklabel), M_DEVBUF, 378 M_NOWAIT); 379 if (gptlp == NULL) 380 return (ENOMEM); 381 *gptlp = *lp; 382 error = spoofgptlabel(bp, strat, gptlp); 383 if (error == 0) { 384 dospartoff = DL_GETBSTART(gptlp); 385 dospartend = DL_GETBEND(gptlp); 386 if (partoffp) { 387 if (dospartoff == 0) 388 return (ENXIO); 389 else 390 goto notfat; 391 } 392 *lp = *gptlp; 393 free(gptlp, M_DEVBUF, 394 sizeof(struct disklabel)); 395 goto notfat; 396 } else { 397 free(gptlp, M_DEVBUF, 398 sizeof(struct disklabel)); 399 goto notmbr; 400 } 401 } 402 403 notgpt: 404 if (ourpart == -1) { 405 /* Search for our MBR partition */ 406 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 407 i++, dp2++) 408 if (letoh32(dp2->dp_size) && 409 dp2->dp_typ == DOSPTYP_OPENBSD) 410 ourpart = i; 411 if (ourpart == -1) 412 goto donot; 413 /* 414 * This is our MBR partition. need sector 415 * address for SCSI/IDE, cylinder for 416 * ESDI/ST506/RLL 417 */ 418 dp2 = &dp[ourpart]; 419 dospartoff = letoh32(dp2->dp_start) + sector; 420 dospartend = dospartoff + letoh32(dp2->dp_size); 421 422 /* 423 * Record the OpenBSD partition's placement (in 424 * 512-byte blocks!) for the caller. No need to 425 * finish spoofing. 426 */ 427 if (partoffp) { 428 *partoffp = DL_SECTOBLK(lp, dospartoff); 429 return (0); 430 } 431 432 if (lp->d_ntracks == 0) 433 lp->d_ntracks = dp2->dp_ehd + 1; 434 if (lp->d_nsectors == 0) 435 lp->d_nsectors = DPSECT(dp2->dp_esect); 436 if (lp->d_secpercyl == 0) 437 lp->d_secpercyl = lp->d_ntracks * 438 lp->d_nsectors; 439 } 440 donot: 441 /* 442 * In case the disklabel read below fails, we want to 443 * provide a fake label in i-p. 444 */ 445 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 446 struct partition *pp; 447 u_int8_t fstype; 448 449 if (dp2->dp_typ == DOSPTYP_OPENBSD || 450 dp2->dp_typ == DOSPTYP_EFI) 451 continue; 452 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 453 continue; 454 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 455 continue; 456 if (letoh32(dp2->dp_size) == 0) 457 continue; 458 459 switch (dp2->dp_typ) { 460 case DOSPTYP_UNUSED: 461 fstype = FS_UNUSED; 462 break; 463 464 case DOSPTYP_LINUX: 465 fstype = FS_EXT2FS; 466 break; 467 468 case DOSPTYP_NTFS: 469 fstype = FS_NTFS; 470 break; 471 472 case DOSPTYP_EFISYS: 473 case DOSPTYP_FAT12: 474 case DOSPTYP_FAT16S: 475 case DOSPTYP_FAT16B: 476 case DOSPTYP_FAT16L: 477 case DOSPTYP_FAT32: 478 case DOSPTYP_FAT32L: 479 fstype = FS_MSDOS; 480 break; 481 case DOSPTYP_EXTEND: 482 case DOSPTYP_EXTENDL: 483 sector = letoh32(dp2->dp_start) + extoff; 484 if (!extoff) { 485 extoff = letoh32(dp2->dp_start); 486 sector = 0; 487 } 488 wander = 1; 489 continue; 490 break; 491 default: 492 fstype = FS_OTHER; 493 break; 494 } 495 496 /* 497 * Don't set fstype/offset/size when just looking for 498 * the offset of the OpenBSD partition. It would 499 * invalidate the disklabel checksum! 500 * 501 * Don't try to spoof more than 8 partitions, i.e. 502 * 'i' -'p'. 503 */ 504 if (partoffp || n >= 8) 505 continue; 506 507 pp = &lp->d_partitions[8+n]; 508 n++; 509 pp->p_fstype = fstype; 510 if (letoh32(dp2->dp_start)) 511 DL_SETPOFFSET(pp, 512 letoh32(dp2->dp_start) + sector); 513 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 514 } 515 } 516 517 notmbr: 518 if (n == 0 && sector == DOSBBSECTOR && ourpart == -1) { 519 u_int16_t fattest; 520 521 /* Check for a valid initial jmp instruction. */ 522 switch ((u_int8_t)bp->b_data[0]) { 523 case 0xeb: 524 /* 525 * Two-byte jmp instruction. The 2nd byte is the number 526 * of bytes to jmp and the 3rd byte must be a NOP. 527 */ 528 if ((u_int8_t)bp->b_data[2] != 0x90) 529 goto notfat; 530 break; 531 case 0xe9: 532 /* 533 * Three-byte jmp instruction. The next two bytes are a 534 * little-endian 16 bit value. 535 */ 536 break; 537 default: 538 goto notfat; 539 break; 540 } 541 542 /* Check for a valid bytes per sector value. */ 543 fattest = ((bp->b_data[12] << 8) & 0xff00) | 544 (bp->b_data[11] & 0xff); 545 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 546 goto notfat; 547 548 if (partoffp) 549 return (ENXIO); /* No place for disklabel on FAT! */ 550 551 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 552 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 553 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 554 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 555 556 spoofonly = 1; /* No disklabel to read from disk. */ 557 } 558 559 notfat: 560 /* record the OpenBSD partition's placement for the caller */ 561 if (partoffp) 562 *partoffp = DL_SECTOBLK(lp, dospartoff); 563 else { 564 DL_SETBSTART(lp, dospartoff); 565 DL_SETBEND(lp, (dospartend < DL_GETDSIZE(lp)) ? dospartend : 566 DL_GETDSIZE(lp)); 567 } 568 569 /* don't read the on-disk label if we are in spoofed-only mode */ 570 if (spoofonly) 571 return (0); 572 573 error = readdisksector(bp, strat, lp, dospartoff + 574 DL_BLKTOSEC(lp, DOS_LABELSECTOR)); 575 if (error) 576 return (bp->b_error); 577 578 offset = DL_BLKOFFSET(lp, DOS_LABELSECTOR); 579 error = checkdisklabel(bp->b_data + offset, lp, 580 DL_GETBSTART((struct disklabel*)(bp->b_data+offset)), 581 DL_GETBEND((struct disklabel *)(bp->b_data+offset))); 582 583 return (error); 584 } 585 586 /* 587 * Returns 0 if the MBR with the provided partition array is a GPT protective 588 * MBR, and returns 1 otherwise. A GPT protective MBR would have one and only 589 * one MBR partition, an EFI partition that either covers the whole disk or as 590 * much of it as is possible with a 32bit size field. 591 * 592 * NOTE: MS always uses a size of UINT32_MAX for the EFI partition!** 593 */ 594 int 595 gpt_chk_mbr(struct dos_partition *dp, u_int64_t dsize) 596 { 597 struct dos_partition *dp2; 598 int efi, found, i; 599 u_int32_t psize; 600 601 found = efi = 0; 602 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 603 if (dp2->dp_typ == DOSPTYP_UNUSED) 604 continue; 605 found++; 606 if (dp2->dp_typ != DOSPTYP_EFI) 607 continue; 608 psize = letoh32(dp2->dp_size); 609 if (psize == (dsize - 1) || 610 psize == UINT32_MAX) { 611 if (letoh32(dp2->dp_start) == 1) 612 efi++; 613 } 614 } 615 if (found == 1 && efi == 1) 616 return (0); 617 618 return (1); 619 } 620 621 int 622 gpt_chk_hdr(struct gpt_header *gh, struct disklabel *lp) 623 { 624 uint64_t ghpartlba; 625 uint64_t ghlbaend, ghlbastart; 626 uint32_t orig_gh_csum; 627 uint32_t ghsize, ghpartsize, ghpartspersec, ghpartnum; 628 629 if (letoh64(gh->gh_sig) != GPTSIGNATURE) 630 return (EINVAL); 631 632 if (letoh32(gh->gh_rev) != GPTREVISION) 633 return (EINVAL); 634 635 ghsize = letoh32(gh->gh_size); 636 ghpartsize = letoh32(gh->gh_part_size); 637 ghpartspersec = lp->d_secsize / ghpartsize; 638 ghpartnum = letoh32(gh->gh_part_num); 639 ghpartlba = letoh64(gh->gh_part_lba); 640 ghlbaend = letoh64(gh->gh_lba_end); 641 ghlbastart = letoh64(gh->gh_lba_start); 642 643 if (ghsize < GPTMINHDRSIZE || ghsize > sizeof(struct gpt_header)) 644 return (EINVAL); 645 646 orig_gh_csum = gh->gh_csum; 647 gh->gh_csum = 0; 648 gh->gh_csum = crc32(0, (unsigned char *)gh, ghsize); 649 650 if (orig_gh_csum != gh->gh_csum) 651 return (EINVAL); 652 653 if (ghlbastart >= DL_GETDSIZE(lp) || 654 ghlbaend >= DL_GETDSIZE(lp) || 655 ghpartlba >= DL_GETDSIZE(lp)) 656 return (EINVAL); 657 658 /* 659 * Size per partition entry shall be 128*(2**n) with n >= 0. 660 * We don't support partition entries larger than block size. 661 */ 662 if (ghpartsize % GPTMINPARTSIZE || ghpartsize > lp->d_secsize 663 || ghpartspersec == 0) { 664 DPRINTF("invalid partition size\n"); 665 return (EINVAL); 666 } 667 668 /* XXX: we don't support multiples of GPTMINPARTSIZE yet */ 669 if (ghpartsize != GPTMINPARTSIZE) { 670 DPRINTF("partition sizes larger than %d bytes are not " 671 "supported", GPTMINPARTSIZE); 672 return (EINVAL); 673 } 674 675 if (letoh64(gh->gh_lba_alt) >= DL_GETDSIZE(lp)) { 676 DPRINTF("alternate header's position is bogus\n"); 677 return (EINVAL); 678 } 679 680 return 0; 681 } 682 683 int 684 gpt_chk_parts(struct gpt_header *gh, struct gpt_partition *gp) 685 { 686 u_int32_t checksum; 687 checksum = crc32(0, (unsigned char *)gp, 688 letoh32(gh->gh_part_num) * letoh32(gh->gh_part_size)); 689 690 if (checksum != gh->gh_part_csum) 691 return (EINVAL); 692 693 return 0; 694 } 695 696 int 697 gpt_get_fstype(struct uuid *uuid_part) 698 { 699 static int init = 0; 700 static struct uuid uuid_openbsd, uuid_msdos, uuid_chromefs, 701 uuid_linux, uuid_hfs, uuid_unused, uuid_efi_system; 702 static const uint8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 703 static const uint8_t gpt_uuid_msdos[] = GPT_UUID_MSDOS; 704 static const uint8_t gpt_uuid_chromerootfs[] = GPT_UUID_CHROMEROOTFS; 705 static const uint8_t gpt_uuid_linux[] = GPT_UUID_LINUX; 706 static const uint8_t gpt_uuid_hfs[] = GPT_UUID_APPLE_HFS; 707 static const uint8_t gpt_uuid_unused[] = GPT_UUID_UNUSED; 708 static const uint8_t gpt_uuid_efi_system[] = GPT_UUID_EFI_SYSTEM; 709 710 if (init == 0) { 711 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 712 uuid_dec_be(gpt_uuid_msdos, &uuid_msdos); 713 uuid_dec_be(gpt_uuid_chromerootfs, &uuid_chromefs); 714 uuid_dec_be(gpt_uuid_linux, &uuid_linux); 715 uuid_dec_be(gpt_uuid_hfs, &uuid_hfs); 716 uuid_dec_be(gpt_uuid_unused, &uuid_unused); 717 uuid_dec_be(gpt_uuid_efi_system, &uuid_efi_system); 718 init = 1; 719 } 720 721 if (!memcmp(uuid_part, &uuid_unused, sizeof(struct uuid))) 722 return FS_UNUSED; 723 else if (!memcmp(uuid_part, &uuid_openbsd, sizeof(struct uuid))) 724 return FS_BSDFFS; 725 else if (!memcmp(uuid_part, &uuid_msdos, sizeof(struct uuid))) 726 return FS_MSDOS; 727 else if (!memcmp(uuid_part, &uuid_chromefs, sizeof(struct uuid))) 728 return FS_EXT2FS; 729 else if (!memcmp(uuid_part, &uuid_linux, sizeof(struct uuid))) 730 return FS_EXT2FS; 731 else if (!memcmp(uuid_part, &uuid_hfs, sizeof(struct uuid))) 732 return FS_HFS; 733 else if (!memcmp(uuid_part, &uuid_efi_system, sizeof(struct uuid))) 734 return FS_MSDOS; 735 else 736 return FS_OTHER; 737 } 738 739 /* 740 * Spoof a disklabel based on the GPT information on the disk. 741 */ 742 int 743 spoofgptlabel(struct buf *bp, void (*strat)(struct buf *), 744 struct disklabel *lp) 745 { 746 static const u_int8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 747 struct gpt_header gh; 748 struct uuid uuid_part, uuid_openbsd; 749 struct gpt_partition *gp, *gp_tmp; 750 struct partition *pp; 751 size_t gpsz; 752 u_int64_t ghlbaend, ghlbastart, gptpartoff, gptpartend, sector; 753 u_int64_t start, end; 754 int i, altheader = 0, error, n; 755 uint32_t ghpartnum; 756 757 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 758 759 for (sector = GPTSECTOR; ; sector = DL_GETDSIZE(lp)-1, altheader = 1) { 760 uint64_t ghpartlba; 761 uint32_t ghpartsize; 762 uint32_t ghpartspersec; 763 764 error = readdisksector(bp, strat, lp, sector); 765 if (error) { 766 DPRINTF("error reading from disk\n"); 767 return (error); 768 } 769 770 bcopy(bp->b_data, &gh, sizeof(gh)); 771 772 if (gpt_chk_hdr(&gh, lp)) { 773 if (altheader) { 774 DPRINTF("alternate header also broken\n"); 775 return (EINVAL); 776 } 777 continue; 778 } 779 780 ghpartsize = letoh32(gh.gh_part_size); 781 ghpartspersec = lp->d_secsize / ghpartsize; 782 ghpartnum = letoh32(gh.gh_part_num); 783 ghpartlba = letoh64(gh.gh_part_lba); 784 ghlbaend = letoh64(gh.gh_lba_end); 785 ghlbastart = letoh64(gh.gh_lba_start); 786 787 /* read GPT partition entry array */ 788 gp = mallocarray(ghpartnum, sizeof(struct gpt_partition), 789 M_DEVBUF, M_NOWAIT|M_ZERO); 790 if (gp == NULL) 791 return (ENOMEM); 792 gpsz = ghpartnum * sizeof(struct gpt_partition); 793 794 /* 795 * XXX: Fails if # of partition entries is not a multiple of 796 * ghpartspersec. 797 */ 798 sector = ghpartlba; 799 for (i = 0; i < ghpartnum / ghpartspersec; i++, sector++) { 800 error = readdisksector(bp, strat, lp, sector); 801 if (error) { 802 free(gp, M_DEVBUF, gpsz); 803 return (error); 804 } 805 806 bcopy(bp->b_data, gp + i * ghpartspersec, 807 ghpartspersec * sizeof(struct gpt_partition)); 808 } 809 810 if (gpt_chk_parts(&gh, gp)) { 811 free(gp, M_DEVBUF, gpsz); 812 if (altheader) { 813 DPRINTF("alternate partition entries are also " 814 "broken\n"); 815 return (EINVAL); 816 } 817 continue; 818 } 819 break; 820 } 821 822 /* Find OpenBSD partition and spoof others along the way. */ 823 n = 0; 824 gptpartoff = 0; 825 gptpartend = DL_GETBEND(lp); 826 for (gp_tmp = gp, i = 0; i < ghpartnum; gp_tmp++, i++) { 827 start = letoh64(gp_tmp->gp_lba_start); 828 end = letoh64(gp_tmp->gp_lba_end); 829 if (start > end || start < ghlbastart || end > ghlbaend) 830 continue; /* entry invalid */ 831 832 uuid_dec_le(&gp_tmp->gp_type, &uuid_part); 833 if (!memcmp(&uuid_part, &uuid_openbsd, sizeof(struct uuid))) { 834 if (gptpartoff == 0) { 835 gptpartoff = start; 836 gptpartend = end + 1; 837 } 838 continue; /* Do *NOT* spoof OpenBSD partitions! */ 839 } 840 841 /* 842 * Don't try to spoof more than 8 partitions, i.e. 843 * 'i' -'p'. 844 */ 845 if (n >= 8) 846 continue; 847 848 pp = &lp->d_partitions[8+n]; 849 n++; 850 pp->p_fstype = gpt_get_fstype(&uuid_part); 851 DL_SETPOFFSET(pp, start); 852 DL_SETPSIZE(pp, end - start + 1); 853 } 854 855 free(gp, M_DEVBUF, gpsz); 856 857 DL_SETBSTART(lp, gptpartoff); 858 DL_SETBEND(lp, (gptpartend < DL_GETDSIZE(lp)) ? gptpartend : 859 DL_GETDSIZE(lp)); 860 861 return (0); 862 } 863 864 /* 865 * Check new disk label for sensibility before setting it. 866 */ 867 int 868 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 869 { 870 struct partition *opp, *npp; 871 struct disk *dk; 872 u_int64_t uid; 873 int i; 874 875 /* sanity clause */ 876 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 877 (nlp->d_secsize % DEV_BSIZE) != 0) 878 return (EINVAL); 879 880 /* special case to allow disklabel to be invalidated */ 881 if (nlp->d_magic == 0xffffffff) { 882 *olp = *nlp; 883 return (0); 884 } 885 886 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 887 dkcksum(nlp) != 0) 888 return (EINVAL); 889 890 /* XXX missing check if other dos partitions will be overwritten */ 891 892 for (i = 0; i < MAXPARTITIONS; i++) { 893 opp = &olp->d_partitions[i]; 894 npp = &nlp->d_partitions[i]; 895 if ((openmask & (1 << i)) && 896 (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 897 DL_GETPSIZE(npp) < DL_GETPSIZE(opp))) 898 return (EBUSY); 899 /* 900 * Copy internally-set partition information 901 * if new label doesn't include it. XXX 902 */ 903 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 904 npp->p_fragblock = opp->p_fragblock; 905 npp->p_cpg = opp->p_cpg; 906 } 907 } 908 909 /* Generate a UID if the disklabel does not already have one. */ 910 uid = 0; 911 if (memcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0) { 912 do { 913 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 914 TAILQ_FOREACH(dk, &disklist, dk_link) 915 if (dk->dk_label && memcmp(dk->dk_label->d_uid, 916 nlp->d_uid, sizeof(nlp->d_uid)) == 0) 917 break; 918 } while (dk != NULL && 919 memcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0); 920 } 921 922 /* Preserve the disk size and RAW_PART values. */ 923 DL_SETDSIZE(nlp, DL_GETDSIZE(olp)); 924 npp = &nlp->d_partitions[RAW_PART]; 925 DL_SETPOFFSET(npp, 0); 926 DL_SETPSIZE(npp, DL_GETDSIZE(nlp)); 927 928 nlp->d_checksum = 0; 929 nlp->d_checksum = dkcksum(nlp); 930 *olp = *nlp; 931 932 disk_change = 1; 933 934 return (0); 935 } 936 937 /* 938 * Determine the size of the transfer, and make sure it is within the 939 * boundaries of the partition. Adjust transfer if needed, and signal errors or 940 * early completion. 941 */ 942 int 943 bounds_check_with_label(struct buf *bp, struct disklabel *lp) 944 { 945 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 946 daddr_t partblocks, sz; 947 948 /* Avoid division by zero, negative offsets, and negative sizes. */ 949 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0) 950 goto bad; 951 952 /* Ensure transfer is a whole number of aligned sectors. */ 953 if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 || 954 (bp->b_bcount % lp->d_secsize) != 0) 955 goto bad; 956 957 /* Ensure transfer starts within partition boundary. */ 958 partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p)); 959 if (bp->b_blkno > partblocks) 960 goto bad; 961 962 /* If exactly at end of partition or null transfer, return EOF. */ 963 if (bp->b_blkno == partblocks || bp->b_bcount == 0) 964 goto done; 965 966 /* Truncate request if it extends past the end of the partition. */ 967 sz = bp->b_bcount >> DEV_BSHIFT; 968 if (sz > partblocks - bp->b_blkno) { 969 sz = partblocks - bp->b_blkno; 970 bp->b_bcount = sz << DEV_BSHIFT; 971 } 972 973 return (0); 974 975 bad: 976 bp->b_error = EINVAL; 977 bp->b_flags |= B_ERROR; 978 done: 979 bp->b_resid = bp->b_bcount; 980 return (-1); 981 } 982 983 /* 984 * Disk error is the preface to plaintive error messages 985 * about failing disk transfers. It prints messages of the form 986 987 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 988 989 * if the offset of the error in the transfer and a disk label 990 * are both available. blkdone should be -1 if the position of the error 991 * is unknown; the disklabel pointer may be null from drivers that have not 992 * been converted to use them. The message is printed with printf 993 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 994 * The message should be completed (with at least a newline) with printf 995 * or addlog, respectively. There is no trailing space. 996 */ 997 void 998 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 999 struct disklabel *lp) 1000 { 1001 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 1002 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))); 1003 char partname = 'a' + part; 1004 daddr_t sn; 1005 1006 if (pri != LOG_PRINTF) { 1007 log(pri, "%s", ""); 1008 pr = addlog; 1009 } else 1010 pr = printf; 1011 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 1012 bp->b_flags & B_READ ? "read" : "writ"); 1013 sn = bp->b_blkno; 1014 if (bp->b_bcount <= DEV_BSIZE) 1015 (*pr)("%lld", (long long)sn); 1016 else { 1017 if (blkdone >= 0) { 1018 sn += blkdone; 1019 (*pr)("%lld of ", (long long)sn); 1020 } 1021 (*pr)("%lld-%lld", (long long)bp->b_blkno, 1022 (long long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE)); 1023 } 1024 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 1025 sn += DL_SECTOBLK(lp, DL_GETPOFFSET(&lp->d_partitions[part])); 1026 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, (long long)sn, 1027 (long long)(sn / DL_SECTOBLK(lp, lp->d_secpercyl))); 1028 sn %= DL_SECTOBLK(lp, lp->d_secpercyl); 1029 (*pr)(" tn %lld sn %lld)", 1030 (long long)(sn / DL_SECTOBLK(lp, lp->d_nsectors)), 1031 (long long)(sn % DL_SECTOBLK(lp, lp->d_nsectors))); 1032 } 1033 } 1034 1035 /* 1036 * Initialize the disklist. Called by main() before autoconfiguration. 1037 */ 1038 void 1039 disk_init(void) 1040 { 1041 1042 TAILQ_INIT(&disklist); 1043 disk_count = disk_change = 0; 1044 } 1045 1046 int 1047 disk_construct(struct disk *diskp) 1048 { 1049 rw_init(&diskp->dk_lock, "dklk"); 1050 mtx_init(&diskp->dk_mtx, IPL_BIO); 1051 1052 diskp->dk_flags |= DKF_CONSTRUCTED; 1053 1054 return (0); 1055 } 1056 1057 /* 1058 * Attach a disk. 1059 */ 1060 void 1061 disk_attach(struct device *dv, struct disk *diskp) 1062 { 1063 int majdev; 1064 1065 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 1066 disk_construct(diskp); 1067 1068 /* 1069 * Allocate and initialize the disklabel structures. Note that 1070 * it's not safe to sleep here, since we're probably going to be 1071 * called during autoconfiguration. 1072 */ 1073 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 1074 M_NOWAIT|M_ZERO); 1075 if (diskp->dk_label == NULL) 1076 panic("disk_attach: can't allocate storage for disklabel"); 1077 1078 /* 1079 * Set the attached timestamp. 1080 */ 1081 microuptime(&diskp->dk_attachtime); 1082 1083 /* 1084 * Link into the disklist. 1085 */ 1086 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 1087 ++disk_count; 1088 disk_change = 1; 1089 1090 /* 1091 * Store device structure and number for later use. 1092 */ 1093 diskp->dk_device = dv; 1094 diskp->dk_devno = NODEV; 1095 if (dv != NULL) { 1096 majdev = findblkmajor(dv); 1097 if (majdev >= 0) 1098 diskp->dk_devno = 1099 MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 1100 1101 if (diskp->dk_devno != NODEV) { 1102 struct disk_attach_task *dat; 1103 1104 dat = malloc(sizeof(*dat), M_TEMP, M_WAITOK); 1105 1106 /* XXX: Assumes dk is part of the device softc. */ 1107 device_ref(dv); 1108 dat->dk = diskp; 1109 1110 task_set(&dat->task, disk_attach_callback, dat); 1111 task_add(systq, &dat->task); 1112 } 1113 } 1114 1115 if (softraid_disk_attach) 1116 softraid_disk_attach(diskp, 1); 1117 } 1118 1119 void 1120 disk_attach_callback(void *xdat) 1121 { 1122 struct disk_attach_task *dat = xdat; 1123 struct disk *dk = dat->dk; 1124 char errbuf[100]; 1125 struct disklabel dl; 1126 dev_t dev; 1127 1128 free(dat, M_TEMP, sizeof(*dat)); 1129 1130 if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD)) 1131 goto done; 1132 1133 /* Read disklabel. */ 1134 dev = dk->dk_devno; 1135 if (disk_readlabel(&dl, dk->dk_devno, errbuf, sizeof(errbuf)) == NULL) { 1136 add_timer_randomness(dl.d_checksum); 1137 dk->dk_flags |= DKF_LABELVALID; 1138 } 1139 1140 done: 1141 dk->dk_flags |= DKF_OPENED; 1142 device_unref(dk->dk_device); 1143 wakeup(dk); 1144 } 1145 1146 /* 1147 * Detach a disk. 1148 */ 1149 void 1150 disk_detach(struct disk *diskp) 1151 { 1152 1153 if (softraid_disk_attach) 1154 softraid_disk_attach(diskp, -1); 1155 1156 /* 1157 * Free the space used by the disklabel structures. 1158 */ 1159 free(diskp->dk_label, M_DEVBUF, sizeof(*diskp->dk_label)); 1160 1161 /* 1162 * Remove from the disklist. 1163 */ 1164 TAILQ_REMOVE(&disklist, diskp, dk_link); 1165 disk_change = 1; 1166 if (--disk_count < 0) 1167 panic("disk_detach: disk_count < 0"); 1168 } 1169 1170 int 1171 disk_openpart(struct disk *dk, int part, int fmt, int haslabel) 1172 { 1173 KASSERT(part >= 0 && part < MAXPARTITIONS); 1174 1175 /* Unless opening the raw partition, check that the partition exists. */ 1176 if (part != RAW_PART && (!haslabel || 1177 part >= dk->dk_label->d_npartitions || 1178 dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED)) 1179 return (ENXIO); 1180 1181 /* Ensure the partition doesn't get changed under our feet. */ 1182 switch (fmt) { 1183 case S_IFCHR: 1184 dk->dk_copenmask |= (1 << part); 1185 break; 1186 case S_IFBLK: 1187 dk->dk_bopenmask |= (1 << part); 1188 break; 1189 } 1190 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1191 1192 return (0); 1193 } 1194 1195 void 1196 disk_closepart(struct disk *dk, int part, int fmt) 1197 { 1198 KASSERT(part >= 0 && part < MAXPARTITIONS); 1199 1200 switch (fmt) { 1201 case S_IFCHR: 1202 dk->dk_copenmask &= ~(1 << part); 1203 break; 1204 case S_IFBLK: 1205 dk->dk_bopenmask &= ~(1 << part); 1206 break; 1207 } 1208 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1209 } 1210 1211 void 1212 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit) 1213 { 1214 int bmaj, cmaj, mn; 1215 1216 /* Locate the lowest minor number to be detached. */ 1217 mn = DISKMINOR(unit, 0); 1218 1219 for (bmaj = 0; bmaj < nblkdev; bmaj++) 1220 if (bdevsw[bmaj].d_open == open) 1221 vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK); 1222 for (cmaj = 0; cmaj < nchrdev; cmaj++) 1223 if (cdevsw[cmaj].d_open == open) 1224 vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR); 1225 } 1226 1227 /* 1228 * Increment a disk's busy counter. If the counter is going from 1229 * 0 to 1, set the timestamp. 1230 */ 1231 void 1232 disk_busy(struct disk *diskp) 1233 { 1234 1235 /* 1236 * XXX We'd like to use something as accurate as microtime(), 1237 * but that doesn't depend on the system TOD clock. 1238 */ 1239 mtx_enter(&diskp->dk_mtx); 1240 if (diskp->dk_busy++ == 0) 1241 microuptime(&diskp->dk_timestamp); 1242 mtx_leave(&diskp->dk_mtx); 1243 } 1244 1245 /* 1246 * Decrement a disk's busy counter, increment the byte count, total busy 1247 * time, and reset the timestamp. 1248 */ 1249 void 1250 disk_unbusy(struct disk *diskp, long bcount, int read) 1251 { 1252 struct timeval dv_time, diff_time; 1253 1254 mtx_enter(&diskp->dk_mtx); 1255 1256 if (diskp->dk_busy-- == 0) 1257 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 1258 1259 microuptime(&dv_time); 1260 1261 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 1262 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 1263 1264 diskp->dk_timestamp = dv_time; 1265 if (bcount > 0) { 1266 if (read) { 1267 diskp->dk_rbytes += bcount; 1268 diskp->dk_rxfer++; 1269 } else { 1270 diskp->dk_wbytes += bcount; 1271 diskp->dk_wxfer++; 1272 } 1273 } else 1274 diskp->dk_seek++; 1275 1276 mtx_leave(&diskp->dk_mtx); 1277 1278 add_disk_randomness(bcount ^ diff_time.tv_usec); 1279 } 1280 1281 int 1282 disk_lock(struct disk *dk) 1283 { 1284 return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR)); 1285 } 1286 1287 void 1288 disk_lock_nointr(struct disk *dk) 1289 { 1290 rw_enter_write(&dk->dk_lock); 1291 } 1292 1293 void 1294 disk_unlock(struct disk *dk) 1295 { 1296 rw_exit_write(&dk->dk_lock); 1297 } 1298 1299 int 1300 dk_mountroot(void) 1301 { 1302 char errbuf[100]; 1303 int part = DISKPART(rootdev); 1304 int (*mountrootfn)(void); 1305 struct disklabel dl; 1306 char *error; 1307 1308 error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf)); 1309 if (error) 1310 panic("%s", error); 1311 1312 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 1313 panic("root filesystem has size 0"); 1314 switch (dl.d_partitions[part].p_fstype) { 1315 #ifdef EXT2FS 1316 case FS_EXT2FS: 1317 { 1318 extern int ext2fs_mountroot(void); 1319 mountrootfn = ext2fs_mountroot; 1320 } 1321 break; 1322 #endif 1323 #ifdef FFS 1324 case FS_BSDFFS: 1325 { 1326 extern int ffs_mountroot(void); 1327 mountrootfn = ffs_mountroot; 1328 } 1329 break; 1330 #endif 1331 #ifdef CD9660 1332 case FS_ISO9660: 1333 { 1334 extern int cd9660_mountroot(void); 1335 mountrootfn = cd9660_mountroot; 1336 } 1337 break; 1338 #endif 1339 default: 1340 #ifdef FFS 1341 { 1342 extern int ffs_mountroot(void); 1343 1344 printf("filesystem type %d not known.. assuming ffs\n", 1345 dl.d_partitions[part].p_fstype); 1346 mountrootfn = ffs_mountroot; 1347 } 1348 #else 1349 panic("disk 0x%x filesystem type %d not known", 1350 rootdev, dl.d_partitions[part].p_fstype); 1351 #endif 1352 } 1353 return (*mountrootfn)(); 1354 } 1355 1356 struct device * 1357 getdisk(char *str, int len, int defpart, dev_t *devp) 1358 { 1359 struct device *dv; 1360 1361 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1362 printf("use one of: exit"); 1363 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1364 if (dv->dv_class == DV_DISK) 1365 printf(" %s[a-p]", dv->dv_xname); 1366 #if defined(NFSCLIENT) 1367 if (dv->dv_class == DV_IFNET) 1368 printf(" %s", dv->dv_xname); 1369 #endif 1370 } 1371 printf("\n"); 1372 } 1373 return (dv); 1374 } 1375 1376 struct device * 1377 parsedisk(char *str, int len, int defpart, dev_t *devp) 1378 { 1379 struct device *dv; 1380 int majdev, part = defpart; 1381 char c; 1382 1383 if (len == 0) 1384 return (NULL); 1385 c = str[len-1]; 1386 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1387 part = c - 'a'; 1388 len -= 1; 1389 } 1390 1391 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1392 if (dv->dv_class == DV_DISK && 1393 strncmp(str, dv->dv_xname, len) == 0 && 1394 dv->dv_xname[len] == '\0') { 1395 majdev = findblkmajor(dv); 1396 if (majdev < 0) 1397 return NULL; 1398 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1399 break; 1400 } 1401 #if defined(NFSCLIENT) 1402 if (dv->dv_class == DV_IFNET && 1403 strncmp(str, dv->dv_xname, len) == 0 && 1404 dv->dv_xname[len] == '\0') { 1405 *devp = NODEV; 1406 break; 1407 } 1408 #endif 1409 } 1410 1411 return (dv); 1412 } 1413 1414 void 1415 setroot(struct device *bootdv, int part, int exitflags) 1416 { 1417 int majdev, unit, len, s, slept = 0; 1418 struct swdevt *swp; 1419 struct device *rootdv, *dv; 1420 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1421 struct ifnet *ifp = NULL; 1422 struct disk *dk; 1423 u_char duid[8]; 1424 char buf[128]; 1425 #if defined(NFSCLIENT) 1426 extern char *nfsbootdevname; 1427 #endif 1428 1429 /* Ensure that all disk attach callbacks have completed. */ 1430 do { 1431 TAILQ_FOREACH(dk, &disklist, dk_link) { 1432 if (dk->dk_devno != NODEV && 1433 (dk->dk_flags & DKF_OPENED) == 0) { 1434 tsleep(dk, 0, "dkopen", hz); 1435 slept++; 1436 break; 1437 } 1438 } 1439 } while (dk != NULL && slept < 5); 1440 1441 if (slept == 5) { 1442 printf("disklabels not read:"); 1443 TAILQ_FOREACH(dk, &disklist, dk_link) 1444 if (dk->dk_devno != NODEV && 1445 (dk->dk_flags & DKF_OPENED) == 0) 1446 printf(" %s", dk->dk_name); 1447 printf("\n"); 1448 } 1449 1450 /* Locate DUID for boot disk if not already provided. */ 1451 memset(duid, 0, sizeof(duid)); 1452 if (memcmp(bootduid, duid, sizeof(bootduid)) == 0) { 1453 TAILQ_FOREACH(dk, &disklist, dk_link) 1454 if (dk->dk_device == bootdv) 1455 break; 1456 if (dk && (dk->dk_flags & DKF_LABELVALID)) 1457 bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid)); 1458 } 1459 bcopy(bootduid, rootduid, sizeof(rootduid)); 1460 1461 #if NSOFTRAID > 0 1462 sr_map_root(); 1463 #endif 1464 1465 /* 1466 * If `swap generic' and we couldn't determine boot device, 1467 * ask the user. 1468 */ 1469 dk = NULL; 1470 if (mountroot == NULL && bootdv == NULL) 1471 boothowto |= RB_ASKNAME; 1472 if (boothowto & RB_ASKNAME) { 1473 while (1) { 1474 printf("root device"); 1475 if (bootdv != NULL) { 1476 printf(" (default %s", bootdv->dv_xname); 1477 if (bootdv->dv_class == DV_DISK) 1478 printf("%c", 'a' + part); 1479 printf(")"); 1480 } 1481 printf(": "); 1482 s = splhigh(); 1483 cnpollc(1); 1484 len = getsn(buf, sizeof(buf)); 1485 cnpollc(0); 1486 splx(s); 1487 if (strcmp(buf, "exit") == 0) 1488 reboot(exitflags); 1489 if (len == 0 && bootdv != NULL) { 1490 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1491 len = strlen(buf); 1492 } 1493 if (len > 0 && buf[len - 1] == '*') { 1494 buf[--len] = '\0'; 1495 dv = getdisk(buf, len, part, &nrootdev); 1496 if (dv != NULL) { 1497 rootdv = dv; 1498 nswapdev = nrootdev; 1499 goto gotswap; 1500 } 1501 } 1502 dv = getdisk(buf, len, part, &nrootdev); 1503 if (dv != NULL) { 1504 rootdv = dv; 1505 break; 1506 } 1507 } 1508 1509 if (rootdv->dv_class == DV_IFNET) 1510 goto gotswap; 1511 1512 /* try to build swap device out of new root device */ 1513 while (1) { 1514 printf("swap device"); 1515 if (rootdv != NULL) 1516 printf(" (default %s%s)", rootdv->dv_xname, 1517 rootdv->dv_class == DV_DISK ? "b" : ""); 1518 printf(": "); 1519 s = splhigh(); 1520 cnpollc(1); 1521 len = getsn(buf, sizeof(buf)); 1522 cnpollc(0); 1523 splx(s); 1524 if (strcmp(buf, "exit") == 0) 1525 reboot(exitflags); 1526 if (len == 0 && rootdv != NULL) { 1527 switch (rootdv->dv_class) { 1528 case DV_IFNET: 1529 nswapdev = NODEV; 1530 break; 1531 case DV_DISK: 1532 nswapdev = MAKEDISKDEV(major(nrootdev), 1533 DISKUNIT(nrootdev), 1); 1534 if (nswapdev == nrootdev) 1535 continue; 1536 break; 1537 default: 1538 break; 1539 } 1540 break; 1541 } 1542 dv = getdisk(buf, len, 1, &nswapdev); 1543 if (dv) { 1544 if (dv->dv_class == DV_IFNET) 1545 nswapdev = NODEV; 1546 if (nswapdev == nrootdev) 1547 continue; 1548 break; 1549 } 1550 } 1551 gotswap: 1552 rootdev = nrootdev; 1553 dumpdev = nswapdev; 1554 swdevt[0].sw_dev = nswapdev; 1555 swdevt[1].sw_dev = NODEV; 1556 #if defined(NFSCLIENT) 1557 } else if (mountroot == nfs_mountroot) { 1558 rootdv = bootdv; 1559 rootdev = dumpdev = swapdev = NODEV; 1560 #endif 1561 } else if (mountroot == NULL && rootdev == NODEV) { 1562 /* 1563 * `swap generic' 1564 */ 1565 rootdv = bootdv; 1566 1567 if (bootdv->dv_class == DV_DISK) { 1568 memset(&duid, 0, sizeof(duid)); 1569 if (memcmp(rootduid, &duid, sizeof(rootduid)) != 0) { 1570 TAILQ_FOREACH(dk, &disklist, dk_link) 1571 if ((dk->dk_flags & DKF_LABELVALID) && 1572 dk->dk_label && memcmp(dk->dk_label->d_uid, 1573 &rootduid, sizeof(rootduid)) == 0) 1574 break; 1575 if (dk == NULL) 1576 panic("root device (%02hx%02hx%02hx%02hx" 1577 "%02hx%02hx%02hx%02hx) not found", 1578 rootduid[0], rootduid[1], rootduid[2], 1579 rootduid[3], rootduid[4], rootduid[5], 1580 rootduid[6], rootduid[7]); 1581 rootdv = dk->dk_device; 1582 } 1583 } 1584 1585 majdev = findblkmajor(rootdv); 1586 if (majdev >= 0) { 1587 /* 1588 * Root and swap are on the disk. 1589 * Assume swap is on partition b. 1590 */ 1591 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1592 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1593 } else { 1594 /* 1595 * Root and swap are on a net. 1596 */ 1597 nswapdev = NODEV; 1598 } 1599 dumpdev = nswapdev; 1600 swdevt[0].sw_dev = nswapdev; 1601 /* swdevt[1].sw_dev = NODEV; */ 1602 } else { 1603 /* Completely pre-configured, but we want rootdv .. */ 1604 majdev = major(rootdev); 1605 if (findblkname(majdev) == NULL) 1606 return; 1607 unit = DISKUNIT(rootdev); 1608 part = DISKPART(rootdev); 1609 snprintf(buf, sizeof buf, "%s%d%c", 1610 findblkname(majdev), unit, 'a' + part); 1611 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1612 if (rootdv == NULL) 1613 panic("root device (%s) not found", buf); 1614 } 1615 1616 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1617 ifp = ifunit(rootdv->dv_xname); 1618 else if (bootdv && bootdv->dv_class == DV_IFNET) 1619 ifp = ifunit(bootdv->dv_xname); 1620 1621 if (ifp) 1622 if_addgroup(ifp, "netboot"); 1623 1624 switch (rootdv->dv_class) { 1625 #if defined(NFSCLIENT) 1626 case DV_IFNET: 1627 mountroot = nfs_mountroot; 1628 nfsbootdevname = rootdv->dv_xname; 1629 return; 1630 #endif 1631 case DV_DISK: 1632 mountroot = dk_mountroot; 1633 part = DISKPART(rootdev); 1634 break; 1635 default: 1636 printf("can't figure root, hope your kernel is right\n"); 1637 return; 1638 } 1639 1640 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1641 1642 if (dk && dk->dk_device == rootdv) 1643 printf(" (%02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx.%c)", 1644 rootduid[0], rootduid[1], rootduid[2], rootduid[3], 1645 rootduid[4], rootduid[5], rootduid[6], rootduid[7], 1646 'a' + part); 1647 1648 /* 1649 * Make the swap partition on the root drive the primary swap. 1650 */ 1651 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1652 if (major(rootdev) == major(swp->sw_dev) && 1653 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1654 temp = swdevt[0].sw_dev; 1655 swdevt[0].sw_dev = swp->sw_dev; 1656 swp->sw_dev = temp; 1657 break; 1658 } 1659 } 1660 if (swp->sw_dev != NODEV) { 1661 /* 1662 * If dumpdev was the same as the old primary swap device, 1663 * move it to the new primary swap device. 1664 */ 1665 if (temp == dumpdev) 1666 dumpdev = swdevt[0].sw_dev; 1667 } 1668 if (swdevt[0].sw_dev != NODEV) 1669 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1670 DISKUNIT(swdevt[0].sw_dev), 1671 'a' + DISKPART(swdevt[0].sw_dev)); 1672 if (dumpdev != NODEV) 1673 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1674 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1675 printf("\n"); 1676 } 1677 1678 extern struct nam2blk nam2blk[]; 1679 1680 int 1681 findblkmajor(struct device *dv) 1682 { 1683 char buf[16], *p; 1684 int i; 1685 1686 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1687 return (-1); 1688 for (p = buf; *p; p++) 1689 if (*p >= '0' && *p <= '9') 1690 *p = '\0'; 1691 1692 for (i = 0; nam2blk[i].name; i++) 1693 if (!strcmp(buf, nam2blk[i].name)) 1694 return (nam2blk[i].maj); 1695 return (-1); 1696 } 1697 1698 char * 1699 findblkname(int maj) 1700 { 1701 int i; 1702 1703 for (i = 0; nam2blk[i].name; i++) 1704 if (nam2blk[i].maj == maj) 1705 return (nam2blk[i].name); 1706 return (NULL); 1707 } 1708 1709 char * 1710 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize) 1711 { 1712 struct vnode *vn; 1713 dev_t chrdev, rawdev; 1714 int error; 1715 1716 chrdev = blktochr(dev); 1717 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART); 1718 1719 #ifdef DEBUG 1720 printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev); 1721 #endif 1722 1723 if (cdevvp(rawdev, &vn)) { 1724 snprintf(errbuf, errsize, 1725 "cannot obtain vnode for 0x%x/0x%x", dev, rawdev); 1726 return (errbuf); 1727 } 1728 1729 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1730 if (error) { 1731 snprintf(errbuf, errsize, 1732 "cannot open disk, 0x%x/0x%x, error %d", 1733 dev, rawdev, error); 1734 goto done; 1735 } 1736 1737 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc); 1738 if (error) { 1739 snprintf(errbuf, errsize, 1740 "cannot read disk label, 0x%x/0x%x, error %d", 1741 dev, rawdev, error); 1742 } 1743 done: 1744 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1745 vput(vn); 1746 if (error) 1747 return (errbuf); 1748 return (NULL); 1749 } 1750 1751 int 1752 disk_map(char *path, char *mappath, int size, int flags) 1753 { 1754 struct disk *dk, *mdk; 1755 u_char uid[8]; 1756 char c, part; 1757 int i; 1758 1759 /* 1760 * Attempt to map a request for a disklabel UID to the correct device. 1761 * We should be supplied with a disklabel UID which has the following 1762 * format: 1763 * 1764 * [disklabel uid] . [partition] 1765 * 1766 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1767 * based passed on its own. 1768 */ 1769 1770 if (strchr(path, '/') != NULL) 1771 return -1; 1772 1773 /* Verify that the device name is properly formed. */ 1774 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1775 (strlen(path) == 18 && path[16] == '.'))) 1776 return -1; 1777 1778 /* Get partition. */ 1779 if (flags & DM_OPENPART) 1780 part = 'a' + RAW_PART; 1781 else 1782 part = path[17]; 1783 1784 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1785 return -1; 1786 1787 /* Derive label UID. */ 1788 memset(uid, 0, sizeof(uid)); 1789 for (i = 0; i < 16; i++) { 1790 c = path[i]; 1791 if (c >= '0' && c <= '9') 1792 c -= '0'; 1793 else if (c >= 'a' && c <= 'f') 1794 c -= ('a' - 10); 1795 else 1796 return -1; 1797 1798 uid[i / 2] <<= 4; 1799 uid[i / 2] |= c & 0xf; 1800 } 1801 1802 mdk = NULL; 1803 TAILQ_FOREACH(dk, &disklist, dk_link) { 1804 if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label && 1805 memcmp(dk->dk_label->d_uid, uid, 1806 sizeof(dk->dk_label->d_uid)) == 0) { 1807 /* Fail if there are duplicate UIDs! */ 1808 if (mdk != NULL) 1809 return -1; 1810 mdk = dk; 1811 } 1812 } 1813 1814 if (mdk == NULL || mdk->dk_name == NULL) 1815 return -1; 1816 1817 snprintf(mappath, size, "/dev/%s%s%c", 1818 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1819 1820 return 0; 1821 } 1822 1823 /* 1824 * Lookup a disk device and verify that it has completed attaching. 1825 */ 1826 struct device * 1827 disk_lookup(struct cfdriver *cd, int unit) 1828 { 1829 struct device *dv; 1830 struct disk *dk; 1831 1832 dv = device_lookup(cd, unit); 1833 if (dv == NULL) 1834 return (NULL); 1835 1836 TAILQ_FOREACH(dk, &disklist, dk_link) 1837 if (dk->dk_device == dv) 1838 break; 1839 1840 if (dk == NULL) { 1841 device_unref(dv); 1842 return (NULL); 1843 } 1844 1845 return (dv); 1846 } 1847