1 /* $OpenBSD: subr_disk.c,v 1.248 2022/01/02 17:26:14 krw Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/vnode.h> 58 #include <sys/task.h> 59 #include <sys/stdint.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 64 #include <net/if.h> 65 66 #include <dev/cons.h> 67 68 #include <lib/libz/zlib.h> 69 70 #include "softraid.h" 71 72 #ifdef DEBUG 73 #define DPRINTF(x...) printf(x) 74 #else 75 #define DPRINTF(x...) 76 #endif 77 78 /* 79 * A global list of all disks attached to the system. May grow or 80 * shrink over time. 81 */ 82 struct disklist_head disklist; /* TAILQ_HEAD */ 83 int disk_count; /* number of drives in global disklist */ 84 int disk_change; /* set if a disk has been attached/detached 85 * since last we looked at this variable. This 86 * is reset by hw_sysctl() 87 */ 88 89 #define DUID_SIZE 8 90 91 u_char bootduid[DUID_SIZE]; /* DUID of boot disk. */ 92 u_char rootduid[DUID_SIZE]; /* DUID of root disk. */ 93 94 /* softraid callback, do not use! */ 95 void (*softraid_disk_attach)(struct disk *, int); 96 97 void sr_map_root(void); 98 99 struct disk_attach_task { 100 struct task task; 101 struct disk *dk; 102 }; 103 104 void disk_attach_callback(void *); 105 106 int spoofgptlabel(struct buf *, void (*)(struct buf *), struct disklabel *); 107 108 int gpt_chk_mbr(struct dos_partition *, uint64_t); 109 int gpt_chk_hdr(struct gpt_header *, struct disklabel *); 110 int gpt_chk_parts(struct gpt_header *, struct gpt_partition *); 111 int gpt_get_fstype(struct uuid *); 112 113 int duid_equal(u_char *, u_char *); 114 115 /* 116 * Compute checksum for disk label. 117 */ 118 u_int 119 dkcksum(struct disklabel *lp) 120 { 121 u_int16_t *start, *end; 122 u_int16_t sum = 0; 123 124 start = (u_int16_t *)lp; 125 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 126 while (start < end) 127 sum ^= *start++; 128 return (sum); 129 } 130 131 int 132 initdisklabel(struct disklabel *lp) 133 { 134 int i; 135 136 /* minimal requirements for archetypal disk label */ 137 if (lp->d_secsize < DEV_BSIZE) 138 lp->d_secsize = DEV_BSIZE; 139 if (DL_GETDSIZE(lp) == 0) 140 DL_SETDSIZE(lp, MAXDISKSIZE); 141 if (lp->d_secpercyl == 0) 142 return (ERANGE); 143 lp->d_npartitions = MAXPARTITIONS; 144 for (i = 0; i < RAW_PART; i++) { 145 DL_SETPSIZE(&lp->d_partitions[i], 0); 146 DL_SETPOFFSET(&lp->d_partitions[i], 0); 147 } 148 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 149 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 150 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 151 DL_SETBSTART(lp, 0); 152 DL_SETBEND(lp, DL_GETDSIZE(lp)); 153 lp->d_version = 1; 154 lp->d_bbsize = 8192; 155 lp->d_sbsize = 64*1024; /* XXX ? */ 156 return (0); 157 } 158 159 /* 160 * Check an incoming block to make sure it is a disklabel, convert it to 161 * a newer version if needed, etc etc. 162 */ 163 int 164 checkdisklabel(void *rlp, struct disklabel *lp, u_int64_t boundstart, 165 u_int64_t boundend) 166 { 167 struct disklabel *dlp = rlp; 168 struct __partitionv0 *v0pp; 169 struct partition *pp; 170 u_int64_t disksize; 171 int error = 0; 172 int i; 173 174 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 175 error = ENOENT; /* no disk label */ 176 else if (dlp->d_npartitions > MAXPARTITIONS) 177 error = E2BIG; /* too many partitions */ 178 else if (dlp->d_secpercyl == 0) 179 error = EINVAL; /* invalid label */ 180 else if (dlp->d_secsize == 0) 181 error = ENOSPC; /* disk too small */ 182 else if (dkcksum(dlp) != 0) 183 error = EINVAL; /* incorrect checksum */ 184 185 if (error) { 186 u_int16_t *start, *end, sum = 0; 187 188 /* If it is byte-swapped, attempt to convert it */ 189 if (swap32(dlp->d_magic) != DISKMAGIC || 190 swap32(dlp->d_magic2) != DISKMAGIC || 191 swap16(dlp->d_npartitions) > MAXPARTITIONS) 192 return (error); 193 194 /* 195 * Need a byte-swap aware dkcksum variant 196 * inlined, because dkcksum uses a sub-field 197 */ 198 start = (u_int16_t *)dlp; 199 end = (u_int16_t *)&dlp->d_partitions[ 200 swap16(dlp->d_npartitions)]; 201 while (start < end) 202 sum ^= *start++; 203 if (sum != 0) 204 return (error); 205 206 dlp->d_magic = swap32(dlp->d_magic); 207 dlp->d_type = swap16(dlp->d_type); 208 209 /* d_typename and d_packname are strings */ 210 211 dlp->d_secsize = swap32(dlp->d_secsize); 212 dlp->d_nsectors = swap32(dlp->d_nsectors); 213 dlp->d_ntracks = swap32(dlp->d_ntracks); 214 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 215 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 216 dlp->d_secperunit = swap32(dlp->d_secperunit); 217 218 /* d_uid is a string */ 219 220 dlp->d_acylinders = swap32(dlp->d_acylinders); 221 222 dlp->d_flags = swap32(dlp->d_flags); 223 224 for (i = 0; i < NDDATA; i++) 225 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 226 227 dlp->d_secperunith = swap16(dlp->d_secperunith); 228 dlp->d_version = swap16(dlp->d_version); 229 230 for (i = 0; i < NSPARE; i++) 231 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 232 233 dlp->d_magic2 = swap32(dlp->d_magic2); 234 235 dlp->d_npartitions = swap16(dlp->d_npartitions); 236 dlp->d_bbsize = swap32(dlp->d_bbsize); 237 dlp->d_sbsize = swap32(dlp->d_sbsize); 238 239 for (i = 0; i < MAXPARTITIONS; i++) { 240 pp = &dlp->d_partitions[i]; 241 pp->p_size = swap32(pp->p_size); 242 pp->p_offset = swap32(pp->p_offset); 243 if (dlp->d_version == 0) { 244 v0pp = (struct __partitionv0 *)pp; 245 v0pp->p_fsize = swap32(v0pp->p_fsize); 246 } else { 247 pp->p_offseth = swap16(pp->p_offseth); 248 pp->p_sizeh = swap16(pp->p_sizeh); 249 } 250 pp->p_cpg = swap16(pp->p_cpg); 251 } 252 253 dlp->d_checksum = 0; 254 dlp->d_checksum = dkcksum(dlp); 255 error = 0; 256 } 257 258 /* XXX should verify lots of other fields and whine a lot */ 259 260 /* Initial passed in lp contains the real disk size. */ 261 disksize = DL_GETDSIZE(lp); 262 263 if (lp != dlp) 264 *lp = *dlp; 265 266 if (lp->d_version == 0) { 267 lp->d_version = 1; 268 lp->d_secperunith = 0; 269 270 v0pp = (struct __partitionv0 *)lp->d_partitions; 271 pp = lp->d_partitions; 272 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 273 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 274 p_fsize, v0pp->p_frag); 275 pp->p_offseth = 0; 276 pp->p_sizeh = 0; 277 } 278 } 279 280 #ifdef DEBUG 281 if (DL_GETDSIZE(lp) != disksize) 282 printf("on-disk disklabel has incorrect disksize (%llu)\n", 283 DL_GETDSIZE(lp)); 284 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 285 printf("on-disk disklabel RAW_PART has incorrect size (%llu)\n", 286 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 287 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 288 printf("on-disk disklabel RAW_PART offset != 0 (%llu)\n", 289 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 290 #endif 291 DL_SETDSIZE(lp, disksize); 292 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 293 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 294 DL_SETBSTART(lp, boundstart); 295 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 296 297 lp->d_checksum = 0; 298 lp->d_checksum = dkcksum(lp); 299 return (0); 300 } 301 302 /* 303 * Read a disk sector. 304 */ 305 int 306 readdisksector(struct buf *bp, void (*strat)(struct buf *), 307 struct disklabel *lp, u_int64_t sector) 308 { 309 bp->b_blkno = DL_SECTOBLK(lp, sector); 310 bp->b_bcount = lp->d_secsize; 311 bp->b_error = 0; 312 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 313 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 314 315 (*strat)(bp); 316 317 return (biowait(bp)); 318 } 319 320 /* 321 * If dos partition table requested, attempt to load it and 322 * find disklabel inside a DOS partition. Return buffer 323 * for use in signalling errors if requested. 324 * 325 * We would like to check if each MBR has a valid BOOT_MAGIC, but 326 * we cannot because it doesn't always exist. So.. we assume the 327 * MBR is valid. 328 */ 329 int 330 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 331 struct disklabel *lp, daddr_t *partoffp, int spoofonly) 332 { 333 struct dos_partition dp[NDOSPART], *dp2; 334 struct disklabel *gptlp; 335 u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp); 336 u_int64_t sector = DOSBBSECTOR; 337 u_int32_t extoff = 0; 338 int ourpart = -1, wander = 1, n = 0, loop = 0; 339 int efi, error, i, offset; 340 341 if (lp->d_secpercyl == 0) 342 return (EINVAL); /* invalid label */ 343 if (lp->d_secsize == 0) 344 return (ENOSPC); /* disk too small */ 345 346 /* do DOS partitions in the process of getting disklabel? */ 347 348 /* 349 * Read dos partition table, follow extended partitions. 350 * Map the partitions to disklabel entries i-p 351 */ 352 while (wander && loop < DOS_MAXEBR) { 353 loop++; 354 wander = 0; 355 if (sector < extoff) 356 sector = extoff; 357 358 /* read MBR/EBR */ 359 error = readdisksector(bp, strat, lp, sector); 360 if (error) { 361 /*wrong*/ if (partoffp) 362 /*wrong*/ *partoffp = -1; 363 return (error); 364 } 365 366 bcopy(bp->b_data + DOSPARTOFF, dp, sizeof(dp)); 367 368 if (n == 0 && sector == DOSBBSECTOR) { 369 u_int16_t mbrtest; 370 371 /* Check the end of sector marker. */ 372 mbrtest = ((bp->b_data[510] << 8) & 0xff00) | 373 (bp->b_data[511] & 0xff); 374 if (mbrtest != 0x55aa) 375 goto notmbr; 376 377 efi = gpt_chk_mbr(dp, DL_GETDSIZE(lp)); 378 if (efi == -1) 379 goto notgpt; 380 381 gptlp = malloc(sizeof(struct disklabel), M_DEVBUF, 382 M_NOWAIT); 383 if (gptlp == NULL) 384 return (ENOMEM); 385 *gptlp = *lp; 386 error = spoofgptlabel(bp, strat, gptlp); 387 if (error == 0) { 388 dospartoff = DL_GETBSTART(gptlp); 389 dospartend = DL_GETBEND(gptlp); 390 if (partoffp == NULL) 391 *lp = *gptlp; 392 free(gptlp, M_DEVBUF, 393 sizeof(struct disklabel)); 394 if (partoffp && dospartoff == 0) 395 return (ENXIO); 396 goto notfat; 397 } else { 398 free(gptlp, M_DEVBUF, 399 sizeof(struct disklabel)); 400 goto notmbr; 401 } 402 } 403 404 notgpt: 405 if (ourpart == -1) { 406 /* Search for our MBR partition */ 407 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 408 i++, dp2++) 409 if (letoh32(dp2->dp_size) && 410 dp2->dp_typ == DOSPTYP_OPENBSD) 411 ourpart = i; 412 if (ourpart == -1) 413 goto donot; 414 /* 415 * This is our MBR partition. need sector 416 * address for SCSI/IDE, cylinder for 417 * ESDI/ST506/RLL 418 */ 419 dp2 = &dp[ourpart]; 420 dospartoff = letoh32(dp2->dp_start) + sector; 421 dospartend = dospartoff + letoh32(dp2->dp_size); 422 423 /* 424 * Record the OpenBSD partition's placement (in 425 * 512-byte blocks!) for the caller. No need to 426 * finish spoofing. 427 */ 428 if (partoffp) { 429 *partoffp = DL_SECTOBLK(lp, dospartoff); 430 return (0); 431 } 432 433 if (lp->d_ntracks == 0) 434 lp->d_ntracks = dp2->dp_ehd + 1; 435 if (lp->d_nsectors == 0) 436 lp->d_nsectors = DPSECT(dp2->dp_esect); 437 if (lp->d_secpercyl == 0) 438 lp->d_secpercyl = lp->d_ntracks * 439 lp->d_nsectors; 440 } 441 donot: 442 /* 443 * In case the disklabel read below fails, we want to 444 * provide a fake label in i-p. 445 */ 446 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 447 struct partition *pp; 448 u_int8_t fstype; 449 450 if (dp2->dp_typ == DOSPTYP_OPENBSD || 451 dp2->dp_typ == DOSPTYP_EFI) 452 continue; 453 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 454 continue; 455 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 456 continue; 457 if (letoh32(dp2->dp_size) == 0) 458 continue; 459 460 switch (dp2->dp_typ) { 461 case DOSPTYP_UNUSED: 462 fstype = FS_UNUSED; 463 break; 464 465 case DOSPTYP_LINUX: 466 fstype = FS_EXT2FS; 467 break; 468 469 case DOSPTYP_NTFS: 470 fstype = FS_NTFS; 471 break; 472 473 case DOSPTYP_EFISYS: 474 case DOSPTYP_FAT12: 475 case DOSPTYP_FAT16S: 476 case DOSPTYP_FAT16B: 477 case DOSPTYP_FAT16L: 478 case DOSPTYP_FAT32: 479 case DOSPTYP_FAT32L: 480 fstype = FS_MSDOS; 481 break; 482 case DOSPTYP_EXTEND: 483 case DOSPTYP_EXTENDL: 484 sector = letoh32(dp2->dp_start) + extoff; 485 if (!extoff) { 486 extoff = letoh32(dp2->dp_start); 487 sector = 0; 488 } 489 wander = 1; 490 continue; 491 break; 492 default: 493 fstype = FS_OTHER; 494 break; 495 } 496 497 /* 498 * Don't set fstype/offset/size when just looking for 499 * the offset of the OpenBSD partition. It would 500 * invalidate the disklabel checksum! 501 * 502 * Don't try to spoof more than 8 partitions, i.e. 503 * 'i' -'p'. 504 */ 505 if (partoffp || n >= 8) 506 continue; 507 508 pp = &lp->d_partitions[8+n]; 509 n++; 510 pp->p_fstype = fstype; 511 if (letoh32(dp2->dp_start)) 512 DL_SETPOFFSET(pp, 513 letoh32(dp2->dp_start) + sector); 514 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 515 } 516 } 517 518 notmbr: 519 if (n == 0 && sector == DOSBBSECTOR && ourpart == -1) { 520 u_int16_t fattest; 521 522 /* Check for a valid initial jmp instruction. */ 523 switch ((u_int8_t)bp->b_data[0]) { 524 case 0xeb: 525 /* 526 * Two-byte jmp instruction. The 2nd byte is the number 527 * of bytes to jmp and the 3rd byte must be a NOP. 528 */ 529 if ((u_int8_t)bp->b_data[2] != 0x90) 530 goto notfat; 531 break; 532 case 0xe9: 533 /* 534 * Three-byte jmp instruction. The next two bytes are a 535 * little-endian 16 bit value. 536 */ 537 break; 538 default: 539 goto notfat; 540 break; 541 } 542 543 /* Check for a valid bytes per sector value. */ 544 fattest = ((bp->b_data[12] << 8) & 0xff00) | 545 (bp->b_data[11] & 0xff); 546 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 547 goto notfat; 548 549 if (partoffp) 550 return (ENXIO); /* No place for disklabel on FAT! */ 551 552 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 553 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 554 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 555 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 556 557 spoofonly = 1; /* No disklabel to read from disk. */ 558 } 559 560 notfat: 561 /* record the OpenBSD partition's placement for the caller */ 562 if (partoffp) 563 *partoffp = DL_SECTOBLK(lp, dospartoff); 564 else { 565 DL_SETBSTART(lp, dospartoff); 566 DL_SETBEND(lp, (dospartend < DL_GETDSIZE(lp)) ? dospartend : 567 DL_GETDSIZE(lp)); 568 } 569 570 /* don't read the on-disk label if we are in spoofed-only mode */ 571 if (spoofonly) 572 return (0); 573 574 error = readdisksector(bp, strat, lp, dospartoff + 575 DL_BLKTOSEC(lp, DOS_LABELSECTOR)); 576 if (error) 577 return (bp->b_error); 578 579 offset = DL_BLKOFFSET(lp, DOS_LABELSECTOR); 580 error = checkdisklabel(bp->b_data + offset, lp, 581 DL_GETBSTART((struct disklabel*)(bp->b_data+offset)), 582 DL_GETBEND((struct disklabel *)(bp->b_data+offset))); 583 584 return (error); 585 } 586 587 /* 588 * Return the index into dp[] of the EFI GPT (0xEE) partition, or -1 if no such 589 * partition exists. 590 * 591 * Copied into sbin/fdisk/mbr.c. 592 */ 593 int 594 gpt_chk_mbr(struct dos_partition *dp, uint64_t dsize) 595 { 596 struct dos_partition *dp2; 597 int efi, eficnt, found, i; 598 uint32_t psize; 599 600 found = efi = eficnt = 0; 601 for (dp2 = dp, i = 0; i < NDOSPART; i++, dp2++) { 602 if (dp2->dp_typ == DOSPTYP_UNUSED) 603 continue; 604 found++; 605 if (dp2->dp_typ != DOSPTYP_EFI) 606 continue; 607 if (letoh32(dp2->dp_start) != GPTSECTOR) 608 continue; 609 psize = letoh32(dp2->dp_size); 610 if (psize <= (dsize - GPTSECTOR) || psize == UINT32_MAX) { 611 efi = i; 612 eficnt++; 613 } 614 } 615 if (found == 1 && eficnt == 1) 616 return (efi); 617 618 return (-1); 619 } 620 621 int 622 gpt_chk_hdr(struct gpt_header *gh, struct disklabel *lp) 623 { 624 uint64_t ghpartlba; 625 uint64_t ghlbaend, ghlbastart; 626 uint32_t gh_csum; 627 uint32_t ghsize, ghpartsize, ghpartspersec; 628 629 if (letoh64(gh->gh_sig) != GPTSIGNATURE) 630 return (EINVAL); 631 632 if (letoh32(gh->gh_rev) != GPTREVISION) 633 return (EINVAL); 634 635 ghsize = letoh32(gh->gh_size); 636 ghpartsize = letoh32(gh->gh_part_size); 637 ghpartspersec = lp->d_secsize / ghpartsize; 638 ghpartlba = letoh64(gh->gh_part_lba); 639 ghlbaend = letoh64(gh->gh_lba_end); 640 ghlbastart = letoh64(gh->gh_lba_start); 641 642 if (ghsize < GPTMINHDRSIZE || ghsize > sizeof(struct gpt_header)) 643 return (EINVAL); 644 645 gh_csum = gh->gh_csum; 646 gh->gh_csum = 0; 647 gh->gh_csum = htole32(crc32(0, (unsigned char *)gh, ghsize)); 648 649 if (gh_csum != gh->gh_csum) 650 return (EINVAL); 651 652 if (ghlbastart >= DL_GETDSIZE(lp) || 653 ghpartlba >= DL_GETDSIZE(lp)) 654 return (EINVAL); 655 656 /* 657 * Size per partition entry shall be 128*(2**n) with n >= 0. 658 * We don't support partition entries larger than block size. 659 */ 660 if (ghpartsize % GPTMINPARTSIZE || ghpartsize > lp->d_secsize 661 || ghpartspersec == 0) { 662 DPRINTF("invalid partition size\n"); 663 return (EINVAL); 664 } 665 666 /* XXX: we don't support multiples of GPTMINPARTSIZE yet */ 667 if (ghpartsize != GPTMINPARTSIZE) { 668 DPRINTF("partition sizes larger than %d bytes are not " 669 "supported", GPTMINPARTSIZE); 670 return (EINVAL); 671 } 672 673 return 0; 674 } 675 676 int 677 gpt_chk_parts(struct gpt_header *gh, struct gpt_partition *gp) 678 { 679 u_int32_t gh_part_csum; 680 681 gh_part_csum = htole32(crc32(0, (unsigned char *)gp, 682 letoh32(gh->gh_part_num) * letoh32(gh->gh_part_size))); 683 684 if (gh_part_csum != gh->gh_part_csum) 685 return (EINVAL); 686 687 return 0; 688 } 689 690 int 691 gpt_get_fstype(struct uuid *uuid_part) 692 { 693 static int init = 0; 694 static struct uuid uuid_openbsd, uuid_msdos, uuid_chromefs, 695 uuid_linux, uuid_hfs, uuid_unused, uuid_efi_system, uuid_bios_boot; 696 static const uint8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 697 static const uint8_t gpt_uuid_msdos[] = GPT_UUID_MSDOS; 698 static const uint8_t gpt_uuid_chromerootfs[] = GPT_UUID_CHROMEROOTFS; 699 static const uint8_t gpt_uuid_linux[] = GPT_UUID_LINUX; 700 static const uint8_t gpt_uuid_hfs[] = GPT_UUID_APPLE_HFS; 701 static const uint8_t gpt_uuid_unused[] = GPT_UUID_UNUSED; 702 static const uint8_t gpt_uuid_efi_system[] = GPT_UUID_EFI_SYSTEM; 703 static const uint8_t gpt_uuid_bios_boot[] = GPT_UUID_BIOS_BOOT; 704 705 if (init == 0) { 706 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 707 uuid_dec_be(gpt_uuid_msdos, &uuid_msdos); 708 uuid_dec_be(gpt_uuid_chromerootfs, &uuid_chromefs); 709 uuid_dec_be(gpt_uuid_linux, &uuid_linux); 710 uuid_dec_be(gpt_uuid_hfs, &uuid_hfs); 711 uuid_dec_be(gpt_uuid_unused, &uuid_unused); 712 uuid_dec_be(gpt_uuid_efi_system, &uuid_efi_system); 713 uuid_dec_be(gpt_uuid_bios_boot, &uuid_bios_boot); 714 init = 1; 715 } 716 717 if (!memcmp(uuid_part, &uuid_unused, sizeof(struct uuid))) 718 return FS_UNUSED; 719 else if (!memcmp(uuid_part, &uuid_openbsd, sizeof(struct uuid))) 720 return FS_BSDFFS; 721 else if (!memcmp(uuid_part, &uuid_msdos, sizeof(struct uuid))) 722 return FS_MSDOS; 723 else if (!memcmp(uuid_part, &uuid_chromefs, sizeof(struct uuid))) 724 return FS_EXT2FS; 725 else if (!memcmp(uuid_part, &uuid_linux, sizeof(struct uuid))) 726 return FS_EXT2FS; 727 else if (!memcmp(uuid_part, &uuid_hfs, sizeof(struct uuid))) 728 return FS_HFS; 729 else if (!memcmp(uuid_part, &uuid_efi_system, sizeof(struct uuid))) 730 return FS_MSDOS; 731 else if (!memcmp(uuid_part, &uuid_bios_boot, sizeof(struct uuid))) 732 return FS_BOOT; 733 else 734 return FS_OTHER; 735 } 736 737 /* 738 * Spoof a disklabel based on the GPT information on the disk. 739 */ 740 int 741 spoofgptlabel(struct buf *bp, void (*strat)(struct buf *), 742 struct disklabel *lp) 743 { 744 static const u_int8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 745 struct gpt_header gh; 746 struct uuid uuid_part, uuid_openbsd; 747 struct gpt_partition *gp; 748 struct partition *pp; 749 size_t gpsz; 750 u_int64_t ghlbaend, ghlbastart, sector; 751 u_int64_t start, end; 752 int i, error, found, n; 753 uint32_t ghpartnum; 754 755 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 756 757 for (sector = GPTSECTOR; ; sector = DL_GETDSIZE(lp) - 1) { 758 uint64_t ghpartlba; 759 uint32_t ghpartsize; 760 uint32_t ghpartspersec; 761 762 error = readdisksector(bp, strat, lp, sector); 763 if (error) { 764 DPRINTF("error reading from disk\n"); 765 return (error); 766 } 767 768 bcopy(bp->b_data, &gh, sizeof(gh)); 769 770 if (gpt_chk_hdr(&gh, lp)) { 771 if (sector != GPTSECTOR) { 772 DPRINTF("alternate header also broken\n"); 773 return (EINVAL); 774 } 775 continue; 776 } 777 778 ghpartsize = letoh32(gh.gh_part_size); 779 ghpartspersec = lp->d_secsize / ghpartsize; 780 ghpartnum = letoh32(gh.gh_part_num); 781 ghpartlba = letoh64(gh.gh_part_lba); 782 ghlbaend = letoh64(gh.gh_lba_end); 783 ghlbastart = letoh64(gh.gh_lba_start); 784 785 /* read GPT partition entry array */ 786 gp = mallocarray(ghpartnum, sizeof(struct gpt_partition), 787 M_DEVBUF, M_NOWAIT|M_ZERO); 788 if (gp == NULL) 789 return (ENOMEM); 790 gpsz = ghpartnum * sizeof(struct gpt_partition); 791 792 /* 793 * XXX: Fails if # of partition entries is not a multiple of 794 * ghpartspersec. 795 */ 796 for (i = 0; i < ghpartnum / ghpartspersec; i++) { 797 error = readdisksector(bp, strat, lp, ghpartlba + i); 798 if (error) { 799 free(gp, M_DEVBUF, gpsz); 800 return (error); 801 } 802 803 bcopy(bp->b_data, gp + i * ghpartspersec, 804 ghpartspersec * sizeof(struct gpt_partition)); 805 } 806 807 if (gpt_chk_parts(&gh, gp)) { 808 free(gp, M_DEVBUF, gpsz); 809 if (letoh64(gh.gh_lba_self) != GPTSECTOR) { 810 DPRINTF("alternate partition entries are also " 811 "broken\n"); 812 return (EINVAL); 813 } 814 continue; 815 } 816 break; 817 } 818 819 /* Find OpenBSD partition and spoof others along the way. */ 820 DL_SETBSTART(lp, ghlbastart); 821 DL_SETBEND(lp, ghlbaend + 1); 822 found = 0; 823 n = 'i' - 'a'; /* Start spoofing at 'i', a.k.a. 8. */ 824 for (i = 0; i < ghpartnum; i++) { 825 start = letoh64(gp[i].gp_lba_start); 826 end = letoh64(gp[i].gp_lba_end); 827 if (start > end || start < ghlbastart || end > ghlbaend) 828 continue; /* entry invalid */ 829 830 uuid_dec_le(&gp[i].gp_type, &uuid_part); 831 if (memcmp(&uuid_part, &uuid_openbsd, sizeof(struct uuid)) == 0) { 832 if (found == 0) { 833 found = 1; 834 DL_SETBSTART(lp, start); 835 DL_SETBEND(lp, end + 1); 836 } 837 } else if (n < MAXPARTITIONS) { 838 pp = &lp->d_partitions[n]; 839 n++; 840 pp->p_fstype = gpt_get_fstype(&uuid_part); 841 DL_SETPOFFSET(pp, start); 842 DL_SETPSIZE(pp, end - start + 1); 843 } 844 } 845 846 free(gp, M_DEVBUF, gpsz); 847 848 return (0); 849 } 850 851 /* 852 * Check new disk label for sensibility before setting it. 853 */ 854 int 855 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 856 { 857 struct partition *opp, *npp; 858 struct disk *dk; 859 int i; 860 861 /* sanity clause */ 862 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 863 (nlp->d_secsize % DEV_BSIZE) != 0) 864 return (EINVAL); 865 866 /* special case to allow disklabel to be invalidated */ 867 if (nlp->d_magic == 0xffffffff) { 868 *olp = *nlp; 869 return (0); 870 } 871 872 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 873 dkcksum(nlp) != 0) 874 return (EINVAL); 875 876 /* XXX missing check if other dos partitions will be overwritten */ 877 878 for (i = 0; i < MAXPARTITIONS; i++) { 879 opp = &olp->d_partitions[i]; 880 npp = &nlp->d_partitions[i]; 881 if ((openmask & (1 << i)) && 882 (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 883 DL_GETPSIZE(npp) < DL_GETPSIZE(opp))) 884 return (EBUSY); 885 /* 886 * Copy internally-set partition information 887 * if new label doesn't include it. XXX 888 */ 889 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 890 npp->p_fragblock = opp->p_fragblock; 891 npp->p_cpg = opp->p_cpg; 892 } 893 } 894 895 /* Generate a UID if the disklabel does not already have one. */ 896 if (duid_iszero(nlp->d_uid)) { 897 do { 898 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 899 TAILQ_FOREACH(dk, &disklist, dk_link) 900 if (dk->dk_label && 901 duid_equal(dk->dk_label->d_uid, nlp->d_uid)) 902 break; 903 } while (dk != NULL || duid_iszero(nlp->d_uid)); 904 } 905 906 /* Preserve the disk size and RAW_PART values. */ 907 DL_SETDSIZE(nlp, DL_GETDSIZE(olp)); 908 npp = &nlp->d_partitions[RAW_PART]; 909 DL_SETPOFFSET(npp, 0); 910 DL_SETPSIZE(npp, DL_GETDSIZE(nlp)); 911 912 nlp->d_checksum = 0; 913 nlp->d_checksum = dkcksum(nlp); 914 *olp = *nlp; 915 916 disk_change = 1; 917 918 return (0); 919 } 920 921 /* 922 * Determine the size of the transfer, and make sure it is within the 923 * boundaries of the partition. Adjust transfer if needed, and signal errors or 924 * early completion. 925 */ 926 int 927 bounds_check_with_label(struct buf *bp, struct disklabel *lp) 928 { 929 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 930 daddr_t partblocks, sz; 931 932 /* Avoid division by zero, negative offsets, and negative sizes. */ 933 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0) 934 goto bad; 935 936 /* Ensure transfer is a whole number of aligned sectors. */ 937 if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 || 938 (bp->b_bcount % lp->d_secsize) != 0) 939 goto bad; 940 941 /* Ensure transfer starts within partition boundary. */ 942 partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p)); 943 if (bp->b_blkno > partblocks) 944 goto bad; 945 946 /* If exactly at end of partition or null transfer, return EOF. */ 947 if (bp->b_blkno == partblocks || bp->b_bcount == 0) 948 goto done; 949 950 /* Truncate request if it extends past the end of the partition. */ 951 sz = bp->b_bcount >> DEV_BSHIFT; 952 if (sz > partblocks - bp->b_blkno) { 953 sz = partblocks - bp->b_blkno; 954 bp->b_bcount = sz << DEV_BSHIFT; 955 } 956 957 return (0); 958 959 bad: 960 bp->b_error = EINVAL; 961 bp->b_flags |= B_ERROR; 962 done: 963 bp->b_resid = bp->b_bcount; 964 return (-1); 965 } 966 967 /* 968 * Disk error is the preface to plaintive error messages 969 * about failing disk transfers. It prints messages of the form 970 971 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 972 973 * if the offset of the error in the transfer and a disk label 974 * are both available. blkdone should be -1 if the position of the error 975 * is unknown; the disklabel pointer may be null from drivers that have not 976 * been converted to use them. The message is printed with printf 977 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 978 * The message should be completed (with at least a newline) with printf 979 * or addlog, respectively. There is no trailing space. 980 */ 981 void 982 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 983 struct disklabel *lp) 984 { 985 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 986 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))); 987 char partname = 'a' + part; 988 daddr_t sn; 989 990 if (pri != LOG_PRINTF) { 991 log(pri, "%s", ""); 992 pr = addlog; 993 } else 994 pr = printf; 995 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 996 bp->b_flags & B_READ ? "read" : "writ"); 997 sn = bp->b_blkno; 998 if (bp->b_bcount <= DEV_BSIZE) 999 (*pr)("%lld", (long long)sn); 1000 else { 1001 if (blkdone >= 0) { 1002 sn += blkdone; 1003 (*pr)("%lld of ", (long long)sn); 1004 } 1005 (*pr)("%lld-%lld", (long long)bp->b_blkno, 1006 (long long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE)); 1007 } 1008 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 1009 sn += DL_SECTOBLK(lp, DL_GETPOFFSET(&lp->d_partitions[part])); 1010 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, (long long)sn, 1011 (long long)(sn / DL_SECTOBLK(lp, lp->d_secpercyl))); 1012 sn %= DL_SECTOBLK(lp, lp->d_secpercyl); 1013 (*pr)(" tn %lld sn %lld)", 1014 (long long)(sn / DL_SECTOBLK(lp, lp->d_nsectors)), 1015 (long long)(sn % DL_SECTOBLK(lp, lp->d_nsectors))); 1016 } 1017 } 1018 1019 /* 1020 * Initialize the disklist. Called by main() before autoconfiguration. 1021 */ 1022 void 1023 disk_init(void) 1024 { 1025 1026 TAILQ_INIT(&disklist); 1027 disk_count = disk_change = 0; 1028 } 1029 1030 int 1031 disk_construct(struct disk *diskp) 1032 { 1033 rw_init_flags(&diskp->dk_lock, "dklk", RWL_IS_VNODE); 1034 mtx_init(&diskp->dk_mtx, IPL_BIO); 1035 1036 diskp->dk_flags |= DKF_CONSTRUCTED; 1037 1038 return (0); 1039 } 1040 1041 /* 1042 * Attach a disk. 1043 */ 1044 void 1045 disk_attach(struct device *dv, struct disk *diskp) 1046 { 1047 int majdev; 1048 1049 KERNEL_ASSERT_LOCKED(); 1050 1051 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 1052 disk_construct(diskp); 1053 1054 /* 1055 * Allocate and initialize the disklabel structures. Note that 1056 * it's not safe to sleep here, since we're probably going to be 1057 * called during autoconfiguration. 1058 */ 1059 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 1060 M_NOWAIT|M_ZERO); 1061 if (diskp->dk_label == NULL) 1062 panic("disk_attach: can't allocate storage for disklabel"); 1063 1064 /* 1065 * Set the attached timestamp. 1066 */ 1067 microuptime(&diskp->dk_attachtime); 1068 1069 /* 1070 * Link into the disklist. 1071 */ 1072 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 1073 ++disk_count; 1074 disk_change = 1; 1075 1076 /* 1077 * Store device structure and number for later use. 1078 */ 1079 diskp->dk_device = dv; 1080 diskp->dk_devno = NODEV; 1081 if (dv != NULL) { 1082 majdev = findblkmajor(dv); 1083 if (majdev >= 0) 1084 diskp->dk_devno = 1085 MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 1086 1087 if (diskp->dk_devno != NODEV) { 1088 struct disk_attach_task *dat; 1089 1090 dat = malloc(sizeof(*dat), M_TEMP, M_WAITOK); 1091 1092 /* XXX: Assumes dk is part of the device softc. */ 1093 device_ref(dv); 1094 dat->dk = diskp; 1095 1096 task_set(&dat->task, disk_attach_callback, dat); 1097 task_add(systq, &dat->task); 1098 } 1099 } 1100 1101 if (softraid_disk_attach) 1102 softraid_disk_attach(diskp, 1); 1103 } 1104 1105 void 1106 disk_attach_callback(void *xdat) 1107 { 1108 struct disk_attach_task *dat = xdat; 1109 struct disk *dk = dat->dk; 1110 struct disklabel dl; 1111 char errbuf[100]; 1112 1113 free(dat, M_TEMP, sizeof(*dat)); 1114 1115 if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD)) 1116 goto done; 1117 1118 /* Read disklabel. */ 1119 if (disk_readlabel(&dl, dk->dk_devno, errbuf, sizeof(errbuf)) == NULL) { 1120 enqueue_randomness(dl.d_checksum); 1121 dk->dk_flags |= DKF_LABELVALID; 1122 } 1123 1124 done: 1125 dk->dk_flags |= DKF_OPENED; 1126 device_unref(dk->dk_device); 1127 wakeup(dk); 1128 } 1129 1130 /* 1131 * Detach a disk. 1132 */ 1133 void 1134 disk_detach(struct disk *diskp) 1135 { 1136 KERNEL_ASSERT_LOCKED(); 1137 1138 if (softraid_disk_attach) 1139 softraid_disk_attach(diskp, -1); 1140 1141 /* 1142 * Free the space used by the disklabel structures. 1143 */ 1144 free(diskp->dk_label, M_DEVBUF, sizeof(*diskp->dk_label)); 1145 1146 /* 1147 * Remove from the disklist. 1148 */ 1149 TAILQ_REMOVE(&disklist, diskp, dk_link); 1150 disk_change = 1; 1151 if (--disk_count < 0) 1152 panic("disk_detach: disk_count < 0"); 1153 } 1154 1155 int 1156 disk_openpart(struct disk *dk, int part, int fmt, int haslabel) 1157 { 1158 KASSERT(part >= 0 && part < MAXPARTITIONS); 1159 1160 /* Unless opening the raw partition, check that the partition exists. */ 1161 if (part != RAW_PART && (!haslabel || 1162 part >= dk->dk_label->d_npartitions || 1163 dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED)) 1164 return (ENXIO); 1165 1166 /* Ensure the partition doesn't get changed under our feet. */ 1167 switch (fmt) { 1168 case S_IFCHR: 1169 dk->dk_copenmask |= (1 << part); 1170 break; 1171 case S_IFBLK: 1172 dk->dk_bopenmask |= (1 << part); 1173 break; 1174 } 1175 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1176 1177 return (0); 1178 } 1179 1180 void 1181 disk_closepart(struct disk *dk, int part, int fmt) 1182 { 1183 KASSERT(part >= 0 && part < MAXPARTITIONS); 1184 1185 switch (fmt) { 1186 case S_IFCHR: 1187 dk->dk_copenmask &= ~(1 << part); 1188 break; 1189 case S_IFBLK: 1190 dk->dk_bopenmask &= ~(1 << part); 1191 break; 1192 } 1193 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1194 } 1195 1196 void 1197 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit) 1198 { 1199 int bmaj, cmaj, mn; 1200 1201 /* Locate the lowest minor number to be detached. */ 1202 mn = DISKMINOR(unit, 0); 1203 1204 for (bmaj = 0; bmaj < nblkdev; bmaj++) 1205 if (bdevsw[bmaj].d_open == open) 1206 vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK); 1207 for (cmaj = 0; cmaj < nchrdev; cmaj++) 1208 if (cdevsw[cmaj].d_open == open) 1209 vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR); 1210 } 1211 1212 /* 1213 * Increment a disk's busy counter. If the counter is going from 1214 * 0 to 1, set the timestamp. 1215 */ 1216 void 1217 disk_busy(struct disk *diskp) 1218 { 1219 1220 /* 1221 * XXX We'd like to use something as accurate as microtime(), 1222 * but that doesn't depend on the system TOD clock. 1223 */ 1224 mtx_enter(&diskp->dk_mtx); 1225 if (diskp->dk_busy++ == 0) 1226 microuptime(&diskp->dk_timestamp); 1227 mtx_leave(&diskp->dk_mtx); 1228 } 1229 1230 /* 1231 * Decrement a disk's busy counter, increment the byte count, total busy 1232 * time, and reset the timestamp. 1233 */ 1234 void 1235 disk_unbusy(struct disk *diskp, long bcount, daddr_t blkno, int read) 1236 { 1237 struct timeval dv_time, diff_time; 1238 1239 mtx_enter(&diskp->dk_mtx); 1240 1241 if (diskp->dk_busy-- == 0) 1242 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 1243 1244 microuptime(&dv_time); 1245 1246 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 1247 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 1248 1249 diskp->dk_timestamp = dv_time; 1250 if (bcount > 0) { 1251 if (read) { 1252 diskp->dk_rbytes += bcount; 1253 diskp->dk_rxfer++; 1254 } else { 1255 diskp->dk_wbytes += bcount; 1256 diskp->dk_wxfer++; 1257 } 1258 } else 1259 diskp->dk_seek++; 1260 1261 mtx_leave(&diskp->dk_mtx); 1262 1263 enqueue_randomness(bcount ^ diff_time.tv_usec ^ 1264 (blkno >> 32) ^ (blkno & 0xffffffff)); 1265 } 1266 1267 int 1268 disk_lock(struct disk *dk) 1269 { 1270 return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR)); 1271 } 1272 1273 void 1274 disk_lock_nointr(struct disk *dk) 1275 { 1276 rw_enter_write(&dk->dk_lock); 1277 } 1278 1279 void 1280 disk_unlock(struct disk *dk) 1281 { 1282 rw_exit_write(&dk->dk_lock); 1283 } 1284 1285 int 1286 dk_mountroot(void) 1287 { 1288 char errbuf[100]; 1289 int part = DISKPART(rootdev); 1290 int (*mountrootfn)(void); 1291 struct disklabel dl; 1292 char *error; 1293 1294 error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf)); 1295 if (error) 1296 panic("%s", error); 1297 1298 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 1299 panic("root filesystem has size 0"); 1300 switch (dl.d_partitions[part].p_fstype) { 1301 #ifdef EXT2FS 1302 case FS_EXT2FS: 1303 { 1304 extern int ext2fs_mountroot(void); 1305 mountrootfn = ext2fs_mountroot; 1306 } 1307 break; 1308 #endif 1309 #ifdef FFS 1310 case FS_BSDFFS: 1311 { 1312 extern int ffs_mountroot(void); 1313 mountrootfn = ffs_mountroot; 1314 } 1315 break; 1316 #endif 1317 #ifdef CD9660 1318 case FS_ISO9660: 1319 { 1320 extern int cd9660_mountroot(void); 1321 mountrootfn = cd9660_mountroot; 1322 } 1323 break; 1324 #endif 1325 default: 1326 #ifdef FFS 1327 { 1328 extern int ffs_mountroot(void); 1329 1330 printf("filesystem type %d not known.. assuming ffs\n", 1331 dl.d_partitions[part].p_fstype); 1332 mountrootfn = ffs_mountroot; 1333 } 1334 #else 1335 panic("disk 0x%x filesystem type %d not known", 1336 rootdev, dl.d_partitions[part].p_fstype); 1337 #endif 1338 } 1339 return (*mountrootfn)(); 1340 } 1341 1342 struct device * 1343 getdisk(char *str, int len, int defpart, dev_t *devp) 1344 { 1345 struct device *dv; 1346 1347 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1348 printf("use one of: exit"); 1349 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1350 if (dv->dv_class == DV_DISK) 1351 printf(" %s[a-p]", dv->dv_xname); 1352 #if defined(NFSCLIENT) 1353 if (dv->dv_class == DV_IFNET) 1354 printf(" %s", dv->dv_xname); 1355 #endif 1356 } 1357 printf("\n"); 1358 } 1359 return (dv); 1360 } 1361 1362 struct device * 1363 parsedisk(char *str, int len, int defpart, dev_t *devp) 1364 { 1365 struct device *dv; 1366 int majdev, part = defpart; 1367 char c; 1368 1369 if (len == 0) 1370 return (NULL); 1371 c = str[len-1]; 1372 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1373 part = c - 'a'; 1374 len -= 1; 1375 } 1376 1377 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1378 if (dv->dv_class == DV_DISK && 1379 strncmp(str, dv->dv_xname, len) == 0 && 1380 dv->dv_xname[len] == '\0') { 1381 majdev = findblkmajor(dv); 1382 if (majdev < 0) 1383 return NULL; 1384 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1385 break; 1386 } 1387 #if defined(NFSCLIENT) 1388 if (dv->dv_class == DV_IFNET && 1389 strncmp(str, dv->dv_xname, len) == 0 && 1390 dv->dv_xname[len] == '\0') { 1391 *devp = NODEV; 1392 break; 1393 } 1394 #endif 1395 } 1396 1397 return (dv); 1398 } 1399 1400 void 1401 setroot(struct device *bootdv, int part, int exitflags) 1402 { 1403 int majdev, unit, len, s, slept = 0; 1404 struct swdevt *swp; 1405 struct device *rootdv, *dv; 1406 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1407 struct ifnet *ifp = NULL; 1408 struct disk *dk; 1409 char buf[128]; 1410 #if defined(NFSCLIENT) 1411 extern char *nfsbootdevname; 1412 #endif 1413 1414 /* Ensure that all disk attach callbacks have completed. */ 1415 do { 1416 TAILQ_FOREACH(dk, &disklist, dk_link) { 1417 if (dk->dk_devno != NODEV && 1418 (dk->dk_flags & DKF_OPENED) == 0) { 1419 tsleep_nsec(dk, 0, "dkopen", SEC_TO_NSEC(1)); 1420 slept++; 1421 break; 1422 } 1423 } 1424 } while (dk != NULL && slept < 5); 1425 1426 if (slept == 5) { 1427 printf("disklabels not read:"); 1428 TAILQ_FOREACH(dk, &disklist, dk_link) 1429 if (dk->dk_devno != NODEV && 1430 (dk->dk_flags & DKF_OPENED) == 0) 1431 printf(" %s", dk->dk_name); 1432 printf("\n"); 1433 } 1434 1435 if (duid_iszero(bootduid)) { 1436 /* Locate DUID for boot disk since it was not provided. */ 1437 TAILQ_FOREACH(dk, &disklist, dk_link) 1438 if (dk->dk_device == bootdv) 1439 break; 1440 if (dk && (dk->dk_flags & DKF_LABELVALID)) 1441 bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid)); 1442 } else if (bootdv == NULL) { 1443 /* Locate boot disk based on the provided DUID. */ 1444 TAILQ_FOREACH(dk, &disklist, dk_link) 1445 if (duid_equal(dk->dk_label->d_uid, bootduid)) 1446 break; 1447 if (dk && (dk->dk_flags & DKF_LABELVALID)) 1448 bootdv = dk->dk_device; 1449 } 1450 bcopy(bootduid, rootduid, sizeof(rootduid)); 1451 1452 #if NSOFTRAID > 0 1453 sr_map_root(); 1454 #endif 1455 1456 /* 1457 * If `swap generic' and we couldn't determine boot device, 1458 * ask the user. 1459 */ 1460 dk = NULL; 1461 if (mountroot == NULL && bootdv == NULL) 1462 boothowto |= RB_ASKNAME; 1463 if (boothowto & RB_ASKNAME) { 1464 while (1) { 1465 printf("root device"); 1466 if (bootdv != NULL) { 1467 printf(" (default %s", bootdv->dv_xname); 1468 if (bootdv->dv_class == DV_DISK) 1469 printf("%c", 'a' + part); 1470 printf(")"); 1471 } 1472 printf(": "); 1473 s = splhigh(); 1474 cnpollc(1); 1475 len = getsn(buf, sizeof(buf)); 1476 cnpollc(0); 1477 splx(s); 1478 if (strcmp(buf, "exit") == 0) 1479 reboot(exitflags); 1480 if (len == 0 && bootdv != NULL) { 1481 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1482 len = strlen(buf); 1483 } 1484 if (len > 0 && buf[len - 1] == '*') { 1485 buf[--len] = '\0'; 1486 dv = getdisk(buf, len, part, &nrootdev); 1487 if (dv != NULL) { 1488 rootdv = dv; 1489 nswapdev = nrootdev; 1490 goto gotswap; 1491 } 1492 } 1493 dv = getdisk(buf, len, part, &nrootdev); 1494 if (dv != NULL) { 1495 rootdv = dv; 1496 break; 1497 } 1498 } 1499 1500 if (rootdv->dv_class == DV_IFNET) 1501 goto gotswap; 1502 1503 /* try to build swap device out of new root device */ 1504 while (1) { 1505 printf("swap device"); 1506 if (rootdv != NULL) 1507 printf(" (default %s%s)", rootdv->dv_xname, 1508 rootdv->dv_class == DV_DISK ? "b" : ""); 1509 printf(": "); 1510 s = splhigh(); 1511 cnpollc(1); 1512 len = getsn(buf, sizeof(buf)); 1513 cnpollc(0); 1514 splx(s); 1515 if (strcmp(buf, "exit") == 0) 1516 reboot(exitflags); 1517 if (len == 0 && rootdv != NULL) { 1518 switch (rootdv->dv_class) { 1519 case DV_IFNET: 1520 nswapdev = NODEV; 1521 break; 1522 case DV_DISK: 1523 nswapdev = MAKEDISKDEV(major(nrootdev), 1524 DISKUNIT(nrootdev), 1); 1525 if (nswapdev == nrootdev) 1526 continue; 1527 break; 1528 default: 1529 break; 1530 } 1531 break; 1532 } 1533 dv = getdisk(buf, len, 1, &nswapdev); 1534 if (dv) { 1535 if (dv->dv_class == DV_IFNET) 1536 nswapdev = NODEV; 1537 if (nswapdev == nrootdev) 1538 continue; 1539 break; 1540 } 1541 } 1542 gotswap: 1543 rootdev = nrootdev; 1544 dumpdev = nswapdev; 1545 swdevt[0].sw_dev = nswapdev; 1546 swdevt[1].sw_dev = NODEV; 1547 #if defined(NFSCLIENT) 1548 } else if (mountroot == nfs_mountroot) { 1549 rootdv = bootdv; 1550 rootdev = dumpdev = swapdev = NODEV; 1551 #endif 1552 } else if (mountroot == NULL && rootdev == NODEV) { 1553 /* 1554 * `swap generic' 1555 */ 1556 rootdv = bootdv; 1557 1558 if (bootdv->dv_class == DV_DISK) { 1559 if (!duid_iszero(rootduid)) { 1560 TAILQ_FOREACH(dk, &disklist, dk_link) 1561 if ((dk->dk_flags & DKF_LABELVALID) && 1562 dk->dk_label && duid_equal( 1563 dk->dk_label->d_uid, rootduid)) 1564 break; 1565 if (dk == NULL) 1566 panic("root device (%s) not found", 1567 duid_format(rootduid)); 1568 rootdv = dk->dk_device; 1569 } 1570 } 1571 1572 majdev = findblkmajor(rootdv); 1573 if (majdev >= 0) { 1574 /* 1575 * Root and swap are on the disk. 1576 * Assume swap is on partition b. 1577 */ 1578 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1579 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1580 } else { 1581 /* 1582 * Root and swap are on a net. 1583 */ 1584 nswapdev = NODEV; 1585 } 1586 dumpdev = nswapdev; 1587 swdevt[0].sw_dev = nswapdev; 1588 /* swdevt[1].sw_dev = NODEV; */ 1589 } else { 1590 /* Completely pre-configured, but we want rootdv .. */ 1591 majdev = major(rootdev); 1592 if (findblkname(majdev) == NULL) 1593 return; 1594 unit = DISKUNIT(rootdev); 1595 part = DISKPART(rootdev); 1596 snprintf(buf, sizeof buf, "%s%d%c", 1597 findblkname(majdev), unit, 'a' + part); 1598 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1599 if (rootdv == NULL) 1600 panic("root device (%s) not found", buf); 1601 } 1602 1603 if (bootdv != NULL && bootdv->dv_class == DV_IFNET) 1604 ifp = if_unit(bootdv->dv_xname); 1605 1606 if (ifp) { 1607 if_addgroup(ifp, "netboot"); 1608 if_put(ifp); 1609 } 1610 1611 switch (rootdv->dv_class) { 1612 #if defined(NFSCLIENT) 1613 case DV_IFNET: 1614 mountroot = nfs_mountroot; 1615 nfsbootdevname = rootdv->dv_xname; 1616 return; 1617 #endif 1618 case DV_DISK: 1619 mountroot = dk_mountroot; 1620 part = DISKPART(rootdev); 1621 break; 1622 default: 1623 printf("can't figure root, hope your kernel is right\n"); 1624 return; 1625 } 1626 1627 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1628 1629 if (dk && dk->dk_device == rootdv) 1630 printf(" (%s.%c)", duid_format(rootduid), 'a' + part); 1631 1632 /* 1633 * Make the swap partition on the root drive the primary swap. 1634 */ 1635 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1636 if (major(rootdev) == major(swp->sw_dev) && 1637 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1638 temp = swdevt[0].sw_dev; 1639 swdevt[0].sw_dev = swp->sw_dev; 1640 swp->sw_dev = temp; 1641 break; 1642 } 1643 } 1644 if (swp->sw_dev != NODEV) { 1645 /* 1646 * If dumpdev was the same as the old primary swap device, 1647 * move it to the new primary swap device. 1648 */ 1649 if (temp == dumpdev) 1650 dumpdev = swdevt[0].sw_dev; 1651 } 1652 if (swdevt[0].sw_dev != NODEV) 1653 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1654 DISKUNIT(swdevt[0].sw_dev), 1655 'a' + DISKPART(swdevt[0].sw_dev)); 1656 if (dumpdev != NODEV) 1657 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1658 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1659 printf("\n"); 1660 } 1661 1662 extern struct nam2blk nam2blk[]; 1663 1664 int 1665 findblkmajor(struct device *dv) 1666 { 1667 char buf[16], *p; 1668 int i; 1669 1670 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1671 return (-1); 1672 for (p = buf; *p; p++) 1673 if (*p >= '0' && *p <= '9') 1674 *p = '\0'; 1675 1676 for (i = 0; nam2blk[i].name; i++) 1677 if (!strcmp(buf, nam2blk[i].name)) 1678 return (nam2blk[i].maj); 1679 return (-1); 1680 } 1681 1682 char * 1683 findblkname(int maj) 1684 { 1685 int i; 1686 1687 for (i = 0; nam2blk[i].name; i++) 1688 if (nam2blk[i].maj == maj) 1689 return (nam2blk[i].name); 1690 return (NULL); 1691 } 1692 1693 char * 1694 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize) 1695 { 1696 struct vnode *vn; 1697 dev_t chrdev, rawdev; 1698 int error; 1699 1700 chrdev = blktochr(dev); 1701 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART); 1702 1703 #ifdef DEBUG 1704 printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev); 1705 #endif 1706 1707 if (cdevvp(rawdev, &vn)) { 1708 snprintf(errbuf, errsize, 1709 "cannot obtain vnode for 0x%x/0x%x", dev, rawdev); 1710 return (errbuf); 1711 } 1712 1713 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1714 if (error) { 1715 snprintf(errbuf, errsize, 1716 "cannot open disk, 0x%x/0x%x, error %d", 1717 dev, rawdev, error); 1718 goto done; 1719 } 1720 1721 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc); 1722 if (error) { 1723 snprintf(errbuf, errsize, 1724 "cannot read disk label, 0x%x/0x%x, error %d", 1725 dev, rawdev, error); 1726 } 1727 done: 1728 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1729 vput(vn); 1730 if (error) 1731 return (errbuf); 1732 return (NULL); 1733 } 1734 1735 int 1736 disk_map(char *path, char *mappath, int size, int flags) 1737 { 1738 struct disk *dk, *mdk; 1739 u_char uid[8]; 1740 char c, part; 1741 int i; 1742 1743 /* 1744 * Attempt to map a request for a disklabel UID to the correct device. 1745 * We should be supplied with a disklabel UID which has the following 1746 * format: 1747 * 1748 * [disklabel uid] . [partition] 1749 * 1750 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1751 * based passed on its own. 1752 */ 1753 1754 if (strchr(path, '/') != NULL) 1755 return -1; 1756 1757 /* Verify that the device name is properly formed. */ 1758 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1759 (strlen(path) == 18 && path[16] == '.'))) 1760 return -1; 1761 1762 /* Get partition. */ 1763 if (flags & DM_OPENPART) 1764 part = 'a' + RAW_PART; 1765 else 1766 part = path[17]; 1767 1768 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1769 return -1; 1770 1771 /* Derive label UID. */ 1772 memset(uid, 0, sizeof(uid)); 1773 for (i = 0; i < 16; i++) { 1774 c = path[i]; 1775 if (c >= '0' && c <= '9') 1776 c -= '0'; 1777 else if (c >= 'a' && c <= 'f') 1778 c -= ('a' - 10); 1779 else 1780 return -1; 1781 1782 uid[i / 2] <<= 4; 1783 uid[i / 2] |= c & 0xf; 1784 } 1785 1786 mdk = NULL; 1787 TAILQ_FOREACH(dk, &disklist, dk_link) { 1788 if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label && 1789 memcmp(dk->dk_label->d_uid, uid, 1790 sizeof(dk->dk_label->d_uid)) == 0) { 1791 /* Fail if there are duplicate UIDs! */ 1792 if (mdk != NULL) 1793 return -1; 1794 mdk = dk; 1795 } 1796 } 1797 1798 if (mdk == NULL || mdk->dk_name == NULL) 1799 return -1; 1800 1801 snprintf(mappath, size, "/dev/%s%s%c", 1802 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1803 1804 return 0; 1805 } 1806 1807 /* 1808 * Lookup a disk device and verify that it has completed attaching. 1809 */ 1810 struct device * 1811 disk_lookup(struct cfdriver *cd, int unit) 1812 { 1813 struct device *dv; 1814 struct disk *dk; 1815 1816 dv = device_lookup(cd, unit); 1817 if (dv == NULL) 1818 return (NULL); 1819 1820 TAILQ_FOREACH(dk, &disklist, dk_link) 1821 if (dk->dk_device == dv) 1822 break; 1823 1824 if (dk == NULL) { 1825 device_unref(dv); 1826 return (NULL); 1827 } 1828 1829 return (dv); 1830 } 1831 1832 int 1833 duid_equal(u_char *duid1, u_char *duid2) 1834 { 1835 return (memcmp(duid1, duid2, DUID_SIZE) == 0); 1836 } 1837 1838 int 1839 duid_iszero(u_char *duid) 1840 { 1841 u_char zeroduid[DUID_SIZE]; 1842 1843 memset(zeroduid, 0, sizeof(zeroduid)); 1844 1845 return (duid_equal(duid, zeroduid)); 1846 } 1847 1848 const char * 1849 duid_format(u_char *duid) 1850 { 1851 static char duid_str[17]; 1852 1853 KERNEL_ASSERT_LOCKED(); 1854 1855 snprintf(duid_str, sizeof(duid_str), 1856 "%02x%02x%02x%02x%02x%02x%02x%02x", 1857 duid[0], duid[1], duid[2], duid[3], 1858 duid[4], duid[5], duid[6], duid[7]); 1859 1860 return (duid_str); 1861 } 1862