1 /* $OpenBSD: subr_disk.c,v 1.265 2022/10/11 23:39:08 krw Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/fcntl.h> 45 #include <sys/buf.h> 46 #include <sys/stat.h> 47 #include <sys/syslog.h> 48 #include <sys/device.h> 49 #include <sys/time.h> 50 #include <sys/disklabel.h> 51 #include <sys/conf.h> 52 #include <sys/disk.h> 53 #include <sys/reboot.h> 54 #include <sys/dkio.h> 55 #include <sys/vnode.h> 56 #include <sys/task.h> 57 #include <sys/stdint.h> 58 59 #include <sys/socket.h> 60 61 #include <net/if.h> 62 63 #include <dev/cons.h> 64 65 #include <lib/libz/zlib.h> 66 67 #include "softraid.h" 68 69 #ifdef DEBUG 70 #define DPRINTF(x...) printf(x) 71 #else 72 #define DPRINTF(x...) 73 #endif 74 75 /* 76 * A global list of all disks attached to the system. May grow or 77 * shrink over time. 78 */ 79 struct disklist_head disklist; /* TAILQ_HEAD */ 80 int disk_count; /* number of drives in global disklist */ 81 int disk_change; /* set if a disk has been attached/detached 82 * since last we looked at this variable. This 83 * is reset by hw_sysctl() 84 */ 85 86 #define DUID_SIZE 8 87 88 u_char bootduid[DUID_SIZE]; /* DUID of boot disk. */ 89 u_char rootduid[DUID_SIZE]; /* DUID of root disk. */ 90 91 struct device *rootdv; 92 93 /* softraid callback, do not use! */ 94 void (*softraid_disk_attach)(struct disk *, int); 95 96 void sr_map_root(void); 97 98 struct disk_attach_task { 99 struct task task; 100 struct disk *dk; 101 }; 102 103 void disk_attach_callback(void *); 104 105 int spoofgpt(struct buf *, void (*)(struct buf *), const uint8_t *, 106 struct disklabel *, daddr_t *); 107 void spoofmbr(struct buf *, void (*)(struct buf *), const uint8_t *, 108 struct disklabel *, daddr_t *); 109 void spooffat(const uint8_t *, struct disklabel *, daddr_t *); 110 111 int gpt_chk_mbr(struct dos_partition *, uint64_t); 112 int gpt_get_hdr(struct buf *, void (*)(struct buf *), struct disklabel *, 113 uint64_t, struct gpt_header *); 114 int gpt_get_parts(struct buf *, void (*)(struct buf *), 115 struct disklabel *, const struct gpt_header *, struct gpt_partition **); 116 int gpt_get_fstype(struct uuid *); 117 118 int duid_equal(u_char *, u_char *); 119 120 /* 121 * Compute checksum for disk label. 122 */ 123 u_int 124 dkcksum(struct disklabel *lp) 125 { 126 u_int16_t *start, *end; 127 u_int16_t sum = 0; 128 129 start = (u_int16_t *)lp; 130 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 131 while (start < end) 132 sum ^= *start++; 133 return (sum); 134 } 135 136 int 137 initdisklabel(struct disklabel *lp) 138 { 139 int i; 140 141 /* minimal requirements for archetypal disk label */ 142 if (lp->d_secsize < DEV_BSIZE) 143 lp->d_secsize = DEV_BSIZE; 144 if (DL_GETDSIZE(lp) == 0) 145 DL_SETDSIZE(lp, MAXDISKSIZE); 146 if (lp->d_secpercyl == 0) 147 return (ERANGE); 148 lp->d_npartitions = MAXPARTITIONS; 149 for (i = 0; i < RAW_PART; i++) { 150 DL_SETPSIZE(&lp->d_partitions[i], 0); 151 DL_SETPOFFSET(&lp->d_partitions[i], 0); 152 } 153 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 154 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 155 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 156 DL_SETBSTART(lp, 0); 157 DL_SETBEND(lp, DL_GETDSIZE(lp)); 158 lp->d_version = 1; 159 return (0); 160 } 161 162 /* 163 * Check an incoming block to make sure it is a disklabel, convert it to 164 * a newer version if needed, etc etc. 165 */ 166 int 167 checkdisklabel(dev_t dev, void *rlp, struct disklabel *lp, u_int64_t boundstart, 168 u_int64_t boundend) 169 { 170 struct disklabel *dlp = rlp; 171 struct __partitionv0 *v0pp; 172 struct partition *pp; 173 const char *blkname; 174 u_int64_t disksize; 175 int error = 0; 176 int i; 177 178 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 179 error = ENOENT; /* no disk label */ 180 else if (dlp->d_npartitions > MAXPARTITIONS) 181 error = E2BIG; /* too many partitions */ 182 else if (dlp->d_secpercyl == 0) 183 error = EINVAL; /* invalid label */ 184 else if (dlp->d_secsize == 0) 185 error = ENOSPC; /* disk too small */ 186 else if (dkcksum(dlp) != 0) 187 error = EINVAL; /* incorrect checksum */ 188 189 if (error) { 190 u_int16_t *start, *end, sum = 0; 191 192 /* If it is byte-swapped, attempt to convert it */ 193 if (swap32(dlp->d_magic) != DISKMAGIC || 194 swap32(dlp->d_magic2) != DISKMAGIC || 195 swap16(dlp->d_npartitions) > MAXPARTITIONS) 196 return (error); 197 198 /* 199 * Need a byte-swap aware dkcksum variant 200 * inlined, because dkcksum uses a sub-field 201 */ 202 start = (u_int16_t *)dlp; 203 end = (u_int16_t *)&dlp->d_partitions[ 204 swap16(dlp->d_npartitions)]; 205 while (start < end) 206 sum ^= *start++; 207 if (sum != 0) 208 return (error); 209 210 dlp->d_magic = swap32(dlp->d_magic); 211 dlp->d_type = swap16(dlp->d_type); 212 213 /* d_typename and d_packname are strings */ 214 215 dlp->d_secsize = swap32(dlp->d_secsize); 216 dlp->d_nsectors = swap32(dlp->d_nsectors); 217 dlp->d_ntracks = swap32(dlp->d_ntracks); 218 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 219 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 220 dlp->d_secperunit = swap32(dlp->d_secperunit); 221 222 /* d_uid is a string */ 223 224 dlp->d_acylinders = swap32(dlp->d_acylinders); 225 226 dlp->d_flags = swap32(dlp->d_flags); 227 228 for (i = 0; i < NDDATA; i++) 229 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 230 231 dlp->d_secperunith = swap16(dlp->d_secperunith); 232 dlp->d_version = swap16(dlp->d_version); 233 234 for (i = 0; i < NSPARE; i++) 235 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 236 237 dlp->d_magic2 = swap32(dlp->d_magic2); 238 239 dlp->d_npartitions = swap16(dlp->d_npartitions); 240 241 for (i = 0; i < MAXPARTITIONS; i++) { 242 pp = &dlp->d_partitions[i]; 243 pp->p_size = swap32(pp->p_size); 244 pp->p_offset = swap32(pp->p_offset); 245 if (dlp->d_version == 0) { 246 v0pp = (struct __partitionv0 *)pp; 247 v0pp->p_fsize = swap32(v0pp->p_fsize); 248 } else { 249 pp->p_offseth = swap16(pp->p_offseth); 250 pp->p_sizeh = swap16(pp->p_sizeh); 251 } 252 pp->p_cpg = swap16(pp->p_cpg); 253 } 254 255 dlp->d_checksum = 0; 256 dlp->d_checksum = dkcksum(dlp); 257 error = 0; 258 } 259 260 /* XXX should verify lots of other fields and whine a lot */ 261 262 /* Initial passed in lp contains the real disk size. */ 263 disksize = DL_GETDSIZE(lp); 264 265 if (lp != dlp) 266 *lp = *dlp; 267 268 if (lp->d_version == 0) { 269 blkname = findblkname(major(dev)); 270 if (blkname == NULL) 271 blkname = findblkname(major(chrtoblk(dev))); 272 printf("%s%d has legacy label, please rewrite using " 273 "disklabel(8)\n", blkname, DISKUNIT(dev)); 274 275 lp->d_version = 1; 276 lp->d_secperunith = 0; 277 278 v0pp = (struct __partitionv0 *)lp->d_partitions; 279 pp = lp->d_partitions; 280 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 281 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 282 p_fsize, v0pp->p_frag); 283 pp->p_offseth = 0; 284 pp->p_sizeh = 0; 285 } 286 } 287 288 #ifdef DEBUG 289 if (DL_GETDSIZE(lp) != disksize) 290 printf("on-disk disklabel has incorrect disksize (%llu)\n", 291 DL_GETDSIZE(lp)); 292 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 293 printf("on-disk disklabel RAW_PART has incorrect size (%llu)\n", 294 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 295 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 296 printf("on-disk disklabel RAW_PART offset != 0 (%llu)\n", 297 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 298 #endif 299 DL_SETDSIZE(lp, disksize); 300 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 301 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 302 DL_SETBSTART(lp, boundstart); 303 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 304 305 lp->d_checksum = 0; 306 lp->d_checksum = dkcksum(lp); 307 return (0); 308 } 309 310 /* 311 * Read a disk sector. 312 */ 313 int 314 readdisksector(struct buf *bp, void (*strat)(struct buf *), 315 struct disklabel *lp, u_int64_t sector) 316 { 317 bp->b_blkno = DL_SECTOBLK(lp, sector); 318 bp->b_bcount = lp->d_secsize; 319 bp->b_error = 0; 320 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 321 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 322 323 (*strat)(bp); 324 325 return (biowait(bp)); 326 } 327 328 int 329 readdoslabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, 330 daddr_t *partoffp, int spoofonly) 331 { 332 uint8_t dosbb[DEV_BSIZE]; 333 struct disklabel nlp; 334 struct disklabel *rlp; 335 daddr_t partoff; 336 int error; 337 338 #ifdef DEBUG 339 char devname[32]; 340 const char *blkname; 341 342 blkname = findblkname(major(bp->b_dev)); 343 if (blkname == NULL) 344 blkname = findblkname(major(chrtoblk(bp->b_dev))); 345 if (blkname == NULL) 346 snprintf(devname, sizeof(devname), "<%d, %d>", major(bp->b_dev), 347 minor(bp->b_dev)); 348 else 349 snprintf(devname, sizeof(devname), "%s%d", blkname, 350 DISKUNIT(bp->b_dev)); 351 352 printf("readdoslabel enter: %s, spoofonly %d, partoffp %sNULL\n", 353 devname, spoofonly, (partoffp == NULL) ? "" : "not "); 354 #endif /* DEBUG */ 355 356 error = readdisksector(bp, strat, lp, DOSBBSECTOR); 357 if (error) { 358 DPRINTF("readdoslabel return: %s, %d -- lp unchanged, " 359 "DOSBBSECTOR read error\n", devname, error); 360 return error; 361 } 362 memcpy(dosbb, bp->b_data, sizeof(dosbb)); 363 364 nlp = *lp; 365 memset(nlp.d_partitions, 0, sizeof(nlp.d_partitions)); 366 nlp.d_partitions[RAW_PART] = lp->d_partitions[RAW_PART]; 367 nlp.d_magic = 0; 368 369 error = spoofgpt(bp, strat, dosbb, &nlp, &partoff); 370 if (error) 371 return error; 372 if (nlp.d_magic != DISKMAGIC) 373 spoofmbr(bp, strat, dosbb, &nlp, &partoff); 374 if (nlp.d_magic != DISKMAGIC) 375 spooffat(dosbb, &nlp, &partoff); 376 if (nlp.d_magic != DISKMAGIC) { 377 DPRINTF("readdoslabel: N/A -- label partition @ " 378 "daddr_t 0 (default)\n"); 379 partoff = 0; 380 } 381 382 if (partoffp != NULL) { 383 /* 384 * If a non-zero value is returned writedisklabel() exits with 385 * EIO. If 0 is returned the label sector is read from disk and 386 * lp is copied into it. So leave lp alone! 387 */ 388 if (partoff == -1) { 389 DPRINTF("readdoslabel return: %s, ENXIO, lp " 390 "unchanged, *partoffp unchanged\n", devname); 391 return ENXIO; 392 } 393 *partoffp = partoff; 394 DPRINTF("readdoslabel return: %s, 0, lp unchanged, " 395 "*partoffp set to %lld\n", devname, *partoffp); 396 return 0; 397 } 398 399 nlp.d_magic = lp->d_magic; 400 *lp = nlp; 401 lp->d_checksum = 0; 402 lp->d_checksum = dkcksum(lp); 403 404 if (spoofonly || partoff == -1) { 405 DPRINTF("readdoslabel return: %s, 0, lp spoofed\n", 406 devname); 407 return 0; 408 } 409 410 partoff += DOS_LABELSECTOR; 411 error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, partoff)); 412 if (error) { 413 DPRINTF("readdoslabel return: %s, %d, lp read failed\n", 414 devname, error); 415 return bp->b_error; 416 } 417 418 rlp = (struct disklabel *)(bp->b_data + DL_BLKOFFSET(lp, partoff)); 419 error = checkdisklabel(bp->b_dev, rlp, lp, DL_GETBSTART(rlp), 420 DL_GETBEND(rlp)); 421 422 DPRINTF("readdoslabel return: %s, %d, checkdisklabel() of daddr_t " 423 "%lld %s\n", devname, error, partoff, error ? "failed" : "ok"); 424 425 return error; 426 } 427 428 /* 429 * Return the index into dp[] of the EFI GPT (0xEE) partition, or -1 if no such 430 * partition exists. 431 * 432 * Copied into sbin/fdisk/mbr.c. 433 */ 434 int 435 gpt_chk_mbr(struct dos_partition *dp, uint64_t dsize) 436 { 437 struct dos_partition *dp2; 438 int efi, eficnt, found, i; 439 uint32_t psize; 440 441 found = efi = eficnt = 0; 442 for (dp2 = dp, i = 0; i < NDOSPART; i++, dp2++) { 443 if (dp2->dp_typ == DOSPTYP_UNUSED) 444 continue; 445 found++; 446 if (dp2->dp_typ != DOSPTYP_EFI) 447 continue; 448 if (letoh32(dp2->dp_start) != GPTSECTOR) 449 continue; 450 psize = letoh32(dp2->dp_size); 451 if (psize <= (dsize - GPTSECTOR) || psize == UINT32_MAX) { 452 efi = i; 453 eficnt++; 454 } 455 } 456 if (found == 1 && eficnt == 1) 457 return (efi); 458 459 return (-1); 460 } 461 462 int 463 gpt_get_hdr(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, 464 uint64_t sector, struct gpt_header *gh) 465 { 466 struct gpt_header ngh; 467 int error; 468 uint64_t lbaend, lbastart; 469 uint32_t csum; 470 uint32_t size, partsize; 471 472 473 error = readdisksector(bp, strat, lp, sector); 474 if (error) 475 return error; 476 477 memcpy(&ngh, bp->b_data, sizeof(ngh)); 478 479 size = letoh32(ngh.gh_size); 480 partsize = letoh32(ngh.gh_part_size); 481 lbaend = letoh64(ngh.gh_lba_end); 482 lbastart = letoh64(ngh.gh_lba_start); 483 484 csum = ngh.gh_csum; 485 ngh.gh_csum = 0; 486 ngh.gh_csum = htole32(crc32(0, (unsigned char *)&ngh, GPTMINHDRSIZE)); 487 488 if (letoh64(ngh.gh_sig) == GPTSIGNATURE && 489 letoh32(ngh.gh_rev) == GPTREVISION && 490 size == GPTMINHDRSIZE && lbastart <= lbaend && 491 partsize == GPTMINPARTSIZE && lp->d_secsize % partsize == 0 && 492 csum == ngh.gh_csum) 493 *gh = ngh; 494 else 495 memset(gh, 0, sizeof(*gh)); 496 497 return 0; 498 } 499 500 int 501 gpt_get_parts(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, 502 const struct gpt_header *gh, struct gpt_partition **gp) 503 { 504 uint8_t *ngp; 505 int error, i; 506 uint64_t bytes, partlba, sectors; 507 uint32_t partnum, partsize, partcsum; 508 509 partlba = letoh64(gh->gh_part_lba); 510 partnum = letoh32(gh->gh_part_num); 511 partsize = letoh32(gh->gh_part_size); 512 513 sectors = ((uint64_t)partnum * partsize + lp->d_secsize - 1) / 514 lp->d_secsize; 515 516 ngp = mallocarray(sectors, lp->d_secsize, M_DEVBUF, M_NOWAIT | M_ZERO); 517 if (ngp == NULL) { 518 *gp = NULL; 519 return ENOMEM; 520 } 521 bytes = sectors * lp->d_secsize; 522 523 for (i = 0; i < sectors; i++) { 524 error = readdisksector(bp, strat, lp, partlba + i); 525 if (error) { 526 free(ngp, M_DEVBUF, bytes); 527 *gp = NULL; 528 return error; 529 } 530 memcpy(ngp + i * lp->d_secsize, bp->b_data, lp->d_secsize); 531 } 532 533 partcsum = htole32(crc32(0, ngp, partnum * partsize)); 534 if (partcsum != gh->gh_part_csum) { 535 DPRINTF("invalid %s GPT partition array @ %llu\n", 536 (letoh64(gh->gh_lba_self) == GPTSECTOR) ? "Primary" : 537 "Secondary", partlba); 538 free(ngp, M_DEVBUF, bytes); 539 *gp = NULL; 540 } else { 541 *gp = (struct gpt_partition *)ngp; 542 } 543 544 return 0; 545 } 546 547 int 548 gpt_get_fstype(struct uuid *uuid_part) 549 { 550 static int init = 0; 551 static struct uuid uuid_openbsd, uuid_msdos, uuid_chromefs, 552 uuid_linux, uuid_hfs, uuid_unused, uuid_efi_system, uuid_bios_boot; 553 static const uint8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 554 static const uint8_t gpt_uuid_msdos[] = GPT_UUID_MSDOS; 555 static const uint8_t gpt_uuid_chromerootfs[] = GPT_UUID_CHROMEROOTFS; 556 static const uint8_t gpt_uuid_linux[] = GPT_UUID_LINUX; 557 static const uint8_t gpt_uuid_hfs[] = GPT_UUID_APPLE_HFS; 558 static const uint8_t gpt_uuid_unused[] = GPT_UUID_UNUSED; 559 static const uint8_t gpt_uuid_efi_system[] = GPT_UUID_EFI_SYSTEM; 560 static const uint8_t gpt_uuid_bios_boot[] = GPT_UUID_BIOS_BOOT; 561 562 if (init == 0) { 563 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 564 uuid_dec_be(gpt_uuid_msdos, &uuid_msdos); 565 uuid_dec_be(gpt_uuid_chromerootfs, &uuid_chromefs); 566 uuid_dec_be(gpt_uuid_linux, &uuid_linux); 567 uuid_dec_be(gpt_uuid_hfs, &uuid_hfs); 568 uuid_dec_be(gpt_uuid_unused, &uuid_unused); 569 uuid_dec_be(gpt_uuid_efi_system, &uuid_efi_system); 570 uuid_dec_be(gpt_uuid_bios_boot, &uuid_bios_boot); 571 init = 1; 572 } 573 574 if (!memcmp(uuid_part, &uuid_unused, sizeof(struct uuid))) 575 return FS_UNUSED; 576 else if (!memcmp(uuid_part, &uuid_openbsd, sizeof(struct uuid))) 577 return FS_BSDFFS; 578 else if (!memcmp(uuid_part, &uuid_msdos, sizeof(struct uuid))) 579 return FS_MSDOS; 580 else if (!memcmp(uuid_part, &uuid_chromefs, sizeof(struct uuid))) 581 return FS_EXT2FS; 582 else if (!memcmp(uuid_part, &uuid_linux, sizeof(struct uuid))) 583 return FS_EXT2FS; 584 else if (!memcmp(uuid_part, &uuid_hfs, sizeof(struct uuid))) 585 return FS_HFS; 586 else if (!memcmp(uuid_part, &uuid_efi_system, sizeof(struct uuid))) 587 return FS_MSDOS; 588 else if (!memcmp(uuid_part, &uuid_bios_boot, sizeof(struct uuid))) 589 return FS_BOOT; 590 else 591 return FS_OTHER; 592 } 593 594 int 595 spoofgpt(struct buf *bp, void (*strat)(struct buf *), const uint8_t *dosbb, 596 struct disklabel *lp, daddr_t *partoffp) 597 { 598 struct dos_partition dp[NDOSPART]; 599 struct gpt_header gh; 600 struct uuid gptype; 601 struct gpt_partition *gp; 602 struct partition *pp; 603 uint64_t lbaend, lbastart, labelsec; 604 uint64_t gpbytes, end, start; 605 daddr_t partoff; 606 unsigned int i, n; 607 int error, fstype, obsdfound; 608 uint32_t partnum; 609 uint16_t sig; 610 611 gp = NULL; 612 gpbytes = 0; 613 614 memcpy(dp, dosbb + DOSPARTOFF, sizeof(dp)); 615 memcpy(&sig, dosbb + DOSMBR_SIGNATURE_OFF, sizeof(sig)); 616 617 if (letoh16(sig) != DOSMBR_SIGNATURE || 618 gpt_chk_mbr(dp, DL_GETDSIZE(lp)) == -1) 619 return 0; 620 621 error = gpt_get_hdr(bp, strat, lp, GPTSECTOR, &gh); 622 if (error == 0 && letoh64(gh.gh_sig) == GPTSIGNATURE) 623 error = gpt_get_parts(bp, strat, lp, &gh, &gp); 624 625 if (error || letoh64(gh.gh_sig) != GPTSIGNATURE || gp == NULL) { 626 error = gpt_get_hdr(bp, strat, lp, DL_GETDSIZE(lp) - 1, &gh); 627 if (error == 0 && letoh64(gh.gh_sig) == GPTSIGNATURE) 628 error = gpt_get_parts(bp, strat, lp, &gh, &gp); 629 } 630 631 if (error) 632 return error; 633 if (gp == NULL) 634 return ENXIO; 635 636 lbastart = letoh64(gh.gh_lba_start); 637 lbaend = letoh64(gh.gh_lba_end); 638 partnum = letoh32(gh.gh_part_num); 639 640 n = 'i' - 'a'; /* Start spoofing at 'i', a.k.a. 8. */ 641 642 DL_SETBSTART(lp, lbastart); 643 DL_SETBEND(lp, lbaend + 1); 644 partoff = DL_SECTOBLK(lp, lbastart); 645 obsdfound = 0; 646 for (i = 0; i < partnum; i++) { 647 if (letoh64(gp[i].gp_attrs) & GPTPARTATTR_REQUIRED) { 648 DPRINTF("spoofgpt: Skipping partition %u (REQUIRED)\n", 649 i); 650 continue; 651 } 652 653 start = letoh64(gp[i].gp_lba_start); 654 if (start > lbaend || start < lbastart) 655 continue; 656 657 end = letoh64(gp[i].gp_lba_end); 658 if (start > end) 659 continue; 660 661 uuid_dec_le(&gp[i].gp_type, &gptype); 662 fstype = gpt_get_fstype(&gptype); 663 if (obsdfound && fstype == FS_BSDFFS) 664 continue; 665 666 if (fstype == FS_BSDFFS) { 667 obsdfound = 1; 668 partoff = DL_SECTOBLK(lp, start); 669 labelsec = DL_BLKTOSEC(lp, partoff + DOS_LABELSECTOR); 670 if (labelsec > ((end < lbaend) ? end : lbaend)) 671 partoff = -1; 672 DL_SETBSTART(lp, start); 673 DL_SETBEND(lp, end + 1); 674 continue; 675 } 676 677 if (partoff != -1) { 678 labelsec = DL_BLKTOSEC(lp, partoff + DOS_LABELSECTOR); 679 if (labelsec >= start && labelsec <= end) 680 partoff = -1; 681 } 682 683 if (n < MAXPARTITIONS && end <= lbaend) { 684 pp = &lp->d_partitions[n]; 685 n++; 686 pp->p_fstype = fstype; 687 DL_SETPOFFSET(pp, start); 688 DL_SETPSIZE(pp, end - start + 1); 689 } 690 } 691 692 lp->d_magic = DISKMAGIC; 693 *partoffp = partoff; 694 free(gp, M_DEVBUF, gpbytes); 695 696 #ifdef DEBUG 697 printf("readdoslabel: GPT -- "); 698 if (partoff == -1) 699 printf("no label partition\n"); 700 else if (obsdfound == 0) 701 printf("label partition @ daddr_t %lld (free space)\n", partoff); 702 else 703 printf("label partition @ daddr_t %lld (A6)\n", partoff); 704 #endif /* DEBUG */ 705 706 return 0; 707 } 708 709 void 710 spoofmbr(struct buf *bp, void (*strat)(struct buf *), const uint8_t *dosbb, 711 struct disklabel *lp, daddr_t *partoffp) 712 { 713 struct dos_partition dp[NDOSPART]; 714 struct partition *pp; 715 uint64_t sector = DOSBBSECTOR; 716 uint64_t start, end; 717 daddr_t labeloff, partoff; 718 unsigned int i, n, parts; 719 int wander = 1, ebr = 0; 720 int error, obsdfound; 721 uint32_t extoff = 0; 722 uint16_t sig; 723 uint8_t fstype; 724 725 memcpy(&sig, dosbb + DOSMBR_SIGNATURE_OFF, sizeof(sig)); 726 if (letoh16(sig) != DOSMBR_SIGNATURE) 727 return; 728 memcpy(dp, dosbb + DOSPARTOFF, sizeof(dp)); 729 730 obsdfound = 0; 731 partoff = 0; 732 parts = 0; 733 n = 'i' - 'a'; 734 while (wander && ebr < DOS_MAXEBR) { 735 ebr++; 736 wander = 0; 737 if (sector < extoff) 738 sector = extoff; 739 740 error = 0; 741 if (sector != DOSBBSECTOR) { 742 error = readdisksector(bp, strat, lp, sector); 743 if (error) 744 break; 745 memcpy(&sig, bp->b_data + DOSMBR_SIGNATURE_OFF, 746 sizeof(sig)); 747 if (letoh16(sig) != DOSMBR_SIGNATURE) 748 break; 749 memcpy(dp, bp->b_data + DOSPARTOFF, sizeof(dp)); 750 } 751 752 for (i = 0; i < NDOSPART; i++) { 753 if (letoh32(dp[i].dp_size) == 0) 754 continue; 755 if (obsdfound && dp[i].dp_typ == DOSPTYP_OPENBSD) 756 continue; 757 758 if (dp[i].dp_typ != DOSPTYP_OPENBSD) { 759 if (letoh32(dp[i].dp_start) > DL_GETDSIZE(lp)) 760 continue; 761 if (letoh32(dp[i].dp_size) > DL_GETDSIZE(lp)) 762 continue; 763 } 764 765 start = sector + letoh32(dp[i].dp_start); 766 end = start + letoh32(dp[i].dp_size); 767 768 parts++; 769 if (obsdfound == 0) { 770 labeloff = partoff + DOS_LABELSECTOR; 771 if (labeloff >= DL_SECTOBLK(lp, start) && 772 labeloff < DL_SECTOBLK(lp, end)) 773 partoff = -1; 774 } 775 776 switch (dp[i].dp_typ) { 777 case DOSPTYP_OPENBSD: 778 obsdfound = 1; 779 partoff = DL_SECTOBLK(lp, start); 780 labeloff = partoff + DOS_LABELSECTOR; 781 if (labeloff >= DL_SECTOBLK(lp, end)) 782 partoff = -1; 783 DL_SETBSTART(lp, start); 784 DL_SETBEND(lp, end); 785 continue; 786 case DOSPTYP_EFI: 787 continue; 788 case DOSPTYP_EXTEND: 789 case DOSPTYP_EXTENDL: 790 sector = start + extoff; 791 if (extoff == 0) { 792 extoff = start; 793 sector = 0; 794 } 795 wander = 1; 796 continue; 797 798 case DOSPTYP_UNUSED: 799 fstype = FS_UNUSED; 800 break; 801 case DOSPTYP_LINUX: 802 fstype = FS_EXT2FS; 803 break; 804 case DOSPTYP_NTFS: 805 fstype = FS_NTFS; 806 break; 807 case DOSPTYP_EFISYS: 808 case DOSPTYP_FAT12: 809 case DOSPTYP_FAT16S: 810 case DOSPTYP_FAT16B: 811 case DOSPTYP_FAT16L: 812 case DOSPTYP_FAT32: 813 case DOSPTYP_FAT32L: 814 fstype = FS_MSDOS; 815 break; 816 default: 817 fstype = FS_OTHER; 818 break; 819 } 820 821 if (n < MAXPARTITIONS) { 822 pp = &lp->d_partitions[n++]; 823 pp->p_fstype = fstype; 824 if (start) 825 DL_SETPOFFSET(pp, start); 826 DL_SETPSIZE(pp, end - start); 827 } 828 } 829 } 830 831 if (parts > 0) { 832 lp->d_magic = DISKMAGIC; 833 *partoffp = partoff; 834 #ifdef DEBUG 835 printf("readdoslabel: MBR -- "); 836 if (partoff == -1) 837 printf("no label partition\n"); 838 else if (obsdfound == 0) 839 printf("label partition @ daddr_t %lld (free space)\n", partoff); 840 else 841 printf("label partition @ daddr_t %lld (A6)\n", partoff); 842 #endif /* DEBUG */ 843 } 844 } 845 846 void 847 spooffat(const uint8_t *dosbb, struct disklabel *lp, daddr_t *partoffp) 848 { 849 uint16_t secsize; 850 851 #define VALID_JMP(_p) (((_p)[0] == 0xeb && (_p)[2] == 0x90) || (_p)[0] == 0xe9) 852 #define VALID_FAT(_p) ((_p)[16] == 1 || (_p)[16] == 2) 853 #define VALID_SEC(_s) ((_s) >= DEV_BSIZE && (_s) <= 4096 && ((_s) % 512 == 0)) 854 855 memcpy(&secsize, dosbb + 11, sizeof(secsize)); 856 secsize = letoh16(secsize); 857 858 if (VALID_JMP(dosbb) && VALID_SEC(secsize) && VALID_FAT(dosbb)) { 859 lp->d_partitions['i' - 'a'] = lp->d_partitions[RAW_PART]; 860 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 861 *partoffp = -1; 862 lp->d_magic = DISKMAGIC; 863 DPRINTF("readdoslabel: FAT -- no label partition\n"); 864 } 865 } 866 867 /* 868 * Check new disk label for sensibility before setting it. 869 */ 870 int 871 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 872 { 873 struct partition *opp, *npp; 874 struct disk *dk; 875 int i; 876 877 /* sanity clause */ 878 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 879 (nlp->d_secsize % DEV_BSIZE) != 0) 880 return (EINVAL); 881 882 /* special case to allow disklabel to be invalidated */ 883 if (nlp->d_magic == 0xffffffff) { 884 *olp = *nlp; 885 return (0); 886 } 887 888 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 889 dkcksum(nlp) != 0) 890 return (EINVAL); 891 892 /* XXX missing check if other dos partitions will be overwritten */ 893 894 for (i = 0; i < MAXPARTITIONS; i++) { 895 opp = &olp->d_partitions[i]; 896 npp = &nlp->d_partitions[i]; 897 if ((openmask & (1 << i)) && 898 (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 899 DL_GETPSIZE(npp) < DL_GETPSIZE(opp))) 900 return (EBUSY); 901 /* 902 * Copy internally-set partition information 903 * if new label doesn't include it. XXX 904 */ 905 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 906 npp->p_fragblock = opp->p_fragblock; 907 npp->p_cpg = opp->p_cpg; 908 } 909 } 910 911 /* Generate a UID if the disklabel does not already have one. */ 912 if (duid_iszero(nlp->d_uid)) { 913 do { 914 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 915 TAILQ_FOREACH(dk, &disklist, dk_link) 916 if (dk->dk_label && 917 duid_equal(dk->dk_label->d_uid, nlp->d_uid)) 918 break; 919 } while (dk != NULL || duid_iszero(nlp->d_uid)); 920 } 921 922 /* Preserve the disk size and RAW_PART values. */ 923 DL_SETDSIZE(nlp, DL_GETDSIZE(olp)); 924 npp = &nlp->d_partitions[RAW_PART]; 925 DL_SETPOFFSET(npp, 0); 926 DL_SETPSIZE(npp, DL_GETDSIZE(nlp)); 927 928 nlp->d_checksum = 0; 929 nlp->d_checksum = dkcksum(nlp); 930 *olp = *nlp; 931 932 disk_change = 1; 933 934 return (0); 935 } 936 937 /* 938 * Determine the size of the transfer, and make sure it is within the 939 * boundaries of the partition. Adjust transfer if needed, and signal errors or 940 * early completion. 941 */ 942 int 943 bounds_check_with_label(struct buf *bp, struct disklabel *lp) 944 { 945 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 946 daddr_t partblocks, sz; 947 948 /* Avoid division by zero, negative offsets, and negative sizes. */ 949 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0) 950 goto bad; 951 952 /* Ensure transfer is a whole number of aligned sectors. */ 953 if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 || 954 (bp->b_bcount % lp->d_secsize) != 0) 955 goto bad; 956 957 /* Ensure transfer starts within partition boundary. */ 958 partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p)); 959 if (bp->b_blkno > partblocks) 960 goto bad; 961 962 /* If exactly at end of partition or null transfer, return EOF. */ 963 if (bp->b_blkno == partblocks || bp->b_bcount == 0) 964 goto done; 965 966 /* Truncate request if it extends past the end of the partition. */ 967 sz = bp->b_bcount >> DEV_BSHIFT; 968 if (sz > partblocks - bp->b_blkno) { 969 sz = partblocks - bp->b_blkno; 970 bp->b_bcount = sz << DEV_BSHIFT; 971 } 972 973 return (0); 974 975 bad: 976 bp->b_error = EINVAL; 977 bp->b_flags |= B_ERROR; 978 done: 979 bp->b_resid = bp->b_bcount; 980 return (-1); 981 } 982 983 /* 984 * Disk error is the preface to plaintive error messages 985 * about failing disk transfers. It prints messages of the form 986 987 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 988 989 * if the offset of the error in the transfer and a disk label 990 * are both available. blkdone should be -1 if the position of the error 991 * is unknown; the disklabel pointer may be null from drivers that have not 992 * been converted to use them. The message is printed with printf 993 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 994 * The message should be completed (with at least a newline) with printf 995 * or addlog, respectively. There is no trailing space. 996 */ 997 void 998 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 999 struct disklabel *lp) 1000 { 1001 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 1002 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))); 1003 char partname = 'a' + part; 1004 daddr_t sn; 1005 1006 if (pri != LOG_PRINTF) { 1007 log(pri, "%s", ""); 1008 pr = addlog; 1009 } else 1010 pr = printf; 1011 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 1012 bp->b_flags & B_READ ? "read" : "writ"); 1013 sn = bp->b_blkno; 1014 if (bp->b_bcount <= DEV_BSIZE) 1015 (*pr)("%lld", (long long)sn); 1016 else { 1017 if (blkdone >= 0) { 1018 sn += blkdone; 1019 (*pr)("%lld of ", (long long)sn); 1020 } 1021 (*pr)("%lld-%lld", (long long)bp->b_blkno, 1022 (long long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE)); 1023 } 1024 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 1025 sn += DL_SECTOBLK(lp, DL_GETPOFFSET(&lp->d_partitions[part])); 1026 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, (long long)sn, 1027 (long long)(sn / DL_SECTOBLK(lp, lp->d_secpercyl))); 1028 sn %= DL_SECTOBLK(lp, lp->d_secpercyl); 1029 (*pr)(" tn %lld sn %lld)", 1030 (long long)(sn / DL_SECTOBLK(lp, lp->d_nsectors)), 1031 (long long)(sn % DL_SECTOBLK(lp, lp->d_nsectors))); 1032 } 1033 } 1034 1035 /* 1036 * Initialize the disklist. Called by main() before autoconfiguration. 1037 */ 1038 void 1039 disk_init(void) 1040 { 1041 1042 TAILQ_INIT(&disklist); 1043 disk_count = disk_change = 0; 1044 } 1045 1046 int 1047 disk_construct(struct disk *diskp) 1048 { 1049 rw_init_flags(&diskp->dk_lock, "dklk", RWL_IS_VNODE); 1050 mtx_init(&diskp->dk_mtx, IPL_BIO); 1051 1052 diskp->dk_flags |= DKF_CONSTRUCTED; 1053 1054 return (0); 1055 } 1056 1057 /* 1058 * Attach a disk. 1059 */ 1060 void 1061 disk_attach(struct device *dv, struct disk *diskp) 1062 { 1063 int majdev; 1064 1065 KERNEL_ASSERT_LOCKED(); 1066 1067 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 1068 disk_construct(diskp); 1069 1070 /* 1071 * Allocate and initialize the disklabel structures. Note that 1072 * it's not safe to sleep here, since we're probably going to be 1073 * called during autoconfiguration. 1074 */ 1075 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 1076 M_NOWAIT|M_ZERO); 1077 if (diskp->dk_label == NULL) 1078 panic("disk_attach: can't allocate storage for disklabel"); 1079 1080 /* 1081 * Set the attached timestamp. 1082 */ 1083 microuptime(&diskp->dk_attachtime); 1084 1085 /* 1086 * Link into the disklist. 1087 */ 1088 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 1089 ++disk_count; 1090 disk_change = 1; 1091 1092 /* 1093 * Store device structure and number for later use. 1094 */ 1095 diskp->dk_device = dv; 1096 diskp->dk_devno = NODEV; 1097 if (dv != NULL) { 1098 majdev = findblkmajor(dv); 1099 if (majdev >= 0) 1100 diskp->dk_devno = 1101 MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 1102 1103 if (diskp->dk_devno != NODEV) { 1104 struct disk_attach_task *dat; 1105 1106 dat = malloc(sizeof(*dat), M_TEMP, M_WAITOK); 1107 1108 /* XXX: Assumes dk is part of the device softc. */ 1109 device_ref(dv); 1110 dat->dk = diskp; 1111 1112 task_set(&dat->task, disk_attach_callback, dat); 1113 task_add(systq, &dat->task); 1114 } 1115 } 1116 1117 if (softraid_disk_attach) 1118 softraid_disk_attach(diskp, 1); 1119 } 1120 1121 void 1122 disk_attach_callback(void *xdat) 1123 { 1124 struct disk_attach_task *dat = xdat; 1125 struct disk *dk = dat->dk; 1126 struct disklabel dl; 1127 char errbuf[100]; 1128 1129 free(dat, M_TEMP, sizeof(*dat)); 1130 1131 if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD)) 1132 goto done; 1133 1134 /* Read disklabel. */ 1135 if (disk_readlabel(&dl, dk->dk_devno, errbuf, sizeof(errbuf)) == NULL) { 1136 enqueue_randomness(dl.d_checksum); 1137 } 1138 1139 done: 1140 dk->dk_flags |= DKF_OPENED; 1141 device_unref(dk->dk_device); 1142 wakeup(dk); 1143 } 1144 1145 /* 1146 * Detach a disk. 1147 */ 1148 void 1149 disk_detach(struct disk *diskp) 1150 { 1151 KERNEL_ASSERT_LOCKED(); 1152 1153 if (softraid_disk_attach) 1154 softraid_disk_attach(diskp, -1); 1155 1156 /* 1157 * Free the space used by the disklabel structures. 1158 */ 1159 free(diskp->dk_label, M_DEVBUF, sizeof(*diskp->dk_label)); 1160 1161 /* 1162 * Remove from the disklist. 1163 */ 1164 TAILQ_REMOVE(&disklist, diskp, dk_link); 1165 disk_change = 1; 1166 if (--disk_count < 0) 1167 panic("disk_detach: disk_count < 0"); 1168 } 1169 1170 int 1171 disk_openpart(struct disk *dk, int part, int fmt, int haslabel) 1172 { 1173 KASSERT(part >= 0 && part < MAXPARTITIONS); 1174 1175 /* Unless opening the raw partition, check that the partition exists. */ 1176 if (part != RAW_PART && (!haslabel || 1177 part >= dk->dk_label->d_npartitions || 1178 dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED)) 1179 return (ENXIO); 1180 1181 /* Ensure the partition doesn't get changed under our feet. */ 1182 switch (fmt) { 1183 case S_IFCHR: 1184 dk->dk_copenmask |= (1 << part); 1185 break; 1186 case S_IFBLK: 1187 dk->dk_bopenmask |= (1 << part); 1188 break; 1189 } 1190 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1191 1192 return (0); 1193 } 1194 1195 void 1196 disk_closepart(struct disk *dk, int part, int fmt) 1197 { 1198 KASSERT(part >= 0 && part < MAXPARTITIONS); 1199 1200 switch (fmt) { 1201 case S_IFCHR: 1202 dk->dk_copenmask &= ~(1 << part); 1203 break; 1204 case S_IFBLK: 1205 dk->dk_bopenmask &= ~(1 << part); 1206 break; 1207 } 1208 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1209 } 1210 1211 void 1212 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit) 1213 { 1214 int bmaj, cmaj, mn; 1215 1216 /* Locate the lowest minor number to be detached. */ 1217 mn = DISKMINOR(unit, 0); 1218 1219 for (bmaj = 0; bmaj < nblkdev; bmaj++) 1220 if (bdevsw[bmaj].d_open == open) 1221 vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK); 1222 for (cmaj = 0; cmaj < nchrdev; cmaj++) 1223 if (cdevsw[cmaj].d_open == open) 1224 vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR); 1225 } 1226 1227 /* 1228 * Increment a disk's busy counter. If the counter is going from 1229 * 0 to 1, set the timestamp. 1230 */ 1231 void 1232 disk_busy(struct disk *diskp) 1233 { 1234 1235 /* 1236 * XXX We'd like to use something as accurate as microtime(), 1237 * but that doesn't depend on the system TOD clock. 1238 */ 1239 mtx_enter(&diskp->dk_mtx); 1240 if (diskp->dk_busy++ == 0) 1241 microuptime(&diskp->dk_timestamp); 1242 mtx_leave(&diskp->dk_mtx); 1243 } 1244 1245 /* 1246 * Decrement a disk's busy counter, increment the byte count, total busy 1247 * time, and reset the timestamp. 1248 */ 1249 void 1250 disk_unbusy(struct disk *diskp, long bcount, daddr_t blkno, int read) 1251 { 1252 struct timeval dv_time, diff_time; 1253 1254 mtx_enter(&diskp->dk_mtx); 1255 1256 if (diskp->dk_busy-- == 0) 1257 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 1258 1259 microuptime(&dv_time); 1260 1261 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 1262 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 1263 1264 diskp->dk_timestamp = dv_time; 1265 if (bcount > 0) { 1266 if (read) { 1267 diskp->dk_rbytes += bcount; 1268 diskp->dk_rxfer++; 1269 } else { 1270 diskp->dk_wbytes += bcount; 1271 diskp->dk_wxfer++; 1272 } 1273 } else 1274 diskp->dk_seek++; 1275 1276 mtx_leave(&diskp->dk_mtx); 1277 1278 enqueue_randomness(bcount ^ diff_time.tv_usec ^ 1279 (blkno >> 32) ^ (blkno & 0xffffffff)); 1280 } 1281 1282 int 1283 disk_lock(struct disk *dk) 1284 { 1285 return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR)); 1286 } 1287 1288 void 1289 disk_lock_nointr(struct disk *dk) 1290 { 1291 rw_enter_write(&dk->dk_lock); 1292 } 1293 1294 void 1295 disk_unlock(struct disk *dk) 1296 { 1297 rw_exit_write(&dk->dk_lock); 1298 } 1299 1300 int 1301 dk_mountroot(void) 1302 { 1303 char errbuf[100]; 1304 int part = DISKPART(rootdev); 1305 int (*mountrootfn)(void); 1306 struct disklabel dl; 1307 char *error; 1308 1309 error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf)); 1310 if (error) 1311 panic("%s", error); 1312 1313 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 1314 panic("root filesystem has size 0"); 1315 switch (dl.d_partitions[part].p_fstype) { 1316 #ifdef EXT2FS 1317 case FS_EXT2FS: 1318 { 1319 extern int ext2fs_mountroot(void); 1320 mountrootfn = ext2fs_mountroot; 1321 } 1322 break; 1323 #endif 1324 #ifdef FFS 1325 case FS_BSDFFS: 1326 { 1327 extern int ffs_mountroot(void); 1328 mountrootfn = ffs_mountroot; 1329 } 1330 break; 1331 #endif 1332 #ifdef CD9660 1333 case FS_ISO9660: 1334 { 1335 extern int cd9660_mountroot(void); 1336 mountrootfn = cd9660_mountroot; 1337 } 1338 break; 1339 #endif 1340 default: 1341 #ifdef FFS 1342 { 1343 extern int ffs_mountroot(void); 1344 1345 printf("filesystem type %d not known.. assuming ffs\n", 1346 dl.d_partitions[part].p_fstype); 1347 mountrootfn = ffs_mountroot; 1348 } 1349 #else 1350 panic("disk 0x%x filesystem type %d not known", 1351 rootdev, dl.d_partitions[part].p_fstype); 1352 #endif 1353 } 1354 return (*mountrootfn)(); 1355 } 1356 1357 struct device * 1358 getdisk(char *str, int len, int defpart, dev_t *devp) 1359 { 1360 struct device *dv; 1361 1362 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1363 printf("use one of: exit"); 1364 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1365 if (dv->dv_class == DV_DISK) 1366 printf(" %s[a-p]", dv->dv_xname); 1367 #if defined(NFSCLIENT) 1368 if (dv->dv_class == DV_IFNET) 1369 printf(" %s", dv->dv_xname); 1370 #endif 1371 } 1372 printf("\n"); 1373 } 1374 return (dv); 1375 } 1376 1377 struct device * 1378 parsedisk(char *str, int len, int defpart, dev_t *devp) 1379 { 1380 struct device *dv; 1381 int majdev, part = defpart; 1382 char c; 1383 1384 if (len == 0) 1385 return (NULL); 1386 c = str[len-1]; 1387 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1388 part = c - 'a'; 1389 len -= 1; 1390 } 1391 1392 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1393 if (dv->dv_class == DV_DISK && 1394 strncmp(str, dv->dv_xname, len) == 0 && 1395 dv->dv_xname[len] == '\0') { 1396 majdev = findblkmajor(dv); 1397 if (majdev < 0) 1398 return NULL; 1399 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1400 break; 1401 } 1402 #if defined(NFSCLIENT) 1403 if (dv->dv_class == DV_IFNET && 1404 strncmp(str, dv->dv_xname, len) == 0 && 1405 dv->dv_xname[len] == '\0') { 1406 *devp = NODEV; 1407 break; 1408 } 1409 #endif 1410 } 1411 1412 return (dv); 1413 } 1414 1415 void 1416 setroot(struct device *bootdv, int part, int exitflags) 1417 { 1418 int majdev, unit, len, s, slept = 0; 1419 struct swdevt *swp; 1420 struct device *dv; 1421 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1422 struct ifnet *ifp = NULL; 1423 struct disk *dk; 1424 char buf[128]; 1425 #if defined(NFSCLIENT) 1426 extern char *nfsbootdevname; 1427 #endif 1428 1429 /* Ensure that all disk attach callbacks have completed. */ 1430 do { 1431 TAILQ_FOREACH(dk, &disklist, dk_link) { 1432 if (dk->dk_devno != NODEV && 1433 (dk->dk_flags & DKF_OPENED) == 0) { 1434 tsleep_nsec(dk, 0, "dkopen", SEC_TO_NSEC(1)); 1435 slept++; 1436 break; 1437 } 1438 } 1439 } while (dk != NULL && slept < 5); 1440 1441 if (slept == 5) { 1442 printf("disklabels not read:"); 1443 TAILQ_FOREACH(dk, &disklist, dk_link) 1444 if (dk->dk_devno != NODEV && 1445 (dk->dk_flags & DKF_OPENED) == 0) 1446 printf(" %s", dk->dk_name); 1447 printf("\n"); 1448 } 1449 1450 if (duid_iszero(bootduid)) { 1451 /* Locate DUID for boot disk since it was not provided. */ 1452 TAILQ_FOREACH(dk, &disklist, dk_link) 1453 if (dk->dk_device == bootdv) 1454 break; 1455 if (dk) 1456 bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid)); 1457 } else if (bootdv == NULL) { 1458 /* Locate boot disk based on the provided DUID. */ 1459 TAILQ_FOREACH(dk, &disklist, dk_link) 1460 if (duid_equal(dk->dk_label->d_uid, bootduid)) 1461 break; 1462 if (dk) 1463 bootdv = dk->dk_device; 1464 } 1465 bcopy(bootduid, rootduid, sizeof(rootduid)); 1466 1467 #if NSOFTRAID > 0 1468 sr_map_root(); 1469 #endif 1470 1471 /* 1472 * If `swap generic' and we couldn't determine boot device, 1473 * ask the user. 1474 */ 1475 dk = NULL; 1476 if (mountroot == NULL && bootdv == NULL) 1477 boothowto |= RB_ASKNAME; 1478 if (boothowto & RB_ASKNAME) { 1479 while (1) { 1480 printf("root device"); 1481 if (bootdv != NULL) { 1482 printf(" (default %s", bootdv->dv_xname); 1483 if (bootdv->dv_class == DV_DISK) 1484 printf("%c", 'a' + part); 1485 printf(")"); 1486 } 1487 printf(": "); 1488 s = splhigh(); 1489 cnpollc(1); 1490 len = getsn(buf, sizeof(buf)); 1491 cnpollc(0); 1492 splx(s); 1493 if (strcmp(buf, "exit") == 0) 1494 reboot(exitflags); 1495 if (len == 0 && bootdv != NULL) { 1496 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1497 len = strlen(buf); 1498 } 1499 if (len > 0 && buf[len - 1] == '*') { 1500 buf[--len] = '\0'; 1501 dv = getdisk(buf, len, part, &nrootdev); 1502 if (dv != NULL) { 1503 rootdv = dv; 1504 nswapdev = nrootdev; 1505 goto gotswap; 1506 } 1507 } 1508 dv = getdisk(buf, len, part, &nrootdev); 1509 if (dv != NULL) { 1510 rootdv = dv; 1511 break; 1512 } 1513 } 1514 1515 if (rootdv->dv_class == DV_IFNET) 1516 goto gotswap; 1517 1518 /* try to build swap device out of new root device */ 1519 while (1) { 1520 printf("swap device"); 1521 if (rootdv != NULL) 1522 printf(" (default %s%s)", rootdv->dv_xname, 1523 rootdv->dv_class == DV_DISK ? "b" : ""); 1524 printf(": "); 1525 s = splhigh(); 1526 cnpollc(1); 1527 len = getsn(buf, sizeof(buf)); 1528 cnpollc(0); 1529 splx(s); 1530 if (strcmp(buf, "exit") == 0) 1531 reboot(exitflags); 1532 if (len == 0 && rootdv != NULL) { 1533 switch (rootdv->dv_class) { 1534 case DV_IFNET: 1535 nswapdev = NODEV; 1536 break; 1537 case DV_DISK: 1538 nswapdev = MAKEDISKDEV(major(nrootdev), 1539 DISKUNIT(nrootdev), 1); 1540 if (nswapdev == nrootdev) 1541 continue; 1542 break; 1543 default: 1544 break; 1545 } 1546 break; 1547 } 1548 dv = getdisk(buf, len, 1, &nswapdev); 1549 if (dv) { 1550 if (dv->dv_class == DV_IFNET) 1551 nswapdev = NODEV; 1552 if (nswapdev == nrootdev) 1553 continue; 1554 break; 1555 } 1556 } 1557 gotswap: 1558 rootdev = nrootdev; 1559 dumpdev = nswapdev; 1560 swdevt[0].sw_dev = nswapdev; 1561 swdevt[1].sw_dev = NODEV; 1562 #if defined(NFSCLIENT) 1563 } else if (mountroot == nfs_mountroot) { 1564 rootdv = bootdv; 1565 rootdev = dumpdev = swapdev = NODEV; 1566 #endif 1567 } else if (mountroot == NULL && rootdev == NODEV) { 1568 /* 1569 * `swap generic' 1570 */ 1571 rootdv = bootdv; 1572 1573 if (bootdv->dv_class == DV_DISK) { 1574 if (!duid_iszero(rootduid)) { 1575 TAILQ_FOREACH(dk, &disklist, dk_link) 1576 if (dk->dk_label && duid_equal( 1577 dk->dk_label->d_uid, rootduid)) 1578 break; 1579 if (dk == NULL) 1580 panic("root device (%s) not found", 1581 duid_format(rootduid)); 1582 rootdv = dk->dk_device; 1583 } 1584 } 1585 1586 majdev = findblkmajor(rootdv); 1587 if (majdev >= 0) { 1588 /* 1589 * Root and swap are on the disk. 1590 * Assume swap is on partition b. 1591 */ 1592 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1593 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1594 } else { 1595 /* 1596 * Root and swap are on a net. 1597 */ 1598 nswapdev = NODEV; 1599 } 1600 dumpdev = nswapdev; 1601 swdevt[0].sw_dev = nswapdev; 1602 /* swdevt[1].sw_dev = NODEV; */ 1603 } else { 1604 /* Completely pre-configured, but we want rootdv .. */ 1605 majdev = major(rootdev); 1606 if (findblkname(majdev) == NULL) 1607 return; 1608 unit = DISKUNIT(rootdev); 1609 part = DISKPART(rootdev); 1610 snprintf(buf, sizeof buf, "%s%d%c", 1611 findblkname(majdev), unit, 'a' + part); 1612 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1613 if (rootdv == NULL) 1614 panic("root device (%s) not found", buf); 1615 } 1616 1617 if (bootdv != NULL && bootdv->dv_class == DV_IFNET) 1618 ifp = if_unit(bootdv->dv_xname); 1619 1620 if (ifp) { 1621 if_addgroup(ifp, "netboot"); 1622 if_put(ifp); 1623 } 1624 1625 switch (rootdv->dv_class) { 1626 #if defined(NFSCLIENT) 1627 case DV_IFNET: 1628 mountroot = nfs_mountroot; 1629 nfsbootdevname = rootdv->dv_xname; 1630 return; 1631 #endif 1632 case DV_DISK: 1633 mountroot = dk_mountroot; 1634 part = DISKPART(rootdev); 1635 break; 1636 default: 1637 printf("can't figure root, hope your kernel is right\n"); 1638 return; 1639 } 1640 1641 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1642 1643 if (dk && dk->dk_device == rootdv) 1644 printf(" (%s.%c)", duid_format(rootduid), 'a' + part); 1645 1646 /* 1647 * Make the swap partition on the root drive the primary swap. 1648 */ 1649 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1650 if (major(rootdev) == major(swp->sw_dev) && 1651 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1652 temp = swdevt[0].sw_dev; 1653 swdevt[0].sw_dev = swp->sw_dev; 1654 swp->sw_dev = temp; 1655 break; 1656 } 1657 } 1658 if (swp->sw_dev != NODEV) { 1659 /* 1660 * If dumpdev was the same as the old primary swap device, 1661 * move it to the new primary swap device. 1662 */ 1663 if (temp == dumpdev) 1664 dumpdev = swdevt[0].sw_dev; 1665 } 1666 if (swdevt[0].sw_dev != NODEV) 1667 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1668 DISKUNIT(swdevt[0].sw_dev), 1669 'a' + DISKPART(swdevt[0].sw_dev)); 1670 if (dumpdev != NODEV) 1671 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1672 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1673 printf("\n"); 1674 } 1675 1676 extern const struct nam2blk nam2blk[]; 1677 1678 int 1679 findblkmajor(struct device *dv) 1680 { 1681 char buf[16], *p; 1682 int i; 1683 1684 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1685 return (-1); 1686 for (p = buf; *p; p++) 1687 if (*p >= '0' && *p <= '9') 1688 *p = '\0'; 1689 1690 for (i = 0; nam2blk[i].name; i++) 1691 if (!strcmp(buf, nam2blk[i].name)) 1692 return (nam2blk[i].maj); 1693 return (-1); 1694 } 1695 1696 char * 1697 findblkname(int maj) 1698 { 1699 int i; 1700 1701 for (i = 0; nam2blk[i].name; i++) 1702 if (nam2blk[i].maj == maj) 1703 return (nam2blk[i].name); 1704 return (NULL); 1705 } 1706 1707 char * 1708 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize) 1709 { 1710 struct vnode *vn; 1711 dev_t chrdev, rawdev; 1712 int error; 1713 1714 chrdev = blktochr(dev); 1715 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART); 1716 1717 #ifdef DEBUG 1718 printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev); 1719 #endif 1720 1721 if (cdevvp(rawdev, &vn)) { 1722 snprintf(errbuf, errsize, 1723 "cannot obtain vnode for 0x%x/0x%x", dev, rawdev); 1724 return (errbuf); 1725 } 1726 1727 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1728 if (error) { 1729 snprintf(errbuf, errsize, 1730 "cannot open disk, 0x%x/0x%x, error %d", 1731 dev, rawdev, error); 1732 goto done; 1733 } 1734 1735 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc); 1736 if (error) { 1737 snprintf(errbuf, errsize, 1738 "cannot read disk label, 0x%x/0x%x, error %d", 1739 dev, rawdev, error); 1740 } 1741 done: 1742 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1743 vput(vn); 1744 if (error) 1745 return (errbuf); 1746 return (NULL); 1747 } 1748 1749 int 1750 disk_map(char *path, char *mappath, int size, int flags) 1751 { 1752 struct disk *dk, *mdk; 1753 u_char uid[8]; 1754 char c, part; 1755 int i; 1756 1757 /* 1758 * Attempt to map a request for a disklabel UID to the correct device. 1759 * We should be supplied with a disklabel UID which has the following 1760 * format: 1761 * 1762 * [disklabel uid] . [partition] 1763 * 1764 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1765 * based passed on its own. 1766 */ 1767 1768 if (strchr(path, '/') != NULL) 1769 return -1; 1770 1771 /* Verify that the device name is properly formed. */ 1772 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1773 (strlen(path) == 18 && path[16] == '.'))) 1774 return -1; 1775 1776 /* Get partition. */ 1777 if (flags & DM_OPENPART) 1778 part = 'a' + RAW_PART; 1779 else 1780 part = path[17]; 1781 1782 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1783 return -1; 1784 1785 /* Derive label UID. */ 1786 memset(uid, 0, sizeof(uid)); 1787 for (i = 0; i < 16; i++) { 1788 c = path[i]; 1789 if (c >= '0' && c <= '9') 1790 c -= '0'; 1791 else if (c >= 'a' && c <= 'f') 1792 c -= ('a' - 10); 1793 else 1794 return -1; 1795 1796 uid[i / 2] <<= 4; 1797 uid[i / 2] |= c & 0xf; 1798 } 1799 1800 mdk = NULL; 1801 TAILQ_FOREACH(dk, &disklist, dk_link) { 1802 if (dk->dk_label && 1803 !duid_iszero(dk->dk_label->d_uid) && 1804 memcmp(dk->dk_label->d_uid, uid, 1805 sizeof(dk->dk_label->d_uid)) == 0) { 1806 /* Fail if there are duplicate UIDs! */ 1807 if (mdk != NULL) 1808 return -1; 1809 mdk = dk; 1810 } 1811 } 1812 1813 if (mdk == NULL || mdk->dk_name == NULL) 1814 return -1; 1815 1816 snprintf(mappath, size, "/dev/%s%s%c", 1817 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1818 1819 return 0; 1820 } 1821 1822 /* 1823 * Lookup a disk device and verify that it has completed attaching. 1824 */ 1825 struct device * 1826 disk_lookup(struct cfdriver *cd, int unit) 1827 { 1828 struct device *dv; 1829 struct disk *dk; 1830 1831 dv = device_lookup(cd, unit); 1832 if (dv == NULL) 1833 return (NULL); 1834 1835 TAILQ_FOREACH(dk, &disklist, dk_link) 1836 if (dk->dk_device == dv) 1837 break; 1838 1839 if (dk == NULL) { 1840 device_unref(dv); 1841 return (NULL); 1842 } 1843 1844 return (dv); 1845 } 1846 1847 int 1848 duid_equal(u_char *duid1, u_char *duid2) 1849 { 1850 return (memcmp(duid1, duid2, DUID_SIZE) == 0); 1851 } 1852 1853 int 1854 duid_iszero(u_char *duid) 1855 { 1856 u_char zeroduid[DUID_SIZE]; 1857 1858 memset(zeroduid, 0, sizeof(zeroduid)); 1859 1860 return (duid_equal(duid, zeroduid)); 1861 } 1862 1863 const char * 1864 duid_format(u_char *duid) 1865 { 1866 static char duid_str[17]; 1867 1868 KERNEL_ASSERT_LOCKED(); 1869 1870 snprintf(duid_str, sizeof(duid_str), 1871 "%02x%02x%02x%02x%02x%02x%02x%02x", 1872 duid[0], duid[1], duid[2], duid[3], 1873 duid[4], duid[5], duid[6], duid[7]); 1874 1875 return (duid_str); 1876 } 1877