1 /* $OpenBSD: subr_disk.c,v 1.273 2024/10/30 06:16:27 jsg Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/fcntl.h> 45 #include <sys/buf.h> 46 #include <sys/stat.h> 47 #include <sys/syslog.h> 48 #include <sys/device.h> 49 #include <sys/time.h> 50 #include <sys/disklabel.h> 51 #include <sys/conf.h> 52 #include <sys/disk.h> 53 #include <sys/reboot.h> 54 #include <sys/dkio.h> 55 #include <sys/vnode.h> 56 #include <sys/task.h> 57 #include <sys/stdint.h> 58 59 #include <sys/socket.h> 60 61 #include <net/if.h> 62 63 #include <dev/cons.h> 64 65 #include <lib/libz/zlib.h> 66 67 #include "softraid.h" 68 69 #ifdef DEBUG 70 #define DPRINTF(x...) printf(x) 71 #else 72 #define DPRINTF(x...) 73 #endif 74 75 /* 76 * A global list of all disks attached to the system. May grow or 77 * shrink over time. 78 */ 79 struct disklist_head disklist; /* TAILQ_HEAD */ 80 int disk_count; /* number of drives in global disklist */ 81 int disk_change; /* set if a disk has been attached/detached 82 * since last we looked at this variable. This 83 * is reset by hw_sysctl() 84 */ 85 86 #define DUID_SIZE 8 87 88 u_char bootduid[DUID_SIZE]; /* DUID of boot disk. */ 89 u_char rootduid[DUID_SIZE]; /* DUID of root disk. */ 90 91 struct device *rootdv; 92 93 /* softraid callback, do not use! */ 94 void (*softraid_disk_attach)(struct disk *, int); 95 96 void sr_map_root(void); 97 98 struct disk_attach_task { 99 struct task task; 100 struct disk *dk; 101 }; 102 103 void disk_attach_callback(void *); 104 105 int spoofgpt(struct buf *, void (*)(struct buf *), const uint8_t *, 106 struct disklabel *, daddr_t *); 107 void spoofmbr(struct buf *, void (*)(struct buf *), const uint8_t *, 108 struct disklabel *, daddr_t *); 109 void spooffat(const uint8_t *, struct disklabel *, daddr_t *); 110 111 int gpt_chk_mbr(struct dos_partition *, uint64_t); 112 int gpt_get_hdr(struct buf *, void (*)(struct buf *), struct disklabel *, 113 uint64_t, struct gpt_header *); 114 int gpt_get_parts(struct buf *, void (*)(struct buf *), 115 struct disklabel *, const struct gpt_header *, struct gpt_partition **); 116 int gpt_get_fstype(const struct uuid *); 117 int mbr_get_fstype(const uint8_t); 118 119 int duid_equal(u_char *, u_char *); 120 121 /* 122 * Compute checksum for disk label. 123 */ 124 u_int 125 dkcksum(struct disklabel *lp) 126 { 127 u_int16_t *start, *end; 128 u_int16_t sum = 0; 129 130 start = (u_int16_t *)lp; 131 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 132 while (start < end) 133 sum ^= *start++; 134 return (sum); 135 } 136 137 int 138 initdisklabel(struct disklabel *lp) 139 { 140 int i; 141 142 /* minimal requirements for archetypal disk label */ 143 if (lp->d_secsize < DEV_BSIZE) 144 lp->d_secsize = DEV_BSIZE; 145 if (DL_GETDSIZE(lp) == 0) 146 DL_SETDSIZE(lp, MAXDISKSIZE); 147 if (lp->d_secpercyl == 0) 148 return (ERANGE); 149 lp->d_npartitions = MAXPARTITIONS; 150 for (i = 0; i < RAW_PART; i++) { 151 DL_SETPSIZE(&lp->d_partitions[i], 0); 152 DL_SETPOFFSET(&lp->d_partitions[i], 0); 153 } 154 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 155 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 156 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 157 DL_SETBSTART(lp, 0); 158 DL_SETBEND(lp, DL_GETDSIZE(lp)); 159 lp->d_version = 1; 160 return (0); 161 } 162 163 /* 164 * Check an incoming block to make sure it is a disklabel, convert it to 165 * a newer version if needed, etc etc. 166 */ 167 int 168 checkdisklabel(dev_t dev, void *rlp, struct disklabel *lp, u_int64_t boundstart, 169 u_int64_t boundend) 170 { 171 struct disklabel *dlp = rlp; 172 struct __partitionv0 *v0pp; 173 struct partition *pp; 174 const char *blkname; 175 u_int64_t disksize; 176 int error = 0; 177 int i; 178 179 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 180 error = ENOENT; /* no disk label */ 181 else if (dlp->d_npartitions > MAXPARTITIONS) 182 error = E2BIG; /* too many partitions */ 183 else if (dlp->d_secpercyl == 0) 184 error = EINVAL; /* invalid label */ 185 else if (dlp->d_secsize == 0) 186 error = ENOSPC; /* disk too small */ 187 else if (dkcksum(dlp) != 0) 188 error = EINVAL; /* incorrect checksum */ 189 190 if (error) { 191 u_int16_t *start, *end, sum = 0; 192 193 /* If it is byte-swapped, attempt to convert it */ 194 if (swap32(dlp->d_magic) != DISKMAGIC || 195 swap32(dlp->d_magic2) != DISKMAGIC || 196 swap16(dlp->d_npartitions) > MAXPARTITIONS) 197 return (error); 198 199 /* 200 * Need a byte-swap aware dkcksum variant 201 * inlined, because dkcksum uses a sub-field 202 */ 203 start = (u_int16_t *)dlp; 204 end = (u_int16_t *)&dlp->d_partitions[ 205 swap16(dlp->d_npartitions)]; 206 while (start < end) 207 sum ^= *start++; 208 if (sum != 0) 209 return (error); 210 211 dlp->d_magic = swap32(dlp->d_magic); 212 dlp->d_type = swap16(dlp->d_type); 213 214 /* d_typename and d_packname are strings */ 215 216 dlp->d_secsize = swap32(dlp->d_secsize); 217 dlp->d_nsectors = swap32(dlp->d_nsectors); 218 dlp->d_ntracks = swap32(dlp->d_ntracks); 219 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 220 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 221 dlp->d_secperunit = swap32(dlp->d_secperunit); 222 223 /* d_uid is a string */ 224 225 dlp->d_acylinders = swap32(dlp->d_acylinders); 226 227 dlp->d_flags = swap32(dlp->d_flags); 228 229 dlp->d_secperunith = swap16(dlp->d_secperunith); 230 dlp->d_version = swap16(dlp->d_version); 231 232 for (i = 0; i < NSPARE; i++) 233 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 234 235 dlp->d_magic2 = swap32(dlp->d_magic2); 236 237 dlp->d_npartitions = swap16(dlp->d_npartitions); 238 239 for (i = 0; i < MAXPARTITIONS; i++) { 240 pp = &dlp->d_partitions[i]; 241 pp->p_size = swap32(pp->p_size); 242 pp->p_offset = swap32(pp->p_offset); 243 if (dlp->d_version == 0) { 244 v0pp = (struct __partitionv0 *)pp; 245 v0pp->p_fsize = swap32(v0pp->p_fsize); 246 } else { 247 pp->p_offseth = swap16(pp->p_offseth); 248 pp->p_sizeh = swap16(pp->p_sizeh); 249 } 250 pp->p_cpg = swap16(pp->p_cpg); 251 } 252 253 dlp->d_checksum = 0; 254 dlp->d_checksum = dkcksum(dlp); 255 error = 0; 256 } 257 258 /* XXX should verify lots of other fields and whine a lot */ 259 260 /* Initial passed in lp contains the real disk size. */ 261 disksize = DL_GETDSIZE(lp); 262 263 if (lp != dlp) 264 *lp = *dlp; 265 266 if (lp->d_version == 0) { 267 blkname = findblkname(major(dev)); 268 if (blkname == NULL) 269 blkname = findblkname(major(chrtoblk(dev))); 270 printf("%s%d has legacy label, please rewrite using " 271 "disklabel(8)\n", blkname, DISKUNIT(dev)); 272 273 lp->d_version = 1; 274 lp->d_secperunith = 0; 275 276 v0pp = (struct __partitionv0 *)lp->d_partitions; 277 pp = lp->d_partitions; 278 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 279 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 280 p_fsize, v0pp->p_frag); 281 pp->p_offseth = 0; 282 pp->p_sizeh = 0; 283 } 284 } 285 286 #ifdef DEBUG 287 if (DL_GETDSIZE(lp) != disksize) 288 printf("on-disk disklabel has incorrect disksize (%llu)\n", 289 DL_GETDSIZE(lp)); 290 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 291 printf("on-disk disklabel RAW_PART has incorrect size (%llu)\n", 292 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 293 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 294 printf("on-disk disklabel RAW_PART offset != 0 (%llu)\n", 295 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 296 #endif 297 DL_SETDSIZE(lp, disksize); 298 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 299 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 300 DL_SETBSTART(lp, boundstart); 301 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 302 303 lp->d_checksum = 0; 304 lp->d_checksum = dkcksum(lp); 305 return (0); 306 } 307 308 /* 309 * Read a disk sector. 310 */ 311 int 312 readdisksector(struct buf *bp, void (*strat)(struct buf *), 313 struct disklabel *lp, u_int64_t sector) 314 { 315 bp->b_blkno = DL_SECTOBLK(lp, sector); 316 bp->b_bcount = lp->d_secsize; 317 bp->b_error = 0; 318 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 319 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 320 321 (*strat)(bp); 322 323 return (biowait(bp)); 324 } 325 326 int 327 readdoslabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, 328 daddr_t *partoffp, int spoofonly) 329 { 330 uint8_t dosbb[DEV_BSIZE]; 331 struct disklabel nlp; 332 struct disklabel *rlp; 333 daddr_t partoff; 334 int error; 335 336 #ifdef DEBUG 337 char devname[32]; 338 const char *blkname; 339 340 blkname = findblkname(major(bp->b_dev)); 341 if (blkname == NULL) 342 blkname = findblkname(major(chrtoblk(bp->b_dev))); 343 if (blkname == NULL) 344 snprintf(devname, sizeof(devname), "<%d, %d>", major(bp->b_dev), 345 minor(bp->b_dev)); 346 else 347 snprintf(devname, sizeof(devname), "%s%d", blkname, 348 DISKUNIT(bp->b_dev)); 349 350 printf("readdoslabel enter: %s, spoofonly %d, partoffp %sNULL\n", 351 devname, spoofonly, (partoffp == NULL) ? "" : "not "); 352 #endif /* DEBUG */ 353 354 error = readdisksector(bp, strat, lp, DOSBBSECTOR); 355 if (error) { 356 DPRINTF("readdoslabel return: %s, %d -- lp unchanged, " 357 "DOSBBSECTOR read error\n", devname, error); 358 return error; 359 } 360 memcpy(dosbb, bp->b_data, sizeof(dosbb)); 361 362 nlp = *lp; 363 memset(nlp.d_partitions, 0, sizeof(nlp.d_partitions)); 364 nlp.d_partitions[RAW_PART] = lp->d_partitions[RAW_PART]; 365 nlp.d_magic = 0; 366 367 error = spoofgpt(bp, strat, dosbb, &nlp, &partoff); 368 if (error) 369 return error; 370 if (nlp.d_magic != DISKMAGIC) 371 spoofmbr(bp, strat, dosbb, &nlp, &partoff); 372 if (nlp.d_magic != DISKMAGIC) 373 spooffat(dosbb, &nlp, &partoff); 374 if (nlp.d_magic != DISKMAGIC) { 375 DPRINTF("readdoslabel: N/A -- label partition @ " 376 "daddr_t 0 (default)\n"); 377 partoff = 0; 378 } 379 380 if (partoffp != NULL) { 381 /* 382 * If a non-zero value is returned writedisklabel() exits with 383 * EIO. If 0 is returned the label sector is read from disk and 384 * lp is copied into it. So leave lp alone! 385 */ 386 if (partoff == -1) { 387 DPRINTF("readdoslabel return: %s, ENXIO, lp " 388 "unchanged, *partoffp unchanged\n", devname); 389 return ENXIO; 390 } 391 *partoffp = partoff; 392 DPRINTF("readdoslabel return: %s, 0, lp unchanged, " 393 "*partoffp set to %lld\n", devname, *partoffp); 394 return 0; 395 } 396 397 nlp.d_magic = lp->d_magic; 398 *lp = nlp; 399 lp->d_checksum = 0; 400 lp->d_checksum = dkcksum(lp); 401 402 if (spoofonly || partoff == -1) { 403 DPRINTF("readdoslabel return: %s, 0, lp spoofed\n", 404 devname); 405 return 0; 406 } 407 408 partoff += DOS_LABELSECTOR; 409 error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, partoff)); 410 if (error) { 411 DPRINTF("readdoslabel return: %s, %d, lp read failed\n", 412 devname, error); 413 return bp->b_error; 414 } 415 416 rlp = (struct disklabel *)(bp->b_data + DL_BLKOFFSET(lp, partoff)); 417 error = checkdisklabel(bp->b_dev, rlp, lp, DL_GETBSTART(rlp), 418 DL_GETBEND(rlp)); 419 420 DPRINTF("readdoslabel return: %s, %d, checkdisklabel() of daddr_t " 421 "%lld %s\n", devname, error, partoff, error ? "failed" : "ok"); 422 423 return error; 424 } 425 426 /* 427 * Return the index into dp[] of the EFI GPT (0xEE) partition, or -1 if no such 428 * partition exists. 429 * 430 * Copied into sbin/fdisk/mbr.c. 431 */ 432 int 433 gpt_chk_mbr(struct dos_partition *dp, uint64_t dsize) 434 { 435 struct dos_partition *dp2; 436 int efi, eficnt, found, i; 437 uint32_t psize; 438 439 found = efi = eficnt = 0; 440 for (dp2 = dp, i = 0; i < NDOSPART; i++, dp2++) { 441 if (dp2->dp_typ == DOSPTYP_UNUSED) 442 continue; 443 found++; 444 if (dp2->dp_typ != DOSPTYP_EFI) 445 continue; 446 if (letoh32(dp2->dp_start) != GPTSECTOR) 447 continue; 448 psize = letoh32(dp2->dp_size); 449 if (psize <= (dsize - GPTSECTOR) || psize == UINT32_MAX) { 450 efi = i; 451 eficnt++; 452 } 453 } 454 if (found == 1 && eficnt == 1) 455 return (efi); 456 457 return (-1); 458 } 459 460 int 461 gpt_get_hdr(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, 462 uint64_t sector, struct gpt_header *gh) 463 { 464 struct gpt_header ngh; 465 int error; 466 uint64_t lbaend, lbastart; 467 uint32_t csum; 468 uint32_t size, partsize; 469 470 471 error = readdisksector(bp, strat, lp, sector); 472 if (error) 473 return error; 474 475 memcpy(&ngh, bp->b_data, sizeof(ngh)); 476 477 size = letoh32(ngh.gh_size); 478 partsize = letoh32(ngh.gh_part_size); 479 lbaend = letoh64(ngh.gh_lba_end); 480 lbastart = letoh64(ngh.gh_lba_start); 481 482 csum = ngh.gh_csum; 483 ngh.gh_csum = 0; 484 ngh.gh_csum = htole32(crc32(0, (unsigned char *)&ngh, GPTMINHDRSIZE)); 485 486 if (letoh64(ngh.gh_sig) == GPTSIGNATURE && 487 letoh32(ngh.gh_rev) == GPTREVISION && 488 size == GPTMINHDRSIZE && lbastart <= lbaend && 489 partsize == GPTMINPARTSIZE && lp->d_secsize % partsize == 0 && 490 csum == ngh.gh_csum) 491 *gh = ngh; 492 else 493 memset(gh, 0, sizeof(*gh)); 494 495 return 0; 496 } 497 498 int 499 gpt_get_parts(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, 500 const struct gpt_header *gh, struct gpt_partition **gp) 501 { 502 uint8_t *ngp; 503 int error, i; 504 uint64_t bytes, partlba, sectors; 505 uint32_t partnum, partsize, partcsum; 506 507 partlba = letoh64(gh->gh_part_lba); 508 partnum = letoh32(gh->gh_part_num); 509 partsize = letoh32(gh->gh_part_size); 510 511 sectors = ((uint64_t)partnum * partsize + lp->d_secsize - 1) / 512 lp->d_secsize; 513 514 ngp = mallocarray(sectors, lp->d_secsize, M_DEVBUF, M_NOWAIT | M_ZERO); 515 if (ngp == NULL) { 516 *gp = NULL; 517 return ENOMEM; 518 } 519 bytes = sectors * lp->d_secsize; 520 521 for (i = 0; i < sectors; i++) { 522 error = readdisksector(bp, strat, lp, partlba + i); 523 if (error) { 524 free(ngp, M_DEVBUF, bytes); 525 *gp = NULL; 526 return error; 527 } 528 memcpy(ngp + i * lp->d_secsize, bp->b_data, lp->d_secsize); 529 } 530 531 partcsum = htole32(crc32(0, ngp, partnum * partsize)); 532 if (partcsum != gh->gh_part_csum) { 533 DPRINTF("invalid %s GPT partition array @ %llu\n", 534 (letoh64(gh->gh_lba_self) == GPTSECTOR) ? "Primary" : 535 "Secondary", partlba); 536 free(ngp, M_DEVBUF, bytes); 537 *gp = NULL; 538 } else { 539 *gp = (struct gpt_partition *)ngp; 540 } 541 542 return 0; 543 } 544 545 int 546 gpt_get_fstype(const struct uuid *uuid_part) 547 { 548 static int init = 0; 549 static struct uuid uuid_openbsd, uuid_msdos, uuid_chromefs, 550 uuid_linux, uuid_hfs, uuid_unused, uuid_efi_system, uuid_bios_boot; 551 static const uint8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 552 static const uint8_t gpt_uuid_msdos[] = GPT_UUID_MSDOS; 553 static const uint8_t gpt_uuid_chromerootfs[] = GPT_UUID_CHROMEROOTFS; 554 static const uint8_t gpt_uuid_linux[] = GPT_UUID_LINUX; 555 static const uint8_t gpt_uuid_hfs[] = GPT_UUID_APPLE_HFS; 556 static const uint8_t gpt_uuid_unused[] = GPT_UUID_UNUSED; 557 static const uint8_t gpt_uuid_efi_system[] = GPT_UUID_EFI_SYSTEM; 558 static const uint8_t gpt_uuid_bios_boot[] = GPT_UUID_BIOS_BOOT; 559 560 if (init == 0) { 561 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 562 uuid_dec_be(gpt_uuid_msdos, &uuid_msdos); 563 uuid_dec_be(gpt_uuid_chromerootfs, &uuid_chromefs); 564 uuid_dec_be(gpt_uuid_linux, &uuid_linux); 565 uuid_dec_be(gpt_uuid_hfs, &uuid_hfs); 566 uuid_dec_be(gpt_uuid_unused, &uuid_unused); 567 uuid_dec_be(gpt_uuid_efi_system, &uuid_efi_system); 568 uuid_dec_be(gpt_uuid_bios_boot, &uuid_bios_boot); 569 init = 1; 570 } 571 572 if (!memcmp(uuid_part, &uuid_unused, sizeof(struct uuid))) 573 return FS_UNUSED; 574 else if (!memcmp(uuid_part, &uuid_openbsd, sizeof(struct uuid))) 575 return FS_BSDFFS; 576 else if (!memcmp(uuid_part, &uuid_msdos, sizeof(struct uuid))) 577 return FS_MSDOS; 578 else if (!memcmp(uuid_part, &uuid_chromefs, sizeof(struct uuid))) 579 return FS_EXT2FS; 580 else if (!memcmp(uuid_part, &uuid_linux, sizeof(struct uuid))) 581 return FS_EXT2FS; 582 else if (!memcmp(uuid_part, &uuid_hfs, sizeof(struct uuid))) 583 return FS_HFS; 584 else if (!memcmp(uuid_part, &uuid_efi_system, sizeof(struct uuid))) 585 return FS_MSDOS; 586 else if (!memcmp(uuid_part, &uuid_bios_boot, sizeof(struct uuid))) 587 return FS_BOOT; 588 else 589 return FS_OTHER; 590 } 591 592 int 593 spoofgpt(struct buf *bp, void (*strat)(struct buf *), const uint8_t *dosbb, 594 struct disklabel *lp, daddr_t *partoffp) 595 { 596 struct dos_partition dp[NDOSPART]; 597 struct gpt_header gh; 598 struct uuid gptype; 599 struct gpt_partition *gp; 600 struct partition *pp; 601 uint64_t lbaend, lbastart, labelsec; 602 uint64_t gpbytes, end, start; 603 daddr_t partoff; 604 unsigned int i, n; 605 int error, fstype, obsdfound; 606 uint32_t partnum; 607 uint16_t sig; 608 609 gp = NULL; 610 gpbytes = 0; 611 612 memcpy(dp, dosbb + DOSPARTOFF, sizeof(dp)); 613 memcpy(&sig, dosbb + DOSMBR_SIGNATURE_OFF, sizeof(sig)); 614 615 if (letoh16(sig) != DOSMBR_SIGNATURE || 616 gpt_chk_mbr(dp, DL_GETDSIZE(lp)) == -1) 617 return 0; 618 619 error = gpt_get_hdr(bp, strat, lp, GPTSECTOR, &gh); 620 if (error == 0 && letoh64(gh.gh_sig) == GPTSIGNATURE) 621 error = gpt_get_parts(bp, strat, lp, &gh, &gp); 622 623 if (error || letoh64(gh.gh_sig) != GPTSIGNATURE || gp == NULL) { 624 error = gpt_get_hdr(bp, strat, lp, DL_GETDSIZE(lp) - 1, &gh); 625 if (error == 0 && letoh64(gh.gh_sig) == GPTSIGNATURE) 626 error = gpt_get_parts(bp, strat, lp, &gh, &gp); 627 } 628 629 if (error) 630 return error; 631 if (gp == NULL) 632 return ENXIO; 633 634 lbastart = letoh64(gh.gh_lba_start); 635 lbaend = letoh64(gh.gh_lba_end); 636 partnum = letoh32(gh.gh_part_num); 637 638 n = 'i' - 'a'; /* Start spoofing at 'i', a.k.a. 8. */ 639 640 DL_SETBSTART(lp, lbastart); 641 DL_SETBEND(lp, lbaend + 1); 642 partoff = DL_SECTOBLK(lp, lbastart); 643 obsdfound = 0; 644 for (i = 0; i < partnum; i++) { 645 if (letoh64(gp[i].gp_attrs) & GPTPARTATTR_REQUIRED) { 646 DPRINTF("spoofgpt: Skipping partition %u (REQUIRED)\n", 647 i); 648 continue; 649 } 650 651 start = letoh64(gp[i].gp_lba_start); 652 if (start > lbaend || start < lbastart) 653 continue; 654 655 end = letoh64(gp[i].gp_lba_end); 656 if (start > end) 657 continue; 658 659 uuid_dec_le(&gp[i].gp_type, &gptype); 660 fstype = gpt_get_fstype(&gptype); 661 if (obsdfound && fstype == FS_BSDFFS) 662 continue; 663 664 if (fstype == FS_BSDFFS) { 665 obsdfound = 1; 666 partoff = DL_SECTOBLK(lp, start); 667 labelsec = DL_BLKTOSEC(lp, partoff + DOS_LABELSECTOR); 668 if (labelsec > ((end < lbaend) ? end : lbaend)) 669 partoff = -1; 670 DL_SETBSTART(lp, start); 671 DL_SETBEND(lp, end + 1); 672 continue; 673 } 674 675 if (partoff != -1) { 676 labelsec = DL_BLKTOSEC(lp, partoff + DOS_LABELSECTOR); 677 if (labelsec >= start && labelsec <= end) 678 partoff = -1; 679 } 680 681 if (n < MAXPARTITIONS && end <= lbaend) { 682 pp = &lp->d_partitions[n]; 683 n++; 684 pp->p_fstype = fstype; 685 DL_SETPOFFSET(pp, start); 686 DL_SETPSIZE(pp, end - start + 1); 687 } 688 } 689 690 lp->d_magic = DISKMAGIC; 691 *partoffp = partoff; 692 free(gp, M_DEVBUF, gpbytes); 693 694 #ifdef DEBUG 695 printf("readdoslabel: GPT -- "); 696 if (partoff == -1) 697 printf("no label partition\n"); 698 else if (obsdfound == 0) 699 printf("label partition @ daddr_t %lld (free space)\n", partoff); 700 else 701 printf("label partition @ daddr_t %lld (A6)\n", partoff); 702 #endif /* DEBUG */ 703 704 return 0; 705 } 706 707 int 708 mbr_get_fstype(const uint8_t dp_typ) 709 { 710 switch (dp_typ) { 711 case DOSPTYP_OPENBSD: 712 return FS_BSDFFS; 713 case DOSPTYP_UNUSED: 714 return FS_UNUSED; 715 case DOSPTYP_LINUX: 716 return FS_EXT2FS; 717 case DOSPTYP_NTFS: 718 return FS_NTFS; 719 case DOSPTYP_EFISYS: 720 case DOSPTYP_FAT12: 721 case DOSPTYP_FAT16S: 722 case DOSPTYP_FAT16B: 723 case DOSPTYP_FAT16L: 724 case DOSPTYP_FAT32: 725 case DOSPTYP_FAT32L: 726 return FS_MSDOS; 727 case DOSPTYP_EFI: 728 case DOSPTYP_EXTEND: 729 case DOSPTYP_EXTENDL: 730 default: 731 return FS_OTHER; 732 } 733 } 734 735 void 736 spoofmbr(struct buf *bp, void (*strat)(struct buf *), const uint8_t *dosbb, 737 struct disklabel *lp, daddr_t *partoffp) 738 { 739 struct dos_partition dp[NDOSPART]; 740 struct partition *pp; 741 uint64_t sector = DOSBBSECTOR; 742 uint64_t start, end; 743 daddr_t labeloff, partoff; 744 unsigned int i, n, parts; 745 int wander = 1, ebr = 0; 746 int error, obsdfound; 747 uint32_t extoff = 0; 748 uint16_t sig; 749 uint8_t fstype; 750 751 memcpy(&sig, dosbb + DOSMBR_SIGNATURE_OFF, sizeof(sig)); 752 if (letoh16(sig) != DOSMBR_SIGNATURE) 753 return; 754 memcpy(dp, dosbb + DOSPARTOFF, sizeof(dp)); 755 756 obsdfound = 0; 757 partoff = 0; 758 parts = 0; 759 n = 'i' - 'a'; 760 while (wander && ebr < DOS_MAXEBR) { 761 ebr++; 762 wander = 0; 763 if (sector < extoff) 764 sector = extoff; 765 766 error = 0; 767 if (sector != DOSBBSECTOR) { 768 error = readdisksector(bp, strat, lp, sector); 769 if (error) 770 break; 771 memcpy(&sig, bp->b_data + DOSMBR_SIGNATURE_OFF, 772 sizeof(sig)); 773 if (letoh16(sig) != DOSMBR_SIGNATURE) 774 break; 775 memcpy(dp, bp->b_data + DOSPARTOFF, sizeof(dp)); 776 } 777 778 for (i = 0; i < NDOSPART; i++) { 779 if (letoh32(dp[i].dp_size) == 0) 780 continue; 781 if (obsdfound && dp[i].dp_typ == DOSPTYP_OPENBSD) 782 continue; 783 784 if (dp[i].dp_typ != DOSPTYP_OPENBSD) { 785 if (letoh32(dp[i].dp_start) > DL_GETDSIZE(lp)) 786 continue; 787 if (letoh32(dp[i].dp_size) > DL_GETDSIZE(lp)) 788 continue; 789 } 790 791 start = sector + letoh32(dp[i].dp_start); 792 end = start + letoh32(dp[i].dp_size); 793 794 parts++; 795 if (obsdfound == 0) { 796 labeloff = partoff + DOS_LABELSECTOR; 797 if (labeloff >= DL_SECTOBLK(lp, start) && 798 labeloff < DL_SECTOBLK(lp, end)) 799 partoff = -1; 800 } 801 802 switch (dp[i].dp_typ) { 803 case DOSPTYP_OPENBSD: 804 obsdfound = 1; 805 partoff = DL_SECTOBLK(lp, start); 806 labeloff = partoff + DOS_LABELSECTOR; 807 if (labeloff >= DL_SECTOBLK(lp, end)) 808 partoff = -1; 809 DL_SETBSTART(lp, start); 810 DL_SETBEND(lp, end); 811 continue; 812 case DOSPTYP_EFI: 813 continue; 814 case DOSPTYP_EXTEND: 815 case DOSPTYP_EXTENDL: 816 sector = start + extoff; 817 if (extoff == 0) { 818 extoff = start; 819 sector = 0; 820 } 821 wander = 1; 822 continue; 823 default: 824 break; 825 } 826 827 fstype = mbr_get_fstype(dp[i].dp_typ); 828 if (n < MAXPARTITIONS) { 829 pp = &lp->d_partitions[n++]; 830 pp->p_fstype = fstype; 831 if (start) 832 DL_SETPOFFSET(pp, start); 833 DL_SETPSIZE(pp, end - start); 834 } 835 } 836 } 837 838 if (parts > 0) { 839 lp->d_magic = DISKMAGIC; 840 *partoffp = partoff; 841 #ifdef DEBUG 842 printf("readdoslabel: MBR -- "); 843 if (partoff == -1) 844 printf("no label partition\n"); 845 else if (obsdfound == 0) 846 printf("label partition @ daddr_t %lld (free space)\n", partoff); 847 else 848 printf("label partition @ daddr_t %lld (A6)\n", partoff); 849 #endif /* DEBUG */ 850 } 851 } 852 853 void 854 spooffat(const uint8_t *dosbb, struct disklabel *lp, daddr_t *partoffp) 855 { 856 uint16_t secsize; 857 858 #define VALID_JMP(_p) (((_p)[0] == 0xeb && (_p)[2] == 0x90) || (_p)[0] == 0xe9) 859 #define VALID_FAT(_p) ((_p)[16] == 1 || (_p)[16] == 2) 860 #define VALID_SEC(_s) ((_s) >= DEV_BSIZE && (_s) <= 4096 && ((_s) % 512 == 0)) 861 862 memcpy(&secsize, dosbb + 11, sizeof(secsize)); 863 secsize = letoh16(secsize); 864 865 if (VALID_JMP(dosbb) && VALID_SEC(secsize) && VALID_FAT(dosbb)) { 866 lp->d_partitions['i' - 'a'] = lp->d_partitions[RAW_PART]; 867 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 868 *partoffp = -1; 869 lp->d_magic = DISKMAGIC; 870 DPRINTF("readdoslabel: FAT -- no label partition\n"); 871 } 872 } 873 874 /* 875 * Check new disk label for sensibility before setting it. 876 */ 877 int 878 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 879 { 880 struct partition *opp, *npp; 881 struct disk *dk; 882 int i; 883 884 /* sanity clause */ 885 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 886 (nlp->d_secsize % DEV_BSIZE) != 0) 887 return (EINVAL); 888 889 /* special case to allow disklabel to be invalidated */ 890 if (nlp->d_magic == 0xffffffff) { 891 *olp = *nlp; 892 return (0); 893 } 894 895 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 896 dkcksum(nlp) != 0) 897 return (EINVAL); 898 899 /* XXX missing check if other dos partitions will be overwritten */ 900 901 for (i = 0; i < MAXPARTITIONS; i++) { 902 opp = &olp->d_partitions[i]; 903 npp = &nlp->d_partitions[i]; 904 if ((openmask & (1 << i)) && 905 (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 906 DL_GETPSIZE(npp) < DL_GETPSIZE(opp))) 907 return (EBUSY); 908 /* 909 * Copy internally-set partition information 910 * if new label doesn't include it. XXX 911 */ 912 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 913 npp->p_fragblock = opp->p_fragblock; 914 npp->p_cpg = opp->p_cpg; 915 } 916 } 917 918 /* Generate a UID if the disklabel does not already have one. */ 919 if (duid_iszero(nlp->d_uid)) { 920 do { 921 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 922 TAILQ_FOREACH(dk, &disklist, dk_link) 923 if (dk->dk_label && 924 duid_equal(dk->dk_label->d_uid, nlp->d_uid)) 925 break; 926 } while (dk != NULL || duid_iszero(nlp->d_uid)); 927 } 928 929 /* Preserve the disk size and RAW_PART values. */ 930 DL_SETDSIZE(nlp, DL_GETDSIZE(olp)); 931 npp = &nlp->d_partitions[RAW_PART]; 932 DL_SETPOFFSET(npp, 0); 933 DL_SETPSIZE(npp, DL_GETDSIZE(nlp)); 934 935 nlp->d_checksum = 0; 936 nlp->d_checksum = dkcksum(nlp); 937 *olp = *nlp; 938 939 disk_change = 1; 940 941 return (0); 942 } 943 944 /* 945 * Determine the size of the transfer, and make sure it is within the 946 * boundaries of the partition. Adjust transfer if needed, and signal errors or 947 * early completion. 948 */ 949 int 950 bounds_check_with_label(struct buf *bp, struct disklabel *lp) 951 { 952 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 953 daddr_t partblocks, sz; 954 955 /* Avoid division by zero, negative offsets, and negative sizes. */ 956 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0) 957 goto bad; 958 959 /* Ensure transfer is a whole number of aligned sectors. */ 960 if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 || 961 (bp->b_bcount % lp->d_secsize) != 0) 962 goto bad; 963 964 /* Ensure transfer starts within partition boundary. */ 965 partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p)); 966 if (bp->b_blkno > partblocks) 967 goto bad; 968 969 /* If exactly at end of partition or null transfer, return EOF. */ 970 if (bp->b_blkno == partblocks || bp->b_bcount == 0) 971 goto done; 972 973 /* Truncate request if it extends past the end of the partition. */ 974 sz = bp->b_bcount >> DEV_BSHIFT; 975 if (sz > partblocks - bp->b_blkno) { 976 sz = partblocks - bp->b_blkno; 977 bp->b_bcount = sz << DEV_BSHIFT; 978 } 979 980 return (0); 981 982 bad: 983 bp->b_error = EINVAL; 984 bp->b_flags |= B_ERROR; 985 done: 986 bp->b_resid = bp->b_bcount; 987 return (-1); 988 } 989 990 /* 991 * Disk error is the preface to plaintive error messages 992 * about failing disk transfers. It prints messages of the form 993 994 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 995 996 * if the offset of the error in the transfer and a disk label 997 * are both available. blkdone should be -1 if the position of the error 998 * is unknown; the disklabel pointer may be null from drivers that have not 999 * been converted to use them. The message is printed with printf 1000 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 1001 * The message should be completed (with at least a newline) with printf 1002 * or addlog, respectively. There is no trailing space. 1003 */ 1004 void 1005 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 1006 struct disklabel *lp) 1007 { 1008 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 1009 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))); 1010 char partname = 'a' + part; 1011 daddr_t sn; 1012 1013 if (pri != LOG_PRINTF) { 1014 log(pri, "%s", ""); 1015 pr = addlog; 1016 } else 1017 pr = printf; 1018 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 1019 bp->b_flags & B_READ ? "read" : "writ"); 1020 sn = bp->b_blkno; 1021 if (bp->b_bcount <= DEV_BSIZE) 1022 (*pr)("%lld", (long long)sn); 1023 else { 1024 if (blkdone >= 0) { 1025 sn += blkdone; 1026 (*pr)("%lld of ", (long long)sn); 1027 } 1028 (*pr)("%lld-%lld", (long long)bp->b_blkno, 1029 (long long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE)); 1030 } 1031 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 1032 sn += DL_SECTOBLK(lp, DL_GETPOFFSET(&lp->d_partitions[part])); 1033 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, (long long)sn, 1034 (long long)(sn / DL_SECTOBLK(lp, lp->d_secpercyl))); 1035 sn %= DL_SECTOBLK(lp, lp->d_secpercyl); 1036 (*pr)(" tn %lld sn %lld)", 1037 (long long)(sn / DL_SECTOBLK(lp, lp->d_nsectors)), 1038 (long long)(sn % DL_SECTOBLK(lp, lp->d_nsectors))); 1039 } 1040 } 1041 1042 /* 1043 * Initialize the disklist. Called by main() before autoconfiguration. 1044 */ 1045 void 1046 disk_init(void) 1047 { 1048 1049 TAILQ_INIT(&disklist); 1050 disk_count = disk_change = 0; 1051 } 1052 1053 int 1054 disk_construct(struct disk *diskp) 1055 { 1056 rw_init_flags(&diskp->dk_lock, "dklk", RWL_IS_VNODE); 1057 mtx_init(&diskp->dk_mtx, IPL_BIO); 1058 1059 diskp->dk_flags |= DKF_CONSTRUCTED; 1060 1061 return (0); 1062 } 1063 1064 /* 1065 * Attach a disk. 1066 */ 1067 void 1068 disk_attach(struct device *dv, struct disk *diskp) 1069 { 1070 int majdev; 1071 1072 KERNEL_ASSERT_LOCKED(); 1073 1074 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 1075 disk_construct(diskp); 1076 1077 /* 1078 * Allocate and initialize the disklabel structures. Note that 1079 * it's not safe to sleep here, since we're probably going to be 1080 * called during autoconfiguration. 1081 */ 1082 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 1083 M_NOWAIT|M_ZERO); 1084 if (diskp->dk_label == NULL) 1085 panic("disk_attach: can't allocate storage for disklabel"); 1086 1087 /* 1088 * Set the attached timestamp. 1089 */ 1090 microuptime(&diskp->dk_attachtime); 1091 1092 /* 1093 * Link into the disklist. 1094 */ 1095 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 1096 ++disk_count; 1097 disk_change = 1; 1098 1099 /* 1100 * Store device structure and number for later use. 1101 */ 1102 diskp->dk_device = dv; 1103 diskp->dk_devno = NODEV; 1104 if (dv != NULL) { 1105 majdev = findblkmajor(dv); 1106 if (majdev >= 0) 1107 diskp->dk_devno = 1108 MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 1109 1110 if (diskp->dk_devno != NODEV) { 1111 struct disk_attach_task *dat; 1112 1113 dat = malloc(sizeof(*dat), M_TEMP, M_WAITOK); 1114 1115 /* XXX: Assumes dk is part of the device softc. */ 1116 device_ref(dv); 1117 dat->dk = diskp; 1118 1119 task_set(&dat->task, disk_attach_callback, dat); 1120 task_add(systq, &dat->task); 1121 } 1122 } 1123 1124 if (softraid_disk_attach) 1125 softraid_disk_attach(diskp, 1); 1126 } 1127 1128 void 1129 disk_attach_callback(void *xdat) 1130 { 1131 struct disk_attach_task *dat = xdat; 1132 struct disk *dk = dat->dk; 1133 struct disklabel dl; 1134 char errbuf[100]; 1135 1136 free(dat, M_TEMP, sizeof(*dat)); 1137 1138 if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD)) 1139 goto done; 1140 1141 /* Read disklabel. */ 1142 if (disk_readlabel(&dl, dk->dk_devno, errbuf, sizeof(errbuf)) == NULL) { 1143 enqueue_randomness(dl.d_checksum); 1144 } 1145 1146 done: 1147 dk->dk_flags |= DKF_OPENED; 1148 device_unref(dk->dk_device); 1149 wakeup(dk); 1150 } 1151 1152 /* 1153 * Detach a disk. 1154 */ 1155 void 1156 disk_detach(struct disk *diskp) 1157 { 1158 KERNEL_ASSERT_LOCKED(); 1159 1160 if (softraid_disk_attach) 1161 softraid_disk_attach(diskp, -1); 1162 1163 /* 1164 * Free the space used by the disklabel structures. 1165 */ 1166 free(diskp->dk_label, M_DEVBUF, sizeof(*diskp->dk_label)); 1167 1168 /* 1169 * Remove from the disklist. 1170 */ 1171 TAILQ_REMOVE(&disklist, diskp, dk_link); 1172 disk_change = 1; 1173 if (--disk_count < 0) 1174 panic("disk_detach: disk_count < 0"); 1175 } 1176 1177 int 1178 disk_openpart(struct disk *dk, int part, int fmt, int haslabel) 1179 { 1180 KASSERT(part >= 0 && part < MAXPARTITIONS); 1181 1182 /* Unless opening the raw partition, check that the partition exists. */ 1183 if (part != RAW_PART && (!haslabel || 1184 part >= dk->dk_label->d_npartitions || 1185 dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED)) 1186 return (ENXIO); 1187 1188 /* Ensure the partition doesn't get changed under our feet. */ 1189 switch (fmt) { 1190 case S_IFCHR: 1191 dk->dk_copenmask |= (1 << part); 1192 break; 1193 case S_IFBLK: 1194 dk->dk_bopenmask |= (1 << part); 1195 break; 1196 } 1197 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1198 1199 return (0); 1200 } 1201 1202 void 1203 disk_closepart(struct disk *dk, int part, int fmt) 1204 { 1205 KASSERT(part >= 0 && part < MAXPARTITIONS); 1206 1207 switch (fmt) { 1208 case S_IFCHR: 1209 dk->dk_copenmask &= ~(1 << part); 1210 break; 1211 case S_IFBLK: 1212 dk->dk_bopenmask &= ~(1 << part); 1213 break; 1214 } 1215 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1216 } 1217 1218 void 1219 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit) 1220 { 1221 int bmaj, cmaj, mn; 1222 1223 /* Locate the lowest minor number to be detached. */ 1224 mn = DISKMINOR(unit, 0); 1225 1226 for (bmaj = 0; bmaj < nblkdev; bmaj++) 1227 if (bdevsw[bmaj].d_open == open) 1228 vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK); 1229 for (cmaj = 0; cmaj < nchrdev; cmaj++) 1230 if (cdevsw[cmaj].d_open == open) 1231 vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR); 1232 } 1233 1234 /* 1235 * Increment a disk's busy counter. If the counter is going from 1236 * 0 to 1, set the timestamp. 1237 */ 1238 void 1239 disk_busy(struct disk *diskp) 1240 { 1241 1242 /* 1243 * XXX We'd like to use something as accurate as microtime(), 1244 * but that doesn't depend on the system TOD clock. 1245 */ 1246 mtx_enter(&diskp->dk_mtx); 1247 if (diskp->dk_busy++ == 0) 1248 microuptime(&diskp->dk_timestamp); 1249 mtx_leave(&diskp->dk_mtx); 1250 } 1251 1252 /* 1253 * Decrement a disk's busy counter, increment the byte count, total busy 1254 * time, and reset the timestamp. 1255 */ 1256 void 1257 disk_unbusy(struct disk *diskp, long bcount, daddr_t blkno, int read) 1258 { 1259 struct timeval dv_time, diff_time; 1260 1261 mtx_enter(&diskp->dk_mtx); 1262 1263 if (diskp->dk_busy-- == 0) 1264 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 1265 1266 microuptime(&dv_time); 1267 1268 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 1269 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 1270 1271 diskp->dk_timestamp = dv_time; 1272 if (bcount > 0) { 1273 if (read) { 1274 diskp->dk_rbytes += bcount; 1275 diskp->dk_rxfer++; 1276 } else { 1277 diskp->dk_wbytes += bcount; 1278 diskp->dk_wxfer++; 1279 } 1280 } else 1281 diskp->dk_seek++; 1282 1283 mtx_leave(&diskp->dk_mtx); 1284 1285 enqueue_randomness(bcount ^ diff_time.tv_usec ^ 1286 (blkno >> 32) ^ (blkno & 0xffffffff)); 1287 } 1288 1289 int 1290 disk_lock(struct disk *dk) 1291 { 1292 return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR)); 1293 } 1294 1295 void 1296 disk_lock_nointr(struct disk *dk) 1297 { 1298 rw_enter_write(&dk->dk_lock); 1299 } 1300 1301 void 1302 disk_unlock(struct disk *dk) 1303 { 1304 rw_exit_write(&dk->dk_lock); 1305 } 1306 1307 int 1308 dk_mountroot(void) 1309 { 1310 char errbuf[100]; 1311 int part = DISKPART(rootdev); 1312 int (*mountrootfn)(void); 1313 struct disklabel dl; 1314 char *error; 1315 1316 error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf)); 1317 if (error) 1318 panic("%s", error); 1319 1320 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 1321 panic("root filesystem has size 0"); 1322 switch (dl.d_partitions[part].p_fstype) { 1323 #ifdef EXT2FS 1324 case FS_EXT2FS: 1325 { 1326 extern int ext2fs_mountroot(void); 1327 mountrootfn = ext2fs_mountroot; 1328 } 1329 break; 1330 #endif 1331 #ifdef FFS 1332 case FS_BSDFFS: 1333 { 1334 extern int ffs_mountroot(void); 1335 mountrootfn = ffs_mountroot; 1336 } 1337 break; 1338 #endif 1339 #ifdef CD9660 1340 case FS_ISO9660: 1341 { 1342 extern int cd9660_mountroot(void); 1343 mountrootfn = cd9660_mountroot; 1344 } 1345 break; 1346 #endif 1347 default: 1348 #ifdef FFS 1349 { 1350 extern int ffs_mountroot(void); 1351 1352 printf("filesystem type %d not known.. assuming ffs\n", 1353 dl.d_partitions[part].p_fstype); 1354 mountrootfn = ffs_mountroot; 1355 } 1356 #else 1357 panic("disk 0x%x filesystem type %d not known", 1358 rootdev, dl.d_partitions[part].p_fstype); 1359 #endif 1360 } 1361 return (*mountrootfn)(); 1362 } 1363 1364 struct device * 1365 getdisk(char *str, int len, int defpart, dev_t *devp) 1366 { 1367 struct device *dv; 1368 1369 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1370 printf("use one of: exit"); 1371 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1372 if (dv->dv_class == DV_DISK) 1373 printf(" %s[a-p]", dv->dv_xname); 1374 #if defined(NFSCLIENT) 1375 if (dv->dv_class == DV_IFNET) 1376 printf(" %s", dv->dv_xname); 1377 #endif 1378 } 1379 printf("\n"); 1380 } 1381 return (dv); 1382 } 1383 1384 struct device * 1385 parsedisk(char *str, int len, int defpart, dev_t *devp) 1386 { 1387 struct device *dv; 1388 int majdev, part = defpart; 1389 char c; 1390 1391 if (len == 0) 1392 return (NULL); 1393 c = str[len-1]; 1394 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1395 part = c - 'a'; 1396 len -= 1; 1397 } 1398 1399 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1400 if (dv->dv_class == DV_DISK && 1401 strncmp(str, dv->dv_xname, len) == 0 && 1402 dv->dv_xname[len] == '\0') { 1403 majdev = findblkmajor(dv); 1404 if (majdev < 0) 1405 return NULL; 1406 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1407 break; 1408 } 1409 #if defined(NFSCLIENT) 1410 if (dv->dv_class == DV_IFNET && 1411 strncmp(str, dv->dv_xname, len) == 0 && 1412 dv->dv_xname[len] == '\0') { 1413 *devp = NODEV; 1414 break; 1415 } 1416 #endif 1417 } 1418 1419 return (dv); 1420 } 1421 1422 void 1423 setroot(struct device *bootdv, int part, int exitflags) 1424 { 1425 int majdev, unit, len, s, slept = 0; 1426 dev_t *swp; 1427 struct device *dv; 1428 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1429 struct ifnet *ifp = NULL; 1430 struct disk *dk; 1431 char buf[128]; 1432 #if defined(NFSCLIENT) 1433 extern char *nfsbootdevname; 1434 #endif 1435 1436 /* Ensure that all disk attach callbacks have completed. */ 1437 do { 1438 TAILQ_FOREACH(dk, &disklist, dk_link) { 1439 if (dk->dk_devno != NODEV && 1440 (dk->dk_flags & DKF_OPENED) == 0) { 1441 tsleep_nsec(dk, 0, "dkopen", SEC_TO_NSEC(1)); 1442 slept++; 1443 break; 1444 } 1445 } 1446 } while (dk != NULL && slept < 5); 1447 1448 if (slept == 5) { 1449 printf("disklabels not read:"); 1450 TAILQ_FOREACH(dk, &disklist, dk_link) 1451 if (dk->dk_devno != NODEV && 1452 (dk->dk_flags & DKF_OPENED) == 0) 1453 printf(" %s", dk->dk_name); 1454 printf("\n"); 1455 } 1456 1457 if (duid_iszero(bootduid)) { 1458 /* Locate DUID for boot disk since it was not provided. */ 1459 TAILQ_FOREACH(dk, &disklist, dk_link) 1460 if (dk->dk_device == bootdv) 1461 break; 1462 if (dk) 1463 bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid)); 1464 } else if (bootdv == NULL) { 1465 /* Locate boot disk based on the provided DUID. */ 1466 TAILQ_FOREACH(dk, &disklist, dk_link) 1467 if (duid_equal(dk->dk_label->d_uid, bootduid)) 1468 break; 1469 if (dk) 1470 bootdv = dk->dk_device; 1471 } 1472 bcopy(bootduid, rootduid, sizeof(rootduid)); 1473 1474 #if NSOFTRAID > 0 1475 sr_map_root(); 1476 #endif 1477 1478 /* 1479 * If `swap generic' and we couldn't determine boot device, 1480 * ask the user. 1481 */ 1482 dk = NULL; 1483 if (mountroot == NULL && bootdv == NULL) 1484 boothowto |= RB_ASKNAME; 1485 if (boothowto & RB_ASKNAME) { 1486 while (1) { 1487 printf("root device"); 1488 if (bootdv != NULL) { 1489 printf(" (default %s", bootdv->dv_xname); 1490 if (bootdv->dv_class == DV_DISK) 1491 printf("%c", 'a' + part); 1492 printf(")"); 1493 } 1494 printf(": "); 1495 s = splhigh(); 1496 cnpollc(1); 1497 len = getsn(buf, sizeof(buf)); 1498 cnpollc(0); 1499 splx(s); 1500 if (strcmp(buf, "exit") == 0) 1501 reboot(exitflags); 1502 if (len == 0 && bootdv != NULL) { 1503 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1504 len = strlen(buf); 1505 } 1506 if (len > 0 && buf[len - 1] == '*') { 1507 buf[--len] = '\0'; 1508 dv = getdisk(buf, len, part, &nrootdev); 1509 if (dv != NULL) { 1510 rootdv = dv; 1511 nswapdev = nrootdev; 1512 goto gotswap; 1513 } 1514 } 1515 dv = getdisk(buf, len, part, &nrootdev); 1516 if (dv != NULL) { 1517 rootdv = dv; 1518 break; 1519 } 1520 } 1521 1522 if (rootdv->dv_class == DV_IFNET) 1523 goto gotswap; 1524 1525 /* try to build swap device out of new root device */ 1526 while (1) { 1527 printf("swap device"); 1528 if (rootdv != NULL) 1529 printf(" (default %s%s)", rootdv->dv_xname, 1530 rootdv->dv_class == DV_DISK ? "b" : ""); 1531 printf(": "); 1532 s = splhigh(); 1533 cnpollc(1); 1534 len = getsn(buf, sizeof(buf)); 1535 cnpollc(0); 1536 splx(s); 1537 if (strcmp(buf, "exit") == 0) 1538 reboot(exitflags); 1539 if (len == 0 && rootdv != NULL) { 1540 switch (rootdv->dv_class) { 1541 case DV_IFNET: 1542 nswapdev = NODEV; 1543 break; 1544 case DV_DISK: 1545 nswapdev = MAKEDISKDEV(major(nrootdev), 1546 DISKUNIT(nrootdev), 1); 1547 if (nswapdev == nrootdev) 1548 continue; 1549 break; 1550 default: 1551 break; 1552 } 1553 break; 1554 } 1555 dv = getdisk(buf, len, 1, &nswapdev); 1556 if (dv) { 1557 if (dv->dv_class == DV_IFNET) 1558 nswapdev = NODEV; 1559 if (nswapdev == nrootdev) 1560 continue; 1561 break; 1562 } 1563 } 1564 gotswap: 1565 rootdev = nrootdev; 1566 dumpdev = nswapdev; 1567 swdevt[0] = nswapdev; 1568 swdevt[1] = NODEV; 1569 #if defined(NFSCLIENT) 1570 } else if (mountroot == nfs_mountroot) { 1571 rootdv = bootdv; 1572 rootdev = dumpdev = swapdev = NODEV; 1573 #endif 1574 } else if (mountroot == NULL && rootdev == NODEV) { 1575 /* 1576 * `swap generic' 1577 */ 1578 rootdv = bootdv; 1579 1580 if (bootdv->dv_class == DV_DISK) { 1581 if (!duid_iszero(rootduid)) { 1582 TAILQ_FOREACH(dk, &disklist, dk_link) 1583 if (dk->dk_label && duid_equal( 1584 dk->dk_label->d_uid, rootduid)) 1585 break; 1586 if (dk == NULL) 1587 panic("root device (%s) not found", 1588 duid_format(rootduid)); 1589 rootdv = dk->dk_device; 1590 } 1591 } 1592 1593 majdev = findblkmajor(rootdv); 1594 if (majdev >= 0) { 1595 /* 1596 * Root and swap are on the disk. 1597 * Assume swap is on partition b. 1598 */ 1599 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1600 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1601 } else { 1602 /* 1603 * Root and swap are on a net. 1604 */ 1605 nswapdev = NODEV; 1606 } 1607 dumpdev = nswapdev; 1608 swdevt[0] = nswapdev; 1609 /* swdevt[1] = NODEV; */ 1610 } else { 1611 /* Completely pre-configured, but we want rootdv .. */ 1612 majdev = major(rootdev); 1613 if (findblkname(majdev) == NULL) 1614 return; 1615 unit = DISKUNIT(rootdev); 1616 part = DISKPART(rootdev); 1617 snprintf(buf, sizeof buf, "%s%d%c", 1618 findblkname(majdev), unit, 'a' + part); 1619 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1620 if (rootdv == NULL) 1621 panic("root device (%s) not found", buf); 1622 } 1623 1624 if (bootdv != NULL && bootdv->dv_class == DV_IFNET) 1625 ifp = if_unit(bootdv->dv_xname); 1626 1627 if (ifp) { 1628 if_addgroup(ifp, "netboot"); 1629 if_put(ifp); 1630 } 1631 1632 switch (rootdv->dv_class) { 1633 #if defined(NFSCLIENT) 1634 case DV_IFNET: 1635 mountroot = nfs_mountroot; 1636 nfsbootdevname = rootdv->dv_xname; 1637 return; 1638 #endif 1639 case DV_DISK: 1640 mountroot = dk_mountroot; 1641 part = DISKPART(rootdev); 1642 break; 1643 default: 1644 printf("can't figure root, hope your kernel is right\n"); 1645 return; 1646 } 1647 1648 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1649 1650 if (dk && dk->dk_device == rootdv) 1651 printf(" (%s.%c)", duid_format(rootduid), 'a' + part); 1652 1653 /* 1654 * Make the swap partition on the root drive the primary swap. 1655 */ 1656 for (swp = swdevt; *swp != NODEV; swp++) { 1657 if (major(rootdev) == major(*swp) && 1658 DISKUNIT(rootdev) == DISKUNIT(*swp)) { 1659 temp = swdevt[0]; 1660 swdevt[0] = *swp; 1661 *swp = temp; 1662 break; 1663 } 1664 } 1665 if (*swp != NODEV) { 1666 /* 1667 * If dumpdev was the same as the old primary swap device, 1668 * move it to the new primary swap device. 1669 */ 1670 if (temp == dumpdev) 1671 dumpdev = swdevt[0]; 1672 } 1673 if (swdevt[0] != NODEV) 1674 printf(" swap on %s%d%c", findblkname(major(swdevt[0])), 1675 DISKUNIT(swdevt[0]), 1676 'a' + DISKPART(swdevt[0])); 1677 if (dumpdev != NODEV) 1678 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1679 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1680 printf("\n"); 1681 } 1682 1683 extern const struct nam2blk nam2blk[]; 1684 1685 int 1686 findblkmajor(struct device *dv) 1687 { 1688 char buf[16], *p; 1689 int i; 1690 1691 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1692 return (-1); 1693 for (p = buf; *p; p++) 1694 if (*p >= '0' && *p <= '9') 1695 *p = '\0'; 1696 1697 for (i = 0; nam2blk[i].name; i++) 1698 if (!strcmp(buf, nam2blk[i].name)) 1699 return (nam2blk[i].maj); 1700 return (-1); 1701 } 1702 1703 char * 1704 findblkname(int maj) 1705 { 1706 int i; 1707 1708 for (i = 0; nam2blk[i].name; i++) 1709 if (nam2blk[i].maj == maj) 1710 return (nam2blk[i].name); 1711 return (NULL); 1712 } 1713 1714 char * 1715 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize) 1716 { 1717 struct vnode *vn; 1718 dev_t chrdev, rawdev; 1719 int error; 1720 1721 chrdev = blktochr(dev); 1722 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART); 1723 1724 #ifdef DEBUG 1725 printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev); 1726 #endif 1727 1728 if (cdevvp(rawdev, &vn)) { 1729 snprintf(errbuf, errsize, 1730 "cannot obtain vnode for 0x%x/0x%x", dev, rawdev); 1731 return (errbuf); 1732 } 1733 1734 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1735 if (error) { 1736 snprintf(errbuf, errsize, 1737 "cannot open disk, 0x%x/0x%x, error %d", 1738 dev, rawdev, error); 1739 goto done; 1740 } 1741 1742 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc); 1743 if (error) { 1744 snprintf(errbuf, errsize, 1745 "cannot read disk label, 0x%x/0x%x, error %d", 1746 dev, rawdev, error); 1747 } 1748 done: 1749 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1750 vput(vn); 1751 if (error) 1752 return (errbuf); 1753 return (NULL); 1754 } 1755 1756 int 1757 disk_map(const char *path, char *mappath, int size, int flags) 1758 { 1759 struct disk *dk, *mdk; 1760 u_char uid[8]; 1761 char c, part; 1762 int i; 1763 1764 /* 1765 * Attempt to map a request for a disklabel UID to the correct device. 1766 * We should be supplied with a disklabel UID which has the following 1767 * format: 1768 * 1769 * [disklabel uid] . [partition] 1770 * 1771 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1772 * based passed on its own. 1773 */ 1774 1775 if (strchr(path, '/') != NULL) 1776 return -1; 1777 1778 /* Verify that the device name is properly formed. */ 1779 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1780 (strlen(path) == 18 && path[16] == '.'))) 1781 return -1; 1782 1783 /* Get partition. */ 1784 if (flags & DM_OPENPART) 1785 part = 'a' + RAW_PART; 1786 else 1787 part = path[17]; 1788 1789 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1790 return -1; 1791 1792 /* Derive label UID. */ 1793 memset(uid, 0, sizeof(uid)); 1794 for (i = 0; i < 16; i++) { 1795 c = path[i]; 1796 if (c >= '0' && c <= '9') 1797 c -= '0'; 1798 else if (c >= 'a' && c <= 'f') 1799 c -= ('a' - 10); 1800 else 1801 return -1; 1802 1803 uid[i / 2] <<= 4; 1804 uid[i / 2] |= c & 0xf; 1805 } 1806 1807 mdk = NULL; 1808 TAILQ_FOREACH(dk, &disklist, dk_link) { 1809 if (dk->dk_label && memcmp(dk->dk_label->d_uid, uid, 1810 sizeof(dk->dk_label->d_uid)) == 0) { 1811 /* Fail if there are duplicate UIDs! */ 1812 if (mdk != NULL) 1813 return -1; 1814 mdk = dk; 1815 } 1816 } 1817 1818 if (mdk == NULL || mdk->dk_name == NULL) 1819 return -1; 1820 1821 snprintf(mappath, size, "/dev/%s%s%c", 1822 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1823 1824 return 0; 1825 } 1826 1827 /* 1828 * Lookup a disk device and verify that it has completed attaching. 1829 */ 1830 struct device * 1831 disk_lookup(struct cfdriver *cd, int unit) 1832 { 1833 struct device *dv; 1834 struct disk *dk; 1835 1836 dv = device_lookup(cd, unit); 1837 if (dv == NULL) 1838 return (NULL); 1839 1840 TAILQ_FOREACH(dk, &disklist, dk_link) 1841 if (dk->dk_device == dv) 1842 break; 1843 1844 if (dk == NULL) { 1845 device_unref(dv); 1846 return (NULL); 1847 } 1848 1849 return (dv); 1850 } 1851 1852 int 1853 duid_equal(u_char *duid1, u_char *duid2) 1854 { 1855 return (memcmp(duid1, duid2, DUID_SIZE) == 0); 1856 } 1857 1858 int 1859 duid_iszero(u_char *duid) 1860 { 1861 u_char zeroduid[DUID_SIZE]; 1862 1863 memset(zeroduid, 0, sizeof(zeroduid)); 1864 1865 return (duid_equal(duid, zeroduid)); 1866 } 1867 1868 const char * 1869 duid_format(u_char *duid) 1870 { 1871 static char duid_str[17]; 1872 1873 KERNEL_ASSERT_LOCKED(); 1874 1875 snprintf(duid_str, sizeof(duid_str), 1876 "%02x%02x%02x%02x%02x%02x%02x%02x", 1877 duid[0], duid[1], duid[2], duid[3], 1878 duid[4], duid[5], duid[6], duid[7]); 1879 1880 return (duid_str); 1881 } 1882