1 /* $OpenBSD: subr_disk.c,v 1.173 2014/11/03 21:00:27 tedu Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/vnode.h> 58 #include <sys/workq.h> 59 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 63 #include <net/if.h> 64 65 #include <dev/rndvar.h> 66 #include <dev/cons.h> 67 68 #include <lib/libz/zlib.h> 69 70 #include "softraid.h" 71 72 #ifdef DEBUG 73 #define DPRINTF(x...) printf(x) 74 #else 75 #define DPRINTF(x...) 76 #endif 77 78 /* 79 * A global list of all disks attached to the system. May grow or 80 * shrink over time. 81 */ 82 struct disklist_head disklist; /* TAILQ_HEAD */ 83 int disk_count; /* number of drives in global disklist */ 84 int disk_change; /* set if a disk has been attached/detached 85 * since last we looked at this variable. This 86 * is reset by hw_sysctl() 87 */ 88 89 u_char bootduid[8]; /* DUID of boot disk. */ 90 u_char rootduid[8]; /* DUID of root disk. */ 91 92 /* softraid callback, do not use! */ 93 void (*softraid_disk_attach)(struct disk *, int); 94 95 void sr_map_root(void); 96 97 void disk_attach_callback(void *, void *); 98 99 /* 100 * Compute checksum for disk label. 101 */ 102 u_int 103 dkcksum(struct disklabel *lp) 104 { 105 u_int16_t *start, *end; 106 u_int16_t sum = 0; 107 108 start = (u_int16_t *)lp; 109 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 110 while (start < end) 111 sum ^= *start++; 112 return (sum); 113 } 114 115 int 116 initdisklabel(struct disklabel *lp) 117 { 118 int i; 119 120 /* minimal requirements for archetypal disk label */ 121 if (lp->d_secsize < DEV_BSIZE) 122 lp->d_secsize = DEV_BSIZE; 123 if (DL_GETDSIZE(lp) == 0) 124 DL_SETDSIZE(lp, MAXDISKSIZE); 125 if (lp->d_secpercyl == 0) 126 return (ERANGE); 127 lp->d_npartitions = MAXPARTITIONS; 128 for (i = 0; i < RAW_PART; i++) { 129 DL_SETPSIZE(&lp->d_partitions[i], 0); 130 DL_SETPOFFSET(&lp->d_partitions[i], 0); 131 } 132 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 133 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 134 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 135 DL_SETBSTART(lp, 0); 136 DL_SETBEND(lp, DL_GETDSIZE(lp)); 137 lp->d_version = 1; 138 lp->d_bbsize = 8192; 139 lp->d_sbsize = 64*1024; /* XXX ? */ 140 return (0); 141 } 142 143 /* 144 * Check an incoming block to make sure it is a disklabel, convert it to 145 * a newer version if needed, etc etc. 146 */ 147 int 148 checkdisklabel(void *rlp, struct disklabel *lp, u_int64_t boundstart, 149 u_int64_t boundend) 150 { 151 struct disklabel *dlp = rlp; 152 struct __partitionv0 *v0pp; 153 struct partition *pp; 154 u_int64_t disksize; 155 int error = 0; 156 int i; 157 158 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 159 error = ENOENT; /* no disk label */ 160 else if (dlp->d_npartitions > MAXPARTITIONS) 161 error = E2BIG; /* too many partitions */ 162 else if (dlp->d_secpercyl == 0) 163 error = EINVAL; /* invalid label */ 164 else if (dlp->d_secsize == 0) 165 error = ENOSPC; /* disk too small */ 166 else if (dkcksum(dlp) != 0) 167 error = EINVAL; /* incorrect checksum */ 168 169 if (error) { 170 u_int16_t *start, *end, sum = 0; 171 172 /* If it is byte-swapped, attempt to convert it */ 173 if (swap32(dlp->d_magic) != DISKMAGIC || 174 swap32(dlp->d_magic2) != DISKMAGIC || 175 swap16(dlp->d_npartitions) > MAXPARTITIONS) 176 return (error); 177 178 /* 179 * Need a byte-swap aware dkcksum variant 180 * inlined, because dkcksum uses a sub-field 181 */ 182 start = (u_int16_t *)dlp; 183 end = (u_int16_t *)&dlp->d_partitions[ 184 swap16(dlp->d_npartitions)]; 185 while (start < end) 186 sum ^= *start++; 187 if (sum != 0) 188 return (error); 189 190 dlp->d_magic = swap32(dlp->d_magic); 191 dlp->d_type = swap16(dlp->d_type); 192 dlp->d_subtype = swap16(dlp->d_subtype); 193 194 /* d_typename and d_packname are strings */ 195 196 dlp->d_secsize = swap32(dlp->d_secsize); 197 dlp->d_nsectors = swap32(dlp->d_nsectors); 198 dlp->d_ntracks = swap32(dlp->d_ntracks); 199 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 200 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 201 dlp->d_secperunit = swap32(dlp->d_secperunit); 202 203 /* d_uid is a string */ 204 205 dlp->d_acylinders = swap32(dlp->d_acylinders); 206 207 dlp->d_flags = swap32(dlp->d_flags); 208 209 for (i = 0; i < NDDATA; i++) 210 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 211 212 dlp->d_secperunith = swap16(dlp->d_secperunith); 213 dlp->d_version = swap16(dlp->d_version); 214 215 for (i = 0; i < NSPARE; i++) 216 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 217 218 dlp->d_magic2 = swap32(dlp->d_magic2); 219 220 dlp->d_npartitions = swap16(dlp->d_npartitions); 221 dlp->d_bbsize = swap32(dlp->d_bbsize); 222 dlp->d_sbsize = swap32(dlp->d_sbsize); 223 224 for (i = 0; i < MAXPARTITIONS; i++) { 225 pp = &dlp->d_partitions[i]; 226 pp->p_size = swap32(pp->p_size); 227 pp->p_offset = swap32(pp->p_offset); 228 if (dlp->d_version == 0) { 229 v0pp = (struct __partitionv0 *)pp; 230 v0pp->p_fsize = swap32(v0pp->p_fsize); 231 } else { 232 pp->p_offseth = swap16(pp->p_offseth); 233 pp->p_sizeh = swap16(pp->p_sizeh); 234 } 235 pp->p_cpg = swap16(pp->p_cpg); 236 } 237 238 dlp->d_checksum = 0; 239 dlp->d_checksum = dkcksum(dlp); 240 error = 0; 241 } 242 243 /* XXX should verify lots of other fields and whine a lot */ 244 245 /* Initial passed in lp contains the real disk size. */ 246 disksize = DL_GETDSIZE(lp); 247 248 if (lp != dlp) 249 *lp = *dlp; 250 251 if (lp->d_version == 0) { 252 lp->d_version = 1; 253 lp->d_secperunith = 0; 254 255 v0pp = (struct __partitionv0 *)lp->d_partitions; 256 pp = lp->d_partitions; 257 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 258 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 259 p_fsize, v0pp->p_frag); 260 pp->p_offseth = 0; 261 pp->p_sizeh = 0; 262 } 263 } 264 265 #ifdef DEBUG 266 if (DL_GETDSIZE(lp) != disksize) 267 printf("on-disk disklabel has incorrect disksize (%llu)\n", 268 DL_GETDSIZE(lp)); 269 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 270 printf("on-disk disklabel RAW_PART has incorrect size (%llu)\n", 271 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 272 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 273 printf("on-disk disklabel RAW_PART offset != 0 (%llu)\n", 274 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 275 #endif 276 DL_SETDSIZE(lp, disksize); 277 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 278 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 279 DL_SETBSTART(lp, boundstart); 280 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 281 282 lp->d_checksum = 0; 283 lp->d_checksum = dkcksum(lp); 284 return (0); 285 } 286 287 /* 288 * If dos partition table requested, attempt to load it and 289 * find disklabel inside a DOS partition. Return buffer 290 * for use in signalling errors if requested. 291 * 292 * We would like to check if each MBR has a valid BOOT_MAGIC, but 293 * we cannot because it doesn't always exist. So.. we assume the 294 * MBR is valid. 295 */ 296 int 297 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 298 struct disklabel *lp, daddr_t *partoffp, int spoofonly) 299 { 300 u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp); 301 int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset; 302 struct dos_partition dp[NDOSPART], *dp2; 303 daddr_t part_blkno = DOSBBSECTOR; 304 u_int32_t extoff = 0; 305 int error; 306 307 if (lp->d_secpercyl == 0) 308 return (EINVAL); /* invalid label */ 309 if (lp->d_secsize == 0) 310 return (ENOSPC); /* disk too small */ 311 312 /* do DOS partitions in the process of getting disklabel? */ 313 314 /* 315 * Read dos partition table, follow extended partitions. 316 * Map the partitions to disklabel entries i-p 317 */ 318 while (wander && loop < DOS_MAXEBR) { 319 loop++; 320 wander = 0; 321 if (part_blkno < extoff) 322 part_blkno = extoff; 323 324 /* read MBR/EBR */ 325 bp->b_blkno = DL_SECTOBLK(lp, part_blkno); 326 bp->b_bcount = lp->d_secsize; 327 bp->b_error = 0; /* B_ERROR and b_error may have stale data. */ 328 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 329 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 330 (*strat)(bp); 331 error = biowait(bp); 332 if (error) { 333 /*wrong*/ if (partoffp) 334 /*wrong*/ *partoffp = -1; 335 return (error); 336 } 337 338 bcopy(bp->b_data + DOSPARTOFF, dp, sizeof(dp)); 339 340 if (n == 0 && part_blkno == DOSBBSECTOR) { 341 u_int16_t mbrtest; 342 343 /* Check the end of sector marker. */ 344 mbrtest = ((bp->b_data[510] << 8) & 0xff00) | 345 (bp->b_data[511] & 0xff); 346 if (mbrtest != 0x55aa) 347 goto notmbr; 348 } 349 350 if (ourpart == -1) { 351 /* Search for our MBR partition */ 352 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 353 i++, dp2++) 354 if (letoh32(dp2->dp_size) && 355 dp2->dp_typ == DOSPTYP_OPENBSD) 356 ourpart = i; 357 if (ourpart == -1) 358 goto donot; 359 /* 360 * This is our MBR partition. need sector 361 * address for SCSI/IDE, cylinder for 362 * ESDI/ST506/RLL 363 */ 364 dp2 = &dp[ourpart]; 365 dospartoff = letoh32(dp2->dp_start) + part_blkno; 366 dospartend = dospartoff + letoh32(dp2->dp_size); 367 368 /* 369 * Record the OpenBSD partition's placement (in 370 * 512-byte blocks!) for the caller. No need to 371 * finish spoofing. 372 */ 373 if (partoffp) { 374 *partoffp = DL_SECTOBLK(lp, dospartoff); 375 return (0); 376 } 377 378 if (lp->d_ntracks == 0) 379 lp->d_ntracks = dp2->dp_ehd + 1; 380 if (lp->d_nsectors == 0) 381 lp->d_nsectors = DPSECT(dp2->dp_esect); 382 if (lp->d_secpercyl == 0) 383 lp->d_secpercyl = lp->d_ntracks * 384 lp->d_nsectors; 385 } 386 donot: 387 /* 388 * In case the disklabel read below fails, we want to 389 * provide a fake label in i-p. 390 */ 391 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 392 struct partition *pp; 393 u_int8_t fstype; 394 395 if (dp2->dp_typ == DOSPTYP_OPENBSD || 396 dp2->dp_typ == DOSPTYP_EFI) 397 continue; 398 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 399 continue; 400 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 401 continue; 402 if (letoh32(dp2->dp_size) == 0) 403 continue; 404 405 switch (dp2->dp_typ) { 406 case DOSPTYP_UNUSED: 407 fstype = FS_UNUSED; 408 break; 409 410 case DOSPTYP_LINUX: 411 fstype = FS_EXT2FS; 412 break; 413 414 case DOSPTYP_NTFS: 415 fstype = FS_NTFS; 416 break; 417 418 case DOSPTYP_EFISYS: 419 case DOSPTYP_FAT12: 420 case DOSPTYP_FAT16S: 421 case DOSPTYP_FAT16B: 422 case DOSPTYP_FAT16L: 423 case DOSPTYP_FAT32: 424 case DOSPTYP_FAT32L: 425 fstype = FS_MSDOS; 426 break; 427 case DOSPTYP_EXTEND: 428 case DOSPTYP_EXTENDL: 429 part_blkno = letoh32(dp2->dp_start) + extoff; 430 if (!extoff) { 431 extoff = letoh32(dp2->dp_start); 432 part_blkno = 0; 433 } 434 wander = 1; 435 continue; 436 break; 437 default: 438 fstype = FS_OTHER; 439 break; 440 } 441 442 /* 443 * Don't set fstype/offset/size when just looking for 444 * the offset of the OpenBSD partition. It would 445 * invalidate the disklabel checksum! 446 * 447 * Don't try to spoof more than 8 partitions, i.e. 448 * 'i' -'p'. 449 */ 450 if (partoffp || n >= 8) 451 continue; 452 453 pp = &lp->d_partitions[8+n]; 454 n++; 455 pp->p_fstype = fstype; 456 if (letoh32(dp2->dp_start)) 457 DL_SETPOFFSET(pp, 458 letoh32(dp2->dp_start) + part_blkno); 459 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 460 } 461 } 462 463 notmbr: 464 if (partoffp == NULL) 465 /* Must not modify *lp when partoffp is set. */ 466 lp->d_npartitions = MAXPARTITIONS; 467 468 if (n == 0 && part_blkno == DOSBBSECTOR && ourpart == -1) { 469 u_int16_t fattest; 470 471 /* Check for a valid initial jmp instruction. */ 472 switch ((u_int8_t)bp->b_data[0]) { 473 case 0xeb: 474 /* 475 * Two-byte jmp instruction. The 2nd byte is the number 476 * of bytes to jmp and the 3rd byte must be a NOP. 477 */ 478 if ((u_int8_t)bp->b_data[2] != 0x90) 479 goto notfat; 480 break; 481 case 0xe9: 482 /* 483 * Three-byte jmp instruction. The next two bytes are a 484 * little-endian 16 bit value. 485 */ 486 break; 487 default: 488 goto notfat; 489 break; 490 } 491 492 /* Check for a valid bytes per sector value. */ 493 fattest = ((bp->b_data[12] << 8) & 0xff00) | 494 (bp->b_data[11] & 0xff); 495 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 496 goto notfat; 497 498 if (partoffp) 499 return (ENXIO); /* No place for disklabel on FAT! */ 500 501 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 502 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 503 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 504 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 505 506 spoofonly = 1; /* No disklabel to read from disk. */ 507 } 508 509 notfat: 510 /* record the OpenBSD partition's placement for the caller */ 511 if (partoffp) 512 *partoffp = DL_SECTOBLK(lp, dospartoff); 513 else { 514 DL_SETBSTART(lp, dospartoff); 515 DL_SETBEND(lp, (dospartend < DL_GETDSIZE(lp)) ? dospartend : 516 DL_GETDSIZE(lp)); 517 } 518 519 /* don't read the on-disk label if we are in spoofed-only mode */ 520 if (spoofonly) 521 return (0); 522 523 bp->b_blkno = DL_BLKTOSEC(lp, DL_SECTOBLK(lp, dospartoff) + 524 DOS_LABELSECTOR) * DL_BLKSPERSEC(lp); 525 offset = DL_BLKOFFSET(lp, DL_SECTOBLK(lp, dospartoff) + 526 DOS_LABELSECTOR); 527 bp->b_bcount = lp->d_secsize; 528 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE); 529 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 530 (*strat)(bp); 531 if (biowait(bp)) 532 return (bp->b_error); 533 534 535 error = checkdisklabel(bp->b_data + offset, lp, 536 DL_GETBSTART((struct disklabel*)(bp->b_data+offset)), 537 DL_GETBEND((struct disklabel *)(bp->b_data+offset))); 538 539 return (error); 540 } 541 542 #ifdef GPT 543 544 int gpt_chk_hdr(struct gpt_header *); 545 int gpt_chk_parts(struct gpt_header *, struct gpt_partition *); 546 int get_fstype(struct uuid *); 547 548 int 549 gpt_chk_hdr(struct gpt_header *gh) 550 { 551 u_int32_t orig_gh_csum = gh->gh_csum; 552 gh->gh_csum = 0; 553 gh->gh_csum = crc32(0, (unsigned char *)gh, gh->gh_size); 554 555 if (orig_gh_csum != gh->gh_csum) 556 return (EINVAL); 557 558 return 0; 559 } 560 561 int 562 gpt_chk_parts(struct gpt_header *gh, struct gpt_partition *gp) 563 { 564 u_int32_t checksum; 565 checksum = crc32(0, (unsigned char *)gp, 566 gh->gh_part_num * gh->gh_part_size); 567 568 if (checksum != gh->gh_part_csum) 569 return (EINVAL); 570 571 return 0; 572 } 573 574 int 575 get_fstype(struct uuid *uuid_part) 576 { 577 static int init = 0; 578 static struct uuid uuid_openbsd, uuid_msdos, uuid_chromefs, 579 uuid_linux, uuid_hfs, uuid_unused; 580 static const uint8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 581 static const uint8_t gpt_uuid_msdos[] = GPT_UUID_MSDOS; 582 static const uint8_t gpt_uuid_chromerootfs[] = GPT_UUID_CHROMEROOTFS; 583 static const uint8_t gpt_uuid_linux[] = GPT_UUID_LINUX; 584 static const uint8_t gpt_uuid_hfs[] = GPT_UUID_APPLE_HFS; 585 static const uint8_t gpt_uuid_unused[] = GPT_UUID_UNUSED; 586 587 if (init == 0) { 588 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 589 uuid_dec_be(gpt_uuid_msdos, &uuid_msdos); 590 uuid_dec_be(gpt_uuid_chromerootfs, &uuid_chromefs); 591 uuid_dec_be(gpt_uuid_linux, &uuid_linux); 592 uuid_dec_be(gpt_uuid_hfs, &uuid_hfs); 593 uuid_dec_be(gpt_uuid_unused, &uuid_unused); 594 init = 1; 595 } 596 597 if (!memcmp(uuid_part, &uuid_unused, sizeof(struct uuid))) 598 return FS_UNUSED; 599 else if (!memcmp(uuid_part, &uuid_openbsd, sizeof(struct uuid))) 600 return FS_BSDFFS; 601 else if (!memcmp(uuid_part, &uuid_msdos, sizeof(struct uuid))) 602 return FS_MSDOS; 603 else if (!memcmp(uuid_part, &uuid_chromefs, sizeof(struct uuid))) 604 return FS_EXT2FS; 605 else if (!memcmp(uuid_part, &uuid_linux, sizeof(struct uuid))) 606 return FS_EXT2FS; 607 else if (!memcmp(uuid_part, &uuid_hfs, sizeof(struct uuid))) 608 return FS_HFS; 609 else 610 return FS_OTHER; 611 } 612 613 /* 614 * If gpt partition table requested, attempt to load it and 615 * find disklabel inside a GPT partition. Return buffer 616 * for use in signalling errors if requested. 617 * 618 * XXX: readgptlabel() is based on readdoslabel(), so they should be merged 619 */ 620 int 621 readgptlabel(struct buf *bp, void (*strat)(struct buf *), 622 struct disklabel *lp, daddr_t *partoffp, int spoofonly) 623 { 624 struct gpt_header gh; 625 struct gpt_partition *gp, *gp_tmp; 626 size_t gpsz; 627 struct uuid uuid_part, uuid_openbsd; 628 struct partition *pp; 629 630 daddr_t part_blkno; 631 u_int64_t gptpartoff = 0, gptpartend = DL_GETBEND(lp); 632 int i, altheader = 0, error, n=0, ourpart = -1, offset; 633 634 static const u_int8_t gpt_uuid_openbsd[] = GPT_UUID_OPENBSD; 635 u_int8_t fstype; 636 637 uuid_dec_be(gpt_uuid_openbsd, &uuid_openbsd); 638 639 if (lp->d_secpercyl == 0) 640 return (EINVAL); /* invalid label */ 641 if (lp->d_secsize == 0) 642 return (ENOSPC); /* disk too small */ 643 644 /* 645 * XXX: We should not trust the primary header and instead 646 * use the last LBA of the disk, as defined in the standard. 647 */ 648 for (part_blkno = GPTSECTOR; ; part_blkno = gh.gh_lba_alt, 649 altheader = 1) { 650 uint32_t ghsize; 651 uint32_t ghpartsize; 652 uint32_t ghpartnum; 653 size_t gpsz; 654 655 /* read header record */ 656 bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp); 657 offset = DL_BLKOFFSET(lp, part_blkno); 658 bp->b_bcount = lp->d_secsize; 659 bp->b_error = 0; /* B_ERROR and b_error may have stale data. */ 660 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 661 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 662 (*strat)(bp); 663 error = biowait(bp); 664 665 if (error) { 666 DPRINTF("error reading from disk\n"); 667 /*wrong*/ if (partoffp) 668 /*wrong*/ *partoffp = -1; 669 return (error); 670 } 671 672 bcopy(bp->b_data + offset, &gh, sizeof(gh)); 673 ghsize = letoh32(gh.gh_size); 674 ghpartsize = letoh32(gh.gh_part_size); 675 ghpartnum = letoh32(gh.gh_part_num); 676 677 678 if (letoh64(gh.gh_sig) != GPTSIGNATURE) 679 return (EINVAL); 680 681 /* we only support version 1.0 */ 682 if (letoh32(gh.gh_rev) != GPTREVISION) 683 return (EINVAL); 684 685 if (gpt_chk_hdr(&gh)) { 686 /* header broken, using alternate header */ 687 if (altheader) { 688 DPRINTF("alternate header also broken\n"); 689 return (EINVAL); 690 } 691 692 if (gh.gh_lba_alt >= DL_GETDSIZE(lp)) { 693 DPRINTF("alternate header's position is " 694 "bogous\n"); 695 return (EINVAL); 696 } 697 698 continue; 699 } 700 701 /* 702 * Header size must be greater than or equal to 92 and less 703 * than or equal to the logical block size. 704 */ 705 if (ghsize < GPTMINHDRSIZE || ghsize > DEV_BSIZE) 706 return (EINVAL); 707 708 if (letoh64(gh.gh_lba_start) >= DL_GETDSIZE(lp) || 709 letoh64(gh.gh_lba_end) >= DL_GETDSIZE(lp) || 710 letoh64(gh.gh_part_lba) >= DL_GETDSIZE(lp)) 711 return (EINVAL); 712 713 /* 714 * Size per partition entry shall be 128*(2**n) with n >= 0. 715 * We don't support partition entries larger than block size. 716 */ 717 if (ghpartsize % GPTMINPARTSIZE 718 || ghpartsize > DEV_BSIZE 719 || GPT_PARTSPERSEC(&gh) == 0) { 720 DPRINTF("invalid partition size\n"); 721 return (EINVAL); 722 } 723 724 /* XXX: we don't support multiples of GPTMINPARTSIZE yet */ 725 if (letoh32(gh.gh_part_size) != GPTMINPARTSIZE) { 726 DPRINTF("partition sizes larger than %d bytes are not " 727 "supported", GPTMINPARTSIZE); 728 return (EINVAL); 729 } 730 731 /* read GPT partition entry array */ 732 gp = mallocarray(ghpartnum, sizeof(struct gpt_partition), M_DEVBUF, M_NOWAIT|M_ZERO); 733 if (gp == NULL) 734 return (ENOMEM); 735 gpsz = ghpartnum * sizeof(struct gpt_partition); 736 737 /* 738 * XXX: Fails if # of partition entries is no multiple of 739 * GPT_PARTSPERSEC(&gh) 740 */ 741 for (i = 0; i < ghpartnum / GPT_PARTSPERSEC(&gh); 742 i++) { 743 part_blkno = letoh64(gh.gh_part_lba) + i; 744 /* read partition record */ 745 bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * 746 DL_BLKSPERSEC(lp); 747 offset = DL_BLKOFFSET(lp, part_blkno); 748 bp->b_bcount = lp->d_secsize; 749 /* B_ERROR and b_error may have stale data. */ 750 bp->b_error = 0; 751 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE | B_ERROR); 752 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 753 (*strat)(bp); 754 error = biowait(bp); 755 if (error) { 756 /*wrong*/ if (partoffp) 757 /*wrong*/ *partoffp = -1; 758 free(gp, M_DEVBUF, gpsz); 759 return (error); 760 } 761 762 bcopy(bp->b_data + offset, gp + 763 i * GPT_PARTSPERSEC(&gh), GPT_PARTSPERSEC(&gh) * 764 sizeof(struct gpt_partition)); 765 } 766 767 if (gpt_chk_parts(&gh, gp)) { 768 DPRINTF("partition entries broken, using alternate " 769 "header\n"); 770 free(gp, M_DEVBUF, gpsz); 771 772 if (altheader) { 773 DPRINTF("alternate partition entries are also " 774 "broken\n"); 775 return (EINVAL); 776 } 777 778 continue; 779 } 780 break; 781 } 782 783 /* find OpenBSD partition */ 784 for (gp_tmp = gp, i = 0; i < letoh32(gh.gh_part_num) && ourpart == -1; 785 gp_tmp++, i++) { 786 if (letoh64(gp_tmp->gp_lba_start) > letoh64(gp_tmp->gp_lba_end) 787 || letoh64(gp_tmp->gp_lba_start) < letoh64(gh.gh_lba_start) 788 || letoh64(gp_tmp->gp_lba_end) > letoh64(gh.gh_lba_end)) 789 continue; /* entry invalid */ 790 791 uuid_dec_le(&gp_tmp->gp_type, &uuid_part); 792 if (!memcmp(&uuid_part, &uuid_openbsd, sizeof(struct uuid))) { 793 ourpart = i; /* found it */ 794 } 795 796 /* 797 * In case the disklabel read below fails, we want to 798 * provide a fake label in i-p. 799 */ 800 fstype = get_fstype(&uuid_part); 801 802 /* 803 * Don't set fstype/offset/size when just looking for 804 * the offset of the OpenBSD partition. It would 805 * invalidate the disklabel checksum! 806 * 807 * Don't try to spoof more than 8 partitions, i.e. 808 * 'i' -'p'. 809 */ 810 if (partoffp || n >= 8) 811 continue; 812 813 pp = &lp->d_partitions[8+n]; 814 n++; 815 pp->p_fstype = fstype; 816 DL_SETPOFFSET(pp, letoh64(gp_tmp->gp_lba_start)); 817 DL_SETPSIZE(pp, letoh64(gp_tmp->gp_lba_end) 818 - letoh64(gp_tmp->gp_lba_start) + 1); 819 } 820 821 if (ourpart != -1) { 822 /* found our OpenBSD partition, so use it */ 823 gp_tmp = &gp[ourpart]; 824 gptpartoff = letoh64(gp_tmp->gp_lba_start); 825 gptpartend = letoh64(gp_tmp->gp_lba_end) + 1; 826 } else 827 spoofonly = 1; /* No disklabel to read from disk. */ 828 829 if (!partoffp) 830 /* Must not modify *lp when partoffp is set. */ 831 lp->d_npartitions = MAXPARTITIONS; 832 833 free(gp, M_DEVBUF, gpsz); 834 835 /* record the OpenBSD partition's placement for the caller */ 836 if (partoffp) 837 *partoffp = gptpartoff; 838 else { 839 DL_SETBSTART(lp, gptpartoff); 840 DL_SETBEND(lp, (gptpartend < DL_GETDSIZE(lp)) ? gptpartend : 841 DL_GETDSIZE(lp)); 842 } 843 844 /* don't read the on-disk label if we are in spoofed-only mode */ 845 if (spoofonly) 846 return (0); 847 848 bp->b_blkno = DL_BLKTOSEC(lp, gptpartoff + DOS_LABELSECTOR) * 849 DL_BLKSPERSEC(lp); 850 offset = DL_BLKOFFSET(lp, gptpartoff + DOS_LABELSECTOR); 851 bp->b_bcount = lp->d_secsize; 852 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE); 853 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 854 (*strat)(bp); 855 if (biowait(bp)) 856 return (bp->b_error); 857 858 /* sub-GPT disklabels are always at a LABELOFFSET of 0 */ 859 return checkdisklabel(bp->b_data + offset, lp, gptpartoff, gptpartend); 860 } 861 862 #endif 863 864 /* 865 * Check new disk label for sensibility before setting it. 866 */ 867 int 868 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 869 { 870 struct partition *opp, *npp; 871 struct disk *dk; 872 u_int64_t uid; 873 int i; 874 875 /* sanity clause */ 876 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 877 (nlp->d_secsize % DEV_BSIZE) != 0) 878 return (EINVAL); 879 880 /* special case to allow disklabel to be invalidated */ 881 if (nlp->d_magic == 0xffffffff) { 882 *olp = *nlp; 883 return (0); 884 } 885 886 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 887 dkcksum(nlp) != 0) 888 return (EINVAL); 889 890 /* XXX missing check if other dos partitions will be overwritten */ 891 892 for (i = 0; i < MAXPARTITIONS; i++) { 893 opp = &olp->d_partitions[i]; 894 npp = &nlp->d_partitions[i]; 895 if ((openmask & (1 << i)) && 896 (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 897 DL_GETPSIZE(npp) < DL_GETPSIZE(opp))) 898 return (EBUSY); 899 /* 900 * Copy internally-set partition information 901 * if new label doesn't include it. XXX 902 */ 903 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 904 npp->p_fragblock = opp->p_fragblock; 905 npp->p_cpg = opp->p_cpg; 906 } 907 } 908 909 /* Generate a UID if the disklabel does not already have one. */ 910 uid = 0; 911 if (memcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0) { 912 do { 913 arc4random_buf(nlp->d_uid, sizeof(nlp->d_uid)); 914 TAILQ_FOREACH(dk, &disklist, dk_link) 915 if (dk->dk_label && memcmp(dk->dk_label->d_uid, 916 nlp->d_uid, sizeof(nlp->d_uid)) == 0) 917 break; 918 } while (dk != NULL && 919 memcmp(nlp->d_uid, &uid, sizeof(nlp->d_uid)) == 0); 920 } 921 922 nlp->d_checksum = 0; 923 nlp->d_checksum = dkcksum(nlp); 924 *olp = *nlp; 925 926 disk_change = 1; 927 928 return (0); 929 } 930 931 /* 932 * Determine the size of the transfer, and make sure it is within the 933 * boundaries of the partition. Adjust transfer if needed, and signal errors or 934 * early completion. 935 */ 936 int 937 bounds_check_with_label(struct buf *bp, struct disklabel *lp) 938 { 939 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 940 daddr_t partblocks, sz; 941 942 /* Avoid division by zero, negative offsets, and negative sizes. */ 943 if (lp->d_secpercyl == 0 || bp->b_blkno < 0 || bp->b_bcount < 0) 944 goto bad; 945 946 /* Ensure transfer is a whole number of aligned sectors. */ 947 if ((bp->b_blkno % DL_BLKSPERSEC(lp)) != 0 || 948 (bp->b_bcount % lp->d_secsize) != 0) 949 goto bad; 950 951 /* Ensure transfer starts within partition boundary. */ 952 partblocks = DL_SECTOBLK(lp, DL_GETPSIZE(p)); 953 if (bp->b_blkno > partblocks) 954 goto bad; 955 956 /* If exactly at end of partition or null transfer, return EOF. */ 957 if (bp->b_blkno == partblocks || bp->b_bcount == 0) 958 goto done; 959 960 /* Truncate request if it exceeds past the end of the partition. */ 961 sz = bp->b_bcount >> DEV_BSHIFT; 962 if (sz > partblocks - bp->b_blkno) { 963 sz = partblocks - bp->b_blkno; 964 bp->b_bcount = sz << DEV_BSHIFT; 965 } 966 967 return (0); 968 969 bad: 970 bp->b_error = EINVAL; 971 bp->b_flags |= B_ERROR; 972 done: 973 bp->b_resid = bp->b_bcount; 974 return (-1); 975 } 976 977 /* 978 * Disk error is the preface to plaintive error messages 979 * about failing disk transfers. It prints messages of the form 980 981 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 982 983 * if the offset of the error in the transfer and a disk label 984 * are both available. blkdone should be -1 if the position of the error 985 * is unknown; the disklabel pointer may be null from drivers that have not 986 * been converted to use them. The message is printed with printf 987 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 988 * The message should be completed (with at least a newline) with printf 989 * or addlog, respectively. There is no trailing space. 990 */ 991 void 992 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 993 struct disklabel *lp) 994 { 995 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 996 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))); 997 char partname = 'a' + part; 998 daddr_t sn; 999 1000 if (pri != LOG_PRINTF) { 1001 log(pri, "%s", ""); 1002 pr = addlog; 1003 } else 1004 pr = printf; 1005 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 1006 bp->b_flags & B_READ ? "read" : "writ"); 1007 sn = bp->b_blkno; 1008 if (bp->b_bcount <= DEV_BSIZE) 1009 (*pr)("%lld", (long long)sn); 1010 else { 1011 if (blkdone >= 0) { 1012 sn += blkdone; 1013 (*pr)("%lld of ", (long long)sn); 1014 } 1015 (*pr)("%lld-%lld", (long long)bp->b_blkno, 1016 (long long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE)); 1017 } 1018 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 1019 sn += DL_SECTOBLK(lp, DL_GETPOFFSET(&lp->d_partitions[part])); 1020 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, (long long)sn, 1021 (long long)(sn / DL_SECTOBLK(lp, lp->d_secpercyl))); 1022 sn %= DL_SECTOBLK(lp, lp->d_secpercyl); 1023 (*pr)(" tn %lld sn %lld)", 1024 (long long)(sn / DL_SECTOBLK(lp, lp->d_nsectors)), 1025 (long long)(sn % DL_SECTOBLK(lp, lp->d_nsectors))); 1026 } 1027 } 1028 1029 /* 1030 * Initialize the disklist. Called by main() before autoconfiguration. 1031 */ 1032 void 1033 disk_init(void) 1034 { 1035 1036 TAILQ_INIT(&disklist); 1037 disk_count = disk_change = 0; 1038 } 1039 1040 int 1041 disk_construct(struct disk *diskp) 1042 { 1043 rw_init(&diskp->dk_lock, "dklk"); 1044 mtx_init(&diskp->dk_mtx, IPL_BIO); 1045 1046 diskp->dk_flags |= DKF_CONSTRUCTED; 1047 1048 return (0); 1049 } 1050 1051 /* 1052 * Attach a disk. 1053 */ 1054 void 1055 disk_attach(struct device *dv, struct disk *diskp) 1056 { 1057 int majdev; 1058 1059 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 1060 disk_construct(diskp); 1061 1062 /* 1063 * Allocate and initialize the disklabel structures. Note that 1064 * it's not safe to sleep here, since we're probably going to be 1065 * called during autoconfiguration. 1066 */ 1067 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 1068 M_NOWAIT|M_ZERO); 1069 if (diskp->dk_label == NULL) 1070 panic("disk_attach: can't allocate storage for disklabel"); 1071 1072 /* 1073 * Set the attached timestamp. 1074 */ 1075 microuptime(&diskp->dk_attachtime); 1076 1077 /* 1078 * Link into the disklist. 1079 */ 1080 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 1081 ++disk_count; 1082 disk_change = 1; 1083 1084 /* 1085 * Store device structure and number for later use. 1086 */ 1087 diskp->dk_device = dv; 1088 diskp->dk_devno = NODEV; 1089 if (dv != NULL) { 1090 majdev = findblkmajor(dv); 1091 if (majdev >= 0) 1092 diskp->dk_devno = 1093 MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 1094 } 1095 if (diskp->dk_devno != NODEV) 1096 workq_add_task(NULL, 0, disk_attach_callback, 1097 (void *)(long)(diskp->dk_devno), NULL); 1098 1099 if (softraid_disk_attach) 1100 softraid_disk_attach(diskp, 1); 1101 } 1102 1103 void 1104 disk_attach_callback(void *arg1, void *arg2) 1105 { 1106 char errbuf[100]; 1107 struct disklabel dl; 1108 struct disk *dk; 1109 dev_t dev = (dev_t)(long)arg1; 1110 1111 /* Locate disk associated with device no. */ 1112 TAILQ_FOREACH(dk, &disklist, dk_link) { 1113 if (dk->dk_devno == dev) 1114 break; 1115 } 1116 if (dk == NULL) 1117 return; 1118 1119 /* XXX: Assumes dk is part of the device softc. */ 1120 device_ref(dk->dk_device); 1121 1122 if (dk->dk_flags & (DKF_OPENED | DKF_NOLABELREAD)) 1123 goto done; 1124 1125 /* Read disklabel. */ 1126 if (disk_readlabel(&dl, dev, errbuf, sizeof(errbuf)) == NULL) { 1127 add_timer_randomness(dl.d_checksum); 1128 dk->dk_flags |= DKF_LABELVALID; 1129 } 1130 1131 done: 1132 dk->dk_flags |= DKF_OPENED; 1133 device_unref(dk->dk_device); 1134 wakeup(dk); 1135 } 1136 1137 /* 1138 * Detach a disk. 1139 */ 1140 void 1141 disk_detach(struct disk *diskp) 1142 { 1143 1144 if (softraid_disk_attach) 1145 softraid_disk_attach(diskp, -1); 1146 1147 /* 1148 * Free the space used by the disklabel structures. 1149 */ 1150 free(diskp->dk_label, M_DEVBUF, sizeof(*diskp->dk_label)); 1151 1152 /* 1153 * Remove from the disklist. 1154 */ 1155 TAILQ_REMOVE(&disklist, diskp, dk_link); 1156 disk_change = 1; 1157 if (--disk_count < 0) 1158 panic("disk_detach: disk_count < 0"); 1159 } 1160 1161 int 1162 disk_openpart(struct disk *dk, int part, int fmt, int haslabel) 1163 { 1164 KASSERT(part >= 0 && part < MAXPARTITIONS); 1165 1166 /* Unless opening the raw partition, check that the partition exists. */ 1167 if (part != RAW_PART && (!haslabel || 1168 part >= dk->dk_label->d_npartitions || 1169 dk->dk_label->d_partitions[part].p_fstype == FS_UNUSED)) 1170 return (ENXIO); 1171 1172 /* Ensure the partition doesn't get changed under our feet. */ 1173 switch (fmt) { 1174 case S_IFCHR: 1175 dk->dk_copenmask |= (1 << part); 1176 break; 1177 case S_IFBLK: 1178 dk->dk_bopenmask |= (1 << part); 1179 break; 1180 } 1181 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1182 1183 return (0); 1184 } 1185 1186 void 1187 disk_closepart(struct disk *dk, int part, int fmt) 1188 { 1189 KASSERT(part >= 0 && part < MAXPARTITIONS); 1190 1191 switch (fmt) { 1192 case S_IFCHR: 1193 dk->dk_copenmask &= ~(1 << part); 1194 break; 1195 case S_IFBLK: 1196 dk->dk_bopenmask &= ~(1 << part); 1197 break; 1198 } 1199 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 1200 } 1201 1202 void 1203 disk_gone(int (*open)(dev_t, int, int, struct proc *), int unit) 1204 { 1205 int bmaj, cmaj, mn; 1206 1207 /* Locate the lowest minor number to be detached. */ 1208 mn = DISKMINOR(unit, 0); 1209 1210 for (bmaj = 0; bmaj < nblkdev; bmaj++) 1211 if (bdevsw[bmaj].d_open == open) 1212 vdevgone(bmaj, mn, mn + MAXPARTITIONS - 1, VBLK); 1213 for (cmaj = 0; cmaj < nchrdev; cmaj++) 1214 if (cdevsw[cmaj].d_open == open) 1215 vdevgone(cmaj, mn, mn + MAXPARTITIONS - 1, VCHR); 1216 } 1217 1218 /* 1219 * Increment a disk's busy counter. If the counter is going from 1220 * 0 to 1, set the timestamp. 1221 */ 1222 void 1223 disk_busy(struct disk *diskp) 1224 { 1225 1226 /* 1227 * XXX We'd like to use something as accurate as microtime(), 1228 * but that doesn't depend on the system TOD clock. 1229 */ 1230 mtx_enter(&diskp->dk_mtx); 1231 if (diskp->dk_busy++ == 0) 1232 microuptime(&diskp->dk_timestamp); 1233 mtx_leave(&diskp->dk_mtx); 1234 } 1235 1236 /* 1237 * Decrement a disk's busy counter, increment the byte count, total busy 1238 * time, and reset the timestamp. 1239 */ 1240 void 1241 disk_unbusy(struct disk *diskp, long bcount, int read) 1242 { 1243 struct timeval dv_time, diff_time; 1244 1245 mtx_enter(&diskp->dk_mtx); 1246 1247 if (diskp->dk_busy-- == 0) 1248 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 1249 1250 microuptime(&dv_time); 1251 1252 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 1253 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 1254 1255 diskp->dk_timestamp = dv_time; 1256 if (bcount > 0) { 1257 if (read) { 1258 diskp->dk_rbytes += bcount; 1259 diskp->dk_rxfer++; 1260 } else { 1261 diskp->dk_wbytes += bcount; 1262 diskp->dk_wxfer++; 1263 } 1264 } else 1265 diskp->dk_seek++; 1266 1267 mtx_leave(&diskp->dk_mtx); 1268 1269 add_disk_randomness(bcount ^ diff_time.tv_usec); 1270 } 1271 1272 int 1273 disk_lock(struct disk *dk) 1274 { 1275 return (rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR)); 1276 } 1277 1278 void 1279 disk_lock_nointr(struct disk *dk) 1280 { 1281 rw_enter_write(&dk->dk_lock); 1282 } 1283 1284 void 1285 disk_unlock(struct disk *dk) 1286 { 1287 rw_exit_write(&dk->dk_lock); 1288 } 1289 1290 int 1291 dk_mountroot(void) 1292 { 1293 char errbuf[100]; 1294 int part = DISKPART(rootdev); 1295 int (*mountrootfn)(void); 1296 struct disklabel dl; 1297 char *error; 1298 1299 error = disk_readlabel(&dl, rootdev, errbuf, sizeof(errbuf)); 1300 if (error) 1301 panic("%s", error); 1302 1303 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 1304 panic("root filesystem has size 0"); 1305 switch (dl.d_partitions[part].p_fstype) { 1306 #ifdef EXT2FS 1307 case FS_EXT2FS: 1308 { 1309 extern int ext2fs_mountroot(void); 1310 mountrootfn = ext2fs_mountroot; 1311 } 1312 break; 1313 #endif 1314 #ifdef FFS 1315 case FS_BSDFFS: 1316 { 1317 extern int ffs_mountroot(void); 1318 mountrootfn = ffs_mountroot; 1319 } 1320 break; 1321 #endif 1322 #ifdef CD9660 1323 case FS_ISO9660: 1324 { 1325 extern int cd9660_mountroot(void); 1326 mountrootfn = cd9660_mountroot; 1327 } 1328 break; 1329 #endif 1330 default: 1331 #ifdef FFS 1332 { 1333 extern int ffs_mountroot(void); 1334 1335 printf("filesystem type %d not known.. assuming ffs\n", 1336 dl.d_partitions[part].p_fstype); 1337 mountrootfn = ffs_mountroot; 1338 } 1339 #else 1340 panic("disk 0x%x filesystem type %d not known", 1341 rootdev, dl.d_partitions[part].p_fstype); 1342 #endif 1343 } 1344 return (*mountrootfn)(); 1345 } 1346 1347 struct device * 1348 getdisk(char *str, int len, int defpart, dev_t *devp) 1349 { 1350 struct device *dv; 1351 1352 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1353 printf("use one of: exit"); 1354 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1355 if (dv->dv_class == DV_DISK) 1356 printf(" %s[a-p]", dv->dv_xname); 1357 #if defined(NFSCLIENT) 1358 if (dv->dv_class == DV_IFNET) 1359 printf(" %s", dv->dv_xname); 1360 #endif 1361 } 1362 printf("\n"); 1363 } 1364 return (dv); 1365 } 1366 1367 struct device * 1368 parsedisk(char *str, int len, int defpart, dev_t *devp) 1369 { 1370 struct device *dv; 1371 int majdev, part = defpart; 1372 char c; 1373 1374 if (len == 0) 1375 return (NULL); 1376 c = str[len-1]; 1377 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1378 part = c - 'a'; 1379 len -= 1; 1380 } 1381 1382 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1383 if (dv->dv_class == DV_DISK && 1384 strncmp(str, dv->dv_xname, len) == 0 && 1385 dv->dv_xname[len] == '\0') { 1386 majdev = findblkmajor(dv); 1387 if (majdev < 0) 1388 return NULL; 1389 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1390 break; 1391 } 1392 #if defined(NFSCLIENT) 1393 if (dv->dv_class == DV_IFNET && 1394 strncmp(str, dv->dv_xname, len) == 0 && 1395 dv->dv_xname[len] == '\0') { 1396 *devp = NODEV; 1397 break; 1398 } 1399 #endif 1400 } 1401 1402 return (dv); 1403 } 1404 1405 void 1406 setroot(struct device *bootdv, int part, int exitflags) 1407 { 1408 int majdev, unit, len, s, slept = 0; 1409 struct swdevt *swp; 1410 struct device *rootdv, *dv; 1411 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1412 struct ifnet *ifp = NULL; 1413 struct disk *dk; 1414 u_char duid[8]; 1415 char buf[128]; 1416 #if defined(NFSCLIENT) 1417 extern char *nfsbootdevname; 1418 #endif 1419 1420 /* Ensure that all disk attach callbacks have completed. */ 1421 do { 1422 TAILQ_FOREACH(dk, &disklist, dk_link) { 1423 if (dk->dk_devno != NODEV && 1424 (dk->dk_flags & DKF_OPENED) == 0) { 1425 tsleep(dk, 0, "dkopen", hz); 1426 slept++; 1427 break; 1428 } 1429 } 1430 } while (dk != NULL && slept < 5); 1431 1432 if (slept == 5) { 1433 printf("disklabels not read:"); 1434 TAILQ_FOREACH(dk, &disklist, dk_link) 1435 if (dk->dk_devno != NODEV && 1436 (dk->dk_flags & DKF_OPENED) == 0) 1437 printf(" %s", dk->dk_name); 1438 printf("\n"); 1439 } 1440 1441 /* Locate DUID for boot disk if not already provided. */ 1442 memset(duid, 0, sizeof(duid)); 1443 if (memcmp(bootduid, duid, sizeof(bootduid)) == 0) { 1444 TAILQ_FOREACH(dk, &disklist, dk_link) 1445 if (dk->dk_device == bootdv) 1446 break; 1447 if (dk && (dk->dk_flags & DKF_LABELVALID)) 1448 bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid)); 1449 } 1450 bcopy(bootduid, rootduid, sizeof(rootduid)); 1451 1452 #if NSOFTRAID > 0 1453 sr_map_root(); 1454 #endif 1455 1456 /* 1457 * If `swap generic' and we couldn't determine boot device, 1458 * ask the user. 1459 */ 1460 dk = NULL; 1461 if (mountroot == NULL && bootdv == NULL) 1462 boothowto |= RB_ASKNAME; 1463 if (boothowto & RB_ASKNAME) { 1464 while (1) { 1465 printf("root device"); 1466 if (bootdv != NULL) { 1467 printf(" (default %s", bootdv->dv_xname); 1468 if (bootdv->dv_class == DV_DISK) 1469 printf("%c", 'a' + part); 1470 printf(")"); 1471 } 1472 printf(": "); 1473 s = splhigh(); 1474 cnpollc(TRUE); 1475 len = getsn(buf, sizeof(buf)); 1476 cnpollc(FALSE); 1477 splx(s); 1478 if (strcmp(buf, "exit") == 0) 1479 reboot(exitflags); 1480 if (len == 0 && bootdv != NULL) { 1481 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1482 len = strlen(buf); 1483 } 1484 if (len > 0 && buf[len - 1] == '*') { 1485 buf[--len] = '\0'; 1486 dv = getdisk(buf, len, part, &nrootdev); 1487 if (dv != NULL) { 1488 rootdv = dv; 1489 nswapdev = nrootdev; 1490 goto gotswap; 1491 } 1492 } 1493 dv = getdisk(buf, len, part, &nrootdev); 1494 if (dv != NULL) { 1495 rootdv = dv; 1496 break; 1497 } 1498 } 1499 1500 if (rootdv->dv_class == DV_IFNET) 1501 goto gotswap; 1502 1503 /* try to build swap device out of new root device */ 1504 while (1) { 1505 printf("swap device"); 1506 if (rootdv != NULL) 1507 printf(" (default %s%s)", rootdv->dv_xname, 1508 rootdv->dv_class == DV_DISK ? "b" : ""); 1509 printf(": "); 1510 s = splhigh(); 1511 cnpollc(TRUE); 1512 len = getsn(buf, sizeof(buf)); 1513 cnpollc(FALSE); 1514 splx(s); 1515 if (strcmp(buf, "exit") == 0) 1516 reboot(exitflags); 1517 if (len == 0 && rootdv != NULL) { 1518 switch (rootdv->dv_class) { 1519 case DV_IFNET: 1520 nswapdev = NODEV; 1521 break; 1522 case DV_DISK: 1523 nswapdev = MAKEDISKDEV(major(nrootdev), 1524 DISKUNIT(nrootdev), 1); 1525 if (nswapdev == nrootdev) 1526 continue; 1527 break; 1528 default: 1529 break; 1530 } 1531 break; 1532 } 1533 dv = getdisk(buf, len, 1, &nswapdev); 1534 if (dv) { 1535 if (dv->dv_class == DV_IFNET) 1536 nswapdev = NODEV; 1537 if (nswapdev == nrootdev) 1538 continue; 1539 break; 1540 } 1541 } 1542 gotswap: 1543 rootdev = nrootdev; 1544 dumpdev = nswapdev; 1545 swdevt[0].sw_dev = nswapdev; 1546 swdevt[1].sw_dev = NODEV; 1547 #if defined(NFSCLIENT) 1548 } else if (mountroot == nfs_mountroot) { 1549 rootdv = bootdv; 1550 rootdev = dumpdev = swapdev = NODEV; 1551 #endif 1552 } else if (mountroot == NULL && rootdev == NODEV) { 1553 /* 1554 * `swap generic' 1555 */ 1556 rootdv = bootdv; 1557 1558 if (bootdv->dv_class == DV_DISK) { 1559 memset(&duid, 0, sizeof(duid)); 1560 if (memcmp(rootduid, &duid, sizeof(rootduid)) != 0) { 1561 TAILQ_FOREACH(dk, &disklist, dk_link) 1562 if ((dk->dk_flags & DKF_LABELVALID) && 1563 dk->dk_label && memcmp(dk->dk_label->d_uid, 1564 &rootduid, sizeof(rootduid)) == 0) 1565 break; 1566 if (dk == NULL) 1567 panic("root device (%02hx%02hx%02hx%02hx" 1568 "%02hx%02hx%02hx%02hx) not found", 1569 rootduid[0], rootduid[1], rootduid[2], 1570 rootduid[3], rootduid[4], rootduid[5], 1571 rootduid[6], rootduid[7]); 1572 rootdv = dk->dk_device; 1573 } 1574 } 1575 1576 majdev = findblkmajor(rootdv); 1577 if (majdev >= 0) { 1578 /* 1579 * Root and swap are on the disk. 1580 * Assume swap is on partition b. 1581 */ 1582 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1583 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1584 } else { 1585 /* 1586 * Root and swap are on a net. 1587 */ 1588 nswapdev = NODEV; 1589 } 1590 dumpdev = nswapdev; 1591 swdevt[0].sw_dev = nswapdev; 1592 /* swdevt[1].sw_dev = NODEV; */ 1593 } else { 1594 /* Completely pre-configured, but we want rootdv .. */ 1595 majdev = major(rootdev); 1596 if (findblkname(majdev) == NULL) 1597 return; 1598 unit = DISKUNIT(rootdev); 1599 part = DISKPART(rootdev); 1600 snprintf(buf, sizeof buf, "%s%d%c", 1601 findblkname(majdev), unit, 'a' + part); 1602 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1603 if (rootdv == NULL) 1604 panic("root device (%s) not found", buf); 1605 } 1606 1607 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1608 ifp = ifunit(rootdv->dv_xname); 1609 else if (bootdv && bootdv->dv_class == DV_IFNET) 1610 ifp = ifunit(bootdv->dv_xname); 1611 1612 if (ifp) 1613 if_addgroup(ifp, "netboot"); 1614 1615 switch (rootdv->dv_class) { 1616 #if defined(NFSCLIENT) 1617 case DV_IFNET: 1618 mountroot = nfs_mountroot; 1619 nfsbootdevname = rootdv->dv_xname; 1620 return; 1621 #endif 1622 case DV_DISK: 1623 mountroot = dk_mountroot; 1624 part = DISKPART(rootdev); 1625 break; 1626 default: 1627 printf("can't figure root, hope your kernel is right\n"); 1628 return; 1629 } 1630 1631 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1632 1633 if (dk && dk->dk_device == rootdv) 1634 printf(" (%02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx.%c)", 1635 rootduid[0], rootduid[1], rootduid[2], rootduid[3], 1636 rootduid[4], rootduid[5], rootduid[6], rootduid[7], 1637 'a' + part); 1638 1639 /* 1640 * Make the swap partition on the root drive the primary swap. 1641 */ 1642 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1643 if (major(rootdev) == major(swp->sw_dev) && 1644 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1645 temp = swdevt[0].sw_dev; 1646 swdevt[0].sw_dev = swp->sw_dev; 1647 swp->sw_dev = temp; 1648 break; 1649 } 1650 } 1651 if (swp->sw_dev != NODEV) { 1652 /* 1653 * If dumpdev was the same as the old primary swap device, 1654 * move it to the new primary swap device. 1655 */ 1656 if (temp == dumpdev) 1657 dumpdev = swdevt[0].sw_dev; 1658 } 1659 if (swdevt[0].sw_dev != NODEV) 1660 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1661 DISKUNIT(swdevt[0].sw_dev), 1662 'a' + DISKPART(swdevt[0].sw_dev)); 1663 if (dumpdev != NODEV) 1664 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1665 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1666 printf("\n"); 1667 } 1668 1669 extern struct nam2blk nam2blk[]; 1670 1671 int 1672 findblkmajor(struct device *dv) 1673 { 1674 char buf[16], *p; 1675 int i; 1676 1677 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1678 return (-1); 1679 for (p = buf; *p; p++) 1680 if (*p >= '0' && *p <= '9') 1681 *p = '\0'; 1682 1683 for (i = 0; nam2blk[i].name; i++) 1684 if (!strcmp(buf, nam2blk[i].name)) 1685 return (nam2blk[i].maj); 1686 return (-1); 1687 } 1688 1689 char * 1690 findblkname(int maj) 1691 { 1692 int i; 1693 1694 for (i = 0; nam2blk[i].name; i++) 1695 if (nam2blk[i].maj == maj) 1696 return (nam2blk[i].name); 1697 return (NULL); 1698 } 1699 1700 char * 1701 disk_readlabel(struct disklabel *dl, dev_t dev, char *errbuf, size_t errsize) 1702 { 1703 struct vnode *vn; 1704 dev_t chrdev, rawdev; 1705 int error; 1706 1707 chrdev = blktochr(dev); 1708 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(chrdev), RAW_PART); 1709 1710 #ifdef DEBUG 1711 printf("dev=0x%x chrdev=0x%x rawdev=0x%x\n", dev, chrdev, rawdev); 1712 #endif 1713 1714 if (cdevvp(rawdev, &vn)) { 1715 snprintf(errbuf, errsize, 1716 "cannot obtain vnode for 0x%x/0x%x", dev, rawdev); 1717 return (errbuf); 1718 } 1719 1720 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1721 if (error) { 1722 snprintf(errbuf, errsize, 1723 "cannot open disk, 0x%x/0x%x, error %d", 1724 dev, rawdev, error); 1725 goto done; 1726 } 1727 1728 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)dl, FREAD, NOCRED, curproc); 1729 if (error) { 1730 snprintf(errbuf, errsize, 1731 "cannot read disk label, 0x%x/0x%x, error %d", 1732 dev, rawdev, error); 1733 } 1734 done: 1735 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1736 vput(vn); 1737 if (error) 1738 return (errbuf); 1739 return (NULL); 1740 } 1741 1742 int 1743 disk_map(char *path, char *mappath, int size, int flags) 1744 { 1745 struct disk *dk, *mdk; 1746 u_char uid[8]; 1747 char c, part; 1748 int i; 1749 1750 /* 1751 * Attempt to map a request for a disklabel UID to the correct device. 1752 * We should be supplied with a disklabel UID which has the following 1753 * format: 1754 * 1755 * [disklabel uid] . [partition] 1756 * 1757 * Alternatively, if the DM_OPENPART flag is set the disklabel UID can 1758 * based passed on its own. 1759 */ 1760 1761 if (strchr(path, '/') != NULL) 1762 return -1; 1763 1764 /* Verify that the device name is properly formed. */ 1765 if (!((strlen(path) == 16 && (flags & DM_OPENPART)) || 1766 (strlen(path) == 18 && path[16] == '.'))) 1767 return -1; 1768 1769 /* Get partition. */ 1770 if (flags & DM_OPENPART) 1771 part = 'a' + RAW_PART; 1772 else 1773 part = path[17]; 1774 1775 if (part < 'a' || part >= 'a' + MAXPARTITIONS) 1776 return -1; 1777 1778 /* Derive label UID. */ 1779 memset(uid, 0, sizeof(uid)); 1780 for (i = 0; i < 16; i++) { 1781 c = path[i]; 1782 if (c >= '0' && c <= '9') 1783 c -= '0'; 1784 else if (c >= 'a' && c <= 'f') 1785 c -= ('a' - 10); 1786 else 1787 return -1; 1788 1789 uid[i / 2] <<= 4; 1790 uid[i / 2] |= c & 0xf; 1791 } 1792 1793 mdk = NULL; 1794 TAILQ_FOREACH(dk, &disklist, dk_link) { 1795 if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label && 1796 memcmp(dk->dk_label->d_uid, uid, 1797 sizeof(dk->dk_label->d_uid)) == 0) { 1798 /* Fail if there are duplicate UIDs! */ 1799 if (mdk != NULL) 1800 return -1; 1801 mdk = dk; 1802 } 1803 } 1804 1805 if (mdk == NULL || mdk->dk_name == NULL) 1806 return -1; 1807 1808 snprintf(mappath, size, "/dev/%s%s%c", 1809 (flags & DM_OPENBLCK) ? "" : "r", mdk->dk_name, part); 1810 1811 return 0; 1812 } 1813 1814 /* 1815 * Lookup a disk device and verify that it has completed attaching. 1816 */ 1817 struct device * 1818 disk_lookup(struct cfdriver *cd, int unit) 1819 { 1820 struct device *dv; 1821 struct disk *dk; 1822 1823 dv = device_lookup(cd, unit); 1824 if (dv == NULL) 1825 return (NULL); 1826 1827 TAILQ_FOREACH(dk, &disklist, dk_link) 1828 if (dk->dk_device == dv) 1829 break; 1830 1831 if (dk == NULL) { 1832 device_unref(dv); 1833 return (NULL); 1834 } 1835 1836 return (dv); 1837 } 1838