1 /* $OpenBSD: subr_disk.c,v 1.73 2008/04/07 23:10:24 krw Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/dkstat.h> /* XXX */ 58 #include <sys/proc.h> 59 #include <uvm/uvm_extern.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 64 #include <net/if.h> 65 66 #include <dev/rndvar.h> 67 #include <dev/cons.h> 68 69 /* 70 * A global list of all disks attached to the system. May grow or 71 * shrink over time. 72 */ 73 struct disklist_head disklist; /* TAILQ_HEAD */ 74 int disk_count; /* number of drives in global disklist */ 75 int disk_change; /* set if a disk has been attached/detached 76 * since last we looked at this variable. This 77 * is reset by hw_sysctl() 78 */ 79 80 /* 81 * Seek sort for disks. We depend on the driver which calls us using b_resid 82 * as the current cylinder number. 83 * 84 * The argument ap structure holds a b_actf activity chain pointer on which we 85 * keep two queues, sorted in ascending cylinder order. The first queue holds 86 * those requests which are positioned after the current cylinder (in the first 87 * request); the second holds requests which came in after their cylinder number 88 * was passed. Thus we implement a one way scan, retracting after reaching the 89 * end of the drive to the first request on the second queue, at which time it 90 * becomes the first queue. 91 * 92 * A one-way scan is natural because of the way UNIX read-ahead blocks are 93 * allocated. 94 */ 95 96 void 97 disksort(struct buf *ap, struct buf *bp) 98 { 99 struct buf *bq; 100 101 /* If the queue is empty, then it's easy. */ 102 if (ap->b_actf == NULL) { 103 bp->b_actf = NULL; 104 ap->b_actf = bp; 105 return; 106 } 107 108 /* 109 * If we lie after the first (currently active) request, then we 110 * must locate the second request list and add ourselves to it. 111 */ 112 bq = ap->b_actf; 113 if (bp->b_cylinder < bq->b_cylinder) { 114 while (bq->b_actf) { 115 /* 116 * Check for an ``inversion'' in the normally ascending 117 * cylinder numbers, indicating the start of the second 118 * request list. 119 */ 120 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 121 /* 122 * Search the second request list for the first 123 * request at a larger cylinder number. We go 124 * before that; if there is no such request, we 125 * go at end. 126 */ 127 do { 128 if (bp->b_cylinder < 129 bq->b_actf->b_cylinder) 130 goto insert; 131 if (bp->b_cylinder == 132 bq->b_actf->b_cylinder && 133 bp->b_blkno < bq->b_actf->b_blkno) 134 goto insert; 135 bq = bq->b_actf; 136 } while (bq->b_actf); 137 goto insert; /* after last */ 138 } 139 bq = bq->b_actf; 140 } 141 /* 142 * No inversions... we will go after the last, and 143 * be the first request in the second request list. 144 */ 145 goto insert; 146 } 147 /* 148 * Request is at/after the current request... 149 * sort in the first request list. 150 */ 151 while (bq->b_actf) { 152 /* 153 * We want to go after the current request if there is an 154 * inversion after it (i.e. it is the end of the first 155 * request list), or if the next request is a larger cylinder 156 * than our request. 157 */ 158 if (bq->b_actf->b_cylinder < bq->b_cylinder || 159 bp->b_cylinder < bq->b_actf->b_cylinder || 160 (bp->b_cylinder == bq->b_actf->b_cylinder && 161 bp->b_blkno < bq->b_actf->b_blkno)) 162 goto insert; 163 bq = bq->b_actf; 164 } 165 /* 166 * Neither a second list nor a larger request... we go at the end of 167 * the first list, which is the same as the end of the whole schebang. 168 */ 169 insert: bp->b_actf = bq->b_actf; 170 bq->b_actf = bp; 171 } 172 173 /* 174 * Compute checksum for disk label. 175 */ 176 u_int 177 dkcksum(struct disklabel *lp) 178 { 179 u_int16_t *start, *end; 180 u_int16_t sum = 0; 181 182 start = (u_int16_t *)lp; 183 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 184 while (start < end) 185 sum ^= *start++; 186 return (sum); 187 } 188 189 char * 190 initdisklabel(struct disklabel *lp) 191 { 192 int i; 193 194 /* minimal requirements for archetypal disk label */ 195 if (lp->d_secsize < DEV_BSIZE) 196 lp->d_secsize = DEV_BSIZE; 197 if (DL_GETDSIZE(lp) == 0) 198 DL_SETDSIZE(lp, MAXDISKSIZE); 199 if (lp->d_secpercyl == 0) 200 return ("invalid geometry"); 201 lp->d_npartitions = RAW_PART + 1; 202 for (i = 0; i < RAW_PART; i++) { 203 DL_SETPSIZE(&lp->d_partitions[i], 0); 204 DL_SETPOFFSET(&lp->d_partitions[i], 0); 205 } 206 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 207 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 208 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 209 lp->d_version = 1; 210 lp->d_bbsize = 8192; 211 lp->d_sbsize = 64*1024; /* XXX ? */ 212 return (NULL); 213 } 214 215 /* 216 * Check an incoming block to make sure it is a disklabel, convert it to 217 * a newer version if needed, etc etc. 218 */ 219 char * 220 checkdisklabel(void *rlp, struct disklabel *lp) 221 { 222 struct disklabel *dlp = rlp; 223 struct __partitionv0 *v0pp; 224 struct partition *pp; 225 daddr64_t disksize; 226 char *msg = NULL; 227 int i; 228 229 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 230 msg = "no disk label"; 231 else if (dlp->d_npartitions > MAXPARTITIONS) 232 msg = "unreasonable partition count"; 233 else if (dkcksum(dlp) != 0) 234 msg = "disk label corrupted"; 235 236 if (msg) { 237 u_int16_t *start, *end, sum = 0; 238 239 /* If it is byte-swapped, attempt to convert it */ 240 if (swap32(dlp->d_magic) != DISKMAGIC || 241 swap32(dlp->d_magic2) != DISKMAGIC || 242 swap16(dlp->d_npartitions) > MAXPARTITIONS) 243 return (msg); 244 245 /* 246 * Need a byte-swap aware dkcksum varient 247 * inlined, because dkcksum uses a sub-field 248 */ 249 start = (u_int16_t *)dlp; 250 end = (u_int16_t *)&dlp->d_partitions[ 251 swap16(dlp->d_npartitions)]; 252 while (start < end) 253 sum ^= *start++; 254 if (sum != 0) 255 return (msg); 256 257 dlp->d_magic = swap32(dlp->d_magic); 258 dlp->d_type = swap16(dlp->d_type); 259 dlp->d_subtype = swap16(dlp->d_subtype); 260 261 /* d_typename and d_packname are strings */ 262 263 dlp->d_secsize = swap32(dlp->d_secsize); 264 dlp->d_nsectors = swap32(dlp->d_nsectors); 265 dlp->d_ntracks = swap32(dlp->d_ntracks); 266 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 267 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 268 dlp->d_secperunit = swap32(dlp->d_secperunit); 269 270 dlp->d_sparespertrack = swap16(dlp->d_sparespertrack); 271 dlp->d_sparespercyl = swap16(dlp->d_sparespercyl); 272 273 dlp->d_acylinders = swap32(dlp->d_acylinders); 274 275 dlp->d_rpm = swap16(dlp->d_rpm); 276 dlp->d_interleave = swap16(dlp->d_interleave); 277 dlp->d_trackskew = swap16(dlp->d_trackskew); 278 dlp->d_cylskew = swap16(dlp->d_cylskew); 279 dlp->d_headswitch = swap32(dlp->d_headswitch); 280 dlp->d_trkseek = swap32(dlp->d_trkseek); 281 dlp->d_flags = swap32(dlp->d_flags); 282 283 for (i = 0; i < NDDATA; i++) 284 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 285 286 dlp->d_secperunith = swap16(dlp->d_secperunith); 287 dlp->d_version = swap16(dlp->d_version); 288 289 for (i = 0; i < NSPARE; i++) 290 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 291 292 dlp->d_magic2 = swap32(dlp->d_magic2); 293 dlp->d_checksum = swap16(dlp->d_checksum); 294 295 dlp->d_npartitions = swap16(dlp->d_npartitions); 296 dlp->d_bbsize = swap32(dlp->d_bbsize); 297 dlp->d_sbsize = swap32(dlp->d_sbsize); 298 299 for (i = 0; i < MAXPARTITIONS; i++) { 300 pp = &dlp->d_partitions[i]; 301 pp->p_size = swap32(pp->p_size); 302 pp->p_offset = swap32(pp->p_offset); 303 if (dlp->d_version == 0) { 304 v0pp = (struct __partitionv0 *)pp; 305 v0pp->p_fsize = swap32(v0pp->p_fsize); 306 } else { 307 pp->p_offseth = swap16(pp->p_offseth); 308 pp->p_sizeh = swap16(pp->p_sizeh); 309 } 310 pp->p_cpg = swap16(pp->p_cpg); 311 } 312 313 dlp->d_checksum = 0; 314 dlp->d_checksum = dkcksum(dlp); 315 msg = NULL; 316 } 317 318 /* XXX should verify lots of other fields and whine a lot */ 319 320 if (msg) 321 return (msg); 322 323 /* Initial passed in lp contains the real disk size. */ 324 disksize = DL_GETDSIZE(lp); 325 326 if (lp != dlp) 327 *lp = *dlp; 328 329 if (lp->d_version == 0) { 330 lp->d_version = 1; 331 lp->d_secperunith = 0; 332 333 v0pp = (struct __partitionv0 *)lp->d_partitions; 334 pp = lp->d_partitions; 335 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 336 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 337 p_fsize, v0pp->p_frag); 338 pp->p_offseth = 0; 339 pp->p_sizeh = 0; 340 } 341 } 342 343 #ifdef DEBUG 344 if (DL_GETDSIZE(lp) != disksize) 345 printf("on-disk disklabel has incorrect disksize (%lld)\n", 346 DL_GETDSIZE(lp)); 347 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 348 printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n", 349 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 350 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 351 printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n", 352 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 353 #endif 354 DL_SETDSIZE(lp, disksize); 355 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 356 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 357 358 lp->d_checksum = 0; 359 lp->d_checksum = dkcksum(lp); 360 return (msg); 361 } 362 363 /* 364 * If dos partition table requested, attempt to load it and 365 * find disklabel inside a DOS partition. Return buffer 366 * for use in signalling errors if requested. 367 * 368 * We would like to check if each MBR has a valid BOOT_MAGIC, but 369 * we cannot because it doesn't always exist. So.. we assume the 370 * MBR is valid. 371 */ 372 char * 373 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 374 struct disklabel *lp, int *partoffp, int spoofonly) 375 { 376 struct dos_partition dp[NDOSPART], *dp2; 377 u_int32_t extoff = 0; 378 daddr64_t part_blkno = DOSBBSECTOR; 379 int dospartoff = 0, i, ourpart = -1; 380 int wander = 1, n = 0, loop = 0; 381 382 if (lp->d_secpercyl == 0) 383 return ("invalid label, d_secpercyl == 0"); 384 if (lp->d_secsize == 0) 385 return ("invalid label, d_secsize == 0"); 386 387 /* do DOS partitions in the process of getting disklabel? */ 388 389 /* 390 * Read dos partition table, follow extended partitions. 391 * Map the partitions to disklabel entries i-p 392 */ 393 while (wander && n < 8 && loop < 8) { 394 loop++; 395 wander = 0; 396 if (part_blkno < extoff) 397 part_blkno = extoff; 398 399 /* read boot record */ 400 bp->b_blkno = part_blkno; 401 bp->b_bcount = lp->d_secsize; 402 bp->b_flags = B_BUSY | B_READ; 403 (*strat)(bp); 404 if (biowait(bp)) { 405 /*wrong*/ if (partoffp) 406 /*wrong*/ *partoffp = -1; 407 return ("dos partition I/O error"); 408 } 409 410 bcopy(bp->b_data + DOSPARTOFF, dp, sizeof(dp)); 411 412 if (ourpart == -1 && part_blkno == DOSBBSECTOR) { 413 /* Search for our MBR partition */ 414 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 415 i++, dp2++) 416 if (letoh32(dp2->dp_size) && 417 dp2->dp_typ == DOSPTYP_OPENBSD) 418 ourpart = i; 419 if (ourpart == -1) 420 goto donot; 421 /* 422 * This is our MBR partition. need sector 423 * address for SCSI/IDE, cylinder for 424 * ESDI/ST506/RLL 425 */ 426 dp2 = &dp[ourpart]; 427 dospartoff = letoh32(dp2->dp_start) + part_blkno; 428 429 /* found our OpenBSD partition, finish up */ 430 if (partoffp) 431 goto notfat; 432 433 if (lp->d_ntracks == 0) 434 lp->d_ntracks = dp2->dp_ehd + 1; 435 if (lp->d_nsectors == 0) 436 lp->d_nsectors = DPSECT(dp2->dp_esect); 437 if (lp->d_secpercyl == 0) 438 lp->d_secpercyl = lp->d_ntracks * 439 lp->d_nsectors; 440 } 441 donot: 442 /* 443 * In case the disklabel read below fails, we want to 444 * provide a fake label in i-p. 445 */ 446 for (dp2=dp, i=0; i < NDOSPART && n < 8; i++, dp2++) { 447 struct partition *pp = &lp->d_partitions[8+n]; 448 449 if (dp2->dp_typ == DOSPTYP_OPENBSD) 450 continue; 451 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 452 continue; 453 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 454 continue; 455 if (letoh32(dp2->dp_size) == 0) 456 continue; 457 if (letoh32(dp2->dp_start)) 458 DL_SETPOFFSET(pp, 459 letoh32(dp2->dp_start) + part_blkno); 460 461 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 462 463 switch (dp2->dp_typ) { 464 case DOSPTYP_UNUSED: 465 pp->p_fstype = FS_UNUSED; 466 n++; 467 break; 468 469 case DOSPTYP_LINUX: 470 pp->p_fstype = FS_EXT2FS; 471 n++; 472 break; 473 474 case DOSPTYP_NTFS: 475 pp->p_fstype = FS_NTFS; 476 n++; 477 break; 478 479 case DOSPTYP_FAT12: 480 case DOSPTYP_FAT16S: 481 case DOSPTYP_FAT16B: 482 case DOSPTYP_FAT16L: 483 case DOSPTYP_FAT32: 484 case DOSPTYP_FAT32L: 485 pp->p_fstype = FS_MSDOS; 486 n++; 487 break; 488 case DOSPTYP_EXTEND: 489 case DOSPTYP_EXTENDL: 490 part_blkno = letoh32(dp2->dp_start) + extoff; 491 if (!extoff) { 492 extoff = letoh32(dp2->dp_start); 493 part_blkno = 0; 494 } 495 wander = 1; 496 break; 497 default: 498 pp->p_fstype = FS_OTHER; 499 n++; 500 break; 501 } 502 } 503 } 504 lp->d_npartitions = MAXPARTITIONS; 505 506 if (n == 0 && part_blkno == DOSBBSECTOR) { 507 u_int16_t fattest; 508 509 /* Check for a short jump instruction. */ 510 fattest = ((bp->b_data[0] << 8) & 0xff00) | 511 (bp->b_data[2] & 0xff); 512 if (fattest != 0xeb90 && fattest != 0xe900) 513 goto notfat; 514 515 /* Check for a valid bytes per sector value. */ 516 fattest = ((bp->b_data[12] << 8) & 0xff00) | 517 (bp->b_data[11] & 0xff); 518 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 519 goto notfat; 520 521 /* Check the end of sector marker. */ 522 fattest = ((bp->b_data[510] << 8) & 0xff00) | 523 (bp->b_data[511] & 0xff); 524 if (fattest != 0x55aa) 525 goto notfat; 526 527 /* Looks like a FAT filesystem. Spoof 'i'. */ 528 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 529 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 530 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 531 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 532 } 533 notfat: 534 535 /* record the OpenBSD partition's placement for the caller */ 536 if (partoffp) 537 *partoffp = dospartoff; 538 539 /* don't read the on-disk label if we are in spoofed-only mode */ 540 if (spoofonly) 541 return (NULL); 542 543 bp->b_blkno = dospartoff + DOS_LABELSECTOR; 544 bp->b_bcount = lp->d_secsize; 545 bp->b_flags = B_BUSY | B_READ; 546 (*strat)(bp); 547 if (biowait(bp)) 548 return ("disk label I/O error"); 549 550 /* sub-MBR disklabels are always at a LABELOFFSET of 0 */ 551 return checkdisklabel(bp->b_data, lp); 552 } 553 554 /* 555 * Check new disk label for sensibility 556 * before setting it. 557 */ 558 int 559 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 560 { 561 int i; 562 struct partition *opp, *npp; 563 564 /* sanity clause */ 565 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 566 (nlp->d_secsize % DEV_BSIZE) != 0) 567 return (EINVAL); 568 569 /* special case to allow disklabel to be invalidated */ 570 if (nlp->d_magic == 0xffffffff) { 571 *olp = *nlp; 572 return (0); 573 } 574 575 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 576 dkcksum(nlp) != 0) 577 return (EINVAL); 578 579 /* XXX missing check if other dos partitions will be overwritten */ 580 581 while (openmask != 0) { 582 i = ffs(openmask) - 1; 583 openmask &= ~(1 << i); 584 if (nlp->d_npartitions <= i) 585 return (EBUSY); 586 opp = &olp->d_partitions[i]; 587 npp = &nlp->d_partitions[i]; 588 if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 589 DL_GETPSIZE(npp) < DL_GETPSIZE(opp)) 590 return (EBUSY); 591 /* 592 * Copy internally-set partition information 593 * if new label doesn't include it. XXX 594 */ 595 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 596 npp->p_fstype = opp->p_fstype; 597 npp->p_fragblock = opp->p_fragblock; 598 npp->p_cpg = opp->p_cpg; 599 } 600 } 601 nlp->d_checksum = 0; 602 nlp->d_checksum = dkcksum(nlp); 603 *olp = *nlp; 604 return (0); 605 } 606 607 /* 608 * Determine the size of the transfer, and make sure it is within the 609 * boundaries of the partition. Adjust transfer if needed, and signal errors or 610 * early completion. 611 */ 612 int 613 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 614 { 615 #define blockpersec(count, lp) ((count) * (((lp)->d_secsize) / DEV_BSIZE)) 616 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 617 daddr64_t sz = howmany(bp->b_bcount, DEV_BSIZE); 618 619 /* avoid division by zero */ 620 if (lp->d_secpercyl == 0) 621 goto bad; 622 623 if (bp->b_blkno < 0 || sz < 0) 624 panic("bounds_check_with_label %lld %lld\n", bp->b_blkno, sz); 625 626 /* beyond partition? */ 627 if (bp->b_blkno + sz > blockpersec(DL_GETPSIZE(p), lp)) { 628 sz = blockpersec(DL_GETPSIZE(p), lp) - bp->b_blkno; 629 if (sz == 0) { 630 /* If exactly at end of disk, return EOF. */ 631 bp->b_resid = bp->b_bcount; 632 return (-1); 633 } 634 if (sz < 0) 635 /* If past end of disk, return EINVAL. */ 636 goto bad; 637 638 /* Otherwise, truncate request. */ 639 bp->b_bcount = sz << DEV_BSHIFT; 640 } 641 642 /* calculate cylinder for disksort to order transfers with */ 643 bp->b_cylinder = (bp->b_blkno + blockpersec(DL_GETPOFFSET(p), lp)) / 644 blockpersec(lp->d_secpercyl, lp); 645 return (1); 646 647 bad: 648 bp->b_error = EINVAL; 649 bp->b_flags |= B_ERROR; 650 return (-1); 651 } 652 653 /* 654 * Disk error is the preface to plaintive error messages 655 * about failing disk transfers. It prints messages of the form 656 657 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 658 659 * if the offset of the error in the transfer and a disk label 660 * are both available. blkdone should be -1 if the position of the error 661 * is unknown; the disklabel pointer may be null from drivers that have not 662 * been converted to use them. The message is printed with printf 663 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 664 * The message should be completed (with at least a newline) with printf 665 * or addlog, respectively. There is no trailing space. 666 */ 667 void 668 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 669 struct disklabel *lp) 670 { 671 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 672 int (*pr)(const char *, ...); 673 char partname = 'a' + part; 674 daddr64_t sn; 675 676 if (pri != LOG_PRINTF) { 677 static const char fmt[] = ""; 678 log(pri, fmt); 679 pr = addlog; 680 } else 681 pr = printf; 682 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 683 bp->b_flags & B_READ ? "read" : "writ"); 684 sn = bp->b_blkno; 685 if (bp->b_bcount <= DEV_BSIZE) 686 (*pr)("%lld", sn); 687 else { 688 if (blkdone >= 0) { 689 sn += blkdone; 690 (*pr)("%lld of ", sn); 691 } 692 (*pr)("%lld-%lld", bp->b_blkno, 693 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 694 } 695 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 696 sn += DL_GETPOFFSET(&lp->d_partitions[part]); 697 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn, 698 sn / lp->d_secpercyl); 699 sn %= lp->d_secpercyl; 700 (*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors, 701 sn % lp->d_nsectors); 702 } 703 } 704 705 /* 706 * Initialize the disklist. Called by main() before autoconfiguration. 707 */ 708 void 709 disk_init(void) 710 { 711 712 TAILQ_INIT(&disklist); 713 disk_count = disk_change = 0; 714 } 715 716 int 717 disk_construct(struct disk *diskp, char *lockname) 718 { 719 rw_init(&diskp->dk_lock, lockname); 720 mtx_init(&diskp->dk_mtx, IPL_BIO); 721 722 diskp->dk_flags |= DKF_CONSTRUCTED; 723 724 return (0); 725 } 726 727 /* 728 * Attach a disk. 729 */ 730 void 731 disk_attach(struct disk *diskp) 732 { 733 734 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 735 disk_construct(diskp, diskp->dk_name); 736 737 /* 738 * Allocate and initialize the disklabel structures. Note that 739 * it's not safe to sleep here, since we're probably going to be 740 * called during autoconfiguration. 741 */ 742 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 743 M_NOWAIT|M_ZERO); 744 if (diskp->dk_label == NULL) 745 panic("disk_attach: can't allocate storage for disklabel"); 746 747 /* 748 * Set the attached timestamp. 749 */ 750 microuptime(&diskp->dk_attachtime); 751 752 /* 753 * Link into the disklist. 754 */ 755 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 756 ++disk_count; 757 disk_change = 1; 758 } 759 760 /* 761 * Detach a disk. 762 */ 763 void 764 disk_detach(struct disk *diskp) 765 { 766 767 /* 768 * Free the space used by the disklabel structures. 769 */ 770 free(diskp->dk_label, M_DEVBUF); 771 772 /* 773 * Remove from the disklist. 774 */ 775 TAILQ_REMOVE(&disklist, diskp, dk_link); 776 disk_change = 1; 777 if (--disk_count < 0) 778 panic("disk_detach: disk_count < 0"); 779 } 780 781 /* 782 * Increment a disk's busy counter. If the counter is going from 783 * 0 to 1, set the timestamp. 784 */ 785 void 786 disk_busy(struct disk *diskp) 787 { 788 789 /* 790 * XXX We'd like to use something as accurate as microtime(), 791 * but that doesn't depend on the system TOD clock. 792 */ 793 mtx_enter(&diskp->dk_mtx); 794 if (diskp->dk_busy++ == 0) 795 microuptime(&diskp->dk_timestamp); 796 mtx_leave(&diskp->dk_mtx); 797 } 798 799 /* 800 * Decrement a disk's busy counter, increment the byte count, total busy 801 * time, and reset the timestamp. 802 */ 803 void 804 disk_unbusy(struct disk *diskp, long bcount, int read) 805 { 806 struct timeval dv_time, diff_time; 807 808 mtx_enter(&diskp->dk_mtx); 809 810 if (diskp->dk_busy-- == 0) 811 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 812 813 microuptime(&dv_time); 814 815 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 816 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 817 818 diskp->dk_timestamp = dv_time; 819 if (bcount > 0) { 820 if (read) { 821 diskp->dk_rbytes += bcount; 822 diskp->dk_rxfer++; 823 } else { 824 diskp->dk_wbytes += bcount; 825 diskp->dk_wxfer++; 826 } 827 } else 828 diskp->dk_seek++; 829 830 mtx_leave(&diskp->dk_mtx); 831 832 add_disk_randomness(bcount ^ diff_time.tv_usec); 833 } 834 835 int 836 disk_lock(struct disk *dk) 837 { 838 int error; 839 840 error = rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR); 841 842 return (error); 843 } 844 845 void 846 disk_unlock(struct disk *dk) 847 { 848 rw_exit(&dk->dk_lock); 849 } 850 851 int 852 dk_mountroot(void) 853 { 854 dev_t rawdev, rrootdev; 855 int part = DISKPART(rootdev); 856 int (*mountrootfn)(void); 857 struct disklabel dl; 858 int error; 859 860 rrootdev = blktochr(rootdev); 861 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 862 #ifdef DEBUG 863 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 864 rrootdev, rawdev); 865 #endif 866 867 /* 868 * open device, ioctl for the disklabel, and close it. 869 */ 870 error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD, 871 S_IFCHR, curproc); 872 if (error) 873 panic("cannot open disk, 0x%x/0x%x, error %d", 874 rootdev, rrootdev, error); 875 error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO, 876 (caddr_t)&dl, FREAD, curproc); 877 if (error) 878 panic("cannot read disk label, 0x%x/0x%x, error %d", 879 rootdev, rrootdev, error); 880 (void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD, 881 S_IFCHR, curproc); 882 883 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 884 panic("root filesystem has size 0"); 885 switch (dl.d_partitions[part].p_fstype) { 886 #ifdef EXT2FS 887 case FS_EXT2FS: 888 { 889 extern int ext2fs_mountroot(void); 890 mountrootfn = ext2fs_mountroot; 891 } 892 break; 893 #endif 894 #ifdef FFS 895 case FS_BSDFFS: 896 { 897 extern int ffs_mountroot(void); 898 mountrootfn = ffs_mountroot; 899 } 900 break; 901 #endif 902 #ifdef CD9660 903 case FS_ISO9660: 904 { 905 extern int cd9660_mountroot(void); 906 mountrootfn = cd9660_mountroot; 907 } 908 break; 909 #endif 910 default: 911 #ifdef FFS 912 { 913 extern int ffs_mountroot(void); 914 915 printf("filesystem type %d not known.. assuming ffs\n", 916 dl.d_partitions[part].p_fstype); 917 mountrootfn = ffs_mountroot; 918 } 919 #else 920 panic("disk 0x%x/0x%x filesystem type %d not known", 921 rootdev, rrootdev, dl.d_partitions[part].p_fstype); 922 #endif 923 } 924 return (*mountrootfn)(); 925 } 926 927 struct bufq * 928 bufq_default_alloc(void) 929 { 930 struct bufq_default *bq; 931 932 bq = malloc(sizeof(*bq), M_DEVBUF, M_NOWAIT|M_ZERO); 933 if (bq == NULL) 934 panic("bufq_default_alloc: no memory"); 935 936 bq->bufq.bufq_free = bufq_default_free; 937 bq->bufq.bufq_add = bufq_default_add; 938 bq->bufq.bufq_get = bufq_default_get; 939 940 return ((struct bufq *)bq); 941 } 942 943 void 944 bufq_default_free(struct bufq *bq) 945 { 946 free(bq, M_DEVBUF); 947 } 948 949 void 950 bufq_default_add(struct bufq *bq, struct buf *bp) 951 { 952 struct bufq_default *bufq = (struct bufq_default *)bq; 953 struct proc *p = bp->b_proc; 954 struct buf *head; 955 956 if (p == NULL || p->p_nice < NZERO) 957 head = &bufq->bufq_head[0]; 958 else if (p->p_nice == NZERO) 959 head = &bufq->bufq_head[1]; 960 else 961 head = &bufq->bufq_head[2]; 962 963 disksort(head, bp); 964 } 965 966 struct buf * 967 bufq_default_get(struct bufq *bq) 968 { 969 struct bufq_default *bufq = (struct bufq_default *)bq; 970 struct buf *bp, *head; 971 int i; 972 973 for (i = 0; i < 3; i++) { 974 head = &bufq->bufq_head[i]; 975 if ((bp = head->b_actf)) 976 break; 977 } 978 if (bp == NULL) 979 return (NULL); 980 head->b_actf = bp->b_actf; 981 return (bp); 982 } 983 984 #ifdef RAMDISK_HOOKS 985 static struct device fakerdrootdev = { DV_DISK, {}, NULL, 0, "rd0", NULL }; 986 #endif 987 988 struct device * 989 getdisk(char *str, int len, int defpart, dev_t *devp) 990 { 991 struct device *dv; 992 993 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 994 printf("use one of: exit"); 995 #ifdef RAMDISK_HOOKS 996 printf(" %s[a-p]", fakerdrootdev.dv_xname); 997 #endif 998 TAILQ_FOREACH(dv, &alldevs, dv_list) { 999 if (dv->dv_class == DV_DISK) 1000 printf(" %s[a-p]", dv->dv_xname); 1001 #if defined(NFSCLIENT) 1002 if (dv->dv_class == DV_IFNET) 1003 printf(" %s", dv->dv_xname); 1004 #endif 1005 } 1006 printf("\n"); 1007 } 1008 return (dv); 1009 } 1010 1011 struct device * 1012 parsedisk(char *str, int len, int defpart, dev_t *devp) 1013 { 1014 struct device *dv; 1015 char c; 1016 int majdev, part; 1017 1018 if (len == 0) 1019 return (NULL); 1020 c = str[len-1]; 1021 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1022 part = c - 'a'; 1023 len -= 1; 1024 } else 1025 part = defpart; 1026 1027 #ifdef RAMDISK_HOOKS 1028 if (strncmp(str, fakerdrootdev.dv_xname, len) == 0) { 1029 dv = &fakerdrootdev; 1030 goto gotdisk; 1031 } 1032 #endif 1033 1034 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1035 if (dv->dv_class == DV_DISK && 1036 strncmp(str, dv->dv_xname, len) == 0 && 1037 dv->dv_xname[len] == '\0') { 1038 #ifdef RAMDISK_HOOKS 1039 gotdisk: 1040 #endif 1041 majdev = findblkmajor(dv); 1042 if (majdev < 0) 1043 panic("parsedisk"); 1044 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1045 break; 1046 } 1047 #if defined(NFSCLIENT) 1048 if (dv->dv_class == DV_IFNET && 1049 strncmp(str, dv->dv_xname, len) == 0 && 1050 dv->dv_xname[len] == '\0') { 1051 *devp = NODEV; 1052 break; 1053 } 1054 #endif 1055 } 1056 1057 return (dv); 1058 } 1059 1060 void 1061 setroot(struct device *bootdv, int part, int exitflags) 1062 { 1063 int majdev, unit, len, s; 1064 struct swdevt *swp; 1065 struct device *rootdv, *dv; 1066 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1067 struct ifnet *ifp = NULL; 1068 char buf[128]; 1069 #if defined(NFSCLIENT) 1070 extern char *nfsbootdevname; 1071 #endif 1072 1073 #ifdef RAMDISK_HOOKS 1074 bootdv = &fakerdrootdev; 1075 mountroot = NULL; 1076 part = 0; 1077 #endif 1078 1079 /* 1080 * If `swap generic' and we couldn't determine boot device, 1081 * ask the user. 1082 */ 1083 if (mountroot == NULL && bootdv == NULL) 1084 boothowto |= RB_ASKNAME; 1085 if (boothowto & RB_ASKNAME) { 1086 while (1) { 1087 printf("root device"); 1088 if (bootdv != NULL) { 1089 printf(" (default %s", bootdv->dv_xname); 1090 if (bootdv->dv_class == DV_DISK) 1091 printf("%c", 'a' + part); 1092 printf(")"); 1093 } 1094 printf(": "); 1095 s = splhigh(); 1096 cnpollc(TRUE); 1097 len = getsn(buf, sizeof(buf)); 1098 cnpollc(FALSE); 1099 splx(s); 1100 if (strcmp(buf, "exit") == 0) 1101 boot(exitflags); 1102 if (len == 0 && bootdv != NULL) { 1103 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1104 len = strlen(buf); 1105 } 1106 if (len > 0 && buf[len - 1] == '*') { 1107 buf[--len] = '\0'; 1108 dv = getdisk(buf, len, part, &nrootdev); 1109 if (dv != NULL) { 1110 rootdv = dv; 1111 nswapdev = nrootdev; 1112 goto gotswap; 1113 } 1114 } 1115 dv = getdisk(buf, len, part, &nrootdev); 1116 if (dv != NULL) { 1117 rootdv = dv; 1118 break; 1119 } 1120 } 1121 1122 if (rootdv->dv_class == DV_IFNET) 1123 goto gotswap; 1124 1125 /* try to build swap device out of new root device */ 1126 while (1) { 1127 printf("swap device"); 1128 if (rootdv != NULL) 1129 printf(" (default %s%s)", rootdv->dv_xname, 1130 rootdv->dv_class == DV_DISK ? "b" : ""); 1131 printf(": "); 1132 s = splhigh(); 1133 cnpollc(TRUE); 1134 len = getsn(buf, sizeof(buf)); 1135 cnpollc(FALSE); 1136 splx(s); 1137 if (strcmp(buf, "exit") == 0) 1138 boot(exitflags); 1139 if (len == 0 && rootdv != NULL) { 1140 switch (rootdv->dv_class) { 1141 case DV_IFNET: 1142 nswapdev = NODEV; 1143 break; 1144 case DV_DISK: 1145 nswapdev = MAKEDISKDEV(major(nrootdev), 1146 DISKUNIT(nrootdev), 1); 1147 if (nswapdev == nrootdev) 1148 continue; 1149 break; 1150 default: 1151 break; 1152 } 1153 break; 1154 } 1155 dv = getdisk(buf, len, 1, &nswapdev); 1156 if (dv) { 1157 if (dv->dv_class == DV_IFNET) 1158 nswapdev = NODEV; 1159 if (nswapdev == nrootdev) 1160 continue; 1161 break; 1162 } 1163 } 1164 gotswap: 1165 rootdev = nrootdev; 1166 dumpdev = nswapdev; 1167 swdevt[0].sw_dev = nswapdev; 1168 swdevt[1].sw_dev = NODEV; 1169 #if defined(NFSCLIENT) 1170 } else if (mountroot == nfs_mountroot) { 1171 rootdv = bootdv; 1172 rootdev = dumpdev = swapdev = NODEV; 1173 #endif 1174 } else if (mountroot == NULL) { 1175 /* 1176 * `swap generic' or RAMDISK_HOOKS -- use the 1177 * device we were told to 1178 */ 1179 rootdv = bootdv; 1180 majdev = findblkmajor(rootdv); 1181 if (majdev >= 0) { 1182 /* 1183 * Root and swap are on the disk. 1184 * Assume swap is on partition b. 1185 */ 1186 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1187 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1188 } else { 1189 /* 1190 * Root and swap are on a net. 1191 */ 1192 nswapdev = NODEV; 1193 } 1194 dumpdev = nswapdev; 1195 swdevt[0].sw_dev = nswapdev; 1196 /* swdevt[1].sw_dev = NODEV; */ 1197 } else { 1198 /* Completely pre-configured, but we want rootdv .. */ 1199 majdev = major(rootdev); 1200 if (findblkname(majdev) == NULL) 1201 return; 1202 unit = DISKUNIT(rootdev); 1203 part = DISKPART(rootdev); 1204 snprintf(buf, sizeof buf, "%s%d%c", 1205 findblkname(majdev), unit, 'a' + part); 1206 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1207 } 1208 1209 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1210 ifp = ifunit(rootdv->dv_xname); 1211 else if (bootdv && bootdv->dv_class == DV_IFNET) 1212 ifp = ifunit(bootdv->dv_xname); 1213 1214 if (ifp) 1215 if_addgroup(ifp, "netboot"); 1216 1217 switch (rootdv->dv_class) { 1218 #if defined(NFSCLIENT) 1219 case DV_IFNET: 1220 mountroot = nfs_mountroot; 1221 nfsbootdevname = rootdv->dv_xname; 1222 return; 1223 #endif 1224 case DV_DISK: 1225 mountroot = dk_mountroot; 1226 part = DISKPART(rootdev); 1227 break; 1228 default: 1229 printf("can't figure root, hope your kernel is right\n"); 1230 return; 1231 } 1232 1233 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1234 1235 /* 1236 * Make the swap partition on the root drive the primary swap. 1237 */ 1238 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1239 if (major(rootdev) == major(swp->sw_dev) && 1240 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1241 temp = swdevt[0].sw_dev; 1242 swdevt[0].sw_dev = swp->sw_dev; 1243 swp->sw_dev = temp; 1244 break; 1245 } 1246 } 1247 if (swp->sw_dev != NODEV) { 1248 /* 1249 * If dumpdev was the same as the old primary swap device, 1250 * move it to the new primary swap device. 1251 */ 1252 if (temp == dumpdev) 1253 dumpdev = swdevt[0].sw_dev; 1254 } 1255 if (swdevt[0].sw_dev != NODEV) 1256 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1257 DISKUNIT(swdevt[0].sw_dev), 1258 'a' + DISKPART(swdevt[0].sw_dev)); 1259 if (dumpdev != NODEV) 1260 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1261 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1262 printf("\n"); 1263 } 1264 1265 extern struct nam2blk nam2blk[]; 1266 1267 int 1268 findblkmajor(struct device *dv) 1269 { 1270 char *name = dv->dv_xname; 1271 int i; 1272 1273 for (i = 0; nam2blk[i].name; i++) 1274 if (!strncmp(name, nam2blk[i].name, strlen(nam2blk[i].name))) 1275 return (nam2blk[i].maj); 1276 return (-1); 1277 } 1278 1279 char * 1280 findblkname(int maj) 1281 { 1282 int i; 1283 1284 for (i = 0; nam2blk[i].name; i++) 1285 if (nam2blk[i].maj == maj) 1286 return (nam2blk[i].name); 1287 return (NULL); 1288 } 1289