1 /* $OpenBSD: subr_disk.c,v 1.17 1999/11/17 04:31:22 d Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/malloc.h> 49 #include <sys/fcntl.h> 50 #include <sys/buf.h> 51 #include <sys/stat.h> 52 #include <sys/syslog.h> 53 #include <sys/time.h> 54 #include <sys/disklabel.h> 55 #include <sys/conf.h> 56 #include <sys/disk.h> 57 #include <sys/dkio.h> 58 #include <sys/dkstat.h> /* XXX */ 59 60 #include <dev/rndvar.h> 61 62 /* 63 * A global list of all disks attached to the system. May grow or 64 * shrink over time. 65 */ 66 struct disklist_head disklist; /* TAILQ_HEAD */ 67 int disk_count; /* number of drives in global disklist */ 68 69 /* 70 * Seek sort for disks. We depend on the driver which calls us using b_resid 71 * as the current cylinder number. 72 * 73 * The argument ap structure holds a b_actf activity chain pointer on which we 74 * keep two queues, sorted in ascending cylinder order. The first queue holds 75 * those requests which are positioned after the current cylinder (in the first 76 * request); the second holds requests which came in after their cylinder number 77 * was passed. Thus we implement a one way scan, retracting after reaching the 78 * end of the drive to the first request on the second queue, at which time it 79 * becomes the first queue. 80 * 81 * A one-way scan is natural because of the way UNIX read-ahead blocks are 82 * allocated. 83 */ 84 85 void 86 disksort(ap, bp) 87 register struct buf *ap, *bp; 88 { 89 register struct buf *bq; 90 91 /* If the queue is empty, then it's easy. */ 92 if (ap->b_actf == NULL) { 93 bp->b_actf = NULL; 94 ap->b_actf = bp; 95 return; 96 } 97 98 /* 99 * If we lie after the first (currently active) request, then we 100 * must locate the second request list and add ourselves to it. 101 */ 102 bq = ap->b_actf; 103 if (bp->b_cylinder < bq->b_cylinder) { 104 while (bq->b_actf) { 105 /* 106 * Check for an ``inversion'' in the normally ascending 107 * cylinder numbers, indicating the start of the second 108 * request list. 109 */ 110 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 111 /* 112 * Search the second request list for the first 113 * request at a larger cylinder number. We go 114 * before that; if there is no such request, we 115 * go at end. 116 */ 117 do { 118 if (bp->b_cylinder < 119 bq->b_actf->b_cylinder) 120 goto insert; 121 if (bp->b_cylinder == 122 bq->b_actf->b_cylinder && 123 bp->b_blkno < bq->b_actf->b_blkno) 124 goto insert; 125 bq = bq->b_actf; 126 } while (bq->b_actf); 127 goto insert; /* after last */ 128 } 129 bq = bq->b_actf; 130 } 131 /* 132 * No inversions... we will go after the last, and 133 * be the first request in the second request list. 134 */ 135 goto insert; 136 } 137 /* 138 * Request is at/after the current request... 139 * sort in the first request list. 140 */ 141 while (bq->b_actf) { 142 /* 143 * We want to go after the current request if there is an 144 * inversion after it (i.e. it is the end of the first 145 * request list), or if the next request is a larger cylinder 146 * than our request. 147 */ 148 if (bq->b_actf->b_cylinder < bq->b_cylinder || 149 bp->b_cylinder < bq->b_actf->b_cylinder || 150 (bp->b_cylinder == bq->b_actf->b_cylinder && 151 bp->b_blkno < bq->b_actf->b_blkno)) 152 goto insert; 153 bq = bq->b_actf; 154 } 155 /* 156 * Neither a second list nor a larger request... we go at the end of 157 * the first list, which is the same as the end of the whole schebang. 158 */ 159 insert: bp->b_actf = bq->b_actf; 160 bq->b_actf = bp; 161 } 162 163 /* 164 * Compute checksum for disk label. 165 */ 166 u_int 167 dkcksum(lp) 168 register struct disklabel *lp; 169 { 170 register u_int16_t *start, *end; 171 register u_int16_t sum = 0; 172 173 start = (u_int16_t *)lp; 174 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 175 while (start < end) 176 sum ^= *start++; 177 return (sum); 178 } 179 180 /* 181 * Disk error is the preface to plaintive error messages 182 * about failing disk transfers. It prints messages of the form 183 184 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 185 186 * if the offset of the error in the transfer and a disk label 187 * are both available. blkdone should be -1 if the position of the error 188 * is unknown; the disklabel pointer may be null from drivers that have not 189 * been converted to use them. The message is printed with printf 190 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 191 * The message should be completed (with at least a newline) with printf 192 * or addlog, respectively. There is no trailing space. 193 */ 194 void 195 diskerr(bp, dname, what, pri, blkdone, lp) 196 register struct buf *bp; 197 char *dname, *what; 198 int pri, blkdone; 199 register struct disklabel *lp; 200 { 201 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 202 register int (*pr) __P((const char *, ...)); 203 char partname = 'a' + part; 204 int sn; 205 206 if (pri != LOG_PRINTF) { 207 static const char fmt[] = ""; 208 log(pri, fmt); 209 pr = addlog; 210 } else 211 pr = printf; 212 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 213 bp->b_flags & B_READ ? "read" : "writ"); 214 sn = bp->b_blkno; 215 if (bp->b_bcount <= DEV_BSIZE) 216 (*pr)("%d", sn); 217 else { 218 if (blkdone >= 0) { 219 sn += blkdone; 220 (*pr)("%d of ", sn); 221 } 222 (*pr)("%d-%d", bp->b_blkno, 223 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 224 } 225 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 226 #ifdef tahoe 227 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */ 228 #endif 229 sn += lp->d_partitions[part].p_offset; 230 (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 231 sn / lp->d_secpercyl); 232 sn %= lp->d_secpercyl; 233 (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors); 234 } 235 } 236 237 /* 238 * Initialize the disklist. Called by main() before autoconfiguration. 239 */ 240 void 241 disk_init() 242 { 243 244 TAILQ_INIT(&disklist); 245 disk_count = 0; 246 } 247 248 /* 249 * Searches the disklist for the disk corresponding to the 250 * name provided. 251 */ 252 struct disk * 253 disk_find(name) 254 char *name; 255 { 256 struct disk *diskp; 257 258 if ((name == NULL) || (disk_count <= 0)) 259 return (NULL); 260 261 for (diskp = disklist.tqh_first; diskp != NULL; 262 diskp = diskp->dk_link.tqe_next) 263 if (strcmp(diskp->dk_name, name) == 0) 264 return (diskp); 265 266 return (NULL); 267 } 268 269 /* 270 * Attach a disk. 271 */ 272 void 273 disk_attach(diskp) 274 struct disk *diskp; 275 { 276 int s; 277 278 /* 279 * Allocate and initialize the disklabel structures. Note that 280 * it's not safe to sleep here, since we're probably going to be 281 * called during autoconfiguration. 282 */ 283 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 284 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 285 M_NOWAIT); 286 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 287 panic("disk_attach: can't allocate storage for disklabel"); 288 289 bzero(diskp->dk_label, sizeof(struct disklabel)); 290 bzero(diskp->dk_cpulabel, sizeof(struct cpu_disklabel)); 291 292 /* 293 * Set the attached timestamp. 294 */ 295 s = splclock(); 296 diskp->dk_attachtime = mono_time; 297 splx(s); 298 299 /* 300 * Link into the disklist. 301 */ 302 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 303 ++disk_count; 304 } 305 306 /* 307 * Detach a disk. 308 */ 309 void 310 disk_detach(diskp) 311 struct disk *diskp; 312 { 313 314 /* 315 * Free the space used by the disklabel structures. 316 */ 317 free(diskp->dk_label, M_DEVBUF); 318 free(diskp->dk_cpulabel, M_DEVBUF); 319 320 /* 321 * Remove from the disklist. 322 */ 323 TAILQ_REMOVE(&disklist, diskp, dk_link); 324 if (--disk_count < 0) 325 panic("disk_detach: disk_count < 0"); 326 } 327 328 /* 329 * Increment a disk's busy counter. If the counter is going from 330 * 0 to 1, set the timestamp. 331 */ 332 void 333 disk_busy(diskp) 334 struct disk *diskp; 335 { 336 int s; 337 338 /* 339 * XXX We'd like to use something as accurate as microtime(), 340 * but that doesn't depend on the system TOD clock. 341 */ 342 if (diskp->dk_busy++ == 0) { 343 s = splclock(); 344 diskp->dk_timestamp = mono_time; 345 splx(s); 346 } 347 } 348 349 /* 350 * Decrement a disk's busy counter, increment the byte count, total busy 351 * time, and reset the timestamp. 352 */ 353 void 354 disk_unbusy(diskp, bcount) 355 struct disk *diskp; 356 long bcount; 357 { 358 int s; 359 struct timeval dv_time, diff_time; 360 361 if (diskp->dk_busy-- == 0) 362 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 363 364 s = splclock(); 365 dv_time = mono_time; 366 splx(s); 367 368 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 369 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 370 371 diskp->dk_timestamp = dv_time; 372 if (bcount > 0) { 373 diskp->dk_bytes += bcount; 374 diskp->dk_xfer++; 375 } 376 diskp->dk_seek++; 377 378 add_disk_randomness(bcount ^ diff_time.tv_usec); 379 } 380 381 /* 382 * Reset the metrics counters on the given disk. Note that we cannot 383 * reset the busy counter, as it may case a panic in disk_unbusy(). 384 * We also must avoid playing with the timestamp information, as it 385 * may skew any pending transfer results. 386 */ 387 void 388 disk_resetstat(diskp) 389 struct disk *diskp; 390 { 391 int s = splbio(), t; 392 393 diskp->dk_xfer = 0; 394 diskp->dk_bytes = 0; 395 diskp->dk_seek = 0; 396 397 t = splclock(); 398 diskp->dk_attachtime = mono_time; 399 splx(t); 400 401 timerclear(&diskp->dk_time); 402 403 splx(s); 404 } 405 406 407 int 408 dk_mountroot() 409 { 410 dev_t rawdev, rrootdev; 411 int part = DISKPART(rootdev); 412 int (*mountrootfn) __P((void)); 413 extern struct proc *curproc; 414 struct disklabel dl; 415 int error; 416 417 rrootdev = blktochr(rootdev); 418 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 419 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 420 rrootdev, rawdev); 421 422 /* 423 * open device, ioctl for the disklabel, and close it. 424 */ 425 error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD, 426 S_IFCHR, curproc); 427 if (error) 428 panic("cannot open disk, 0x%x/0x%x, error %d", 429 rootdev, rrootdev, error); 430 error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO, 431 (caddr_t)&dl, FREAD, curproc); 432 if (error) 433 panic("cannot read disk label, 0x%x/0x%x, error %d", 434 rootdev, rrootdev, error); 435 (void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD, 436 S_IFCHR, curproc); 437 438 if (dl.d_partitions[part].p_size == 0) 439 panic("root filesystem has size 0"); 440 switch (dl.d_partitions[part].p_fstype) { 441 #ifdef EXT2FS 442 case FS_EXT2FS: 443 { 444 extern int ext2fs_mountroot __P((void)); 445 mountrootfn = ext2fs_mountroot; 446 } 447 break; 448 #endif 449 #ifdef FFS 450 case FS_BSDFFS: 451 { 452 extern int ffs_mountroot __P((void)); 453 mountrootfn = ffs_mountroot; 454 } 455 break; 456 #endif 457 #ifdef LFS 458 case FS_BSDLFS: 459 { 460 extern int lfs_mountroot __P((void)); 461 mountrootfn = lfs_mountroot; 462 } 463 break; 464 #endif 465 #ifdef CD9660 466 case FS_ISO9660: 467 { 468 extern int cd9660_mountroot __P((void)); 469 mountrootfn = cd9660_mountroot; 470 } 471 break; 472 #endif 473 default: 474 #ifdef FFS 475 { 476 extern int ffs_mountroot __P((void)); 477 478 printf("filesystem type %d not known.. assuming ffs\n", 479 dl.d_partitions[part].p_fstype); 480 mountrootfn = ffs_mountroot; 481 } 482 #else 483 panic("disk 0x%x/0x%x filesystem type %d not known", 484 rootdev, rrootdev, dl.d_partitions[part].p_fstype); 485 #endif 486 } 487 return (*mountrootfn)(); 488 } 489