1 /* $NetBSD: subr_disk.c,v 1.42 2002/08/30 15:43:40 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 78 */ 79 80 #include <sys/cdefs.h> 81 __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.42 2002/08/30 15:43:40 hannken Exp $"); 82 83 #include <sys/param.h> 84 #include <sys/kernel.h> 85 #include <sys/malloc.h> 86 #include <sys/buf.h> 87 #include <sys/syslog.h> 88 #include <sys/disklabel.h> 89 #include <sys/disk.h> 90 #include <sys/sysctl.h> 91 92 /* 93 * A global list of all disks attached to the system. May grow or 94 * shrink over time. 95 */ 96 struct disklist_head disklist; /* TAILQ_HEAD */ 97 int disk_count; /* number of drives in global disklist */ 98 struct simplelock disklist_slock = SIMPLELOCK_INITIALIZER; 99 100 /* 101 * Compute checksum for disk label. 102 */ 103 u_int 104 dkcksum(struct disklabel *lp) 105 { 106 u_short *start, *end; 107 u_short sum = 0; 108 109 start = (u_short *)lp; 110 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 111 while (start < end) 112 sum ^= *start++; 113 return (sum); 114 } 115 116 /* 117 * Disk error is the preface to plaintive error messages 118 * about failing disk transfers. It prints messages of the form 119 120 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 121 122 * if the offset of the error in the transfer and a disk label 123 * are both available. blkdone should be -1 if the position of the error 124 * is unknown; the disklabel pointer may be null from drivers that have not 125 * been converted to use them. The message is printed with printf 126 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 127 * The message should be completed (with at least a newline) with printf 128 * or addlog, respectively. There is no trailing space. 129 */ 130 void 131 diskerr(const struct buf *bp, const char *dname, const char *what, int pri, 132 int blkdone, const struct disklabel *lp) 133 { 134 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 135 void (*pr)(const char *, ...); 136 char partname = 'a' + part; 137 int sn; 138 139 if (pri != LOG_PRINTF) { 140 static const char fmt[] = ""; 141 log(pri, fmt); 142 pr = addlog; 143 } else 144 pr = printf; 145 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 146 bp->b_flags & B_READ ? "read" : "writ"); 147 sn = bp->b_blkno; 148 if (bp->b_bcount <= DEV_BSIZE) 149 (*pr)("%d", sn); 150 else { 151 if (blkdone >= 0) { 152 sn += blkdone; 153 (*pr)("%d of ", sn); 154 } 155 (*pr)("%d-%d", bp->b_blkno, 156 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 157 } 158 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 159 sn += lp->d_partitions[part].p_offset; 160 (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 161 sn / lp->d_secpercyl); 162 sn %= lp->d_secpercyl; 163 (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, 164 sn % lp->d_nsectors); 165 } 166 } 167 168 /* 169 * Initialize the disklist. Called by main() before autoconfiguration. 170 */ 171 void 172 disk_init(void) 173 { 174 175 TAILQ_INIT(&disklist); 176 disk_count = 0; 177 } 178 179 /* 180 * Searches the disklist for the disk corresponding to the 181 * name provided. 182 */ 183 struct disk * 184 disk_find(char *name) 185 { 186 struct disk *diskp; 187 188 if ((name == NULL) || (disk_count <= 0)) 189 return (NULL); 190 191 simple_lock(&disklist_slock); 192 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 193 diskp = TAILQ_NEXT(diskp, dk_link)) 194 if (strcmp(diskp->dk_name, name) == 0) { 195 simple_unlock(&disklist_slock); 196 return (diskp); 197 } 198 simple_unlock(&disklist_slock); 199 200 return (NULL); 201 } 202 203 /* 204 * Attach a disk. 205 */ 206 void 207 disk_attach(struct disk *diskp) 208 { 209 int s; 210 211 /* 212 * Allocate and initialize the disklabel structures. Note that 213 * it's not safe to sleep here, since we're probably going to be 214 * called during autoconfiguration. 215 */ 216 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 217 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 218 M_NOWAIT); 219 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 220 panic("disk_attach: can't allocate storage for disklabel"); 221 222 memset(diskp->dk_label, 0, sizeof(struct disklabel)); 223 memset(diskp->dk_cpulabel, 0, sizeof(struct cpu_disklabel)); 224 225 /* 226 * Set the attached timestamp. 227 */ 228 s = splclock(); 229 diskp->dk_attachtime = mono_time; 230 splx(s); 231 232 /* 233 * Link into the disklist. 234 */ 235 simple_lock(&disklist_slock); 236 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 237 simple_unlock(&disklist_slock); 238 ++disk_count; 239 } 240 241 /* 242 * Detach a disk. 243 */ 244 void 245 disk_detach(struct disk *diskp) 246 { 247 248 /* 249 * Remove from the disklist. 250 */ 251 if (--disk_count < 0) 252 panic("disk_detach: disk_count < 0"); 253 simple_lock(&disklist_slock); 254 TAILQ_REMOVE(&disklist, diskp, dk_link); 255 simple_unlock(&disklist_slock); 256 257 /* 258 * Free the space used by the disklabel structures. 259 */ 260 free(diskp->dk_label, M_DEVBUF); 261 free(diskp->dk_cpulabel, M_DEVBUF); 262 } 263 264 /* 265 * Increment a disk's busy counter. If the counter is going from 266 * 0 to 1, set the timestamp. 267 */ 268 void 269 disk_busy(struct disk *diskp) 270 { 271 int s; 272 273 /* 274 * XXX We'd like to use something as accurate as microtime(), 275 * but that doesn't depend on the system TOD clock. 276 */ 277 if (diskp->dk_busy++ == 0) { 278 s = splclock(); 279 diskp->dk_timestamp = mono_time; 280 splx(s); 281 } 282 } 283 284 /* 285 * Decrement a disk's busy counter, increment the byte count, total busy 286 * time, and reset the timestamp. 287 */ 288 void 289 disk_unbusy(struct disk *diskp, long bcount) 290 { 291 int s; 292 struct timeval dv_time, diff_time; 293 294 if (diskp->dk_busy-- == 0) { 295 printf("%s: dk_busy < 0\n", diskp->dk_name); 296 panic("disk_unbusy"); 297 } 298 299 s = splclock(); 300 dv_time = mono_time; 301 splx(s); 302 303 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 304 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 305 306 diskp->dk_timestamp = dv_time; 307 if (bcount > 0) { 308 diskp->dk_bytes += bcount; 309 diskp->dk_xfer++; 310 } 311 } 312 313 /* 314 * Reset the metrics counters on the given disk. Note that we cannot 315 * reset the busy counter, as it may case a panic in disk_unbusy(). 316 * We also must avoid playing with the timestamp information, as it 317 * may skew any pending transfer results. 318 */ 319 void 320 disk_resetstat(struct disk *diskp) 321 { 322 int s = splbio(), t; 323 324 diskp->dk_xfer = 0; 325 diskp->dk_bytes = 0; 326 327 t = splclock(); 328 diskp->dk_attachtime = mono_time; 329 splx(t); 330 331 timerclear(&diskp->dk_time); 332 333 splx(s); 334 } 335 336 int 337 sysctl_disknames(void *vwhere, size_t *sizep) 338 { 339 char buf[DK_DISKNAMELEN + 1]; 340 char *where = vwhere; 341 struct disk *diskp; 342 size_t needed, left, slen; 343 int error, first; 344 345 first = 1; 346 error = 0; 347 needed = 0; 348 left = *sizep; 349 350 simple_lock(&disklist_slock); 351 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 352 diskp = TAILQ_NEXT(diskp, dk_link)) { 353 if (where == NULL) 354 needed += strlen(diskp->dk_name) + 1; 355 else { 356 memset(buf, 0, sizeof(buf)); 357 if (first) { 358 strncpy(buf, diskp->dk_name, sizeof(buf)); 359 first = 0; 360 } else { 361 buf[0] = ' '; 362 strncpy(buf + 1, diskp->dk_name, 363 sizeof(buf) - 1); 364 } 365 buf[DK_DISKNAMELEN] = '\0'; 366 slen = strlen(buf); 367 if (left < slen + 1) 368 break; 369 /* +1 to copy out the trailing NUL byte */ 370 error = copyout(buf, where, slen + 1); 371 if (error) 372 break; 373 where += slen; 374 needed += slen; 375 left -= slen; 376 } 377 } 378 simple_unlock(&disklist_slock); 379 *sizep = needed; 380 return (error); 381 } 382 383 int 384 sysctl_diskstats(int *name, u_int namelen, void *vwhere, size_t *sizep) 385 { 386 struct disk_sysctl sdisk; 387 struct disk *diskp; 388 char *where = vwhere; 389 size_t tocopy, left; 390 int error; 391 392 if (where == NULL) { 393 *sizep = disk_count * sizeof(struct disk_sysctl); 394 return (0); 395 } 396 397 if (namelen == 0) 398 tocopy = sizeof(sdisk); 399 else 400 tocopy = name[0]; 401 402 error = 0; 403 left = *sizep; 404 memset(&sdisk, 0, sizeof(sdisk)); 405 *sizep = 0; 406 407 simple_lock(&disklist_slock); 408 TAILQ_FOREACH(diskp, &disklist, dk_link) { 409 if (left < sizeof(struct disk_sysctl)) 410 break; 411 strncpy(sdisk.dk_name, diskp->dk_name, sizeof(sdisk.dk_name)); 412 sdisk.dk_xfer = diskp->dk_xfer; 413 sdisk.dk_seek = diskp->dk_seek; 414 sdisk.dk_bytes = diskp->dk_bytes; 415 sdisk.dk_attachtime_sec = diskp->dk_attachtime.tv_sec; 416 sdisk.dk_attachtime_usec = diskp->dk_attachtime.tv_usec; 417 sdisk.dk_timestamp_sec = diskp->dk_timestamp.tv_sec; 418 sdisk.dk_timestamp_usec = diskp->dk_timestamp.tv_usec; 419 sdisk.dk_time_sec = diskp->dk_time.tv_sec; 420 sdisk.dk_time_usec = diskp->dk_time.tv_usec; 421 sdisk.dk_busy = diskp->dk_busy; 422 423 error = copyout(&sdisk, where, min(tocopy, sizeof(sdisk))); 424 if (error) 425 break; 426 where += tocopy; 427 *sizep += tocopy; 428 left -= tocopy; 429 } 430 simple_unlock(&disklist_slock); 431 return (error); 432 } 433 434 435 struct bufq_fcfs { 436 TAILQ_HEAD(, buf) bq_head; /* actual list of buffers */ 437 }; 438 439 struct bufq_disksort { 440 TAILQ_HEAD(, buf) bq_head; /* actual list of buffers */ 441 }; 442 443 #define PRIO_READ_BURST 48 444 #define PRIO_WRITE_REQ 16 445 446 struct bufq_prio { 447 TAILQ_HEAD(, buf) bq_read, bq_write; /* actual list of buffers */ 448 struct buf *bq_write_next; /* next request in bq_write */ 449 struct buf *bq_next; /* current request */ 450 int bq_read_burst; /* # of consecutive reads */ 451 }; 452 453 454 /* 455 * Check if two buf's are in ascending order. 456 */ 457 static __inline int 458 buf_inorder(struct buf *bp, struct buf *bq, int sortby) 459 { 460 int r; 461 462 if (bp == NULL || bq == NULL) 463 return(bq == NULL); 464 465 if (sortby == BUFQ_SORT_CYLINDER) 466 r = bp->b_cylinder - bq->b_cylinder; 467 else 468 r = 0; 469 470 if (r == 0) 471 r = bp->b_rawblkno - bq->b_rawblkno; 472 473 return(r <= 0); 474 } 475 476 477 /* 478 * First-come first-served sort for disks. 479 * 480 * Requests are appended to the queue without any reordering. 481 */ 482 static void 483 bufq_fcfs_put(struct bufq_state *bufq, struct buf *bp) 484 { 485 struct bufq_fcfs *fcfs = bufq->bq_private; 486 487 TAILQ_INSERT_TAIL(&fcfs->bq_head, bp, b_actq); 488 } 489 490 static struct buf * 491 bufq_fcfs_get(struct bufq_state *bufq, int remove) 492 { 493 struct bufq_fcfs *fcfs = bufq->bq_private; 494 struct buf *bp; 495 496 bp = TAILQ_FIRST(&fcfs->bq_head); 497 498 if (bp != NULL && remove) 499 TAILQ_REMOVE(&fcfs->bq_head, bp, b_actq); 500 501 return(bp); 502 } 503 504 505 /* 506 * Seek sort for disks. 507 * 508 * There are actually two queues, sorted in ascendening order. The first 509 * queue holds those requests which are positioned after the current block; 510 * the second holds requests which came in after their position was passed. 511 * Thus we implement a one-way scan, retracting after reaching the end of 512 * the drive to the first request on the second queue, at which time it 513 * becomes the first queue. 514 * 515 * A one-way scan is natural because of the way UNIX read-ahead blocks are 516 * allocated. 517 */ 518 static void 519 bufq_disksort_put(struct bufq_state *bufq, struct buf *bp) 520 { 521 struct bufq_disksort *disksort = bufq->bq_private; 522 struct buf *bq, *nbq; 523 int sortby; 524 525 sortby = bufq->bq_flags & BUFQ_SORT_MASK; 526 527 bq = TAILQ_FIRST(&disksort->bq_head); 528 529 /* 530 * If the queue is empty it's easy; we just go on the end. 531 */ 532 if (bq == NULL) { 533 TAILQ_INSERT_TAIL(&disksort->bq_head, bp, b_actq); 534 return; 535 } 536 537 /* 538 * If we lie before the currently active request, then we 539 * must locate the second request list and add ourselves to it. 540 */ 541 if (buf_inorder(bp, bq, sortby)) { 542 while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL) { 543 /* 544 * Check for an ``inversion'' in the normally ascending 545 * block numbers, indicating the start of the second 546 * request list. 547 */ 548 if (buf_inorder(nbq, bq, sortby)) { 549 /* 550 * Search the second request list for the first 551 * request at a larger block number. We go 552 * after that; if there is no such request, we 553 * go at the end. 554 */ 555 do { 556 if (buf_inorder(bp, nbq, sortby)) 557 goto insert; 558 bq = nbq; 559 } while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL); 560 goto insert; /* after last */ 561 } 562 bq = nbq; 563 } 564 /* 565 * No inversions... we will go after the last, and 566 * be the first request in the second request list. 567 */ 568 goto insert; 569 } 570 /* 571 * Request is at/after the current request... 572 * sort in the first request list. 573 */ 574 while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL) { 575 /* 576 * We want to go after the current request if there is an 577 * inversion after it (i.e. it is the end of the first 578 * request list), or if the next request is a larger cylinder 579 * than our request. 580 */ 581 if (buf_inorder(nbq, bq, sortby) || 582 buf_inorder(bp, nbq, sortby)) 583 goto insert; 584 bq = nbq; 585 } 586 /* 587 * Neither a second list nor a larger request... we go at the end of 588 * the first list, which is the same as the end of the whole schebang. 589 */ 590 insert: TAILQ_INSERT_AFTER(&disksort->bq_head, bq, bp, b_actq); 591 } 592 593 static struct buf * 594 bufq_disksort_get(struct bufq_state *bufq, int remove) 595 { 596 struct bufq_disksort *disksort = bufq->bq_private; 597 struct buf *bp; 598 599 bp = TAILQ_FIRST(&disksort->bq_head); 600 601 if (bp != NULL && remove) 602 TAILQ_REMOVE(&disksort->bq_head, bp, b_actq); 603 604 return(bp); 605 } 606 607 608 /* 609 * Seek sort for disks. 610 * 611 * There are two queues. The first queue holds read requests; the second 612 * holds write requests. The read queue is first-come first-served; the 613 * write queue is sorted in ascendening block order. 614 * The read queue is processed first. After PRIO_READ_BURST consecutive 615 * read requests with non-empty write queue PRIO_WRITE_REQ requests from 616 * the write queue will be processed. 617 */ 618 static void 619 bufq_prio_put(struct bufq_state *bufq, struct buf *bp) 620 { 621 struct bufq_prio *prio = bufq->bq_private; 622 struct buf *bq; 623 int sortby; 624 625 sortby = bufq->bq_flags & BUFQ_SORT_MASK; 626 627 /* 628 * If it's a read request append it to the list. 629 */ 630 if ((bp->b_flags & B_READ) == B_READ) { 631 TAILQ_INSERT_TAIL(&prio->bq_read, bp, b_actq); 632 return; 633 } 634 635 bq = TAILQ_FIRST(&prio->bq_write); 636 637 /* 638 * If the write list is empty, simply append it to the list. 639 */ 640 if (bq == NULL) { 641 TAILQ_INSERT_TAIL(&prio->bq_write, bp, b_actq); 642 prio->bq_write_next = bp; 643 return; 644 } 645 646 /* 647 * If we lie after the next request, insert after this request. 648 */ 649 if (buf_inorder(prio->bq_write_next, bp, sortby)) 650 bq = prio->bq_write_next; 651 652 /* 653 * Search for the first request at a larger block number. 654 * We go before this request if it exists. 655 */ 656 while (bq != NULL && buf_inorder(bq, bp, sortby)) 657 bq = TAILQ_NEXT(bq, b_actq); 658 659 if (bq != NULL) 660 TAILQ_INSERT_BEFORE(bq, bp, b_actq); 661 else 662 TAILQ_INSERT_TAIL(&prio->bq_write, bp, b_actq); 663 } 664 665 static struct buf * 666 bufq_prio_get(struct bufq_state *bufq, int remove) 667 { 668 struct bufq_prio *prio = bufq->bq_private; 669 struct buf *bp; 670 671 /* 672 * If no current request, get next from the lists. 673 */ 674 if (prio->bq_next == NULL) { 675 /* 676 * If at least one list is empty, select the other. 677 */ 678 679 if (TAILQ_FIRST(&prio->bq_read) == NULL) { 680 prio->bq_next = prio->bq_write_next; 681 prio->bq_read_burst = 0; 682 } else if (prio->bq_write_next == NULL) { 683 prio->bq_next = TAILQ_FIRST(&prio->bq_read); 684 prio->bq_read_burst = 0; 685 } else { 686 /* 687 * Both list have requests. Select the read list up 688 * to PRIO_READ_BURST times, then select the write 689 * list PRIO_WRITE_REQ times. 690 */ 691 692 if (prio->bq_read_burst++ < PRIO_READ_BURST) 693 prio->bq_next = TAILQ_FIRST(&prio->bq_read); 694 else if (prio->bq_read_burst < 695 PRIO_READ_BURST + PRIO_WRITE_REQ) 696 prio->bq_next = prio->bq_write_next; 697 else { 698 prio->bq_next = TAILQ_FIRST(&prio->bq_read); 699 prio->bq_read_burst = 0; 700 } 701 } 702 } 703 704 bp = prio->bq_next; 705 706 if (prio->bq_next != NULL && remove) { 707 if ((prio->bq_next->b_flags & B_READ) == B_READ) 708 TAILQ_REMOVE(&prio->bq_read, prio->bq_next, b_actq); 709 else { 710 TAILQ_REMOVE(&prio->bq_write, prio->bq_next, b_actq); 711 /* 712 * Advance the write pointer. 713 */ 714 prio->bq_write_next = 715 TAILQ_NEXT(prio->bq_write_next, b_actq); 716 if (prio->bq_write_next == NULL) 717 prio->bq_write_next = 718 TAILQ_FIRST(&prio->bq_write); 719 } 720 721 prio->bq_next = NULL; 722 } 723 724 return(bp); 725 } 726 727 /* 728 * Create a device buffer queue. 729 */ 730 void 731 bufq_alloc(struct bufq_state *bufq, int flags) 732 { 733 struct bufq_fcfs *fcfs; 734 struct bufq_disksort *disksort; 735 struct bufq_prio *prio; 736 737 bufq->bq_flags = flags; 738 739 switch (flags & BUFQ_SORT_MASK) { 740 case BUFQ_SORT_RAWBLOCK: 741 case BUFQ_SORT_CYLINDER: 742 break; 743 case 0: 744 if ((flags & BUFQ_METHOD_MASK) == BUFQ_FCFS) 745 break; 746 /* FALLTHROUGH */ 747 default: 748 panic("bufq_alloc: sort out of range"); 749 } 750 751 switch (flags & BUFQ_METHOD_MASK) { 752 case BUFQ_FCFS: 753 bufq->bq_get = bufq_fcfs_get; 754 bufq->bq_put = bufq_fcfs_put; 755 MALLOC(bufq->bq_private, struct bufq_fcfs *, 756 sizeof(struct bufq_fcfs), M_DEVBUF, M_ZERO); 757 fcfs = (struct bufq_fcfs *)bufq->bq_private; 758 TAILQ_INIT(&fcfs->bq_head); 759 break; 760 case BUFQ_DISKSORT: 761 bufq->bq_get = bufq_disksort_get; 762 bufq->bq_put = bufq_disksort_put; 763 MALLOC(bufq->bq_private, struct bufq_disksort *, 764 sizeof(struct bufq_disksort), M_DEVBUF, M_ZERO); 765 disksort = (struct bufq_disksort *)bufq->bq_private; 766 TAILQ_INIT(&disksort->bq_head); 767 break; 768 case BUFQ_READ_PRIO: 769 bufq->bq_get = bufq_prio_get; 770 bufq->bq_put = bufq_prio_put; 771 MALLOC(bufq->bq_private, struct bufq_prio *, 772 sizeof(struct bufq_prio), M_DEVBUF, M_ZERO); 773 prio = (struct bufq_prio *)bufq->bq_private; 774 TAILQ_INIT(&prio->bq_read); 775 TAILQ_INIT(&prio->bq_write); 776 break; 777 default: 778 panic("bufq_alloc: method out of range"); 779 } 780 } 781 782 /* 783 * Destroy a device buffer queue. 784 */ 785 void 786 bufq_free(struct bufq_state *bufq) 787 { 788 KASSERT(bufq->bq_private != NULL); 789 KASSERT(BUFQ_PEEK(bufq) == NULL); 790 791 FREE(bufq->bq_private, M_DEVBUF); 792 bufq->bq_get = NULL; 793 bufq->bq_put = NULL; 794 } 795