1 /* $OpenBSD: kern_bufq.c,v 1.26 2013/11/20 23:52:42 dlg Exp $ */ 2 /* 3 * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org> 4 * Copyright (c) 2010 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/malloc.h> 23 #include <sys/mount.h> 24 #include <sys/mutex.h> 25 #include <sys/buf.h> 26 #include <sys/errno.h> 27 #include <sys/queue.h> 28 29 #include <sys/disklabel.h> 30 31 SLIST_HEAD(, bufq) bufqs = SLIST_HEAD_INITIALIZER(bufqs); 32 struct mutex bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE); 33 int bufqs_stop; 34 35 struct bufq_impl { 36 void *(*impl_create)(void); 37 void (*impl_destroy)(void *); 38 39 void (*impl_queue)(void *, struct buf *); 40 struct buf *(*impl_dequeue)(void *); 41 void (*impl_requeue)(void *, struct buf *); 42 int (*impl_peek)(void *); 43 }; 44 45 void *bufq_fifo_create(void); 46 void bufq_fifo_destroy(void *); 47 void bufq_fifo_queue(void *, struct buf *); 48 struct buf *bufq_fifo_dequeue(void *); 49 void bufq_fifo_requeue(void *, struct buf *); 50 int bufq_fifo_peek(void *); 51 52 void *bufq_nscan_create(void); 53 void bufq_nscan_destroy(void *); 54 void bufq_nscan_queue(void *, struct buf *); 55 struct buf *bufq_nscan_dequeue(void *); 56 void bufq_nscan_requeue(void *, struct buf *); 57 int bufq_nscan_peek(void *); 58 59 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = { 60 { 61 bufq_fifo_create, 62 bufq_fifo_destroy, 63 bufq_fifo_queue, 64 bufq_fifo_dequeue, 65 bufq_fifo_requeue, 66 bufq_fifo_peek 67 }, 68 { 69 bufq_nscan_create, 70 bufq_nscan_destroy, 71 bufq_nscan_queue, 72 bufq_nscan_dequeue, 73 bufq_nscan_requeue, 74 bufq_nscan_peek 75 } 76 }; 77 78 int 79 bufq_init(struct bufq *bq, int type) 80 { 81 u_int hi = BUFQ_HI, low = BUFQ_LOW; 82 83 if (type > BUFQ_HOWMANY) 84 panic("bufq_init: type %i unknown", type); 85 86 /* 87 * Ensure that writes can't consume the entire amount of kva 88 * available the buffer cache if we only have a limited amount 89 * of kva available to us. 90 */ 91 if (hi >= (bcstats.kvaslots / 16)) { 92 hi = bcstats.kvaslots / 16; 93 if (hi < 2) 94 hi = 2; 95 low = hi / 2; 96 } 97 98 mtx_init(&bq->bufq_mtx, IPL_BIO); 99 bq->bufq_hi = hi; 100 bq->bufq_low = low; 101 bq->bufq_type = type; 102 bq->bufq_impl = &bufq_impls[type]; 103 bq->bufq_data = bq->bufq_impl->impl_create(); 104 if (bq->bufq_data == NULL) { 105 /* 106 * we should actually return failure so disks attaching after 107 * boot in low memory situations dont panic the system. 108 */ 109 panic("bufq init fail"); 110 } 111 112 mtx_enter(&bufqs_mtx); 113 while (bufqs_stop) { 114 msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", 0); 115 } 116 SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries); 117 mtx_leave(&bufqs_mtx); 118 119 return (0); 120 } 121 122 int 123 bufq_switch(struct bufq *bq, int type) 124 { 125 void *data; 126 void *odata; 127 int otype; 128 struct buf *bp; 129 int ret; 130 131 mtx_enter(&bq->bufq_mtx); 132 ret = (bq->bufq_type == type); 133 mtx_leave(&bq->bufq_mtx); 134 if (ret) 135 return (0); 136 137 data = bufq_impls[type].impl_create(); 138 if (data == NULL) 139 return (ENOMEM); 140 141 mtx_enter(&bq->bufq_mtx); 142 if (bq->bufq_type != type) { /* might have changed during create */ 143 odata = bq->bufq_data; 144 otype = bq->bufq_type; 145 146 while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL) 147 bufq_impls[type].impl_queue(data, bp); 148 149 bq->bufq_data = data; 150 bq->bufq_type = type; 151 bq->bufq_impl = &bufq_impls[type]; 152 } else { 153 otype = type; 154 odata = data; 155 } 156 mtx_leave(&bq->bufq_mtx); 157 158 bufq_impls[otype].impl_destroy(odata); 159 160 return (0); 161 } 162 163 void 164 bufq_destroy(struct bufq *bq) 165 { 166 bufq_drain(bq); 167 168 bq->bufq_impl->impl_destroy(bq->bufq_data); 169 bq->bufq_data = NULL; 170 171 mtx_enter(&bufqs_mtx); 172 while (bufqs_stop) { 173 msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", 0); 174 } 175 SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries); 176 mtx_leave(&bufqs_mtx); 177 } 178 179 180 void 181 bufq_queue(struct bufq *bq, struct buf *bp) 182 { 183 mtx_enter(&bq->bufq_mtx); 184 while (bq->bufq_stop) { 185 msleep(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue", 0); 186 } 187 188 bp->b_bq = bq; 189 bq->bufq_outstanding++; 190 bq->bufq_impl->impl_queue(bq->bufq_data, bp); 191 mtx_leave(&bq->bufq_mtx); 192 } 193 194 struct buf * 195 bufq_dequeue(struct bufq *bq) 196 { 197 struct buf *bp; 198 199 mtx_enter(&bq->bufq_mtx); 200 bp = bq->bufq_impl->impl_dequeue(bq->bufq_data); 201 mtx_leave(&bq->bufq_mtx); 202 203 return (bp); 204 } 205 206 void 207 bufq_requeue(struct bufq *bq, struct buf *bp) 208 { 209 mtx_enter(&bq->bufq_mtx); 210 bq->bufq_impl->impl_requeue(bq->bufq_data, bp); 211 mtx_leave(&bq->bufq_mtx); 212 } 213 214 int 215 bufq_peek(struct bufq *bq) 216 { 217 int rv; 218 219 mtx_enter(&bq->bufq_mtx); 220 rv = bq->bufq_impl->impl_peek(bq->bufq_data); 221 mtx_leave(&bq->bufq_mtx); 222 223 return (rv); 224 } 225 226 void 227 bufq_drain(struct bufq *bq) 228 { 229 struct buf *bp; 230 int s; 231 232 while ((bp = bufq_dequeue(bq)) != NULL) { 233 bp->b_error = ENXIO; 234 bp->b_flags |= B_ERROR; 235 s = splbio(); 236 biodone(bp); 237 splx(s); 238 } 239 } 240 241 void 242 bufq_wait(struct bufq *bq, struct buf *bp) 243 { 244 if (bq->bufq_hi) { 245 assertwaitok(); 246 mtx_enter(&bq->bufq_mtx); 247 while (bq->bufq_outstanding >= bq->bufq_hi) { 248 bq->bufq_waiting++; 249 msleep(&bq->bufq_waiting, &bq->bufq_mtx, 250 PRIBIO, "bqwait", 0); 251 bq->bufq_waiting--; 252 } 253 mtx_leave(&bq->bufq_mtx); 254 } 255 } 256 257 void 258 bufq_done(struct bufq *bq, struct buf *bp) 259 { 260 mtx_enter(&bq->bufq_mtx); 261 bq->bufq_outstanding--; 262 KASSERT(bq->bufq_outstanding >= 0); 263 if (bq->bufq_stop && bq->bufq_outstanding == 0) 264 wakeup(&bq->bufq_outstanding); 265 if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low) 266 wakeup(&bq->bufq_waiting); 267 mtx_leave(&bq->bufq_mtx); 268 bp->b_bq = NULL; 269 } 270 271 void 272 bufq_quiesce(void) 273 { 274 struct bufq *bq; 275 276 mtx_enter(&bufqs_mtx); 277 bufqs_stop = 1; 278 mtx_leave(&bufqs_mtx); 279 /* 280 * We can safely walk the list since it can't be modified as 281 * long as bufqs_stop is non-zero. 282 */ 283 SLIST_FOREACH(bq, &bufqs, bufq_entries) { 284 mtx_enter(&bq->bufq_mtx); 285 bq->bufq_stop = 1; 286 while (bq->bufq_outstanding) { 287 msleep(&bq->bufq_outstanding, &bq->bufq_mtx, 288 PRIBIO, "bqquies", 0); 289 } 290 mtx_leave(&bq->bufq_mtx); 291 } 292 } 293 294 void 295 bufq_restart(void) 296 { 297 struct bufq *bq; 298 299 mtx_enter(&bufqs_mtx); 300 SLIST_FOREACH(bq, &bufqs, bufq_entries) { 301 mtx_enter(&bq->bufq_mtx); 302 bq->bufq_stop = 0; 303 wakeup(&bq->bufq_stop); 304 mtx_leave(&bq->bufq_mtx); 305 } 306 bufqs_stop = 0; 307 wakeup(&bufqs_stop); 308 mtx_leave(&bufqs_mtx); 309 } 310 311 312 /* 313 * fifo implementation 314 */ 315 316 void * 317 bufq_fifo_create(void) 318 { 319 struct bufq_fifo_head *head; 320 321 head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO); 322 if (head == NULL) 323 return (NULL); 324 325 SIMPLEQ_INIT(head); 326 327 return (head); 328 } 329 330 void 331 bufq_fifo_destroy(void *data) 332 { 333 free(data, M_DEVBUF); 334 } 335 336 void 337 bufq_fifo_queue(void *data, struct buf *bp) 338 { 339 struct bufq_fifo_head *head = data; 340 341 SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries); 342 } 343 344 struct buf * 345 bufq_fifo_dequeue(void *data) 346 { 347 struct bufq_fifo_head *head = data; 348 struct buf *bp; 349 350 bp = SIMPLEQ_FIRST(head); 351 if (bp != NULL) 352 SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries); 353 354 return (bp); 355 } 356 357 void 358 bufq_fifo_requeue(void *data, struct buf *bp) 359 { 360 struct bufq_fifo_head *head = data; 361 362 SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries); 363 } 364 365 int 366 bufq_fifo_peek(void *data) 367 { 368 struct bufq_fifo_head *head = data; 369 370 return (SIMPLEQ_FIRST(head) != NULL); 371 } 372 373 /* 374 * nscan implementation 375 */ 376 377 #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno) 378 379 #define dsentries b_bufq.bufq_data_nscan.bqf_entries 380 381 struct bufq_nscan_data { 382 struct bufq_nscan_head sorted; 383 struct bufq_nscan_head fifo; 384 int leftoverroom; /* Remaining number of buffer inserts allowed */ 385 }; 386 387 void bufq_nscan_resort(struct bufq_nscan_data *data); 388 void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *); 389 390 void 391 bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp) 392 { 393 struct buf *cur, *prev; 394 395 prev = NULL; 396 /* 397 * We look for the first slot where we would fit, then insert 398 * after the element we just passed. 399 */ 400 SIMPLEQ_FOREACH(cur, head, dsentries) { 401 if (BUF_INORDER(bp, cur)) 402 break; 403 prev = cur; 404 } 405 if (prev) 406 SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries); 407 else 408 SIMPLEQ_INSERT_HEAD(head, bp, dsentries); 409 410 } 411 412 /* 413 * Take N elements from the fifo queue and sort them 414 */ 415 void 416 bufq_nscan_resort(struct bufq_nscan_data *data) 417 { 418 struct bufq_nscan_head *fifo = &data->fifo; 419 struct bufq_nscan_head *sorted = &data->sorted; 420 int count, segmentsize = BUFQ_NSCAN_N; 421 struct buf *bp; 422 423 for (count = 0; count < segmentsize; count++) { 424 bp = SIMPLEQ_FIRST(fifo); 425 if (!bp) 426 break; 427 SIMPLEQ_REMOVE_HEAD(fifo, dsentries); 428 bufq_simple_nscan(sorted, bp); 429 } 430 data->leftoverroom = segmentsize - count; 431 } 432 433 void * 434 bufq_nscan_create(void) 435 { 436 struct bufq_nscan_data *data; 437 438 data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO); 439 if (!data) 440 return NULL; 441 SIMPLEQ_INIT(&data->sorted); 442 SIMPLEQ_INIT(&data->fifo); 443 444 return data; 445 } 446 447 void 448 bufq_nscan_destroy(void *vdata) 449 { 450 free(vdata, M_DEVBUF); 451 } 452 453 void 454 bufq_nscan_queue(void *vdata, struct buf *bp) 455 { 456 struct bufq_nscan_data *data = vdata; 457 458 /* 459 * If the previous sorted segment was small, we will continue 460 * packing in bufs as long as they're in order. 461 */ 462 if (data->leftoverroom) { 463 struct buf *next = SIMPLEQ_FIRST(&data->sorted); 464 if (next && BUF_INORDER(next, bp)) { 465 bufq_simple_nscan(&data->sorted, bp); 466 data->leftoverroom--; 467 return; 468 } 469 } 470 471 SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries); 472 473 } 474 475 struct buf * 476 bufq_nscan_dequeue(void *vdata) 477 { 478 struct bufq_nscan_data *data = vdata; 479 struct bufq_nscan_head *sorted = &data->sorted; 480 struct buf *bp; 481 482 if (SIMPLEQ_FIRST(sorted) == NULL) 483 bufq_nscan_resort(data); 484 485 bp = SIMPLEQ_FIRST(sorted); 486 if (bp != NULL) 487 SIMPLEQ_REMOVE_HEAD(sorted, dsentries); 488 489 return (bp); 490 } 491 492 void 493 bufq_nscan_requeue(void *vdata, struct buf *bp) 494 { 495 struct bufq_nscan_data *data = vdata; 496 497 SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries); 498 } 499 500 int 501 bufq_nscan_peek(void *vdata) 502 { 503 struct bufq_nscan_data *data = vdata; 504 505 return (SIMPLEQ_FIRST(&data->sorted) != NULL) || 506 (SIMPLEQ_FIRST(&data->fifo) != NULL); 507 } 508