1 /* $NetBSD: bufq_priocscan.c,v 1.18 2014/01/28 12:50:54 martin Exp $ */ 2 3 /*- 4 * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.18 2014/01/28 12:50:54 martin Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/bufq.h> 36 #include <sys/bufq_impl.h> 37 #include <sys/kmem.h> 38 #include <sys/rbtree.h> 39 40 #undef PRIOCSCAN_USE_GLOBAL_POSITION 41 42 /* 43 * Cyclical scan (CSCAN) 44 */ 45 46 struct cscan_key { 47 daddr_t k_rawblkno; 48 int k_cylinder; 49 }; 50 51 struct cscan_queue { 52 rb_tree_t cq_buffers; /* ordered list of buffers */ 53 #if !defined(PRIOCSCAN_USE_GLOBAL_POSITION) 54 struct cscan_key cq_lastkey; /* key of last request */ 55 #endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ 56 int cq_sortby; /* BUFQ_SORT_MASK */ 57 rb_tree_ops_t cq_ops; 58 }; 59 60 static signed int 61 buf_cmp(const struct buf *b1, const struct buf *b2, int sortby) 62 { 63 64 if (buf_inorder(b2, b1, sortby)) { 65 return 1; /* b1 > b2 */ 66 } 67 if (buf_inorder(b1, b2, sortby)) { 68 return -1; /* b1 < b2 */ 69 } 70 return 0; 71 } 72 73 /* return positive if n1 > n2 */ 74 static signed int 75 cscan_tree_compare_nodes(void *context, const void *n1, const void *n2) 76 { 77 const struct cscan_queue * const q = context; 78 const struct buf * const b1 = n1; 79 const struct buf * const b2 = n2; 80 const int sortby = q->cq_sortby; 81 const int diff = buf_cmp(b1, b2, sortby); 82 83 /* 84 * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o 85 */ 86 87 if (diff != 0) { 88 return diff; 89 } 90 91 /* 92 * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o 93 */ 94 if (b1 > b2) { 95 return 1; 96 } 97 if (b1 < b2) { 98 return -1; 99 } 100 return 0; 101 } 102 103 /* return positive if n1 > k2 */ 104 static signed int 105 cscan_tree_compare_key(void *context, const void *n1, const void *k2) 106 { 107 const struct cscan_queue * const q = context; 108 const struct buf * const b1 = n1; 109 const struct cscan_key * const key = k2; 110 const struct buf tmp = { 111 .b_rawblkno = key->k_rawblkno, 112 .b_cylinder = key->k_cylinder, 113 }; 114 const struct buf *b2 = &tmp; 115 const int sortby = q->cq_sortby; 116 117 return buf_cmp(b1, b2, sortby); 118 } 119 120 static void __unused 121 cscan_dump(struct cscan_queue *cq) 122 { 123 const int sortby = cq->cq_sortby; 124 struct buf *bp; 125 126 RB_TREE_FOREACH(bp, &cq->cq_buffers) { 127 if (sortby == BUFQ_SORT_RAWBLOCK) { 128 printf(" %jd", (intmax_t)bp->b_rawblkno); 129 } else { 130 printf(" %jd/%jd", 131 (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno); 132 } 133 } 134 } 135 136 static inline bool 137 cscan_empty(struct cscan_queue *q) 138 { 139 140 /* XXX this might do more work than necessary */ 141 return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL; 142 } 143 144 static void 145 cscan_put(struct cscan_queue *q, struct buf *bp) 146 { 147 struct buf *obp __diagused; 148 149 obp = rb_tree_insert_node(&q->cq_buffers, bp); 150 KASSERT(obp == bp); /* see cscan_tree_compare_nodes */ 151 } 152 153 static struct buf * 154 cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key) 155 { 156 struct buf *bp; 157 158 bp = rb_tree_find_node_geq(&q->cq_buffers, key); 159 KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0); 160 if (bp == NULL) { 161 bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT); 162 KDASSERT(cscan_tree_compare_key(q, bp, key) < 0); 163 } 164 if (bp != NULL && remove) { 165 #if defined(DEBUG) 166 struct buf *nbp; 167 #endif /* defined(DEBUG) */ 168 169 rb_tree_remove_node(&q->cq_buffers, bp); 170 /* 171 * remember the head position. 172 */ 173 key->k_cylinder = bp->b_cylinder; 174 key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT); 175 #if defined(DEBUG) 176 nbp = rb_tree_find_node_geq(&q->cq_buffers, key); 177 if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) { 178 panic("%s: wrong order %p < %p\n", __func__, 179 nbp, bp); 180 } 181 #endif /* defined(DEBUG) */ 182 } 183 return bp; 184 } 185 186 static void 187 cscan_init(struct cscan_queue *q, int sortby) 188 { 189 static const rb_tree_ops_t cscan_tree_ops = { 190 .rbto_compare_nodes = cscan_tree_compare_nodes, 191 .rbto_compare_key = cscan_tree_compare_key, 192 .rbto_node_offset = offsetof(struct buf, b_u.u_rbnode), 193 .rbto_context = NULL, 194 }; 195 196 q->cq_sortby = sortby; 197 /* XXX copy ops to workaround rbtree.h API limitation */ 198 q->cq_ops = cscan_tree_ops; 199 q->cq_ops.rbto_context = q; 200 rb_tree_init(&q->cq_buffers, &q->cq_ops); 201 } 202 203 /* 204 * Per-prioritiy CSCAN. 205 * 206 * XXX probably we should have a way to raise 207 * priority of the on-queue requests. 208 */ 209 #define PRIOCSCAN_NQUEUE 3 210 211 struct priocscan_queue { 212 struct cscan_queue q_queue; 213 unsigned int q_burst; 214 }; 215 216 struct bufq_priocscan { 217 struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE]; 218 219 #if defined(PRIOCSCAN_USE_GLOBAL_POSITION) 220 /* 221 * XXX using "global" head position can reduce positioning time 222 * when switching between queues. 223 * although it might affect against fairness. 224 */ 225 struct cscan_key bq_lastkey; 226 #endif 227 }; 228 229 /* 230 * how many requests to serve when having pending requests on other queues. 231 * 232 * XXX tune 233 * be careful: while making these values larger likely 234 * increases the total throughput, it can also increase latencies 235 * for some workloads. 236 */ 237 const int priocscan_burst[] = { 238 64, 16, 4 239 }; 240 241 static void bufq_priocscan_init(struct bufq_state *); 242 static void bufq_priocscan_put(struct bufq_state *, struct buf *); 243 static struct buf *bufq_priocscan_get(struct bufq_state *, int); 244 245 BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init); 246 247 static inline struct cscan_queue *bufq_priocscan_selectqueue( 248 struct bufq_priocscan *, const struct buf *); 249 250 static inline struct cscan_queue * 251 bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp) 252 { 253 static const int priocscan_priomap[] = { 254 [BPRIO_TIMENONCRITICAL] = 2, 255 [BPRIO_TIMELIMITED] = 1, 256 [BPRIO_TIMECRITICAL] = 0 257 }; 258 259 return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue; 260 } 261 262 static void 263 bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp) 264 { 265 struct bufq_priocscan *q = bufq->bq_private; 266 struct cscan_queue *cq; 267 268 cq = bufq_priocscan_selectqueue(q, bp); 269 cscan_put(cq, bp); 270 } 271 272 static struct buf * 273 bufq_priocscan_get(struct bufq_state *bufq, int remove) 274 { 275 struct bufq_priocscan *q = bufq->bq_private; 276 struct priocscan_queue *pq, *npq; 277 struct priocscan_queue *first; /* highest priority non-empty queue */ 278 const struct priocscan_queue *epq; 279 struct buf *bp; 280 bool single; /* true if there's only one non-empty queue */ 281 282 /* 283 * find the highest priority non-empty queue. 284 */ 285 pq = &q->bq_queue[0]; 286 epq = pq + PRIOCSCAN_NQUEUE; 287 for (; pq < epq; pq++) { 288 if (!cscan_empty(&pq->q_queue)) { 289 break; 290 } 291 } 292 if (pq == epq) { 293 /* 294 * all our queues are empty. there's nothing to serve. 295 */ 296 return NULL; 297 } 298 first = pq; 299 300 /* 301 * scan the rest of queues. 302 * 303 * if we have two or more non-empty queues, we serve the highest 304 * priority one with non-zero burst count. 305 */ 306 single = true; 307 for (npq = pq + 1; npq < epq; npq++) { 308 if (!cscan_empty(&npq->q_queue)) { 309 /* 310 * we found another non-empty queue. 311 * it means that a queue needs to consume its burst 312 * count to be served. 313 */ 314 single = false; 315 316 /* 317 * check if our current candidate queue has already 318 * exhausted its burst count. 319 */ 320 if (pq->q_burst > 0) { 321 break; 322 } 323 pq = npq; 324 } 325 } 326 if (single) { 327 /* 328 * there's only a non-empty queue. 329 * just serve it without consuming its burst count. 330 */ 331 KASSERT(pq == first); 332 } else { 333 /* 334 * there are two or more non-empty queues. 335 */ 336 if (pq->q_burst == 0) { 337 /* 338 * no queues can be served because they have already 339 * exhausted their burst count. 340 */ 341 unsigned int i; 342 #ifdef DEBUG 343 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 344 pq = &q->bq_queue[i]; 345 if (!cscan_empty(&pq->q_queue) && pq->q_burst) { 346 panic("%s: inconsist", __func__); 347 } 348 } 349 #endif /* DEBUG */ 350 /* 351 * reset burst counts. 352 */ 353 if (remove) { 354 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 355 pq = &q->bq_queue[i]; 356 pq->q_burst = priocscan_burst[i]; 357 } 358 } 359 360 /* 361 * serve the highest priority non-empty queue. 362 */ 363 pq = first; 364 } 365 /* 366 * consume the burst count. 367 * 368 * XXX account only by number of requests. is it good enough? 369 */ 370 if (remove) { 371 KASSERT(pq->q_burst > 0); 372 pq->q_burst--; 373 } 374 } 375 376 /* 377 * finally, get a request from the selected queue. 378 */ 379 KDASSERT(!cscan_empty(&pq->q_queue)); 380 bp = cscan_get(&pq->q_queue, remove, 381 #if defined(PRIOCSCAN_USE_GLOBAL_POSITION) 382 &q->bq_lastkey 383 #else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ 384 &pq->q_queue.cq_lastkey 385 #endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ 386 ); 387 KDASSERT(bp != NULL); 388 KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp)); 389 390 return bp; 391 } 392 393 static struct buf * 394 bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp) 395 { 396 struct bufq_priocscan * const q = bufq->bq_private; 397 unsigned int i; 398 399 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 400 struct cscan_queue * const cq = &q->bq_queue[i].q_queue; 401 struct buf *it; 402 403 /* 404 * XXX probably could be faster but the cancel functionality 405 * is not widely used anyway. 406 */ 407 RB_TREE_FOREACH(it, &cq->cq_buffers) { 408 if (it == bp) { 409 rb_tree_remove_node(&cq->cq_buffers, bp); 410 return bp; 411 } 412 } 413 } 414 return NULL; 415 } 416 417 static void 418 bufq_priocscan_fini(struct bufq_state *bufq) 419 { 420 421 KASSERT(bufq->bq_private != NULL); 422 kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan)); 423 } 424 425 static void 426 bufq_priocscan_init(struct bufq_state *bufq) 427 { 428 struct bufq_priocscan *q; 429 const int sortby = bufq->bq_flags & BUFQ_SORT_MASK; 430 unsigned int i; 431 432 bufq->bq_get = bufq_priocscan_get; 433 bufq->bq_put = bufq_priocscan_put; 434 bufq->bq_cancel = bufq_priocscan_cancel; 435 bufq->bq_fini = bufq_priocscan_fini; 436 bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP); 437 438 q = bufq->bq_private; 439 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 440 struct cscan_queue *cq = &q->bq_queue[i].q_queue; 441 442 cscan_init(cq, sortby); 443 } 444 } 445