xref: /netbsd-src/sys/kern/bufq_priocscan.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: bufq_priocscan.c,v 1.16 2011/11/07 08:44:16 hannken Exp $	*/
2 
3 /*-
4  * Copyright (c)2004,2005,2006,2008,2009 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.16 2011/11/07 08:44:16 hannken Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/bufq.h>
36 #include <sys/bufq_impl.h>
37 #include <sys/kmem.h>
38 
39 /*
40  * Cyclical scan (CSCAN)
41  */
42 TAILQ_HEAD(bqhead, buf);
43 struct cscan_queue {
44 	struct bqhead cq_head[2];	/* actual lists of buffers */
45 	unsigned int cq_idx;		/* current list index */
46 	int cq_lastcylinder;		/* b_cylinder of the last request */
47 	daddr_t cq_lastrawblkno;	/* b_rawblkno of the last request */
48 };
49 
50 static inline int cscan_empty(const struct cscan_queue *);
51 static void cscan_put(struct cscan_queue *, struct buf *, int);
52 static struct buf *cscan_get(struct cscan_queue *, int);
53 static void cscan_init(struct cscan_queue *);
54 
55 static inline int
56 cscan_empty(const struct cscan_queue *q)
57 {
58 
59 	return TAILQ_EMPTY(&q->cq_head[0]) && TAILQ_EMPTY(&q->cq_head[1]);
60 }
61 
62 static void
63 cscan_put(struct cscan_queue *q, struct buf *bp, int sortby)
64 {
65 	struct buf tmp;
66 	struct buf *it;
67 	struct bqhead *bqh;
68 	unsigned int idx;
69 
70 	tmp.b_cylinder = q->cq_lastcylinder;
71 	tmp.b_rawblkno = q->cq_lastrawblkno;
72 
73 	if (buf_inorder(bp, &tmp, sortby))
74 		idx = 1 - q->cq_idx;
75 	else
76 		idx = q->cq_idx;
77 
78 	bqh = &q->cq_head[idx];
79 
80 	TAILQ_FOREACH(it, bqh, b_actq)
81 		if (buf_inorder(bp, it, sortby))
82 			break;
83 
84 	if (it != NULL)
85 		TAILQ_INSERT_BEFORE(it, bp, b_actq);
86 	else
87 		TAILQ_INSERT_TAIL(bqh, bp, b_actq);
88 }
89 
90 static struct buf *
91 cscan_get(struct cscan_queue *q, int remove)
92 {
93 	unsigned int idx = q->cq_idx;
94 	struct bqhead *bqh;
95 	struct buf *bp;
96 
97 	bqh = &q->cq_head[idx];
98 	bp = TAILQ_FIRST(bqh);
99 
100 	if (bp == NULL) {
101 		/* switch queue */
102 		idx = 1 - idx;
103 		bqh = &q->cq_head[idx];
104 		bp = TAILQ_FIRST(bqh);
105 	}
106 
107 	KDASSERT((bp != NULL && !cscan_empty(q)) ||
108 	         (bp == NULL && cscan_empty(q)));
109 
110 	if (bp != NULL && remove) {
111 		q->cq_idx = idx;
112 		TAILQ_REMOVE(bqh, bp, b_actq);
113 
114 		q->cq_lastcylinder = bp->b_cylinder;
115 		q->cq_lastrawblkno =
116 		    bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
117 	}
118 
119 	return (bp);
120 }
121 
122 static void
123 cscan_init(struct cscan_queue *q)
124 {
125 
126 	TAILQ_INIT(&q->cq_head[0]);
127 	TAILQ_INIT(&q->cq_head[1]);
128 }
129 
130 
131 /*
132  * Per-prioritiy CSCAN.
133  *
134  * XXX probably we should have a way to raise
135  * priority of the on-queue requests.
136  */
137 #define	PRIOCSCAN_NQUEUE	3
138 
139 struct priocscan_queue {
140 	struct cscan_queue q_queue;
141 	unsigned int q_burst;
142 };
143 
144 struct bufq_priocscan {
145 	struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];
146 
147 #if 0
148 	/*
149 	 * XXX using "global" head position can reduce positioning time
150 	 * when switching between queues.
151 	 * although it might affect against fairness.
152 	 */
153 	daddr_t bq_lastrawblkno;
154 	int bq_lastcylinder;
155 #endif
156 };
157 
158 /*
159  * how many requests to serve when having pending requests on other queues.
160  *
161  * XXX tune
162  * be careful: while making these values larger likely
163  * increases the total throughput, it can also increase latencies
164  * for some workloads.
165  */
166 const int priocscan_burst[] = {
167 	64, 16, 4
168 };
169 
170 static void bufq_priocscan_init(struct bufq_state *);
171 static void bufq_priocscan_put(struct bufq_state *, struct buf *);
172 static struct buf *bufq_priocscan_get(struct bufq_state *, int);
173 
174 BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init);
175 
176 static inline struct cscan_queue *bufq_priocscan_selectqueue(
177     struct bufq_priocscan *, const struct buf *);
178 
179 static inline struct cscan_queue *
180 bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp)
181 {
182 	static const int priocscan_priomap[] = {
183 		[BPRIO_TIMENONCRITICAL] = 2,
184 		[BPRIO_TIMELIMITED] = 1,
185 		[BPRIO_TIMECRITICAL] = 0
186 	};
187 
188 	return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue;
189 }
190 
191 static void
192 bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp)
193 {
194 	struct bufq_priocscan *q = bufq->bq_private;
195 	struct cscan_queue *cq;
196 	const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
197 
198 	cq = bufq_priocscan_selectqueue(q, bp);
199 	cscan_put(cq, bp, sortby);
200 }
201 
202 static struct buf *
203 bufq_priocscan_get(struct bufq_state *bufq, int remove)
204 {
205 	struct bufq_priocscan *q = bufq->bq_private;
206 	struct priocscan_queue *pq, *npq;
207 	struct priocscan_queue *first; /* highest priority non-empty queue */
208 	const struct priocscan_queue *epq;
209 	struct buf *bp;
210 	bool single; /* true if there's only one non-empty queue */
211 
212 	/*
213 	 * find the highest priority non-empty queue.
214 	 */
215 	pq = &q->bq_queue[0];
216 	epq = pq + PRIOCSCAN_NQUEUE;
217 	for (; pq < epq; pq++) {
218 		if (!cscan_empty(&pq->q_queue)) {
219 			break;
220 		}
221 	}
222 	if (pq == epq) {
223 		/*
224 		 * all our queues are empty.  there's nothing to serve.
225 		 */
226 		return NULL;
227 	}
228 	first = pq;
229 
230 	/*
231 	 * scan the rest of queues.
232 	 *
233 	 * if we have two or more non-empty queues, we serve the highest
234 	 * priority one with non-zero burst count.
235 	 */
236 	single = true;
237 	for (npq = pq + 1; npq < epq; npq++) {
238 		if (!cscan_empty(&npq->q_queue)) {
239 			/*
240 			 * we found another non-empty queue.
241 			 * it means that a queue needs to consume its burst
242 			 * count to be served.
243 			 */
244 			single = false;
245 
246 			/*
247 			 * check if our current candidate queue has already
248 			 * exhausted its burst count.
249 			 */
250 			if (pq->q_burst > 0) {
251 				break;
252 			}
253 			pq = npq;
254 		}
255 	}
256 	if (single) {
257 		/*
258 		 * there's only a non-empty queue.
259 		 * just serve it without consuming its burst count.
260 		 */
261 		KASSERT(pq == first);
262 	} else {
263 		/*
264 		 * there are two or more non-empty queues.
265 		 */
266 		if (pq->q_burst == 0) {
267 			/*
268 			 * no queues can be served because they have already
269 			 * exhausted their burst count.
270 			 */
271 			unsigned int i;
272 #ifdef DEBUG
273 			for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
274 				pq = &q->bq_queue[i];
275 				if (!cscan_empty(&pq->q_queue) && pq->q_burst) {
276 					panic("%s: inconsist", __func__);
277 				}
278 			}
279 #endif /* DEBUG */
280 			/*
281 			 * reset burst counts.
282 			 */
283 			if (remove) {
284 				for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
285 					pq = &q->bq_queue[i];
286 					pq->q_burst = priocscan_burst[i];
287 				}
288 			}
289 
290 			/*
291 			 * serve the highest priority non-empty queue.
292 			 */
293 			pq = first;
294 		}
295 		/*
296 		 * consume the burst count.
297 		 *
298 		 * XXX account only by number of requests.  is it good enough?
299 		 */
300 		if (remove) {
301 			KASSERT(pq->q_burst > 0);
302 			pq->q_burst--;
303 		}
304 	}
305 
306 	/*
307 	 * finally, get a request from the selected queue.
308 	 */
309 	KDASSERT(!cscan_empty(&pq->q_queue));
310 	bp = cscan_get(&pq->q_queue, remove);
311 	KDASSERT(bp != NULL);
312 	KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));
313 
314 	return bp;
315 }
316 
317 static struct buf *
318 bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp)
319 {
320 	struct bufq_priocscan * const q = bufq->bq_private;
321 	unsigned int i, j;
322 
323 	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
324 		struct cscan_queue * const cq = &q->bq_queue[i].q_queue;
325 		for (j = 0; j < 2; j++) {
326 			struct bqhead * const bqh = &cq->cq_head[j];
327 			struct buf *bq;
328 
329 			TAILQ_FOREACH(bq, bqh, b_actq) {
330 				if (bq == bp) {
331 					TAILQ_REMOVE(bqh, bp, b_actq);
332 					return bp;
333 				}
334 			}
335 		}
336 	}
337 	return NULL;
338 }
339 
340 static void
341 bufq_priocscan_fini(struct bufq_state *bufq)
342 {
343 
344 	KASSERT(bufq->bq_private != NULL);
345 	kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan));
346 }
347 
348 static void
349 bufq_priocscan_init(struct bufq_state *bufq)
350 {
351 	struct bufq_priocscan *q;
352 	unsigned int i;
353 
354 	bufq->bq_get = bufq_priocscan_get;
355 	bufq->bq_put = bufq_priocscan_put;
356 	bufq->bq_cancel = bufq_priocscan_cancel;
357 	bufq->bq_fini = bufq_priocscan_fini;
358 	bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP);
359 
360 	q = bufq->bq_private;
361 	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
362 		struct cscan_queue *cq = &q->bq_queue[i].q_queue;
363 
364 		cscan_init(cq);
365 	}
366 }
367