xref: /openbsd-src/sys/kern/kern_bufq.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: kern_bufq.c,v 1.26 2013/11/20 23:52:42 dlg Exp $	*/
2 /*
3  * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
4  * Copyright (c) 2010 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/mount.h>
24 #include <sys/mutex.h>
25 #include <sys/buf.h>
26 #include <sys/errno.h>
27 #include <sys/queue.h>
28 
29 #include <sys/disklabel.h>
30 
31 SLIST_HEAD(, bufq)	bufqs = SLIST_HEAD_INITIALIZER(bufqs);
32 struct mutex		bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE);
33 int			bufqs_stop;
34 
35 struct bufq_impl {
36 	void		*(*impl_create)(void);
37 	void		 (*impl_destroy)(void *);
38 
39 	void		 (*impl_queue)(void *, struct buf *);
40 	struct buf	*(*impl_dequeue)(void *);
41 	void		 (*impl_requeue)(void *, struct buf *);
42 	int		 (*impl_peek)(void *);
43 };
44 
45 void		*bufq_fifo_create(void);
46 void		 bufq_fifo_destroy(void *);
47 void		 bufq_fifo_queue(void *, struct buf *);
48 struct buf	*bufq_fifo_dequeue(void *);
49 void		 bufq_fifo_requeue(void *, struct buf *);
50 int		 bufq_fifo_peek(void *);
51 
52 void		*bufq_nscan_create(void);
53 void		 bufq_nscan_destroy(void *);
54 void		 bufq_nscan_queue(void *, struct buf *);
55 struct buf	*bufq_nscan_dequeue(void *);
56 void		 bufq_nscan_requeue(void *, struct buf *);
57 int		 bufq_nscan_peek(void *);
58 
59 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
60 	{
61 		bufq_fifo_create,
62 		bufq_fifo_destroy,
63 		bufq_fifo_queue,
64 		bufq_fifo_dequeue,
65 		bufq_fifo_requeue,
66 		bufq_fifo_peek
67 	},
68 	{
69 		bufq_nscan_create,
70 		bufq_nscan_destroy,
71 		bufq_nscan_queue,
72 		bufq_nscan_dequeue,
73 		bufq_nscan_requeue,
74 		bufq_nscan_peek
75 	}
76 };
77 
78 int
79 bufq_init(struct bufq *bq, int type)
80 {
81 	u_int hi = BUFQ_HI, low = BUFQ_LOW;
82 
83 	if (type > BUFQ_HOWMANY)
84 		panic("bufq_init: type %i unknown", type);
85 
86 	/*
87 	 * Ensure that writes can't consume the entire amount of kva
88 	 * available the buffer cache if we only have a limited amount
89 	 * of kva available to us.
90 	 */
91 	if (hi >= (bcstats.kvaslots / 16)) {
92 		hi = bcstats.kvaslots / 16;
93 		if (hi < 2)
94 			hi = 2;
95 		low = hi / 2;
96 	}
97 
98 	mtx_init(&bq->bufq_mtx, IPL_BIO);
99 	bq->bufq_hi = hi;
100 	bq->bufq_low = low;
101 	bq->bufq_type = type;
102 	bq->bufq_impl = &bufq_impls[type];
103 	bq->bufq_data = bq->bufq_impl->impl_create();
104 	if (bq->bufq_data == NULL) {
105 		/*
106 		 * we should actually return failure so disks attaching after
107 		 * boot in low memory situations dont panic the system.
108 		 */
109 		panic("bufq init fail");
110 	}
111 
112 	mtx_enter(&bufqs_mtx);
113 	while (bufqs_stop) {
114 		msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", 0);
115 	}
116 	SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries);
117 	mtx_leave(&bufqs_mtx);
118 
119 	return (0);
120 }
121 
122 int
123 bufq_switch(struct bufq *bq, int type)
124 {
125 	void		*data;
126 	void		*odata;
127 	int		otype;
128 	struct buf	*bp;
129 	int		ret;
130 
131 	mtx_enter(&bq->bufq_mtx);
132 	ret = (bq->bufq_type == type);
133 	mtx_leave(&bq->bufq_mtx);
134 	if (ret)
135 		return (0);
136 
137 	data = bufq_impls[type].impl_create();
138 	if (data == NULL)
139 		return (ENOMEM);
140 
141 	mtx_enter(&bq->bufq_mtx);
142 	if (bq->bufq_type != type) { /* might have changed during create */
143 		odata = bq->bufq_data;
144 		otype = bq->bufq_type;
145 
146 		while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL)
147 			bufq_impls[type].impl_queue(data, bp);
148 
149 		bq->bufq_data = data;
150 		bq->bufq_type = type;
151 		bq->bufq_impl = &bufq_impls[type];
152 	} else {
153 		otype = type;
154 		odata = data;
155 	}
156 	mtx_leave(&bq->bufq_mtx);
157 
158 	bufq_impls[otype].impl_destroy(odata);
159 
160 	return (0);
161 }
162 
163 void
164 bufq_destroy(struct bufq *bq)
165 {
166 	bufq_drain(bq);
167 
168 	bq->bufq_impl->impl_destroy(bq->bufq_data);
169 	bq->bufq_data = NULL;
170 
171 	mtx_enter(&bufqs_mtx);
172 	while (bufqs_stop) {
173 		msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", 0);
174 	}
175 	SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries);
176 	mtx_leave(&bufqs_mtx);
177 }
178 
179 
180 void
181 bufq_queue(struct bufq *bq, struct buf *bp)
182 {
183 	mtx_enter(&bq->bufq_mtx);
184 	while (bq->bufq_stop) {
185 		msleep(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue", 0);
186 	}
187 
188 	bp->b_bq = bq;
189 	bq->bufq_outstanding++;
190 	bq->bufq_impl->impl_queue(bq->bufq_data, bp);
191 	mtx_leave(&bq->bufq_mtx);
192 }
193 
194 struct buf *
195 bufq_dequeue(struct bufq *bq)
196 {
197 	struct buf	*bp;
198 
199 	mtx_enter(&bq->bufq_mtx);
200 	bp = bq->bufq_impl->impl_dequeue(bq->bufq_data);
201 	mtx_leave(&bq->bufq_mtx);
202 
203 	return (bp);
204 }
205 
206 void
207 bufq_requeue(struct bufq *bq, struct buf *bp)
208 {
209 	mtx_enter(&bq->bufq_mtx);
210 	bq->bufq_impl->impl_requeue(bq->bufq_data, bp);
211 	mtx_leave(&bq->bufq_mtx);
212 }
213 
214 int
215 bufq_peek(struct bufq *bq)
216 {
217 	int		rv;
218 
219 	mtx_enter(&bq->bufq_mtx);
220 	rv = bq->bufq_impl->impl_peek(bq->bufq_data);
221 	mtx_leave(&bq->bufq_mtx);
222 
223 	return (rv);
224 }
225 
226 void
227 bufq_drain(struct bufq *bq)
228 {
229 	struct buf	*bp;
230 	int		 s;
231 
232 	while ((bp = bufq_dequeue(bq)) != NULL) {
233 		bp->b_error = ENXIO;
234 		bp->b_flags |= B_ERROR;
235 		s = splbio();
236 		biodone(bp);
237 		splx(s);
238 	}
239 }
240 
241 void
242 bufq_wait(struct bufq *bq, struct buf *bp)
243 {
244 	if (bq->bufq_hi) {
245 		assertwaitok();
246 		mtx_enter(&bq->bufq_mtx);
247 		while (bq->bufq_outstanding >= bq->bufq_hi) {
248 			bq->bufq_waiting++;
249 			msleep(&bq->bufq_waiting, &bq->bufq_mtx,
250 			    PRIBIO, "bqwait", 0);
251 			bq->bufq_waiting--;
252 		}
253 		mtx_leave(&bq->bufq_mtx);
254 	}
255 }
256 
257 void
258 bufq_done(struct bufq *bq, struct buf *bp)
259 {
260 	mtx_enter(&bq->bufq_mtx);
261 	bq->bufq_outstanding--;
262 	KASSERT(bq->bufq_outstanding >= 0);
263 	if (bq->bufq_stop && bq->bufq_outstanding == 0)
264 		wakeup(&bq->bufq_outstanding);
265 	if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low)
266 		wakeup(&bq->bufq_waiting);
267 	mtx_leave(&bq->bufq_mtx);
268 	bp->b_bq = NULL;
269 }
270 
271 void
272 bufq_quiesce(void)
273 {
274 	struct bufq		*bq;
275 
276 	mtx_enter(&bufqs_mtx);
277 	bufqs_stop = 1;
278 	mtx_leave(&bufqs_mtx);
279 	/*
280 	 * We can safely walk the list since it can't be modified as
281 	 * long as bufqs_stop is non-zero.
282 	 */
283 	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
284 		mtx_enter(&bq->bufq_mtx);
285 		bq->bufq_stop = 1;
286 		while (bq->bufq_outstanding) {
287 			msleep(&bq->bufq_outstanding, &bq->bufq_mtx,
288 			    PRIBIO, "bqquies", 0);
289 		}
290 		mtx_leave(&bq->bufq_mtx);
291 	}
292 }
293 
294 void
295 bufq_restart(void)
296 {
297 	struct bufq		*bq;
298 
299 	mtx_enter(&bufqs_mtx);
300 	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
301 		mtx_enter(&bq->bufq_mtx);
302 		bq->bufq_stop = 0;
303 		wakeup(&bq->bufq_stop);
304 		mtx_leave(&bq->bufq_mtx);
305 	}
306 	bufqs_stop = 0;
307 	wakeup(&bufqs_stop);
308 	mtx_leave(&bufqs_mtx);
309 }
310 
311 
312 /*
313  * fifo implementation
314  */
315 
316 void *
317 bufq_fifo_create(void)
318 {
319 	struct bufq_fifo_head	*head;
320 
321 	head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO);
322 	if (head == NULL)
323 		return (NULL);
324 
325 	SIMPLEQ_INIT(head);
326 
327 	return (head);
328 }
329 
330 void
331 bufq_fifo_destroy(void *data)
332 {
333 	free(data, M_DEVBUF);
334 }
335 
336 void
337 bufq_fifo_queue(void *data, struct buf *bp)
338 {
339 	struct bufq_fifo_head	*head = data;
340 
341 	SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
342 }
343 
344 struct buf *
345 bufq_fifo_dequeue(void *data)
346 {
347 	struct bufq_fifo_head	*head = data;
348 	struct buf		*bp;
349 
350 	bp = SIMPLEQ_FIRST(head);
351 	if (bp != NULL)
352 		SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries);
353 
354 	return (bp);
355 }
356 
357 void
358 bufq_fifo_requeue(void *data, struct buf *bp)
359 {
360 	struct bufq_fifo_head	*head = data;
361 
362 	SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
363 }
364 
365 int
366 bufq_fifo_peek(void *data)
367 {
368 	struct bufq_fifo_head	*head = data;
369 
370 	return (SIMPLEQ_FIRST(head) != NULL);
371 }
372 
373 /*
374  * nscan implementation
375  */
376 
377 #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno)
378 
379 #define dsentries b_bufq.bufq_data_nscan.bqf_entries
380 
381 struct bufq_nscan_data {
382 	struct bufq_nscan_head sorted;
383 	struct bufq_nscan_head fifo;
384 	int leftoverroom; /* Remaining number of buffer inserts allowed  */
385 };
386 
387 void bufq_nscan_resort(struct bufq_nscan_data *data);
388 void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *);
389 
390 void
391 bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp)
392 {
393 	struct buf *cur, *prev;
394 
395 	prev = NULL;
396 	/*
397 	 * We look for the first slot where we would fit, then insert
398 	 * after the element we just passed.
399 	 */
400 	SIMPLEQ_FOREACH(cur, head, dsentries) {
401 		if (BUF_INORDER(bp, cur))
402 			break;
403 		prev = cur;
404 	}
405 	if (prev)
406 		SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries);
407 	else
408 		SIMPLEQ_INSERT_HEAD(head, bp, dsentries);
409 
410 }
411 
412 /*
413  * Take N elements from the fifo queue and sort them
414  */
415 void
416 bufq_nscan_resort(struct bufq_nscan_data *data)
417 {
418 	struct bufq_nscan_head *fifo = &data->fifo;
419 	struct bufq_nscan_head *sorted = &data->sorted;
420 	int count, segmentsize = BUFQ_NSCAN_N;
421 	struct buf *bp;
422 
423 	for (count = 0; count < segmentsize; count++) {
424 		bp = SIMPLEQ_FIRST(fifo);
425 		if (!bp)
426 			break;
427 		SIMPLEQ_REMOVE_HEAD(fifo, dsentries);
428 		bufq_simple_nscan(sorted, bp);
429 	}
430 	data->leftoverroom = segmentsize - count;
431 }
432 
433 void *
434 bufq_nscan_create(void)
435 {
436 	struct bufq_nscan_data *data;
437 
438 	data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO);
439 	if (!data)
440 		return NULL;
441 	SIMPLEQ_INIT(&data->sorted);
442 	SIMPLEQ_INIT(&data->fifo);
443 
444 	return data;
445 }
446 
447 void
448 bufq_nscan_destroy(void *vdata)
449 {
450 	free(vdata, M_DEVBUF);
451 }
452 
453 void
454 bufq_nscan_queue(void *vdata, struct buf *bp)
455 {
456 	struct bufq_nscan_data *data = vdata;
457 
458 	/*
459 	 * If the previous sorted segment was small, we will continue
460 	 * packing in bufs as long as they're in order.
461 	 */
462 	if (data->leftoverroom) {
463 		struct buf *next = SIMPLEQ_FIRST(&data->sorted);
464 		if (next && BUF_INORDER(next, bp)) {
465 			bufq_simple_nscan(&data->sorted, bp);
466 			data->leftoverroom--;
467 			return;
468 		}
469 	}
470 
471 	SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries);
472 
473 }
474 
475 struct buf *
476 bufq_nscan_dequeue(void *vdata)
477 {
478 	struct bufq_nscan_data *data = vdata;
479 	struct bufq_nscan_head *sorted = &data->sorted;
480 	struct buf	*bp;
481 
482 	if (SIMPLEQ_FIRST(sorted) == NULL)
483 		bufq_nscan_resort(data);
484 
485 	bp = SIMPLEQ_FIRST(sorted);
486 	if (bp != NULL)
487 		SIMPLEQ_REMOVE_HEAD(sorted, dsentries);
488 
489 	return (bp);
490 }
491 
492 void
493 bufq_nscan_requeue(void *vdata, struct buf *bp)
494 {
495 	struct bufq_nscan_data *data = vdata;
496 
497 	SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries);
498 }
499 
500 int
501 bufq_nscan_peek(void *vdata)
502 {
503 	struct bufq_nscan_data *data = vdata;
504 
505 	return (SIMPLEQ_FIRST(&data->sorted) != NULL) ||
506 	    (SIMPLEQ_FIRST(&data->fifo) != NULL);
507 }
508