xref: /openbsd-src/sys/kern/kern_bufq.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: kern_bufq.c,v 1.19 2010/09/03 10:51:53 dlg Exp $	*/
2 /*
3  * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
4  * Copyright (c) 2010 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/mutex.h>
24 #include <sys/buf.h>
25 #include <sys/errno.h>
26 #include <sys/queue.h>
27 
28 #include <sys/disklabel.h>
29 
30 SLIST_HEAD(, bufq)	bufqs = SLIST_HEAD_INITIALIZER(&bufq);
31 struct mutex		bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE);
32 int			bufqs_stop;
33 
34 struct bufq_impl {
35 	void		*(*impl_create)(void);
36 	void		 (*impl_destroy)(void *);
37 
38 	void		 (*impl_queue)(void *, struct buf *);
39 	struct buf	*(*impl_dequeue)(void *);
40 	void		 (*impl_requeue)(void *, struct buf *);
41 	int		 (*impl_peek)(void *);
42 };
43 
44 void		*bufq_disksort_create(void);
45 void		 bufq_disksort_destroy(void *);
46 void		 bufq_disksort_queue(void *, struct buf *);
47 struct buf	*bufq_disksort_dequeue(void *);
48 void		 bufq_disksort_requeue(void *, struct buf *);
49 int		 bufq_disksort_peek(void *);
50 
51 void		*bufq_fifo_create(void);
52 void		 bufq_fifo_destroy(void *);
53 void		 bufq_fifo_queue(void *, struct buf *);
54 struct buf	*bufq_fifo_dequeue(void *);
55 void		 bufq_fifo_requeue(void *, struct buf *);
56 int		 bufq_fifo_peek(void *);
57 
58 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
59 	{
60 		bufq_disksort_create,
61 		bufq_disksort_destroy,
62 		bufq_disksort_queue,
63 		bufq_disksort_dequeue,
64 		bufq_disksort_requeue,
65 		bufq_disksort_peek
66 	},
67 	{
68 		bufq_fifo_create,
69 		bufq_fifo_destroy,
70 		bufq_fifo_queue,
71 		bufq_fifo_dequeue,
72 		bufq_fifo_requeue,
73 		bufq_fifo_peek
74 	}
75 };
76 
77 int
78 bufq_init(struct bufq *bq, int type)
79 {
80 	if (type > BUFQ_HOWMANY)
81 		panic("bufq_init: type %i unknown", type);
82 
83 	mtx_init(&bq->bufq_mtx, IPL_BIO);
84 	bq->bufq_type = type;
85 	bq->bufq_impl = &bufq_impls[type];
86 	bq->bufq_data = bq->bufq_impl->impl_create();
87 	if (bq->bufq_data == NULL) {
88 		/*
89 		 * we should actually return failure so disks attaching after
90 		 * boot in low memory situations dont panic the system.
91 		 */
92 		panic("bufq init fail");
93 	}
94 
95 	mtx_enter(&bufqs_mtx);
96 	while (bufqs_stop) {
97 		msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", 0);
98 	}
99 	SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries);
100 	mtx_leave(&bufqs_mtx);
101 
102 	return (0);
103 }
104 
105 int
106 bufq_switch(struct bufq *bq, int type)
107 {
108 	void		*data;
109 	void		*odata;
110 	int		otype;
111 	struct buf	*bp;
112 	int		ret;
113 
114 	mtx_enter(&bq->bufq_mtx);
115 	ret = (bq->bufq_type == type);
116 	mtx_leave(&bq->bufq_mtx);
117 	if (ret)
118 		return (0);
119 
120 	data = bufq_impls[type].impl_create();
121 	if (data == NULL)
122 		return (ENOMEM);
123 
124 	mtx_enter(&bq->bufq_mtx);
125 	if (bq->bufq_type != type) { /* might have changed during create */
126 		odata = bq->bufq_data;
127 		otype = bq->bufq_type;
128 
129 		while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL)
130 			bufq_impls[type].impl_queue(data, bp);
131 
132 		bq->bufq_data = data;
133 		bq->bufq_type = type;
134 		bq->bufq_impl = &bufq_impls[type];
135 	} else {
136 		otype = type;
137 		odata = data;
138 	}
139 	mtx_leave(&bq->bufq_mtx);
140 
141 	bufq_impls[otype].impl_destroy(odata);
142 
143 	return (0);
144 }
145 
146 void
147 bufq_destroy(struct bufq *bq)
148 {
149 	bufq_drain(bq);
150 
151 	bq->bufq_impl->impl_destroy(bq->bufq_data);
152 	bq->bufq_data = NULL;
153 
154 	mtx_enter(&bufqs_mtx);
155 	while (bufqs_stop) {
156 		msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", 0);
157 	}
158 	SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries);
159 	mtx_leave(&bufqs_mtx);
160 }
161 
162 
163 void
164 bufq_queue(struct bufq *bq, struct buf *bp)
165 {
166 	mtx_enter(&bq->bufq_mtx);
167 	while (bq->bufq_stop) {
168 		msleep(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue", 0);
169 	}
170 
171 	bp->b_bq = bq;
172 	bq->bufq_outstanding++;
173 	bq->bufq_impl->impl_queue(bq->bufq_data, bp);
174 	mtx_leave(&bq->bufq_mtx);
175 }
176 
177 struct buf *
178 bufq_dequeue(struct bufq *bq)
179 {
180 	struct buf	*bp;
181 
182 	mtx_enter(&bq->bufq_mtx);
183 	bp = bq->bufq_impl->impl_dequeue(bq->bufq_data);
184 	mtx_leave(&bq->bufq_mtx);
185 
186 	return (bp);
187 }
188 
189 void
190 bufq_requeue(struct bufq *bq, struct buf *bp)
191 {
192 	mtx_enter(&bq->bufq_mtx);
193 	bq->bufq_impl->impl_requeue(bq->bufq_data, bp);
194 	mtx_leave(&bq->bufq_mtx);
195 }
196 
197 int
198 bufq_peek(struct bufq *bq)
199 {
200 	int		rv;
201 
202 	mtx_enter(&bq->bufq_mtx);
203 	rv = bq->bufq_impl->impl_peek(bq->bufq_data);
204 	mtx_leave(&bq->bufq_mtx);
205 
206 	return (rv);
207 }
208 
209 void
210 bufq_drain(struct bufq *bq)
211 {
212 	struct buf	*bp;
213 	int		 s;
214 
215 	while ((bp = bufq_dequeue(bq)) != NULL) {
216 		bp->b_error = ENXIO;
217 		bp->b_flags |= B_ERROR;
218 		s = splbio();
219 		biodone(bp);
220 		splx(s);
221 	}
222 }
223 
224 void
225 bufq_done(struct bufq *bq, struct buf *bp)
226 {
227 	mtx_enter(&bq->bufq_mtx);
228 	bq->bufq_outstanding--;
229 	KASSERT(bq->bufq_outstanding >= 0);
230 	if (bq->bufq_stop && bq->bufq_outstanding == 0)
231 		wakeup(&bq->bufq_outstanding);
232 	mtx_leave(&bq->bufq_mtx);
233 	bp->b_bq = NULL;
234 }
235 
236 void
237 bufq_quiesce(void)
238 {
239 	struct bufq		*bq;
240 
241 	mtx_enter(&bufqs_mtx);
242 	bufqs_stop = 1;
243 	mtx_leave(&bufqs_mtx);
244 	/*
245 	 * We can safely walk the list since it can't be modified as
246 	 * long as bufqs_stop is non-zero.
247 	 */
248 	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
249 		mtx_enter(&bq->bufq_mtx);
250 		bq->bufq_stop = 1;
251 		while (bq->bufq_outstanding) {
252 			msleep(&bq->bufq_outstanding, &bq->bufq_mtx,
253 			    PRIBIO, "bqquies", 0);
254 		}
255 		mtx_leave(&bq->bufq_mtx);
256 	}
257 }
258 
259 void
260 bufq_restart(void)
261 {
262 	struct bufq		*bq;
263 
264 	mtx_enter(&bufqs_mtx);
265 	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
266 		mtx_enter(&bq->bufq_mtx);
267 		bq->bufq_stop = 0;
268 		wakeup(&bq->bufq_stop);
269 		mtx_leave(&bq->bufq_mtx);
270 	}
271 	bufqs_stop = 0;
272 	wakeup(&bufqs_stop);
273 	mtx_leave(&bufqs_mtx);
274 }
275 
276 /*
277  * disksort implementation.
278  */
279 
280 void *
281 bufq_disksort_create(void)
282 {
283 	return (malloc(sizeof(struct buf), M_DEVBUF, M_NOWAIT | M_ZERO));
284 }
285 
286 void
287 bufq_disksort_destroy(void *data)
288 {
289 	free(data, M_DEVBUF);
290 }
291 
292 void
293 bufq_disksort_queue(void *data, struct buf *bp)
294 {
295 	disksort((struct buf *)data, bp);
296 }
297 
298 struct buf *
299 bufq_disksort_dequeue(void *data)
300 {
301 	struct buf	*bufq = data;
302 	struct buf	*bp;
303 
304 	bp = bufq->b_actf;
305 	if (bp != NULL)
306 		bufq->b_actf = bp->b_actf;
307 	if (bufq->b_actf == NULL)
308 		bufq->b_actb = &bufq->b_actf;
309 
310 	return (bp);
311 }
312 
313 void
314 bufq_disksort_requeue(void *data, struct buf *bp)
315 {
316 	struct buf	*bufq = data;
317 
318 	bp->b_actf = bufq->b_actf;
319 	bufq->b_actf = bp;
320 	if (bp->b_actf == NULL)
321 		bufq->b_actb = &bp->b_actf;
322 }
323 
324 int
325 bufq_disksort_peek(void *data)
326 {
327 	struct buf	*bufq = data;
328 
329 	return (bufq->b_actf != NULL);
330 }
331 
332 /*
333  * fifo implementation
334  */
335 
336 void *
337 bufq_fifo_create(void)
338 {
339 	struct bufq_fifo_head	*head;
340 
341 	head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO);
342 	if (head == NULL)
343 		return (NULL);
344 
345 	SIMPLEQ_INIT(head);
346 
347 	return (head);
348 }
349 
350 void
351 bufq_fifo_destroy(void *data)
352 {
353 	free(data, M_DEVBUF);
354 }
355 
356 void
357 bufq_fifo_queue(void *data, struct buf *bp)
358 {
359 	struct bufq_fifo_head	*head = data;
360 
361 	SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
362 }
363 
364 struct buf *
365 bufq_fifo_dequeue(void *data)
366 {
367 	struct bufq_fifo_head	*head = data;
368 	struct buf		*bp;
369 
370 	bp = SIMPLEQ_FIRST(head);
371 	if (bp != NULL)
372 		SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries);
373 
374 	return (bp);
375 }
376 
377 void
378 bufq_fifo_requeue(void *data, struct buf *bp)
379 {
380 	struct bufq_fifo_head	*head = data;
381 
382 	SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
383 }
384 
385 int
386 bufq_fifo_peek(void *data)
387 {
388 	struct bufq_fifo_head	*head = data;
389 
390 	return (SIMPLEQ_FIRST(head) != NULL);
391 }
392