xref: /openbsd-src/sys/kern/vfs_biomem.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: vfs_biomem.c,v 1.18 2011/09/19 14:48:04 beck Exp $ */
2 /*
3  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/buf.h>
22 #include <sys/pool.h>
23 #include <sys/proc.h>		/* XXX for atomic */
24 #include <sys/mount.h>
25 
26 #include <uvm/uvm_extern.h>
27 #include <uvm/uvm.h>
28 
29 vaddr_t buf_kva_start, buf_kva_end;
30 int buf_needva;
31 TAILQ_HEAD(,buf) buf_valist;
32 
33 int buf_nkvmsleep;
34 
35 extern struct bcachestats bcstats;
36 
37 /*
38  * Pages are allocated from a uvm object (we only use it for page storage,
39  * all pages are wired). Since every buffer contains a contiguous range of
40  * pages, reusing the pages could be very painful. Fortunately voff_t is
41  * 64 bits, so we can just increment buf_page_offset all the time and ignore
42  * wraparound. Even if you reuse 4GB worth of buffers every second
43  * you'll still run out of time_t faster than buffers.
44  *
45  * XXX - the spl locking in here is extreme paranoia right now until I figure
46  *       it all out.
47  */
48 voff_t buf_page_offset;
49 struct uvm_object *buf_object, buf_object_store;
50 
51 vaddr_t buf_unmap(struct buf *);
52 
53 void
54 buf_mem_init(vsize_t size)
55 {
56 	TAILQ_INIT(&buf_valist);
57 
58 	buf_kva_start = vm_map_min(kernel_map);
59 	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
60 	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE,
61 	    UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0)))
62 		panic("bufinit: can't reserve VM for buffers");
63 	buf_kva_end = buf_kva_start + size;
64 
65 	/* Contiguous mapping */
66 	bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
67 
68 	buf_object = &buf_object_store;
69 
70 	uvm_objinit(buf_object, NULL, 1);
71 }
72 
73 /*
74  * buf_acquire and buf_release manage the kvm mappings of buffers.
75  */
76 void
77 buf_acquire(struct buf *bp)
78 {
79 	int s;
80 
81 	KASSERT((bp->b_flags & B_BUSY) == 0);
82 
83 	s = splbio();
84 	/*
85 	 * Busy before waiting for kvm.
86 	 */
87 	SET(bp->b_flags, B_BUSY);
88 	buf_map(bp);
89 
90 	splx(s);
91 }
92 
93 /*
94  * Busy a buffer, but don't map it.
95  * If it has a mapping, we keep it, but we also keep the mapping on
96  * the list since we assume that it won't be used anymore.
97  */
98 void
99 buf_acquire_unmapped(struct buf *bp)
100 {
101 	int s;
102 
103 	s = splbio();
104 	SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
105 	splx(s);
106 }
107 
108 void
109 buf_map(struct buf *bp)
110 {
111 	vaddr_t va;
112 
113 	splassert(IPL_BIO);
114 
115 	if (bp->b_data == NULL) {
116 		unsigned long i;
117 
118 		/*
119 		 * First, just use the pre-allocated space until we run out.
120 		 */
121 		if (buf_kva_start < buf_kva_end) {
122 			va = buf_kva_start;
123 			buf_kva_start += MAXPHYS;
124 			bcstats.kvaslots_avail--;
125 		} else {
126 			struct buf *vbp;
127 
128 			/*
129 			 * Find some buffer we can steal the space from.
130 			 */
131 			while ((vbp = TAILQ_FIRST(&buf_valist)) == NULL) {
132 				buf_needva++;
133 				buf_nkvmsleep++;
134 				tsleep(&buf_needva, PRIBIO, "buf_needva", 0);
135 			}
136 			va = buf_unmap(vbp);
137 		}
138 
139 		for (i = 0; i < atop(bp->b_bufsize); i++) {
140 			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
141 			    bp->b_poffs + ptoa(i));
142 
143 			KASSERT(pg != NULL);
144 
145 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
146 			    VM_PROT_READ|VM_PROT_WRITE);
147 			pmap_update(pmap_kernel());
148 		}
149 		bp->b_data = (caddr_t)va;
150 	} else {
151 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
152 		bcstats.kvaslots_avail--;
153 	}
154 
155 	bcstats.busymapped++;
156 
157 	CLR(bp->b_flags, B_NOTMAPPED);
158 }
159 
160 void
161 buf_release(struct buf *bp)
162 {
163 	int s;
164 
165 	KASSERT(bp->b_flags & B_BUSY);
166 	KASSERT((bp->b_data != NULL) || (bp->b_flags & B_NOTMAPPED));
167 
168 	s = splbio();
169 	if (bp->b_data) {
170 		bcstats.busymapped--;
171 		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
172 		bcstats.kvaslots_avail++;
173 		if (buf_needva) {
174 			buf_needva--;
175 			wakeup_one(&buf_needva);
176 		}
177 	}
178 	CLR(bp->b_flags, B_BUSY|B_NOTMAPPED);
179 	splx(s);
180 }
181 
182 /*
183  * Deallocate all memory resources for this buffer. We need to be careful
184  * to not drop kvm since we have no way to reclaim it. So, if the buffer
185  * has kvm, we need to free it later. We put it on the front of the
186  * freelist just so it gets picked up faster.
187  *
188  * Also, lots of assertions count on bp->b_data being NULL, so we
189  * set it temporarily to NULL.
190  *
191  * Return non-zero if we take care of the freeing later.
192  */
193 int
194 buf_dealloc_mem(struct buf *bp)
195 {
196 	caddr_t data;
197 	int s;
198 
199 	s = splbio();
200 
201 	data = bp->b_data;
202 	bp->b_data = NULL;
203 
204 	if (data) {
205 		if (bp->b_flags & B_BUSY)
206 			bcstats.busymapped--;
207 		pmap_kremove((vaddr_t)data, bp->b_bufsize);
208 		pmap_update(pmap_kernel());
209 	}
210 
211 	if (bp->b_pobj)
212 		buf_free_pages(bp);
213 
214 	if (data == NULL) {
215 		splx(s);
216 		return (0);
217 	}
218 
219 	bp->b_data = data;
220 	if (!(bp->b_flags & B_BUSY)) {		/* XXX - need better test */
221 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
222 		bcstats.kvaslots_avail--;
223 	} else
224 		CLR(bp->b_flags, B_BUSY);
225 	SET(bp->b_flags, B_RELEASED);
226 	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
227 	bcstats.kvaslots_avail++;
228 	splx(s);
229 
230 	return (1);
231 }
232 
233 /*
234  * Only used by bread_cluster.
235  */
236 void
237 buf_fix_mapping(struct buf *bp, vsize_t newsize)
238 {
239 	vaddr_t va = (vaddr_t)bp->b_data;
240 
241 	if (newsize < bp->b_bufsize) {
242 		pmap_kremove(va + newsize, bp->b_bufsize - newsize);
243 		pmap_update(pmap_kernel());
244 		/*
245 		 * Note: the size we lost is actually with the other
246 		 * buffers read in by bread_cluster
247 		 */
248 		bp->b_bufsize = newsize;
249 	}
250 }
251 
252 vaddr_t
253 buf_unmap(struct buf *bp)
254 {
255 	vaddr_t va;
256 	int s;
257 
258 	KASSERT((bp->b_flags & B_BUSY) == 0);
259 	KASSERT(bp->b_data != NULL);
260 
261 	s = splbio();
262 	TAILQ_REMOVE(&buf_valist, bp, b_valist);
263 	bcstats.kvaslots_avail--;
264 	va = (vaddr_t)bp->b_data;
265 	bp->b_data = 0;
266 	pmap_kremove(va, bp->b_bufsize);
267 	pmap_update(pmap_kernel());
268 
269 	if (bp->b_flags & B_RELEASED)
270 		pool_put(&bufpool, bp);
271 
272 	splx(s);
273 
274 	return (va);
275 }
276 
277 /* Always allocates in dma-reachable memory */
278 void
279 buf_alloc_pages(struct buf *bp, vsize_t size)
280 {
281 	voff_t offs;
282 	int s;
283 
284 	KASSERT(size == round_page(size));
285 	KASSERT(bp->b_pobj == NULL);
286 	KASSERT(bp->b_data == NULL);
287 
288 	s = splbio();
289 
290 	offs = buf_page_offset;
291 	buf_page_offset += size;
292 
293 	KASSERT(buf_page_offset > 0);
294 
295 	uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK);
296 	bcstats.numbufpages += atop(size);
297 	bp->b_pobj = buf_object;
298 	bp->b_poffs = offs;
299 	bp->b_bufsize = size;
300 	splx(s);
301 }
302 
303 void
304 buf_free_pages(struct buf *bp)
305 {
306 	struct uvm_object *uobj = bp->b_pobj;
307 	struct vm_page *pg;
308 	voff_t off, i;
309 	int s;
310 
311 	KASSERT(bp->b_data == NULL);
312 	KASSERT(uobj != NULL);
313 
314 	s = splbio();
315 
316 	off = bp->b_poffs;
317 	bp->b_pobj = NULL;
318 	bp->b_poffs = 0;
319 
320 	for (i = 0; i < atop(bp->b_bufsize); i++) {
321 		pg = uvm_pagelookup(uobj, off + ptoa(i));
322 		KASSERT(pg != NULL);
323 		KASSERT(pg->wire_count == 1);
324 		pg->wire_count = 0;
325 		uvm_pagefree(pg);
326 		bcstats.numbufpages--;
327 	}
328 	splx(s);
329 }
330 
331 /*
332  * XXX - it might make sense to make a buf_realloc_pages to avoid
333  *       bouncing through the free list all the time.
334  */
335