xref: /openbsd-src/sys/kern/vfs_biomem.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*	$OpenBSD: vfs_biomem.c,v 1.12 2009/08/09 17:45:02 art Exp $ */
2 /*
3  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/buf.h>
22 #include <sys/pool.h>
23 #include <sys/mount.h>
24 
25 #include <uvm/uvm_extern.h>
26 #include <uvm/uvm.h>
27 
28 vaddr_t buf_kva_start, buf_kva_end;
29 int buf_needva;
30 TAILQ_HEAD(,buf) buf_valist;
31 
32 int buf_nkvmsleep;
33 
34 extern struct bcachestats bcstats;
35 
36 /*
37  * Pages are allocated from a uvm object (we only use it for page storage,
38  * all pages are wired). Since every buffer contains a contiguous range of
39  * pages, reusing the pages could be very painful. Fortunately voff_t is
40  * 64 bits, so we can just increment buf_page_offset all the time and ignore
41  * wraparound. Even if you reuse 4GB worth of buffers every second
42  * you'll still run out of time_t faster than buffers.
43  *
44  * XXX - the spl locking in here is extreme paranoia right now until I figure
45  *       it all out.
46  */
47 voff_t buf_page_offset;
48 struct uvm_object *buf_object, buf_object_store;
49 
50 vaddr_t buf_unmap(struct buf *);
51 
52 void
53 buf_mem_init(vsize_t size)
54 {
55 	TAILQ_INIT(&buf_valist);
56 
57 	buf_kva_start = vm_map_min(kernel_map);
58 	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
59 	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE,
60 	    UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0)))
61 		panic("bufinit: can't reserve VM for buffers");
62 	buf_kva_end = buf_kva_start + size;
63 
64 	buf_object = &buf_object_store;
65 
66 	buf_object->pgops = NULL;
67 	RB_INIT(&buf_object->memt);
68 	buf_object->uo_npages = 0;
69 	buf_object->uo_refs = 1;
70 }
71 
72 /*
73  * buf_acquire and buf_release manage the kvm mappings of buffers.
74  */
75 void
76 buf_acquire(struct buf *bp)
77 {
78 	int s;
79 
80 	KASSERT((bp->b_flags & B_BUSY) == 0);
81 
82 	s = splbio();
83 	/*
84 	 * Busy before waiting for kvm.
85 	 */
86 	SET(bp->b_flags, B_BUSY);
87 	buf_map(bp);
88 
89 	splx(s);
90 }
91 
92 /*
93  * Busy a buffer, but don't map it.
94  * If it has a mapping, we keep it, but we also keep the mapping on
95  * the list since we assume that it won't be used anymore.
96  */
97 void
98 buf_acquire_unmapped(struct buf *bp)
99 {
100 	int s;
101 
102 	s = splbio();
103 	SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
104 	splx(s);
105 }
106 
107 void
108 buf_map(struct buf *bp)
109 {
110 	vaddr_t va;
111 
112 	splassert(IPL_BIO);
113 
114 	if (bp->b_data == NULL) {
115 		unsigned long i;
116 
117 		/*
118 		 * First, just use the pre-allocated space until we run out.
119 		 */
120 		if (buf_kva_start < buf_kva_end) {
121 			va = buf_kva_start;
122 			buf_kva_start += MAXPHYS;
123 		} else {
124 			struct buf *vbp;
125 
126 			/*
127 			 * Find some buffer we can steal the space from.
128 			 */
129 			while ((vbp = TAILQ_FIRST(&buf_valist)) == NULL) {
130 				buf_needva++;
131 				buf_nkvmsleep++;
132 				tsleep(&buf_needva, PRIBIO, "buf_needva", 0);
133 			}
134 			va = buf_unmap(vbp);
135 		}
136 
137 		for (i = 0; i < atop(bp->b_bufsize); i++) {
138 			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
139 			    bp->b_poffs + ptoa(i));
140 
141 			KASSERT(pg != NULL);
142 
143 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
144 			    VM_PROT_READ|VM_PROT_WRITE);
145 			pmap_update(pmap_kernel());
146 		}
147 		bp->b_data = (caddr_t)va;
148 	} else {
149 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
150 	}
151 
152 	bcstats.busymapped++;
153 
154 	CLR(bp->b_flags, B_NOTMAPPED);
155 }
156 
157 void
158 buf_release(struct buf *bp)
159 {
160 	int s;
161 
162 	KASSERT(bp->b_flags & B_BUSY);
163 	KASSERT((bp->b_data != NULL) || (bp->b_flags & B_NOTMAPPED));
164 
165 	s = splbio();
166 	if (bp->b_data) {
167 		bcstats.busymapped--;
168 		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
169 		if (buf_needva) {
170 			buf_needva--;
171 			wakeup_one(&buf_needva);
172 		}
173 	}
174 	CLR(bp->b_flags, B_BUSY|B_NOTMAPPED);
175 	splx(s);
176 }
177 
178 /*
179  * Deallocate all memory resources for this buffer. We need to be careful
180  * to not drop kvm since we have no way to reclaim it. So, if the buffer
181  * has kvm, we need to free it later. We put it on the front of the
182  * freelist just so it gets picked up faster.
183  *
184  * Also, lots of assertions count on bp->b_data being NULL, so we
185  * set it temporarily to NULL.
186  *
187  * Return non-zero if we take care of the freeing later.
188  */
189 int
190 buf_dealloc_mem(struct buf *bp)
191 {
192 	caddr_t data;
193 	int s;
194 
195 	s = splbio();
196 
197 	data = bp->b_data;
198 	bp->b_data = NULL;
199 
200 	if (data) {
201 		if (bp->b_flags & B_BUSY)
202 			bcstats.busymapped--;
203 		pmap_kremove((vaddr_t)data, bp->b_bufsize);
204 		pmap_update(pmap_kernel());
205 	}
206 
207 	if (bp->b_pobj)
208 		buf_free_pages(bp);
209 
210 	if (data == NULL) {
211 		splx(s);
212 		return (0);
213 	}
214 
215 	bp->b_data = data;
216 	if (!(bp->b_flags & B_BUSY))		/* XXX - need better test */
217 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
218 	else
219 		CLR(bp->b_flags, B_BUSY);
220 	SET(bp->b_flags, B_RELEASED);
221 	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
222 
223 	splx(s);
224 
225 	return (1);
226 }
227 
228 void
229 buf_shrink_mem(struct buf *bp, vsize_t newsize)
230 {
231 	vaddr_t va = (vaddr_t)bp->b_data;
232 
233 	if (newsize < bp->b_bufsize) {
234 		pmap_kremove(va + newsize, bp->b_bufsize - newsize);
235 		pmap_update(pmap_kernel());
236 		bp->b_bufsize = newsize;
237 	}
238 }
239 
240 vaddr_t
241 buf_unmap(struct buf *bp)
242 {
243 	vaddr_t va;
244 	int s;
245 
246 	KASSERT((bp->b_flags & B_BUSY) == 0);
247 	KASSERT(bp->b_data != NULL);
248 
249 	s = splbio();
250 	TAILQ_REMOVE(&buf_valist, bp, b_valist);
251 	va = (vaddr_t)bp->b_data;
252 	bp->b_data = 0;
253 	pmap_kremove(va, bp->b_bufsize);
254 	pmap_update(pmap_kernel());
255 
256 	if (bp->b_flags & B_RELEASED)
257 		pool_put(&bufpool, bp);
258 
259 	splx(s);
260 
261 	return (va);
262 }
263 
264 void
265 buf_alloc_pages(struct buf *bp, vsize_t size)
266 {
267 	struct vm_page *pg;
268 	voff_t offs, i;
269 	int s;
270 
271 	KASSERT(size == round_page(size));
272 	KASSERT(bp->b_pobj == NULL);
273 	KASSERT(bp->b_data == NULL);
274 
275 	s = splbio();
276 
277 	offs = buf_page_offset;
278 	buf_page_offset += size;
279 
280 	KASSERT(buf_page_offset > 0);
281 
282 	for (i = 0; i < atop(size); i++) {
283 #if defined(DEBUG) || 1
284 		if ((pg = uvm_pagelookup(buf_object, offs + ptoa(i))))
285 			panic("buf_alloc_pages: overlap buf: %p page: %p",
286 			    bp, pg);
287 #endif
288 
289 		while ((pg = uvm_pagealloc(buf_object, offs + ptoa(i),
290 			    NULL, 0)) == NULL) {
291 			uvm_wait("buf_alloc_pages");
292 		}
293 		pg->wire_count = 1;
294 		atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
295 		bcstats.numbufpages++;
296 	}
297 
298 	bp->b_pobj = buf_object;
299 	bp->b_poffs = offs;
300 	bp->b_bufsize = size;
301 	splx(s);
302 }
303 
304 void
305 buf_free_pages(struct buf *bp)
306 {
307 	struct uvm_object *uobj = bp->b_pobj;
308 	struct vm_page *pg;
309 	voff_t off, i;
310 	int s;
311 
312 	KASSERT(bp->b_data == NULL);
313 	KASSERT(uobj != NULL);
314 
315 	s = splbio();
316 
317 	off = bp->b_poffs;
318 	bp->b_pobj = NULL;
319 	bp->b_poffs = 0;
320 
321 	for (i = 0; i < atop(bp->b_bufsize); i++) {
322 		pg = uvm_pagelookup(uobj, off + ptoa(i));
323 		KASSERT(pg != NULL);
324 		KASSERT(pg->wire_count == 1);
325 		pg->wire_count = 0;
326 		uvm_pagefree(pg);
327 		bcstats.numbufpages--;
328 	}
329 	splx(s);
330 }
331 
332 /*
333  * XXX - it might make sense to make a buf_realloc_pages to avoid
334  *       bouncing through the free list all the time.
335  */
336