xref: /openbsd-src/sys/kern/vfs_biomem.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: vfs_biomem.c,v 1.4 2008/11/08 23:20:50 pedro Exp $ */
2 /*
3  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/buf.h>
22 #include <sys/pool.h>
23 #include <sys/mount.h>
24 
25 #include <uvm/uvm_extern.h>
26 #include <uvm/uvm.h>
27 
28 vaddr_t buf_kva_start, buf_kva_end;
29 int buf_needva;
30 TAILQ_HEAD(,buf) buf_valist;
31 
32 int buf_nkvmsleep;
33 
34 extern struct bcachestats bcstats;
35 
36 /*
37  * Pages are allocated from a uvm object (we only use it for page storage,
38  * all pages are wired). Since every buffer contains a contiguous range of
39  * pages, reusing the pages could be very painful. Fortunately voff_t is
40  * 64 bits, so we can just increment buf_page_offset all the time and ignore
41  * wraparound. Even if you reuse 4GB worth of buffers every second
42  * you'll still run out of time_t faster than buffers.
43  *
44  * XXX - the spl locking in here is extreme paranoia right now until I figure
45  *       it all out.
46  */
47 voff_t buf_page_offset;
48 struct uvm_object *buf_object, buf_object_store;
49 
50 vaddr_t buf_unmap(struct buf *);
51 
52 void
53 buf_mem_init(vsize_t size)
54 {
55 	TAILQ_INIT(&buf_valist);
56 
57 	buf_kva_start = vm_map_min(kernel_map);
58 	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
59 	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE,
60 	    UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0)))
61 		panic("bufinit: can't reserve VM for buffers");
62 	buf_kva_end = buf_kva_start + size;
63 
64 	buf_object = &buf_object_store;
65 
66 	buf_object->pgops = NULL;
67 	TAILQ_INIT(&buf_object->memq);
68 	buf_object->uo_npages = 0;
69 	buf_object->uo_refs = 1;
70 }
71 
72 /*
73  * buf_acquire and buf_release manage the kvm mappings of buffers.
74  */
75 void
76 buf_acquire(struct buf *bp)
77 {
78 	vaddr_t va;
79 	int s;
80 
81 	KASSERT((bp->b_flags & B_BUSY) == 0);
82 
83 	s = splbio();
84 	/*
85 	 * Busy before waiting for kvm.
86 	 */
87 	SET(bp->b_flags, B_BUSY);
88 
89 	if (bp->b_data == NULL) {
90 		unsigned long i;
91 
92 		/*
93 		 * First, just use the pre-allocated space until we run out.
94 		 */
95 		if (buf_kva_start < buf_kva_end) {
96 			va = buf_kva_start;
97 			buf_kva_start += MAXPHYS;
98 		} else {
99 			struct buf *vbp;
100 
101 			/*
102 			 * Find some buffer we can steal the space from.
103 			 */
104 			while ((vbp = TAILQ_FIRST(&buf_valist)) == NULL) {
105 				buf_needva++;
106 				buf_nkvmsleep++;
107 				tsleep(&buf_needva, PRIBIO, "buf_needva", 0);
108 			}
109 			va = buf_unmap(vbp);
110 		}
111 
112 		for (i = 0; i < atop(bp->b_bufsize); i++) {
113 			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
114 			    bp->b_poffs + ptoa(i));
115 
116 			KASSERT(pg != NULL);
117 
118 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
119 			    VM_PROT_READ|VM_PROT_WRITE);
120 			pmap_update(pmap_kernel());
121 		}
122 		bp->b_data = (caddr_t)va;
123 	} else {
124 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
125 	}
126 	splx(s);
127 }
128 
129 /*
130  * Busy a buffer, but don't map it.
131  * If it has a mapping, we keep it, but we also keep the mapping on
132  * the list since we assume that it won't be used anymore.
133  */
134 void
135 buf_acquire_unmapped(struct buf *bp)
136 {
137 	int s;
138 
139 	s = splbio();
140 	SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
141 	splx(s);
142 }
143 
144 void
145 buf_release(struct buf *bp)
146 {
147 	int s;
148 
149 	KASSERT(bp->b_flags & B_BUSY);
150 	KASSERT((bp->b_data != NULL) || (bp->b_flags & B_NOTMAPPED));
151 
152 	s = splbio();
153 	if (bp->b_data) {
154 		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
155 		if (buf_needva) {
156 			buf_needva--;
157 			wakeup_one(&buf_needva);
158 		}
159 	}
160 	CLR(bp->b_flags, B_BUSY|B_NOTMAPPED);
161 	splx(s);
162 }
163 
164 /*
165  * Deallocate all memory resources for this buffer. We need to be careful
166  * to not drop kvm since we have no way to reclaim it. So, if the buffer
167  * has kvm, we need to free it later. We put it on the front of the
168  * freelist just so it gets picked up faster.
169  *
170  * Also, lots of assertions count on bp->b_data being NULL, so we
171  * set it temporarily to NULL.
172  *
173  * Return non-zero if we take care of the freeing later.
174  */
175 int
176 buf_dealloc_mem(struct buf *bp)
177 {
178 	caddr_t data;
179 	int s;
180 
181 	s = splbio();
182 
183 	data = bp->b_data;
184 	bp->b_data = NULL;
185 
186 	if (data) {
187 		pmap_kremove((vaddr_t)data, bp->b_bufsize);
188 		pmap_update(pmap_kernel());
189 	}
190 
191 	if (bp->b_pobj)
192 		buf_free_pages(bp);
193 
194 	if (data == NULL) {
195 		splx(s);
196 		return (0);
197 	}
198 
199 	bp->b_data = data;
200 	if (!(bp->b_flags & B_BUSY))		/* XXX - need better test */
201 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
202 	else
203 		CLR(bp->b_flags, B_BUSY);
204 	SET(bp->b_flags, B_RELEASED);
205 	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
206 
207 	splx(s);
208 
209 	return (1);
210 }
211 
212 vaddr_t
213 buf_unmap(struct buf *bp)
214 {
215 	vaddr_t va;
216 	int s;
217 
218 	KASSERT((bp->b_flags & B_BUSY) == 0);
219 	KASSERT(bp->b_data != NULL);
220 
221 	s = splbio();
222 	TAILQ_REMOVE(&buf_valist, bp, b_valist);
223 	va = (vaddr_t)bp->b_data;
224 	bp->b_data = 0;
225 	pmap_kremove(va, bp->b_bufsize);
226 	pmap_update(pmap_kernel());
227 
228 	if (bp->b_flags & B_RELEASED)
229 		pool_put(&bufpool, bp);
230 
231 	splx(s);
232 
233 	return (va);
234 }
235 
236 void
237 buf_alloc_pages(struct buf *bp, vsize_t size)
238 {
239 	struct vm_page *pg;
240 	voff_t offs, i;
241 	int s;
242 
243 	KASSERT(size == round_page(size));
244 	KASSERT(bp->b_pobj == NULL);
245 	KASSERT(bp->b_data == NULL);
246 
247 	s = splbio();
248 
249 	offs = buf_page_offset;
250 	buf_page_offset += size;
251 
252 	KASSERT(buf_page_offset > 0);
253 
254 	for (i = 0; i < atop(size); i++) {
255 #if defined(DEBUG) || 1
256 		if ((pg = uvm_pagelookup(buf_object, offs + ptoa(i))))
257 			panic("buf_alloc_pages: overlap buf: %p page: %p",
258 			    bp, pg);
259 #endif
260 
261 		while ((pg = uvm_pagealloc(buf_object, offs + ptoa(i),
262 			    NULL, 0)) == NULL) {
263 			uvm_wait("buf_alloc_pages");
264 		}
265 		pg->wire_count = 1;
266 		atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
267 		bcstats.numbufpages++;
268 	}
269 
270 	bp->b_pobj = buf_object;
271 	bp->b_poffs = offs;
272 	bp->b_bufsize = size;
273 	splx(s);
274 }
275 
276 void
277 buf_free_pages(struct buf *bp)
278 {
279 	struct uvm_object *uobj = bp->b_pobj;
280 	struct vm_page *pg;
281 	voff_t off, i;
282 	int s;
283 
284 	KASSERT(bp->b_data == NULL);
285 	KASSERT(uobj != NULL);
286 
287 	s = splbio();
288 
289 	off = bp->b_poffs;
290 	bp->b_pobj = NULL;
291 	bp->b_poffs = 0;
292 
293 	for (i = 0; i < atop(bp->b_bufsize); i++) {
294 		pg = uvm_pagelookup(uobj, off + ptoa(i));
295 		KASSERT(pg != NULL);
296 		KASSERT(pg->wire_count == 1);
297 		pg->wire_count = 0;
298 		uvm_pagefree(pg);
299 		bcstats.numbufpages--;
300 	}
301 	splx(s);
302 }
303 
304 /*
305  * XXX - it might make sense to make a buf_realloc_pages to avoid
306  *       bouncing through the free list all the time.
307  */
308